
# RankIC evaluation with random sentiment

Load `train_return_data.csv`, generate a random sentiment score per row, compute the per-timestamp RankIC (Spearman rank correlation), and visualize the RankIC series.


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

base_path = Path(".")
data_path = base_path / "train_return_data.csv"
seed = 42


In [None]:
def load_returns(data_path: Path) -> pd.DataFrame:
    df = pd.read_csv(data_path, usecols=["date", "RET"])
    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    df["RET"] = pd.to_numeric(df["RET"], errors="coerce")
    return df.dropna(subset=["date", "RET"]).reset_index(drop=True)


def attach_random_sentiment(df: pd.DataFrame, seed: int = 42) -> pd.DataFrame:
    rng = np.random.default_rng(seed)
    df = df.copy()
    df["sentiment_score"] = rng.standard_normal(len(df))
    return df


def compute_rank_ic(df: pd.DataFrame) -> pd.Series:
    def _rank_ic_for_group(group: pd.DataFrame) -> float:
        valid = group.dropna()
        if len(valid) < 2:
            return np.nan
        return valid["sentiment_score"].corr(valid["RET"], method="spearman")

    return (
        df.groupby("date", sort=True, group_keys=False)[["sentiment_score", "RET"]]
        .apply(_rank_ic_for_group)
        .rename("rank_ic")
        .sort_index()
    )


In [None]:
returns_df = load_returns(data_path)
scored_df = attach_random_sentiment(returns_df, seed=seed)
rank_ic_series = compute_rank_ic(scored_df)
avg_rank_ic = rank_ic_series.mean(skipna=True)
rank_ic_series.head()


In [None]:
print(f"Timestamps with RankIC: {rank_ic_series.count()}")
print(f"Average RankIC: {avg_rank_ic:.4f}")


In [None]:
plot_path = base_path / "rank_ic_series.png"
fig, ax = plt.subplots(figsize=(12, 4))
rank_ic_series.plot(ax=ax, color="steelblue")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_title("RankIC per timestamp (random sentiment)")
ax.set_xlabel("Timestamp")
ax.set_ylabel("RankIC (Spearman)")
fig.tight_layout()
fig.savefig(plot_path, dpi=150)
plt.show()
print(f"RankIC plot saved to: {plot_path.resolve()}")
