In [None]:
import sys
from dotenv import load_dotenv
import os

load_dotenv()

sys.path.append(os.getenv("ROOT"))

In [None]:
import polars as pl
import pandas as pd
import statsmodels.api as sm
import numpy as np
from datetime import date
from silverfund.datasets import CRSPMonthly
from silverfund.datasets import BarraSpecificReturns, CRSPMonthly, MasterMonthly
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# start = date(1995, 7, 31)
start = date(2006, 1, 1)
end = date(2024, 12, 31)

## Testing economic theory behind reversal.
 - Use monthly trading volume as proxy for news events to inform reversal strength.


In [None]:
crsp_monthly = CRSPMonthly(start_date=start, end_date=end).load_all().to_pandas()

crsp_monthly

## Testing Barra residual short term mean reversal.
 - 


In [None]:
# master = MasterMonthly(start_date=start, end_date=end, quiet=False).load_all().to_pandas()
master = MasterMonthly(start_date=start, end_date=end, quiet=False).load_all()
master

In [None]:
df = master.clone()

In [None]:
df

In [None]:
# reversal signals
df = df.with_columns(pl.col("spec_ret").shift(1).over("barrid").alias("barra_rev"))
df = df.with_columns(pl.col("ret").shift(1).over("barrid").alias("rev"))

# filter low price and nan vals
df = df.with_columns(pl.col("price").shift(1).over("barrid").alias("prclag"))
df = df.filter(pl.col("prclag") > 5)
df = df.drop_nulls(subset=["rev"])
df = df.drop_nulls(subset=["barra_rev"])

df

In [None]:
labels = [str(x) for x in range(10)]

df = df.with_columns(pl.col("rev").qcut(10, labels=labels).over("date").alias("bin"))
df = df.with_columns(pl.col("barra_rev").qcut(10, labels=labels).over("date").alias("barra_bin"))

df

In [None]:
port = df.group_by(["date", "bin"]).agg(pl.col("ret").mean())
barra_port = df.group_by(["date", "barra_bin"]).agg(pl.col("ret").mean())


port = port.pivot(on="bin", index="date", values="ret")
barra_port = barra_port.pivot(on="barra_bin", index="date", values="ret")


port = port.select(["date"] + labels)
barra_port = barra_port.select(["date"] + labels)


port = port.sort(by="date")
barra_port = barra_port.sort(by="date")

# port
barra_port

In [None]:
port = port.with_columns((pl.col("0") - pl.col("9")).alias("spread"))
barra_port = barra_port.with_columns((pl.col("0") - pl.col("9")).alias("spread"))

# Unpivot dataframe
port = port.unpivot(index="date", variable_name="bin", value_name="ret")
# barra_port = barra_port.unpivot(index="date", variable_name="barra_bin", value_name="log_spec_ret")
barra_port = barra_port.unpivot(index="date", variable_name="barra_bin", value_name="ret")

# Sort
port = port.sort(by=["date", "bin"])
barra_port = barra_port.sort(by=["date", "barra_bin"])

# port
barra_port

In [None]:
# Calculate mean, std, sharpe, and tstat of each portfolio

# Mean, std, and count
result = port.group_by("bin").agg(
    [
        pl.col("ret").mean().cast(pl.Float64).alias("mean"),
        pl.col("ret").std().cast(pl.Float64).alias("std"),
        pl.col("ret").count().cast(pl.Float64).alias("count"),
    ]
)

# Sharpe
result = result.with_columns(((pl.col("mean") * 12) / (pl.col("std") * np.sqrt(12))).cast(pl.Float64).alias("annualized_sharpe"))

# Tstat
result = result.with_columns((pl.col("mean") / (pl.col("std") / pl.col("count").sqrt())).cast(pl.Float64).alias("tstat"))

# Sort
result = result.sort(by="bin")

# Transpose
result = result.transpose(include_header=True, column_names="bin", header_name="statistic")

print("Monthly results")
result

In [None]:
barra_port

In [None]:
# Calculate mean, std, sharpe, and tstat of each portfolio

# Mean, std, and count
result = barra_port.group_by("barra_bin").agg(
    [
        pl.col("ret").mean().cast(pl.Float64).alias("mean"),
        pl.col("ret").std().cast(pl.Float64).alias("std"),
        pl.col("ret").count().cast(pl.Float64).alias("count"),
    ]
)

# Sharpe
result = result.with_columns(((pl.col("mean") * 12) / (pl.col("std") * np.sqrt(12))).cast(pl.Float64).alias("annualized_sharpe"))

# Tstat
result = result.with_columns((pl.col("mean") / (pl.col("std") / pl.col("count").sqrt())).cast(pl.Float64).alias("tstat"))

# Sort
result = result.sort(by="barra_bin")

# Transpose
result = result.transpose(include_header=True, column_names="barra_bin", header_name="statistic")

print("Monthly results")
result

In [None]:
# Create backtest plot

# Log returns
port = port.with_columns(pl.col("ret").log1p().over("bin").alias("logret"))
barra_port = barra_port.with_columns(pl.col("ret").log1p().over("barra_bin").alias("logret"))

# Cummulative sum log returns
port = port.with_columns(pl.col("logret").cum_sum().over("bin").alias("cumret") * 100)
barra_port = barra_port.with_columns(pl.col("logret").cum_sum().over("barra_bin").alias("cumret") * 100)

port

In [None]:
plt.figure(figsize=(10, 6))
sns.lineplot(port.filter(pl.col("bin").is_in(["0", "9", "spread"])), x="date", y="cumret", hue="bin")
plt.xlabel(None)
plt.ylabel("Cummulative Sum Return (%)")
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.lineplot(barra_port.filter(pl.col("barra_bin").is_in(["0", "1", "2", "9", "spread"])), x="date", y="cumret", hue="barra_bin")
plt.xlabel(None)
plt.ylabel("Cummulative Sum Return (%)")
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.lineplot(barra_port.filter(pl.col("barra_bin").is_in(["spread"])), x="date", y="cumret", label="Barra Residuals")
sns.lineplot(port.filter(pl.col("bin").is_in(["spread"])), x="date", y="cumret", label="Raw Returns")
plt.legend()
plt.title("Decile Spread Portfolio Performance Comparison")
plt.xlabel(None)
plt.ylabel("Cummulative Sum Return (%)")
plt.show()

## THINGS TO DO:
- Figure out how to merge data to get volume for intuition test
- Test reversal window
- value weight? Optimizer? 
- add features similar to dipesh/miki (rolling sharpe)
