In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from data_cleaning import clean_prices
from compute_returns import compute_daily_returns, compute_mu_and_cov, save_outputs

os.makedirs("../Results/processed", exist_ok=True)
os.makedirs("../Results/plots", exist_ok=True)


In [None]:
df = pd.read_csv("../Dataset/all_stocks_5yr.csv")
df["date"] = pd.to_datetime(df["date"])
df_clean = clean_prices(df)
df_clean.head()

In [None]:
# Compute returns, μ, Σ and save
df_returns = compute_daily_returns(df_clean)
mu, cov, tickers = compute_mu_and_cov(df_returns)
save_outputs(mu, cov, tickers)
len(tickers), list(mu)[:5]

In [None]:
# Plot S&P 500 price trends (a few tickers for readability)
plt.figure(figsize=(12,6))
sample = tickers[:10]  # first 10 tickers
for name, sub in df_clean[df_clean["Name"].isin(sample)].groupby("Name"):
    s = sub.sort_values("date")
    norm = s["close"] / s["close"].iloc[0]
    plt.plot(s["date"], norm, label=name, linewidth=1)

plt.title("S&P 500 Sample Price Trends (Normalized)")
plt.xlabel("Date")
plt.ylabel("Normalized Close")
plt.legend(ncol=2, fontsize=8)
plt.tight_layout()
plt.savefig("../Results/plots/S&P_price_trends.png", dpi=150)
plt.show()