In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [102]:
folder_path = "../data/processed/sp500/"
combined_df = None

for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        ticker = filename.replace(".csv", "") 
        filepath = os.path.join(folder_path, filename)

        df = pd.read_csv(filepath, usecols=["Price", "Close"])
        df.rename(columns={"Close": ticker}, inplace=True)
        df.set_index("Price", inplace=True)

        if combined_df is None:
            combined_df = df
        else:
            combined_df = combined_df.join(df, how='inner')

combined_df.index.name = "Date"
combined_df.columns.name = None
combined_df = combined_df.drop(combined_df.index[0])

In [132]:
log_returns = np.log(combined_df.astype(float) / combined_df.astype(float).shift(1)).dropna()

In [136]:
log_returns.tail()

Unnamed: 0_level_0,AAPL,AMZN,CAT,GOOGL,JNJ,JPM,MSFT,NEE,TSLA,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-12-24,0.011413,0.017573,0.005949,0.007575,0.003985,0.01631,0.00933,0.005777,0.070991,0.00094
2024-12-26,0.003171,-0.00877,-0.001225,-0.002604,-0.001853,0.00342,-0.002781,-0.007434,-0.017787,0.000845
2024-12-27,-0.013331,-0.014641,-0.006175,-0.014626,-0.003647,-0.008135,-0.017453,-0.003599,-0.050745,-9.4e-05
2024-12-30,-0.013352,-0.01101,-0.005083,-0.007917,-0.011859,-0.007701,-0.013328,-0.004866,-0.033569,-0.006785
2024-12-31,-0.007083,-0.008668,-0.000689,-0.010196,0.00889,0.001628,-0.007869,-0.000976,-0.03305,0.016969


In [145]:
rolling_volatility = log_returns.rolling(window=30).std().dropna()

In [147]:
rolling_volatility.head()

Unnamed: 0_level_0,AAPL,AMZN,CAT,GOOGL,JNJ,JPM,MSFT,NEE,TSLA,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-02-14,0.016556,0.016563,0.015986,0.013114,0.006383,0.011276,0.014394,0.007292,0.061446,0.01471
2020-02-18,0.016856,0.016308,0.015909,0.013024,0.006101,0.011278,0.014045,0.007409,0.062101,0.014772
2020-02-19,0.016969,0.016201,0.015928,0.012302,0.006102,0.011572,0.014042,0.009416,0.062624,0.014772
2020-02-20,0.017083,0.016352,0.015803,0.01236,0.006091,0.011119,0.014311,0.009395,0.063054,0.01477
2020-02-21,0.017491,0.017179,0.015691,0.013151,0.006339,0.011263,0.015708,0.00936,0.062954,0.01471


In [151]:
combined_df = combined_df.astype(float)
peak = combined_df.cummax()
drawdowns = (combined_df - peak) / peak 

In [153]:
drawdowns

Unnamed: 0_level_0,AAPL,AMZN,CAT,GOOGL,JNJ,JPM,MSFT,NEE,TSLA,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-02,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2020-01-03,-0.009722,-0.012139,-0.013884,-0.005231,-0.011578,-0.013196,-0.012452,0.000000,0.000000,-0.008039
2020-01-06,-0.001831,0.000000,-0.014549,0.000000,-0.012811,-0.013981,-0.009899,0.000000,0.000000,-0.000423
2020-01-07,-0.006526,0.000000,-0.027569,-0.001932,-0.006782,-0.030744,-0.018926,-0.000869,0.000000,-0.008604
2020-01-08,0.000000,-0.007809,-0.018933,0.000000,-0.006919,-0.023183,-0.003300,-0.001325,0.000000,-0.023554
...,...,...,...,...,...,...,...,...,...,...
2024-12-24,0.000000,-0.016657,-0.118283,-0.002797,-0.150089,-0.031883,-0.056800,-0.155983,-0.036636,-0.144338
2024-12-26,0.000000,-0.025244,-0.119363,-0.005390,-0.151663,-0.028567,-0.059419,-0.162234,-0.053620,-0.143615
2024-12-27,-0.013242,-0.039411,-0.124784,-0.019831,-0.154751,-0.036438,-0.075692,-0.165244,-0.100446,-0.143695
2024-12-30,-0.026330,-0.049929,-0.129222,-0.027560,-0.164716,-0.043829,-0.087930,-0.169295,-0.130142,-0.149485


In [167]:
cumulative = (log_returns + 1).cumprod()

running_max = cumulative.cummax()
drawdown = (cumulative - running_max) / running_max
max_drawdown = drawdown.min()

In [169]:
avg_corr = log_returns.corr().mean()

In [171]:
avg_corr

AAPL     0.533582
AMZN     0.447384
CAT      0.439300
GOOGL    0.511883
JNJ      0.382922
JPM      0.476826
MSFT     0.545175
NEE      0.406328
TSLA     0.375800
XOM      0.383478
dtype: float64

In [173]:
sharpe_ratio = log_returns.mean()/(log_returns.std()) * np.sqrt(252)

In [175]:
max_drawdown

AAPL    -0.351789
AMZN    -0.618939
CAT     -0.401653
GOOGL   -0.479546
JNJ     -0.290193
JPM     -0.467463
MSFT    -0.406100
NEE     -0.490705
TSLA    -0.798813
XOM     -0.582714
dtype: float64