In [None]:
import pandas as pd
from pathlib import Path
import numpy as np

In [None]:
# find root 
root = Path.cwd().parent

# build path to VIX data
vix_path = root / "data" / "external" / "VIX_History.csv"
gspc_path = root / "data" / "raw" / "^GSPC_full_2023_2025.csv" # S&P 500

# load data
vix_df = pd.read_csv(vix_path)
sp_df = pd.read_csv(gspc_path)

Subset VIX

In [None]:
# inspect data
vix_df.head()

In [None]:
# convert to datetime
vix_df["date"] = pd.to_datetime(vix_df["DATE"], format="%m/%d/%Y")

# Convert as string
vix_df["date"] = vix_df["date"].dt.strftime("%Y-%m-%d")

# verify
vix_df["date"].head()

In [None]:
# drop all but close
drop_col = ["OPEN","HIGH","LOW","DATE"]

# drop cols
vix_df_sub = vix_df.drop(drop_col,axis=1)

# subset for April 2023 - July 2025
vix_df_sub = vix_df_sub[(vix_df_sub["date"] >= "2023-04-01") & (vix_df_sub["date"] < "2025-06-16")] 

In [None]:
# calc log growth
vix_df_sub["log_growth_closed"] = np.log(vix_df_sub["CLOSE"]) - np.log(vix_df_sub["CLOSE"].shift(1))

# verify
vix_df_sub.head()

In [None]:
# define output path
out_path = root / "data" / "processed" / "variables" / "log_growth_VIX.csv"

# write to out_path
vix_df_sub.to_csv(out_path, index=False)

Subset S&P 500

In [None]:
# convert to datetime
sp_df["date"] = pd.to_datetime(sp_df["DATE"], format="%m/%d/%Y")

# Convert as string
sp_df["date"] = sp_df["date"].dt.strftime("%Y-%m-%d")

# verify
sp_df.head()

In [None]:
# drop all but close
drop_col = ["OPEN","HIGH","LOW","DATE"] # as above

# drop cols
sp_df_sub = sp_df.drop(drop_col,axis=1)

# subset for April 2023 - July 2025
sp_df_sub = sp_df_sub[(sp_df_sub["date"] >= "2023-04-01") & (sp_df_sub["date"] < "2025-06-16")] 

# calc log growth
sp_df_sub["log_growth_closed"] = np.log(vix_df_sub["CLOSE"]) - np.log(vix_df_sub["CLOSE"].shift(1))

# verify
sp_df_sub.head()

In [None]:
# define output path
sp_out_path = root / "data" / "processed" / "variables" / "log_growth_sp500.csv"

# write to out_path
sp_df_sub.to_csv(out_path, index=False)