## CORACLE-VAR

In [3]:
## Ignore warnings
import warnings
warnings.filterwarnings("ignore")
# Helper imports
import numpy as np
import pandas as pd
import pandas_market_calendars as mcal # For NYSE trading calendar
import os
import sys

from itertools import product
import matplotlib.pyplot as plt

%matplotlib inline

current_working_dir = os.getcwd()
print(f"Current Working Directory: {current_working_dir}")
project_root = os.path.dirname(current_working_dir)
modules_path = os.path.join(project_root, 'Modules')
if modules_path not in sys.path:
    sys.path.append(modules_path)
    print(f"Added to sys.path for custom modules: {modules_path}")

####################################################################
#### NYSE Daily Open-Close Returns
####################################################################
data_folder_path = os.path.join(project_root, 'Data')
data_file_name = "OPCL_20000103_20201231.csv"   
data_file_path = os.path.join(data_folder_path, data_file_name) # So that we get to the file itself and not the folder it is in
returns_df = pd.read_csv(data_file_path) # Assumes file exists and is readable
returns_df.set_index('ticker', inplace=True)
returns_df.columns = pd.to_datetime(returns_df.columns.str.lstrip('X'), format='%Y%m%d').strftime('%Y-%m-%d')
returns_df_cleaned = returns_df.dropna().transpose() # Assumes dropna results in non-empty returns_df
returns_df_cleaned.index = pd.to_datetime(returns_df_cleaned.index)
print("Data loaded and cleaned. Sample (first 5 rows/cols):")
print(returns_df_cleaned.iloc[0:5,0:5])
print(f"Shape of the cleaned data: {returns_df_cleaned.shape}")

####################################################################
#### Constructing the dataframe for the Confounding Variables
####################################################################

# First, we obtain the NYSE trading calendar
nyse = mcal.get_calendar('NYSE')
# Then, we create a date range for the trading days in the dataset
start_date = returns_df_cleaned.index.min()
end_date = returns_df_cleaned.index.max()
trading_days = nyse.schedule(start_date=start_date, end_date=end_date)
print(trading_days.head())
# Now, we create a DataFrame with the trading days
trading_days_df = pd.DataFrame(index=trading_days.index, columns=['market_open', 'market_close'])
trading_days_df['market_open'] = trading_days['market_open']
trading_days_df['market_close'] = trading_days['market_close']
print("Trading days DataFrame created. Sample (first 5 rows):")
print(trading_days_df.head())

confound_1_name = "DFF.csv"
confound_1_path = os.path.join(data_folder_path, confound_1_name)
confound_1_df = pd.read_csv(confound_1_path, index_col=0, parse_dates=True)
print("Federal Funds Effective Rate (DFF) loaded and cleaned. Sample (first 5 rows/cols):")
print(confound_1_df.iloc[0:5,0:5])
print(f"Shape of the cleaned data: {confound_1_df.shape}")

Current Working Directory: c:\Users\hktan\OneDrive - University of California\Codes\ICAIF_25\New Code\Script
Data loaded and cleaned. Sample (first 5 rows/cols):
ticker            AA       ABM       ABT       ADI       ADM
2000-01-03 -0.013042 -0.009188 -0.007117 -0.036071  0.000000
2000-01-04  0.010043  0.012346 -0.012786 -0.044261  0.005277
2000-01-05  0.047628 -0.006192  0.011111  0.014493 -0.015915
2000-01-06 -0.011713  0.000000  0.032553 -0.027719  0.010695
2000-01-07 -0.016118  0.003091  0.028573  0.033654  0.005249
Shape of the cleaned data: (5279, 663)
                         market_open              market_close
2000-01-03 2000-01-03 14:30:00+00:00 2000-01-03 21:00:00+00:00
2000-01-04 2000-01-04 14:30:00+00:00 2000-01-04 21:00:00+00:00
2000-01-05 2000-01-05 14:30:00+00:00 2000-01-05 21:00:00+00:00
2000-01-06 2000-01-06 14:30:00+00:00 2000-01-06 21:00:00+00:00
2000-01-07 2000-01-07 14:30:00+00:00 2000-01-07 21:00:00+00:00
Trading days DataFrame created. Sample (first 5 rows):
