In [3]:
import yfinance as yf
import pandas as pd

# Fetch daily S&P 500 data
sp500_data = yf.download('^GSPC', start='2020-01-01', end='2024-01-01')

# Calculate additional features for S&P 500 data
sp500_data['daily_return'] = sp500_data['Close'].pct_change() #might need
sp500_data['volatility'] = sp500_data['daily_return'].rolling(window=20).std()

###sp500_data['ma_200'] = sp500_data['Close'].rolling(window=200).mean() # for long term


# Calculate the 200-day moving return
sp500_data['200_day_return'] = sp500_data['Close'].pct_change(periods=200)

[*********************100%%**********************]  1 of 1 completed


In [4]:
sp500 = sp500_data.dropna()
sp500.shape

(806, 9)

In [7]:
import pandas_datareader.data as pdr
import yfinance as yf
import pandas as pd

# Fetch economic indicators from FRED
gdp = pdr.get_data_fred('GDP', start='2020-01-01', end='2024-01-01') #quaterly

inflation = pdr.get_data_fred('CPIAUCSL', start='2020-01-01', end='2024-01-01')  # CPI for inflation
unemployment = pdr.get_data_fred('UNRATE', start='2020-01-01', end='2024-01-01')
interest_rate = pdr.get_data_fred('FEDFUNDS', start='2020-01-01', end='2024-01-01')

# Rename columns for clarity
gdp.rename(columns={'GDP': 'GDP'}, inplace=True)
inflation.rename(columns={'CPIAUCSL': 'Inflation'}, inplace=True)
unemployment.rename(columns={'UNRATE': 'Unemployment'}, inplace=True)
interest_rate.rename(columns={'FEDFUNDS': 'Interest_Rate'}, inplace=True)

In [8]:
inflation.head()

Unnamed: 0_level_0,Inflation
DATE,Unnamed: 1_level_1
2020-01-01,258.906
2020-02-01,259.246
2020-03-01,258.15
2020-04-01,256.126
2020-05-01,255.848


In [9]:
GDP = gdp.resample('D').ffill()
GDP.loc['2020-03-28':'2020-04-01'] 

Unnamed: 0_level_0,GDP
DATE,Unnamed: 1_level_1
2020-03-28,21706.513
2020-03-29,21706.513
2020-03-30,21706.513
2020-03-31,21706.513
2020-04-01,19913.143


In [10]:
Inflation = inflation.resample('D').ffill()
Unemployment = unemployment.resample('D').ffill()
Interest_rate = interest_rate.resample('D').ffill()

In [11]:
merged_data = sp500.join([Inflation, Unemployment, Interest_rate], how='left')
merged_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,daily_return,volatility,200_day_return,Inflation,Unemployment,Interest_Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-10-16,3493.5,3515.76001,3480.449951,3483.810059,3483.810059,4688030000.0,0.000135,0.012,0.069359,260.249,6.8,0.09
2020-10-19,3493.659912,3502.419922,3419.929932,3426.919922,3426.919922,4091080000.0,-0.01633,0.012336,0.059375,260.249,6.8,0.09
2020-10-20,3439.379883,3476.929932,3435.649902,3443.120117,3443.120117,3917850000.0,0.004727,0.012199,0.060636,260.249,6.8,0.09
2020-10-21,3439.909912,3464.860107,3433.060059,3435.560059,3435.560059,4103960000.0,-0.002196,0.010667,0.061282,260.249,6.8,0.09
2020-10-22,3438.5,3460.530029,3415.340088,3453.48999,3453.48999,4172060000.0,0.005219,0.010679,0.061616,260.249,6.8,0.09


In [12]:
# Finding rows with any NaNs
rows_with_any_nans = merged_data[merged_data.isna().any(axis=1)]
print("Rows with any NaNs:")
print(rows_with_any_nans)

Rows with any NaNs:
Empty DataFrame
Columns: [Open, High, Low, Close, Adj Close, Volume, daily_return, volatility, 200_day_return, Inflation, Unemployment, Interest_Rate]
Index: []


In [13]:
from sklearn.preprocessing import StandardScaler
# Separating the features and target
features = merged_data.drop(columns=['200_day_return'])
target = merged_data['200_day_return']
# Standardizing the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Converting the scaled features back to a DataFrame
features_scaled_df = pd.DataFrame(features_scaled, columns=features.columns)

# Adding the target column back to the scaled DataFrame
merged_data_scaled = pd.concat([features_scaled_df, target.reset_index(drop=True)], axis=1)

merged_data_scaled.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,daily_return,volatility,Inflation,Unemployment,Interest_Rate,200_day_return
0,-2.140844,-2.169149,-2.083494,-2.172766,-2.172766,0.343366,-0.028673,0.40516,-1.726933,2.20047,-0.917707,0.069359
1,-2.140342,-2.211381,-2.27208,-2.351238,-2.351238,-0.297361,-1.52324,0.488162,-1.726933,2.20047,-0.917707,0.059375
2,-2.310742,-2.292077,-2.223095,-2.300416,-2.300416,-0.483294,0.388196,0.454288,-1.726933,2.20047,-0.917707,0.060636
3,-2.309078,-2.330288,-2.231166,-2.324133,-2.324133,-0.283536,-0.240232,0.075816,-1.726933,2.20047,-0.917707,0.061282
4,-2.313504,-2.343996,-2.286383,-2.267884,-2.267884,-0.210442,0.43282,0.078566,-1.726933,2.20047,-0.917707,0.061616


In [14]:
# Selecting the relevant columns
data = merged_data_scaled[[ '200_day_return', 'Inflation', 'Unemployment', 'Interest_Rate']]
data.shape


(806, 4)

In [20]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from pandas_datareader import data as pdr
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
# Define the model structure



# Define the structure of the Bayesian Network
model = BayesianNetwork([('Unemployment', 'Inflation'),
                         ('Inflation', 'Interest_Rate'),
                            ('Interest_Rate', '200_day_return'),
                              ('Unemployment', '200_day_return')])

# Fit the model using Maximum Likelihood Estimator
model.fit(data, estimator=MaximumLikelihoodEstimator)


# Get the CPDs of the model
cpds = model.get_cpds()

# Print the CPDs
for cpd in cpds:
    print(cpd)

  from .autonotebook import tqdm as notebook_tqdm


+------------------------------------+-----------+
| Unemployment(-0.87316678754688)    | 0.0483871 |
+------------------------------------+-----------+
| Unemployment(-0.7827657045972608)  | 0.130273  |
+------------------------------------+-----------+
| Unemployment(-0.6923646216476417)  | 0.210918  |
+------------------------------------+-----------+
| Unemployment(-0.6019635386980224)  | 0.102978  |
+------------------------------------+-----------+
| Unemployment(-0.5115624557484036)  | 0.104218  |
+------------------------------------+-----------+
| Unemployment(-0.4211613727987844)  | 0.0272953 |
+------------------------------------+-----------+
| Unemployment(-0.3307602898491652)  | 0.0248139 |
+------------------------------------+-----------+
| Unemployment(-0.24035920689954643) | 0.0260546 |
+------------------------------------+-----------+
| Unemployment(0.12124512489893041)  | 0.0260546 |
+------------------------------------+-----------+
| Unemployment(0.30204729079816

In [21]:
# Validate the model
assert model.check_model()

In [None]:
#now need to make the CPDS as new features 
