In [182]:
import yfinance as yf

start_date = '2009-01-01'
end_date = '2019-01-01'

sp500_data = yf.download('^GSPC', start = start_date, end = end_date, interval='1wk' )
nasdaq_data = yf.download('^IXIC', start = start_date, end = end_date, interval='1wk' )
print(sp500_data.head())
print(nasdaq_data.head())

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2009-01-01  902.989990  943.849976  899.349976  906.650024  906.650024   
2009-01-08  905.729980  911.929993  836.929993  842.619995  842.619995   
2009-01-15  841.989990  858.130005  804.299988  840.239990  840.239990   
2009-01-22  839.739990  877.859985  806.070007  874.090027  874.090027   
2009-01-29  868.890015  868.890015  812.869995  832.229980  832.229980   

                 Volume  
Date                     
2009-01-01  19559740000  
2009-01-08  25408440000  
2009-01-15  27436450000  
2009-01-22  29268370000  
2009-01-29  28397670000  
                   Open         High          Low        Close    Adj Close  \
Date                                                                          
2009-01-01  1578.869995  1665.630005  1571.979980  1599.060059  1599.060059   
2009-01-08  1590.250000  1617.260010  1485.260010  1489.640015




In [183]:
import pandas as pd
import numpy as np
import requests

api_key = "b4821cc094d8c47919698af6f79cbfdd"

base_url = "https://api.stlouisfed.org/fred/series/observations"

factors = {
    "rf_rate" : {
        "series_id": "DGS5",
        "units" : "lin",
        "frequency" : "w"
    },
    "inflation_rate" : {
        "series_id": "T5YIE",
        "units" : "pch",
        "frequency": "w"
    },
    # "industrial_production" : {
    #     "series_id": "INDPRO",
    #     "units" : "pch",
    #     "frequency": "m"
    # },
    "risk_premium" : {
        "series_id": "TENEXPCHAREARISPRE",
        "units" : "pch",
        "frequency": "m"
    },
    "gdp_growth" : {
        "series_id": "GDPC1",
        "units" : "pch",
        "frequency": "q"
    }
}
for factor, data in factors.items():
    response = requests.request("GET", base_url, params = {
        "api_key" : api_key,
        "series_id" : data['series_id'],
        "file_type" : "json",
        "observation_start" : start_date,
        "observation_end" : end_date,
        "units" : data['units'],
        "frequency" : data['frequency']
    } )
    print(factor)
    factors[factor] = pd.DataFrame.from_dict(response.json()['observations'])[["date", "value"]]
    factors[factor]['date'] = pd.to_datetime(factors[factor]['date'])
    factors[factor]['value'] = factors[factor]['value'].replace('.', np.nan)
    factors[factor] = factors[factor].dropna()
    factors[factor].set_index('date', inplace=True)
    factors[factor]['value'] = pd.to_numeric(factors[factor]['value']).astype(float)/100
    factors[factor] = factors[factor].reindex(sp500_data.index, method='ffill')

rf_rate
inflation_rate
risk_premium
gdp_growth


In [184]:

    
sp500_data['Returns'] = sp500_data['Adj Close'].pct_change()
nasdaq_data['Returns'] = nasdaq_data['Adj Close'].pct_change()

sp500_data['Excess_Returns'] = sp500_data['Returns'] - factors['rf_rate']['value']
nasdaq_data['Excess_Returns'] = nasdaq_data['Returns'] - factors['rf_rate']['value']


# Assume you have economic factor returns in a DataFrame called factors_data
factors_data = pd.DataFrame({
    'inflation_rate': factors['inflation_rate']['value'],
    # 'industrial_production': factors['industrial_production']['value'],
    'risk_premium': factors['risk_premium']['value'],
    'gdp_growth': factors['gdp_growth']['value']
}, index=sp500_data.index)

In [185]:
import statsmodels.api as sm

# Ensure both datasets align in terms of dates
combined_data = pd.concat([sp500_data['Excess_Returns'].rename('S&P500'), nasdaq_data['Excess_Returns'].rename('NASDAQ'), factors_data], axis=1).dropna()

# Set up the independent (X) and dependent (Y) variables
X = combined_data[[ 'inflation_rate', "risk_premium", "gdp_growth"]]  # Add more factors as columns
Y_1 = combined_data['S&P500']  
Y_2 = combined_data['NASDAQ']

# Add a constant term for the regression (intercept)
X = sm.add_constant(X)

In [186]:
# Fit the OLS regression model
model_1 = sm.OLS(Y_1, X).fit()

# Print the results
print(model_1.summary())

                            OLS Regression Results                            
Dep. Variable:                 S&P500   R-squared:                       0.006
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                    0.9952
Date:                Wed, 25 Sep 2024   Prob (F-statistic):              0.395
Time:                        13:44:08   Log-Likelihood:                 1246.5
No. Observations:                 521   AIC:                            -2485.
Df Residuals:                     517   BIC:                            -2468.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const             -0.0161      0.001    -10.

In [187]:
model_2 = sm.OLS(Y_2, X).fit()
print(model_2.summary())

                            OLS Regression Results                            
Dep. Variable:                 NASDAQ   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                 -0.003
Method:                 Least Squares   F-statistic:                    0.5049
Date:                Wed, 25 Sep 2024   Prob (F-statistic):              0.679
Time:                        13:44:08   Log-Likelihood:                 1191.1
No. Observations:                 521   AIC:                            -2374.
Df Residuals:                     517   BIC:                            -2357.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const             -0.0148      0.002     -9.