In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import os
from dotenv import load_dotenv
import requests
from datetime import timedelta
import json

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import PoissonRegressor
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
import numpy as np


In [None]:
import cpi


In [None]:
load_dotenv("../../.env",override=True)
pd.set_option("display.max_columns",500)

In [None]:
college = cpi.series.get(items='College tuition and fees',periodicity='Monthly').to_dataframe().sort_values(by='date')
housing = cpi.series.get(items='Housing',periodicity='Monthly').to_dataframe().sort_values(by='date')
allItems = cpi.series.get(items='All items',periodicity='Monthly').to_dataframe().sort_values(by='date')
energy = cpi.series.get(items='Energy',periodicity='Monthly').to_dataframe().sort_values(by='date')
food = cpi.series.get(items='Food',periodicity='Monthly').to_dataframe().sort_values(by='date')
medicalCare = cpi.series.get(items='Medical care',periodicity='Monthly').to_dataframe().sort_values(by='date')
recreation = cpi.series.get(items='Recreation',periodicity='Monthly').to_dataframe().sort_values(by='date')
rent = cpi.series.get(items='Rent of primary residence',periodicity='Monthly').to_dataframe().sort_values(by='date')
healthInsurance = cpi.series.get(items='Health insurance',periodicity='Monthly').to_dataframe().sort_values(by='date')


series = {
    'college':college,
    'housing':housing,
    'allItems':allItems,
    'energy':energy,
    'food':food,
    'medicalCare':medicalCare,
    'recreation':recreation,
    'rent':rent,
    'healthInsurance':healthInsurance,
}

for k in series.keys():
    series[k] = series[k][series[k]['period_type']=='monthly'][['year','date','value','series_id','series_items_name']].copy()
    series[k]['inflation'] = series[k]['value']/series[k]['value'].shift(12) - 1
    series[k].index = pd.to_datetime(series[k]['date']).values

allDat = pd.concat(list(series.values()))
allDat['date']=allDat['date'].astype(str)

inflation_cross = pd.crosstab(index = allDat['date'],
    columns = allDat['series_items_name'],
    values = allDat['inflation'],
    aggfunc='mean')

cpi_cross = pd.crosstab(index = allDat['date'],
    columns = allDat['series_items_name'],
    values = allDat['value'],
    aggfunc='mean')

    
cpi_cross['month'] = cpi_cross.index
cpi_cross['month']=cpi_cross['month'].astype(str)
cpi_cross['inflation']=cpi_cross['All items']/cpi_cross['All items'].shift(12) - 1


In [None]:
cpi_cross

In [None]:
consumerKey = os.environ.get("CONSUMER_KEY")


In [None]:
url = "https://api.tdameritrade.com/v1/marketdata/$SPX.X/pricehistory?periodType=year&period=20&frequencyType=weekly&frequency=1"
response = requests.get(url,
        params={'apikey' : consumerKey})
SPY = pd.DataFrame(json.loads(response.content)['candles'])
SPY['date'] = pd.to_datetime(SPY['datetime'],unit='ms').dt.date
SPY.index = SPY['date'].values

SPY['month'] = pd.to_datetime(SPY['date']) - pd.to_timedelta(pd.to_datetime(SPY['date']).dt.day-1,unit='d')
SPY['month'] = SPY['month'].astype(str)

In [None]:
PE_history = pd.read_html("https://www.multpl.com/s-p-500-pe-ratio/table/by-month")
PE_history = PE_history[0]
PE_history.columns = ['month','sp500 pe']
PE_history['sp500 pe'] = PE_history['sp500 pe'].str.replace(' estimate','').astype(float)
PE_history['month'] = pd.to_datetime(PE_history['month']).astype(str)
PE_history.index = pd.to_datetime(PE_history['month']).values

In [None]:
plt.plot(np.log(PE_history['sp500 pe'].head(500)))
plt.xticks(rotation=90)

In [None]:
spy_pe_hist = SPY.merge(PE_history,left_on=['month'],right_on=['month'],how='left')
spy_pe_hist = spy_pe_hist.merge(cpi_cross[['month','inflation','All items']],on='month',how='left')
spy_pe_hist.index = pd.to_datetime(spy_pe_hist['date']).values

In [None]:
fix,ax1 = plt.subplots()
ax1.plot(spy_pe_hist['close'])
ax2 = ax1.twinx()
ax2.plot(spy_pe_hist['sp500 pe'],color='red')

In [None]:
spy_pe_hist['All items'].fillna(spy_pe_hist[~spy_pe_hist['All items'].isna()]['All items'].values[-1],
    inplace=True)
spy_pe_hist['inflation'].fillna(spy_pe_hist[~spy_pe_hist['inflation'].isna()]['inflation'].values[-1],
    inplace=True)


In [None]:
spy_pe_hist['close_yoy_change']=spy_pe_hist['close'].shift(-52)/spy_pe_hist['close']
spy_pe_hist['masked sp500 pe'] = np.where(spy_pe_hist['sp500 pe'] > 40,40,spy_pe_hist['sp500 pe'] )
spy_pe_hist['int1'] = spy_pe_hist['masked sp500 pe']*spy_pe_hist['All items']

In [None]:
s2 = spy_pe_hist.dropna().copy()

In [None]:
feats = ['masked sp500 pe','All items','inflation']
kpi = 'close_yoy_change'

X, y = s2[feats],s2[kpi]

X_train,X_test,y_train,y_test =  train_test_split(
    X, y, test_size=.05, random_state=42,shuffle=False)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import PoissonRegressor


pipe = Pipeline(
    steps = [
    #('scaler',MinMaxScaler()),
    ('XGB',XGBRegressor(max_depth=3,n_estimators=50,random_state=42))
   # ('LR',LinearRegression())
    ]
)

pipe.fit(X_train,y_train)

In [None]:
pred = pipe.predict(s2[feats])

s2['pred'] = pred

plt.plot(s2[kpi])
plt.plot(s2['pred'])

In [None]:
pred = pipe.predict(spy_pe_hist[feats])

spy_pe_hist['pred'] = pred

plt.plot(spy_pe_hist[kpi])
plt.plot(spy_pe_hist['pred'])

In [None]:
spy_pe_hist

In [None]:
import shap
explainer = shap.Explainer(pipe['XGB'])
#shap_values = explainer(pd.DataFrame(MinMaxScaler().fit_transform(spy_pe_hist[feats]),columns=feats))
shap_values = explainer(spy_pe_hist[feats])




In [None]:

shap.plots.waterfall(shap_values[1042])

In [None]:
len(spy_pe_hist)