In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns 

from sklearn import tree
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
import xgboost as xgb
from xgboost import XGBRegressor
from xgboost import DMatrix
from sklearn.ensemble import RandomForestClassifier

from xgb_utils import *
from xgb_quantile import *

In [2]:
data = pd.read_csv('monthly_data.csv')
data = data.drop(columns=['Unnamed: 0', 'Monthly Real GDP Index'])
data = data.drop(data.index[0])
data = data.drop(data.index[-5:])
data = data.reset_index(drop=True)
data

Unnamed: 0,Date,unemployment_rate,inflation_rate,treasury_yield,GDP_growth,SP500_return,ABBV,AZN,BMY,JNJ,LLY,MRK,NVO,NVS,PFE,ROG
0,2000-02-01,4.1,170.000,6.661000,0.496560,-1.522563,,-12.828964,-13.228004,-16.339821,-11.121498,-21.701151,2.220031,3.838386,-11.226228,54.440789
1,2000-03-01,4.0,171.000,6.519500,0.511258,9.413333,,22.264136,-0.218329,-2.079067,5.804243,0.913712,8.390897,6.420237,14.101954,6.922258
2,2000-04-01,3.8,170.900,6.256522,1.327803,-3.266805,,5.567379,-8.205683,17.437698,23.153694,12.400712,-0.097663,2.559423,15.213674,7.370518
3,2000-05-01,4.0,171.200,5.990526,-0.181797,-1.572223,,-0.148357,5.395746,8.484832,-1.296597,7.374072,20.863985,5.169310,5.638019,-8.163265
4,2000-06-01,4.0,172.200,6.440455,0.305565,1.728613,,10.549735,5.788826,14.239888,31.641749,3.078671,2.813690,8.474599,8.076012,13.131313
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
272,2022-10-01,3.7,297.987,3.519048,0.280228,8.571722,9.082759,7.239243,8.974528,6.494844,11.980826,18.457777,9.244201,6.735944,6.375682,-2.707956
273,2022-11-01,3.6,298.598,3.983500,0.470134,5.559164,11.215906,15.575584,4.416367,2.316498,2.482801,8.814239,14.479974,10.390734,7.690670,-53.665066
274,2022-12-01,3.5,298.990,3.891000,-0.085161,-6.193593,0.266786,-0.250105,-10.376181,-0.117873,-1.137072,0.753719,8.619586,1.295221,3.090302,9.446071
275,2023-01-01,3.4,300.536,3.616190,0.390254,6.776820,-8.576205,-3.584079,0.972908,-7.489384,-5.928822,-2.549213,2.541749,-0.110227,-13.817335,16.968326


In [3]:
data['unemployment_rate_lag'] = data['unemployment_rate'].shift(1)
data['delta_unemp'] = (data['unemployment_rate'] / data['unemployment_rate_lag'] - 1) * 100
data['inflation_rate_lag'] = data['inflation_rate'].shift(1)
data['inflation'] = (data['inflation_rate'] / data['inflation_rate_lag'] - 1) * 100
data['GDP_growth_lag'] = data['GDP_growth'].shift(1)
data['SP500_return_lag'] = data['SP500_return'].shift(1)

In [4]:
tickers = ['NVS', 'ABBV', 'AZN', 'BMY', 'JNJ', 'LLY', 'MRK', 'NVO', 'PFE', 'ROG']

for t in tickers:
    data[f'{t}_lag'] = data[t].shift(1)
    data[f'{t}_lag2'] = data[t].shift(2)

data

Unnamed: 0,Date,unemployment_rate,inflation_rate,treasury_yield,GDP_growth,SP500_return,ABBV,AZN,BMY,JNJ,...,LLY_lag,LLY_lag2,MRK_lag,MRK_lag2,NVO_lag,NVO_lag2,PFE_lag,PFE_lag2,ROG_lag,ROG_lag2
0,2000-02-01,4.1,170.000,6.661000,0.496560,-1.522563,,-12.828964,-13.228004,-16.339821,...,,,,,,,,,,
1,2000-03-01,4.0,171.000,6.519500,0.511258,9.413333,,22.264136,-0.218329,-2.079067,...,-11.121498,,-21.701151,,2.220031,,-11.226228,,54.440789,
2,2000-04-01,3.8,170.900,6.256522,1.327803,-3.266805,,5.567379,-8.205683,17.437698,...,5.804243,-11.121498,0.913712,-21.701151,8.390897,2.220031,14.101954,-11.226228,6.922258,54.440789
3,2000-05-01,4.0,171.200,5.990526,-0.181797,-1.572223,,-0.148357,5.395746,8.484832,...,23.153694,5.804243,12.400712,0.913712,-0.097663,8.390897,15.213674,14.101954,7.370518,6.922258
4,2000-06-01,4.0,172.200,6.440455,0.305565,1.728613,,10.549735,5.788826,14.239888,...,-1.296597,23.153694,7.374072,12.400712,20.863985,-0.097663,5.638019,15.213674,-8.163265,7.370518
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
272,2022-10-01,3.7,297.987,3.519048,0.280228,8.571722,9.082759,7.239243,8.974528,6.494844,...,7.693656,-8.632348,0.890348,-4.454882,-5.591229,-8.581764,-3.250061,-9.758538,-3.448826,-6.956359
273,2022-11-01,3.6,298.598,3.983500,0.470134,5.559164,11.215906,15.575584,4.416367,2.316498,...,11.980826,7.693656,18.457777,0.890348,9.244201,-5.591229,6.375682,-3.250061,-2.707956,-3.448826
274,2022-12-01,3.5,298.990,3.891000,-0.085161,-6.193593,0.266786,-0.250105,-10.376181,-0.117873,...,2.482801,11.980826,8.814239,18.457777,14.479974,9.244201,7.690670,6.375682,-53.665066,-2.707956
275,2023-01-01,3.4,300.536,3.616190,0.390254,6.776820,-8.576205,-3.584079,0.972908,-7.489384,...,-1.137072,2.482801,0.753719,8.814239,8.619586,14.479974,3.090302,7.690670,9.446071,-53.665066


In [5]:
split_date = '2022-02-01'
train_df = data[data.Date <= split_date]
train_df = train_df.dropna()
test_df = data[data.Date > split_date]
test_df = test_df.reset_index(drop=True)

### NVS

In [6]:
X_train = train_df.drop(columns=['NVS', 'ABBV', 'ABBV_lag', 'ABBV_lag2', 'Date'])
y_train = train_df['NVS']

X_test = test_df.drop(columns=['NVS', 'ABBV', 'ABBV_lag', 'ABBV_lag2', 'Date'])
y_test = train_df['NVS']

In [7]:
X_train.dtypes

unemployment_rate        float64
inflation_rate           float64
treasury_yield           float64
GDP_growth               float64
SP500_return             float64
AZN                      float64
BMY                      float64
JNJ                      float64
LLY                      float64
MRK                      float64
NVO                      float64
PFE                      float64
ROG                      float64
unemployment_rate_lag    float64
delta_unemp              float64
inflation_rate_lag       float64
inflation                float64
GDP_growth_lag           float64
SP500_return_lag         float64
NVS_lag                  float64
NVS_lag2                 float64
AZN_lag                  float64
AZN_lag2                 float64
BMY_lag                  float64
BMY_lag2                 float64
JNJ_lag                  float64
JNJ_lag2                 float64
LLY_lag                  float64
LLY_lag2                 float64
MRK_lag                  float64
MRK_lag2  

In [8]:
def collect_prediction(X_train,y_train,X_test,y_test,estimator,alpha,model_name):
  estimator.fit(X_train,y_train)
  y_pred = estimator.predict(X_test)
  print( "{model_name} alpha = {alpha:.2f},score = {score:.1f}".format(model_name=model_name, alpha=alpha , score= XGBQuantile.quantile_score(y_test, y_pred, alpha)))
  return y_pred

In [9]:
alpha = 0.95

regressor = XGBQuantile(n_estimators=100, max_depth = 3, reg_alpha =5.0, gamma = 0.5 , reg_lambda = 1.0 )     
regressor.set_params(quant_alpha = 0.05, quant_delta=1.0, quant_thres=5.0, quant_var=3.2)

y_lower = collect_prediction(X_train, y_train, X_test, y_test, estimator=regressor, alpha=0.05, model_name="Quantile XGB")
regressor.set_params(quant_alpha=alpha,quant_delta=1.0,quant_thres=6.0,quant_var = 4.2)
y_upper = collect_prediction(X_train,y_train,X_test,y_test,estimator=regressor,alpha=alpha,model_name="Quantile XGB")

AttributeError: 'XGBQuantile' object has no attribute 'silent'