In [1]:
import numpy as np
np.random.seed(1)
import pandas as pd
import scipy

from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV

from xgboost.sklearn import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor

from functools import partial

import matplotlib.pyplot as plt
import seaborn as sns

from xgb_utils import *

In [2]:
data = pd.read_csv('merged_df.csv')
data

Unnamed: 0,Date,ABBV,unemployment_rate,CPI,treasury_yield,GDP_growth,SP500_return,AZN,BMY,JNJ,LLY,MRK,NVO,NVS,PFE,ROG,inflation_change,unemp_change,treasury_yield_change
0,2013-03-01,10.455062,7.5,232.282,1.984211,0.397212,3.337507,14.582205,11.414669,7.978126,4.851792,3.440210,-7.714279,9.067089,5.443922,-0.188640,-0.655,-0.2,0.069449
1,2013-04-01,12.922975,7.6,231.797,1.957500,0.085246,2.374537,3.881563,-3.568848,4.538222,-2.482836,7.359867,11.497419,3.537347,0.727651,-10.457790,-0.485,0.1,-0.026711
2,2013-05-01,-6.438455,7.5,231.893,1.759091,-0.190605,2.361004,-1.290451,16.816781,-1.231989,-4.008663,-0.638313,-8.792401,-2.711502,-6.329562,8.513136,0.096,-0.1,-0.198409
3,2013-06-01,-3.162299,7.5,232.445,1.928182,0.620501,-1.853781,-7.707318,-2.868894,2.762464,-6.788137,-0.535372,-3.805087,-1.463219,3.722003,2.269287,0.552,0.0,0.169091
4,2013-07-01,10.014486,7.3,232.900,2.300000,0.210359,5.724173,7.230424,-3.244595,8.898141,8.122938,4.655361,9.001745,1.272809,4.355584,17.540151,0.455,-0.2,0.371818
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,2023-01-01,-8.576205,3.4,300.536,3.616190,0.390254,6.776820,-3.584079,0.972908,-7.489384,-5.928822,-2.549213,2.541749,-0.110227,-13.817335,16.968326,1.546,-0.1,-0.274810
119,2023-02-01,5.152609,3.6,301.648,3.531500,-0.442183,-2.514271,-0.290649,-4.328217,-6.217115,-9.568502,-1.089288,1.592445,-7.172811,-7.286115,5.451681,1.112,0.2,-0.084690
120,2023-03-01,3.554254,3.5,301.808,3.746842,-0.442183,3.313488,8.035329,0.507544,1.862736,10.703390,0.141189,12.873250,9.367574,0.566924,11.025813,0.160,-0.1,0.215342
121,2023-04-01,-5.176628,3.4,302.918,3.663043,-0.442183,1.985238,5.489119,-3.664707,5.612908,15.269915,9.289214,5.836894,16.334413,-4.681371,-1.517467,1.110,-0.1,-0.083799


In [3]:
macros = ['unemployment_rate', 'GDP_growth', 'SP500_return', 'inflation_change', 'unemp_change', 'treasury_yield_change']

for m in macros:
    data[f'{m}_lag'] = data[m].shift(1)
    data[f'{m}_lag2'] = data[m].shift(2)
    data[f'{m}_lag3'] = data[m].shift(3)

In [4]:
data['Quater_dummy'] = 0
data['Quater_rippel'] = 0
data['Date'] = pd.to_datetime(data['Date'])
for index, row in data.iterrows():
    if row['Date'].month in [3, 6, 9, 12]:
        data.loc[index, 'Quater_dummy'] = 1
    if row['Date'].month in [1, 4, 7, 10]:
        data.loc[index, 'Quater_dummy'] = 1

In [5]:
tickers = ['ABBV', 'NVS', 'AZN', 'BMY', 'JNJ', 'LLY', 'MRK', 'NVO', 'PFE', 'ROG']

for t in tickers:
    data[f'{t}_lag'] = data[t].shift(1)
    data[f'{t}_lag2'] = data[t].shift(2)

MACROS

In [6]:
data_macro = data.copy()
to_exclude = []

for t in tickers:
    to_exclude.append(t)
    to_exclude.append(f'{t}_lag')
    to_exclude.append(f'{t}_lag2')

for m in macros:
    to_exclude.append(m)


to_exclude.append('Date')

12 month

In [7]:
dates12m = ['2022-05-01']#, '2022-06-01', '2022-07-01', '2022-08-01']

loss = get_model_performance(data_macro, 'ABBV', 0.05, to_exclude, dates12m, 12)
print(f'ABBV 12m-loss: {loss} \n')

ABBV 12m-loss: 11.316596733709495 



9 month

In [8]:
dates9m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01']

loss = get_model_performance(data_macro, 'ABBV', 0.05, to_exclude, dates9m, 9)
print(f'ABBV-9m-loss: {loss} \n')

ABBV-9m-loss: 8.104017057279268 



6 month

In [9]:
dates6m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01']


loss = get_model_performance(data_macro, 'ABBV', 0.05, to_exclude, dates6m, 6)
print(f'ABBV-6m-loss: {loss} \n')

ABBV-6m-loss: 4.874960316783349 



3 month

In [10]:
dates3m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01', '2022-12-01', '2023-01-01', '2023-02-01']

loss = get_model_performance(data_macro, 'ABBV', 0.05, to_exclude, dates3m, 3)
print(f'ABBV-3m-loss: {loss} \n')

ABBV-3m-loss: 3.542646130167654 



1 month

In [11]:
dates1m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01', '2022-12-01', '2023-01-01', '2023-02-01', '2023-03-01', '2023-04-01']

loss = get_model_performance(data_macro, 'ABBV', 0.05, to_exclude, dates1m, 1)
print(f'ABBV-1m-loss: {loss} \n')

ABBV-1m-loss: 0.7515972527087004 



STOCKS

In [12]:
data_stocks = data.copy()
to_exclude2 = []

for m in macros:
    to_exclude2.append(m)
    to_exclude2.append(f'{m}_lag')
    to_exclude2.append(f'{m}_lag2')
    to_exclude2.append(f'{m}_lag3')

for t in tickers:
    to_exclude2.append(t)


to_exclude2.append('Date')

In [17]:
dates12m = ['2022-05-01']#, '2022-06-01', '2022-07-01', '2022-08-01']

loss = get_model_performance(data_stocks, 'ABBV', 0.05, to_exclude2, dates12m, 12)
print(f'ABBV 12m-loss: {loss} \n')

ABBV 12m-loss: 9.435762241956123 



9 month

In [18]:
dates9m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01']

loss = get_model_performance(data_stocks, 'ABBV', 0.05, to_exclude2, dates9m, 9)
print(f'ABBV-9m-loss: {loss} \n')

ABBV-9m-loss: 6.92902732930904 



6 month

In [19]:
dates6m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01']


loss = get_model_performance(data_stocks, 'ABBV', 0.05, to_exclude2, dates6m, 6)
print(f'ABBV-6m-loss: {loss} \n')

ABBV-6m-loss: 4.273372612536642 



In [20]:
dates3m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01', '2022-12-01', '2023-01-01', '2023-02-01']

loss = get_model_performance(data_stocks, 'ABBV', 0.05, to_exclude2, dates3m, 3)
print(f'ABBV-3m-loss: {loss} \n')

ABBV-3m-loss: 1.8861132685268274 



In [21]:
dates1m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01', '2022-12-01', '2023-01-01', '2023-02-01', '2023-03-01', '2023-04-01']

loss = get_model_performance(data_macro, 'ABBV', 0.05, to_exclude2, dates1m, 1)
print(f'ABBV-1m-loss: {loss} \n')

ABBV-1m-loss: 0.579863476673398 



MACROS + TICKERS

In [22]:
covs = data.copy()
to_exclude3 = ['Date']

for m in macros:
    to_exclude3.append(m)

for t in tickers:
    to_exclude3.append(t)

12 month

In [23]:
dates12m = ['2022-05-01']#, '2022-06-01', '2022-07-01', '2022-08-01']

loss = get_model_performance(data_stocks, 'ABBV', 0.05, to_exclude3, dates12m, 12)
print(f'ABBV 12m-loss: {loss} \n')

ABBV 12m-loss: 7.299063464241938 



9 month

In [24]:
dates9m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01']

loss = get_model_performance(data_stocks, 'ABBV', 0.05, to_exclude3, dates9m, 9)
print(f'ABBV-9m-loss: {loss} \n')

ABBV-9m-loss: 6.228951556554465 



6 month

In [25]:
dates6m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01']


loss = get_model_performance(data_stocks, 'ABBV', 0.05, to_exclude3, dates6m, 6)
print(f'ABBV-6m-loss: {loss} \n')

ABBV-6m-loss: 4.059930361089115 



3 month

In [26]:
dates3m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01', '2022-12-01', '2023-01-01', '2023-02-01']

loss = get_model_performance(data_stocks, 'ABBV', 0.05, to_exclude3, dates3m, 3)
print(f'ABBV-3m-loss: {loss} \n')

ABBV-3m-loss: 1.7897863604440585 



1 month

In [27]:
dates1m = ['2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01', '2022-12-01', '2023-01-01', '2023-02-01', '2023-03-01', '2023-04-01']

loss = get_model_performance(data_macro, 'ABBV', 0.05, to_exclude3, dates1m, 1)
print(f'ABBV-1m-loss: {loss} \n')

ABBV-1m-loss: 0.5474342853332762 

