In [1]:
# Initial Imports:
import pandas as pd
import numpy as np
import os
from datetime import datetime

# To run models:
from xgboost import XGBRegressor, XGBClassifier
from sklearn.metrics import (r2_score, 
                             ConfusionMatrixDisplay, 
                             accuracy_score, 
                             precision_score, 
                             recall_score, 
                             f1_score)

In [2]:
BASE_DIR = os.path.abspath(os.path.dirname('__file__'))

all_data  = pd.read_csv('all_data.csv', index_col='Date', parse_dates=True)

In [3]:
all_data.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF,cdd_18-France,cdd_18-Germany,cdd_18-India,cdd_18-Indonesia,...,ret_ETN,ret_GOOGL,ret_HD,ret_HIG,ret_MS,ret_MSFT,ret_PLD,ret_TT,ret_UNH,ret_V
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-09-30,-0.0307,-0.028,0.0056,0.018,-0.006,0.0,7.588,1.484,313.4,233.8,...,,,,,,,,,,
2015-10-31,0.0775,-0.0205,-0.0046,0.0086,0.0055,0.0,0.5981,0.0,280.1,263.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-11-30,0.0056,0.0329,-0.0042,-0.0271,-0.0103,0.0,0.01212,0.0,169.6,256.0,...,0.14482,0.194997,0.159235,0.001563,0.09398,0.236248,0.098971,0.155604,-0.028446,0.136261
2015-12-31,-0.0217,-0.0297,-0.0261,0.0045,0.0003,0.0001,0.0,0.0,88.83,250.7,...,-0.105227,0.019873,-0.007803,-0.047765,-0.072595,0.020791,0.013633,-0.052648,0.048249,-0.018478
2016-01-31,-0.0577,-0.0348,0.0209,0.028,0.0307,0.0001,0.0,0.0,72.22,260.8,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
all_data = all_data.iloc[1:,]
all_data = all_data.ffill()

In [5]:
stock_list = ['ret_AAPL', 'ret_APTV', 'ret_BAC', 'ret_BALL', 'ret_BEP', 'ret_BKNG', 
              'ret_BMRN', 'ret_COST', 'ret_CVS', 'ret_DE', 'ret_ETN', 'ret_GOOGL', 'ret_HD', 
              'ret_HIG', 'ret_MS', 'ret_MSFT', 'ret_PLD', 'ret_TT', 'ret_UNH', 'ret_V']

In [6]:
for i in stock_list:
    all_data[i] = np.where(all_data[i] > 0, 1, 0) # 'Long = 1', 'Short = -1'

# example : Create C_Score for AAPL

### Note! C_score is climate score for each stock

In [7]:
# Define X and y variables:
X = all_data.drop(stock_list, axis=1)
X = X.drop('ret_Fund' ,axis=1)
y = all_data.loc[:, 'ret_AAPL']

In [8]:
# Split into Training/Testing Data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

In [9]:
clf = XGBClassifier(booster = 'gbtree', 
                    objective = 'binary:logistic',
                    gamma = 0.05,
                    colsample_bytree = 0.8,
                    colsample_bynode = 0.5,
                    n_estimators = 200, 
                    subsample = 0.8, 
                    min_child_weight = 1, 
                    learning_rate = 0.1)

In [10]:
%%time
clf.fit(X_train, y_train)

CPU times: user 2.31 s, sys: 27 ms, total: 2.33 s
Wall time: 218 ms


In [11]:
predictions = clf.predict(X)

In [12]:
accuracy_score(y, predictions)

0.9310344827586207

In [13]:
precision_score(y, predictions, pos_label=0)

0.9565217391304348

In [14]:
precision_score(y, predictions, pos_label=1)

0.9024390243902439

In [15]:
recall_score(y, predictions, pos_label=0)

0.9166666666666666

In [16]:
recall_score(y, predictions, pos_label=1)

0.9487179487179487

# Calculate C_Score for all stock

In [17]:
stock_list = ['AAPL', 'APTV', 'BAC', 'BALL', 'BEP', 'BKNG', 'BMRN', 'COST', 'CVS', 'DE', 'ETN', 'GOOGL', 'HD', 'HIG', 'MS', 'MSFT', 'PLD', 'TT', 'UNH', 'V']

In [18]:
%%time
for i in stock_list:
    # Define X and y variables:
    X = all_data.drop(stock_list, axis=1)
    X = X.drop('ret_Fund' ,axis=1)
    y = all_data.loc[:, 'ret_'+i]

    # Split into Training/Testing Data:
    split = int(0.8 * len(X))
    X_train = X[: split]
    X_test = X[split:]
    y_train = y[: split]
    y_test = y[split:]

    clf = XGBClassifier(booster = 'gbtree', 
                        objective = 'binary:logistic',
                        gamma = 0.05,
                        colsample_bytree = 0.8,
                        colsample_bynode = 0.5,
                        n_estimators = 200, 
                        subsample = 0.8, 
                        min_child_weight = 1, 
                        learning_rate = 0.1)
    clf.fit(X_train, y_train)
    predictions = clf.predict(X)
    all_data['C_Score_'+i] = predictions

CPU times: user 53.5 s, sys: 155 ms, total: 53.7 s
Wall time: 5.09 s


In [19]:
all_data.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF,cdd_18-France,cdd_18-Germany,cdd_18-India,cdd_18-Indonesia,...,C_Score_ETN,C_Score_GOOGL,C_Score_HD,C_Score_HIG,C_Score_MS,C_Score_MSFT,C_Score_PLD,C_Score_TT,C_Score_UNH,C_Score_V
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-10-31,0.0775,-0.0205,-0.0046,0.0086,0.0055,0.0,0.5981,0.0,280.1,263.1,...,0,0,0,0,0,0,0,0,0,0
2015-11-30,0.0056,0.0329,-0.0042,-0.0271,-0.0103,0.0,0.01212,0.0,169.6,256.0,...,1,1,1,1,1,1,1,1,0,1
2015-12-31,-0.0217,-0.0297,-0.0261,0.0045,0.0003,0.0001,0.0,0.0,88.83,250.7,...,0,1,0,0,0,1,1,0,1,0
2016-01-31,-0.0577,-0.0348,0.0209,0.028,0.0307,0.0001,0.0,0.0,72.22,260.8,...,0,0,0,0,0,0,0,0,0,0
2016-02-29,-0.0007,0.0085,-0.0057,0.0328,0.0199,0.0002,0.0,0.0,142.2,230.0,...,1,0,0,0,0,0,0,1,1,0


In [20]:
score  = all_data.columns[[x.startswith("C_Score") for x in all_data.columns]]
c_score = all_data[score]

In [21]:
c_score

Unnamed: 0_level_0,C_Score_AAPL,C_Score_APTV,C_Score_BAC,C_Score_BALL,C_Score_BEP,C_Score_BKNG,C_Score_BMRN,C_Score_COST,C_Score_CVS,C_Score_DE,C_Score_ETN,C_Score_GOOGL,C_Score_HD,C_Score_HIG,C_Score_MS,C_Score_MSFT,C_Score_PLD,C_Score_TT,C_Score_UNH,C_Score_V
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2015-10-31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2015-11-30,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,1,1,1,0,1
2015-12-31,0,0,0,1,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,0
2016-01-31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2016-02-29,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-08-31,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1
2022-09-30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2022-10-31,1,1,1,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,1,1
2022-11-30,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1
