### Imports

In [42]:
%pip install -r dependencies.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import requests
import scipy
import importlib
import helper, consts
importlib.reload(consts)
importlib.reload(helper)

<module 'helper' from '/Users/rosy/Clinic/Clinic/code/helper.py'>

### CONSTANTS

In [3]:
ROW = consts.ROW
COL = consts.COL
CLEANED_DATA_PATH = consts.CLEANED_DATA_PATH
DATA_PATH = consts.DATA_PATH_2015
RESPONSE_NAME = consts.RESPONSE_NAME

TRAIN_START_DATE = "20150101"
TRAIN_END_DATE = "20150601" # Up to but not including
TEST_DATE = "20150601"

In [7]:
REGRESSION_TYPES = helper.Regression('OLS').list_all_regression_types()
REGRESSION_TYPES

1: OLS
2: LASSO
3: XGBOOST


### Read data

In [4]:
x_cols = ["rrirpnxm_nt_0", "rrirpnxm_lst15_0","rrirpnxm_lsthrx15_0", "rrirpnxm_toxhr_0", "relvol_nt_0"]
train_df, test_df = helper.get_train_test_df(TRAIN_START_DATE, TRAIN_END_DATE, TEST_DATE, x_cols)

### Interaction Terms

In [5]:
interactingTerms = [["relvol_nt_0", col] for col in x_cols[:-1]]
interactingTerms

[['relvol_nt_0', 'rrirpnxm_nt_0'],
 ['relvol_nt_0', 'rrirpnxm_lst15_0'],
 ['relvol_nt_0', 'rrirpnxm_lsthrx15_0'],
 ['relvol_nt_0', 'rrirpnxm_toxhr_0']]

In [6]:
interactingTerms_df = helper.get_df_with_interaction_terms(train_df, interactingTerms)

In [7]:
interactingTerms_df.head()

Unnamed: 0,rrirpnxm_nt_0,rrirpnxm_lst15_0,rrirpnxm_lsthrx15_0,rrirpnxm_toxhr_0,relvol_nt_0,tonight,"('relvol_nt_0', 'rrirpnxm_nt_0')","('relvol_nt_0', 'rrirpnxm_lst15_0')","('relvol_nt_0', 'rrirpnxm_lsthrx15_0')","('relvol_nt_0', 'rrirpnxm_toxhr_0')"
0,0.0,0.0,0.0,0.0,0.0,-7.3e-05,0.0,0.0,0.0,0.0
1,-0.003448,0.001921,0.001569,-0.013866,0.969491,-0.004306,-0.003343,0.001862,0.001521,-0.013443
2,0.0,-0.00283,-0.00145,0.004245,-0.771546,0.004177,-0.0,0.002183,0.001119,-0.003275
3,0.0,0.0,0.0,0.0,0.0,-7.3e-05,0.0,0.0,0.0,0.0
4,-0.001338,0.005082,-0.000502,-0.010742,-0.931449,-0.007763,0.001246,-0.004734,0.000468,0.010006


In [8]:
test_df.head()

Unnamed: 0,rrirpnxm_nt_0,rrirpnxm_lst15_0,rrirpnxm_lsthrx15_0,rrirpnxm_toxhr_0,relvol_nt_0,tonight
0,-8.7e-05,0.000925,-0.000866,-0.002807,0.432604,0.014909
1,-0.001879,-0.000284,-0.000161,-4.9e-05,1.130768,-0.002456
2,-0.006314,0.002057,-0.003269,-0.000509,-1.257813,0.002018
3,0.008574,0.002091,0.003191,0.016976,0.344154,0.003111
4,-0.005452,-0.001565,0.004289,0.006091,-0.534876,-0.005646


In [32]:
# rosy: making my own interacting terms

col_pairs = [
    ['relvol_nt_0', 'rrirpnxm_nt_0'],
    ["relvol_lst15_0", "rrirpnxm_lst15_0"],
    ["relvol_toxhr_0", "rrirpnxm_toxhr_0"],
    ["relvol_lsthrx15_0", "rrirpnxm_lsthrx15_0"],
]
interaction_terms_train_df = helper.get_df_with_interaction_terms(train_df, col_pairs)
interaction_terms_test_df = helper.get_df_with_interaction_terms(test_df, col_pairs)

[['relvol_nt_0']] missing or already been grouped!
[['relvol_lst15_0']] missing or already been grouped!
[['relvol_toxhr_0']] missing or already been grouped!
[['relvol_lsthrx15_0']] missing or already been grouped!
[['relvol_nt_0']] missing or already been grouped!
[['relvol_lst15_0']] missing or already been grouped!
[['relvol_toxhr_0']] missing or already been grouped!
[['relvol_lsthrx15_0']] missing or already been grouped!


### Transform the data if needed

#### Ordinary Least Squares

In [9]:
ols_regression_model = helper.Regression('OLS')
model_attributes = ols_regression_model.execute(train_df, RESPONSE_NAME, test_df)
model_attributes

array([3.86475959e-02, 4.68771384e-03, 1.99013644e-03, 1.70877863e-03,
       3.94435165e-05, 3.85589377e-05])

In [10]:
ols_regression_model.get_metric()

1. Weighted Correlation:
[[ 1.         -0.04230618]
 [-0.04230618  1.        ]]

2. Weighted Mean Return:
-8.251876590330781e-05

3. Weighted Scale Factor:
[-1.31304471]



#### OLS w/ Interacting Terms

In [11]:
ols_interacting_model = helper.Regression('OLS')
ols_interacting_model.execute(interactingTerms_df, RESPONSE_NAME, interactingTerms_df)
ols_interacting_model.get_metric()

1. Weighted Correlation:
[[1.         0.04006125]
 [0.04006125 1.        ]]

2. Weighted Mean Return:
0.00030635630392555457

3. Weighted Scale Factor:
[1.]



#### LASSO

In [12]:
lasso_model = helper.Regression('LASSO')
lasso_model.execute(train_df, RESPONSE_NAME, test_df)
lasso_model.get_metric()

1. Weighted Correlation:
[[ 1.         -0.04069795]
 [-0.04069795  1.        ]]

2. Weighted Mean Return:
-1.801176844783715e-05

3. Weighted Scale Factor:
[-1.47184094]



### XGBoost



In [13]:
xgb_model = helper.Regression('XGboost')
xgb_model.execute(train_df, RESPONSE_NAME, test_df)
xgb_model.get_metric()



1. Weighted Correlation:
[[ 1.         -0.04230619]
 [-0.04230619  1.        ]]

2. Weighted Mean Return:
-8.251876590330781e-05

3. Weighted Scale Factor:
[-1.313045]



