### Imports

In [633]:
%pip install -r dependencies.txt

Note: you may need to restart the kernel to use updated packages.


In [634]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import requests
import scipy
import importlib
import helper, consts
importlib.reload(consts)
importlib.reload(helper)

<module 'helper' from '/Users/hoangchu/Documents/Clinic/code/helper.py'>

### CONSTANTS

In [635]:
ROW = consts.ROW
COL = consts.COL
CLEANED_DATA_PATH = consts.CLEANED_DATA_PATH
RESPONSE_NAME = consts.RESPONSE_NAME

TRAIN_FILE = "training_data.csv"
TEST_FILE = "testing_data.csv"

In [636]:
REGRESSION_TYPES = helper.Regression('OLS').list_all_regression_types()
REGRESSION_TYPES

1: OLS
2: LASSO


### Read data

In [637]:
train_df = pd.read_csv(CLEANED_DATA_PATH + TRAIN_FILE)
test_df = pd.read_csv(CLEANED_DATA_PATH + TEST_FILE)

In [638]:
train_df.head()

Unnamed: 0,wt,today,tonight,tmwam,dn3sttmwmd,dn1,dn4x1,cftorrrelstd_open_0,liqlog_open_0,llirpnxm_am_1,...,tr_md_1,tr_nt_0,tr_nt_1,tr_nt_12to16,tr_nt_17to21,tr_nt_1to3,tr_nt_4to6,tr_nt_7to11,tr_toxhr_0,rrirpnxm_nt_0
0,0.000594,-0.01672,-0.015436,-0.002487,0.011511,-0.038052,0.02553,0.231635,-0.065247,0.002597,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015402
1,0.001446,0.001475,-0.007683,-0.001346,0.027832,-0.006712,0.029269,-0.482915,1.571073,-5.5e-05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005125
2,6.9e-05,-0.019774,-0.013203,-0.00321,-0.001507,-0.040861,0.005203,0.165615,-2.223915,0.000659,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.006332
3,0.00012,-0.000934,-0.002772,-0.000651,0.005818,-0.004393,0.006443,-1.229984,-1.67097,-0.000372,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001978
4,0.000892,0.004249,0.002662,-0.000931,-0.034163,0.007955,-0.037853,-1.32647,0.342333,-0.000857,...,0.0,0.0,0.0,0.0,0.012116,0.0,0.0,-0.055974,0.0,0.003413


### Interaction Terms

In [639]:
col_pairs = [
    ['relvol_lsthrx15_0', 'rrirpnxm_lst15_0'],
    ["rvdelta_toxhr_0", "rrirpnxm_toxhr_0"],
    ["rvdelta_am_1", "rrirpnxm_am_1"],
    ["rvdelta_md_1", "rrirpnxm_md_1"],
    ["rvdelta_pm_1", "rrirpnxm_pm_1"],
    ["rvdelta_nt_1", "rrirpnxm_nt_1"],
    ["rvdelta_dy_1to3", "rrirpnxm_dy_1to3"],
    ["rvdelta_nt_1to3", "rrirpnxm_1to3"],
    ["rvdelta_dy_4to6", "rrirpnxm_4to6"],
    ["rvdelta_nt_4to6", "rrirpnxm_nt_4to6"],
    ["rvdelta_dy_7to11", "rrirpnxm_dy_7to11"],
    ["rvdelta_nt_7to11", "rrirpnxm_nt_7to11"],
    ["rvdelta_dy_12to16", "rrirpnxm_dy_12to16"],
    ["rvdelta_nt_12to16", "rrirpnxm_nt_12to16"],
    ["rvdelta_dy_17to21", "rrirpnxm_dy_17to21"],
    ["rvdelta_nt_17to21", "rrirpnxm_nt_17to21"]
]
interaction_terms_train_df = helper.get_df_with_interaction_terms(train_df, col_pairs)
interaction_terms_test_df = helper.get_df_with_interaction_terms(test_df, col_pairs)

[['rvdelta_pm_1']] missing or already been grouped!
[['rvdelta_nt_1']] missing or already been grouped!
[['rvdelta_nt_1to3']] missing or already been grouped!
[['rvdelta_dy_4to6']] missing or already been grouped!
[['rvdelta_nt_4to6']] missing or already been grouped!
[['rvdelta_nt_7to11']] missing or already been grouped!
[['rvdelta_nt_12to16']] missing or already been grouped!
[['rvdelta_nt_17to21']] missing or already been grouped!
[['rvdelta_pm_1']] missing or already been grouped!
[['rvdelta_nt_1']] missing or already been grouped!
[['rvdelta_nt_1to3']] missing or already been grouped!
[['rvdelta_dy_4to6']] missing or already been grouped!
[['rvdelta_nt_4to6']] missing or already been grouped!
[['rvdelta_nt_7to11']] missing or already been grouped!
[['rvdelta_nt_12to16']] missing or already been grouped!
[['rvdelta_nt_17to21']] missing or already been grouped!


### Transform the data if needed

### Run regressions

#### Ordinary Least Squares

In [640]:
ols_regression_model = helper.Regression('OLS')
ols_regression_model.execute(train_df, RESPONSE_NAME, test_df)
ols_regression_model.get_metric()

1. Weighted Correlation:
[[1.         0.72854457]
 [0.72854457 1.        ]]

2. Weighted Mean Return:
0.005221154716981131

3. Weighted Scale Factor:
[0.62839211]



#### OLS w/ Interacting Terms

In [641]:
ols_interacting_model = helper.Regression('OLS')
ols_interacting_model.execute(interaction_terms_train_df, RESPONSE_NAME, interaction_terms_test_df)
ols_interacting_model.get_metric()

1. Weighted Correlation:
[[1.         0.73491725]
 [0.73491725 1.        ]]

2. Weighted Mean Return:
0.005242186620926244

3. Weighted Scale Factor:
[0.63591755]



#### LASSO

In [642]:
lasso_model = helper.Regression('LASSO')
lasso_model.execute(train_df, RESPONSE_NAME, test_df)
lasso_model.get_metric()

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


1. Weighted Correlation:
[[1.        0.7379731]
 [0.7379731 1.       ]]

2. Weighted Mean Return:
0.005155882675814751

3. Weighted Scale Factor:
[1.06295937]

