### Imports

In [25]:
%pip install -r dependencies.txt

INFO: pip is looking at multiple versions of qtpy to determine which version is compatible with other requirements. This could take a while.
Collecting QtPy==2.3.0
  Using cached QtPy-2.3.0-py3-none-any.whl (83 kB)
INFO: pip is looking at multiple versions of qtconsole to determine which version is compatible with other requirements. This could take a while.
Collecting qtconsole==5.4.0
  Using cached qtconsole-5.4.0-py3-none-any.whl (121 kB)
INFO: pip is looking at multiple versions of pyzmq to determine which version is compatible with other requirements. This could take a while.
Collecting pyzmq==25.0.0
  Using cached pyzmq-25.0.0-cp310-cp310-macosx_10_15_universal2.whl (1.8 MB)
INFO: pip is looking at multiple versions of pyyaml to determine which version is compatible with other requirements. This could take a while.
Collecting PyYAML==6.0
  Using cached PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl (173 kB)
INFO: pip is looking at multiple versions of pytz to determine which versio

In [26]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import requests
import scipy
import importlib
import helper, consts
importlib.reload(consts)
importlib.reload(helper)

<module 'helper' from '/Users/rosy/Clinic/Clinic/code/helper.py'>

### CONSTANTS

In [27]:
ROW = consts.ROW
COL = consts.COL
CLEANED_DATA_PATH = consts.CLEANED_DATA_PATH
RESPONSE_NAME = consts.RESPONSE_NAME

TRAIN_FILE = "training_data.csv"
TEST_FILE = "testing_data.csv"

In [28]:
REGRESSION_TYPES = helper.Regression('OLS').list_all_regression_types()
REGRESSION_TYPES

1: OLS
2: LASSO
3: XGBOOST


### Read data

In [29]:
train_df = pd.read_csv(CLEANED_DATA_PATH + TRAIN_FILE)
test_df = pd.read_csv(CLEANED_DATA_PATH + TEST_FILE)

In [30]:
train_df.head()

Unnamed: 0,rrirpnxm_nt_0,rrirpnxm_lst15_0,rrirpnxm_toxhr_0,rrirpnxm_lsthrx15_0,tonight
0,0.009545,-0.003896,-0.003841,-0.003296,-0.009093
1,-0.00319,-0.000876,0.008117,-0.000894,0.007955
2,-0.001437,-9.1e-05,-0.000974,0.000101,0.007004
3,-0.00303,0.000382,0.000696,0.000529,-0.008005
4,-0.003135,-0.000855,-0.000542,-0.001793,0.006914


### Interaction Terms

In [31]:
col_pairs = [
    ['relvol_lsthrx15_0', 'rrirpnxm_lst15_0'],
    ["rvdelta_toxhr_0", "rrirpnxm_toxhr_0"],
    ["rvdelta_am_1", "rrirpnxm_am_1"],
    ["rvdelta_md_1", "rrirpnxm_md_1"],
    ["rvdelta_pm_1", "rrirpnxm_pm_1"],
    ["rvdelta_nt_1", "rrirpnxm_nt_1"],
    ["rvdelta_dy_1to3", "rrirpnxm_dy_1to3"],
    ["rvdelta_nt_1to3", "rrirpnxm_1to3"],
    ["rvdelta_dy_4to6", "rrirpnxm_4to6"],
    ["rvdelta_nt_4to6", "rrirpnxm_nt_4to6"],
    ["rvdelta_dy_7to11", "rrirpnxm_dy_7to11"],
    ["rvdelta_nt_7to11", "rrirpnxm_nt_7to11"],
    ["rvdelta_dy_12to16", "rrirpnxm_dy_12to16"],
    ["rvdelta_nt_12to16", "rrirpnxm_nt_12to16"],
    ["rvdelta_dy_17to21", "rrirpnxm_dy_17to21"],
    ["rvdelta_nt_17to21", "rrirpnxm_nt_17to21"]
]
interaction_terms_train_df = helper.get_df_with_interaction_terms(train_df, col_pairs)
interaction_terms_test_df = helper.get_df_with_interaction_terms(test_df, col_pairs)

[['relvol_lsthrx15_0']] missing or already been grouped!
[['rvdelta_toxhr_0']] missing or already been grouped!
[['rvdelta_am_1']] missing or already been grouped!
[['rvdelta_md_1']] missing or already been grouped!
[['rvdelta_pm_1']] missing or already been grouped!
[['rvdelta_nt_1']] missing or already been grouped!
[['rvdelta_dy_1to3']] missing or already been grouped!
[['rvdelta_nt_1to3']] missing or already been grouped!
[['rvdelta_dy_4to6']] missing or already been grouped!
[['rvdelta_nt_4to6']] missing or already been grouped!
[['rvdelta_dy_7to11']] missing or already been grouped!
[['rvdelta_nt_7to11']] missing or already been grouped!
[['rvdelta_dy_12to16']] missing or already been grouped!
[['rvdelta_nt_12to16']] missing or already been grouped!
[['rvdelta_dy_17to21']] missing or already been grouped!
[['rvdelta_nt_17to21']] missing or already been grouped!
[['relvol_lsthrx15_0']] missing or already been grouped!
[['rvdelta_toxhr_0']] missing or already been grouped!
[['rvdel

In [32]:
# rosy: making my own interacting terms

col_pairs = [
    ['relvol_nt_0', 'rrirpnxm_nt_0'],
    ["relvol_lst15_0", "rrirpnxm_lst15_0"],
    ["relvol_toxhr_0", "rrirpnxm_toxhr_0"],
    ["relvol_lsthrx15_0", "rrirpnxm_lsthrx15_0"],
]
interaction_terms_train_df = helper.get_df_with_interaction_terms(train_df, col_pairs)
interaction_terms_test_df = helper.get_df_with_interaction_terms(test_df, col_pairs)

[['relvol_nt_0']] missing or already been grouped!
[['relvol_lst15_0']] missing or already been grouped!
[['relvol_toxhr_0']] missing or already been grouped!
[['relvol_lsthrx15_0']] missing or already been grouped!
[['relvol_nt_0']] missing or already been grouped!
[['relvol_lst15_0']] missing or already been grouped!
[['relvol_toxhr_0']] missing or already been grouped!
[['relvol_lsthrx15_0']] missing or already been grouped!


### Transform the data if needed

### Run regressions

#### Ordinary Least Squares

In [33]:
ols_regression_model = helper.Regression('OLS')
model_attributes = ols_regression_model.execute(train_df, RESPONSE_NAME, test_df)
model_attributes

array([ 3.82911060e-02,  3.55913794e-03,  1.23452145e-03, -1.34482596e-05,
        2.24528768e-05])

In [34]:
ols_regression_model.get_metric()

1. Weighted Correlation:
[[1.         0.05058978]
 [0.05058978 1.        ]]

2. Weighted Mean Return:
0.0005575588048315321

3. Weighted Scale Factor:
[1.49685605]



#### OLS w/ Interacting Terms

In [35]:
ols_interacting_model = helper.Regression('OLS')
ols_interacting_model.execute(interaction_terms_train_df, RESPONSE_NAME, interaction_terms_test_df)
ols_interacting_model.get_metric()

1. Weighted Correlation:
[[1.         0.05058978]
 [0.05058978 1.        ]]

2. Weighted Mean Return:
0.0005575588048315321

3. Weighted Scale Factor:
[1.49685605]



#### LASSO

In [36]:
lasso_model = helper.Regression('LASSO')
lasso_model.execute(train_df, RESPONSE_NAME, test_df)
lasso_model.get_metric()

1. Weighted Correlation:
[[1.         0.05313098]
 [0.05313098 1.        ]]

2. Weighted Mean Return:
0.0006055581691036236

3. Weighted Scale Factor:
[1.82900052]



### XGBoost



In [37]:
xgb_model = helper.Regression('XGBOOST')
xgb_model.execute(train_df, RESPONSE_NAME, test_df)
xgb_model.get_metric()



1. Weighted Correlation:
[[1.         0.05058978]
 [0.05058978 1.        ]]

2. Weighted Mean Return:
0.0005575588048315321

3. Weighted Scale Factor:
[1.4968572]

