### Imports

In [None]:
%pip install -r dependencies.txt

In [None]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import requests
import scipy
import importlib
import helper, consts
importlib.reload(consts)
importlib.reload(helper)

### CONSTANTS

In [None]:
ROW = consts.ROW
COL = consts.COL
CLEANED_DATA_PATH = consts.CLEANED_DATA_PATH
DATA_PATH = consts.RAW_DATA_PATH
RESPONSE_NAME = consts.RESPONSE_NAME

# TRAIN_START_DATE = "20150101"
# TRAIN_END_DATE = "20150601" # Up to but not including
TEST_DATE = '20170103'

In [None]:
trainRange = helper.get_train_from_testday(TEST_DATE)
training_range = f"{trainRange[0]}-{trainRange[1]}"
train_start_date = trainRange[0]
train_end_date = trainRange[1]

In [None]:
REGRESSION_TYPES = helper.Regression('OLS').list_all_regression_types()
REGRESSION_TYPES

### Read data

In [None]:
importlib.reload(helper)
x_cols = ["rrirpnxm_nt_0", "rrirpnxm_lst15_0","rrirpnxm_lsthrx15_0", "rrirpnxm_toxhr_0"]
relvol_cols = ["relvol_nt_0", "relvol_lst15_0", "relvol_lsthrx15_0", "relvol_toxhr_0"]
x_cols += relvol_cols
train_df, test_df = helper.get_train_test_df(train_start_date, train_end_date, TEST_DATE, x_cols)
train_df.info()


### Interaction Terms

In [None]:
importlib.reload(helper)
interactingTerms = [[relvol, col] for relvol,col in zip(relvol_cols,x_cols)]
interactingTerms

In [None]:
train_df.columns

In [None]:
importlib.reload(helper)
interactingTerms_df = helper.get_df_with_interaction_terms(train_df, interactingTerms)

In [None]:
interactingTerms_df.columns

In [None]:
interactingTerms_df.head()

In [None]:
test_df.head()

In [None]:
# rosy: making my own interacting terms

col_pairs = [
    ['relvol_nt_0', 'rrirpnxm_nt_0'],
    ["relvol_lst15_0", "rrirpnxm_lst15_0"],
    ["relvol_toxhr_0", "rrirpnxm_toxhr_0"],
    ["relvol_lsthrx15_0", "rrirpnxm_lsthrx15_0"],
]
interaction_terms_train_df = helper.get_df_with_interaction_terms(train_df, col_pairs)
interaction_terms_test_df = helper.get_df_with_interaction_terms(test_df, col_pairs)

### Transform the data if needed

#### Ordinary Least Squares

In [None]:
importlib.reload(helper)
ols_regression_model = helper.Regression('OLS')
model_attributes = ols_regression_model.execute(train_df, RESPONSE_NAME, test_df)
model_attributes

In [None]:
ols_regression_model.get_metric()

#### OLS w/ Interacting Terms

In [None]:
weights = helper.get_weights(train_df)
ols_interacting_model = helper.Regression('OLS')
ols_interacting_model.execute(interactingTerms_df, RESPONSE_NAME, interactingTerms_df, weights)
ols_interacting_model.get_metric()

#### LASSO

In [None]:
cv = 10

weights = helper.get_weights(train_df)
lasso_model = helper.Regression('LASSO')
lasso_model.execute(train_df, RESPONSE_NAME, test_df, cv, weights)
lasso_model.get_metric()

### XGBoost



In [None]:
importlib.reload(helper)
weights = helper.get_weights(train_df)
xgb_model = helper.Regression('XGBOOST')

In [None]:
model_attributes = xgb_model.execute(train_df, RESPONSE_NAME, test_df, weights)
xgb_model.get_metric()

get feature importance df

In [None]:
feature_importance = xgb_model.model.feature_importances_
feature_importance

In [None]:
# Create a DataFrame to display feature importance
importance_df = pd.DataFrame({'Feature': train_df.columns, 'Importance': feature_importance})

# Sort the DataFrame by importance in descending order
importance_df = importance_df.sort_values(by='Importance', ascending=False)

importance_df