In [None]:
%load_ext autoreload
%autoreload 2

## Common Libray Definitions

import sqlalchemy
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import matplotlib.ticker as mtick

# Custom Library Definitions
from CustomLibs.CustomFunctions import plot_corr_heatmap, plot_permutation_importance, sqlcol,save_to_file,load_from_file
from config import Config
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import RobustScaler,SplineTransformer,OneHotEncoder

from CustomLibs.CustomTransformers import filtered_transformer
from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.svm import LinearSVR, SVR
from sklearn.ensemble import RandomForestRegressor
from CustomLibs.MultiPipe import MultiPipe
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from pprint import pprint

from sklearn.metrics import mean_absolute_error,root_mean_squared_error,r2_score
import scipy.stats

## SQL Store Definition
engine = sqlalchemy.create_engine(Config.CONN_STR)

date_val_end=Config.TEST_DATE_CUTOFF
date_test_start=pd.to_datetime(date_val_end) + pd.DateOffset(days=1)




In [None]:
with engine.connect() as conn:
    df_preproc = pd.read_sql_table('Preprocessed_Features', conn,schema='Gold')
    df_feature_ranks = pd.read_sql_table('PermutationFeatureRanks', conn,schema='Gold')

df_preproc.set_index('Date',inplace=True)
df_preproc.columns = [str(x) for x in df_preproc.columns]
label_field = df_preproc.columns[-1]

X=df_preproc.sort_index().loc[:date_val_end].drop(columns=label_field)
y=df_preproc[label_field].sort_index().loc[:date_val_end]

X_test=df_preproc.sort_index().loc[date_test_start:].drop(columns=label_field)
y_test=df_preproc[label_field].sort_index().loc[date_test_start:]

feat_ranks = df_feature_ranks.sort_values('Mean Rank')['Feature'].to_list()





In [None]:
pds=MultiPipe()

svr_pipe = make_pipeline(filtered_transformer(feat_ranks),LinearSVR(dual='auto',max_iter=10000,random_state=43))

search_grid_1 = {
    'linearsvr__epsilon': [1, 0.1, 0.01, 0.001,0],
    'linearsvr__C': [0.01, 0.1, 1, 10, 100,],
    'linearsvr__loss':['epsilon_insensitive', 'squared_epsilon_insensitive'],

    }


scoring = {'Mean Absolute Error': 'neg_mean_absolute_error', 'RMS Error': 'neg_root_mean_squared_error','R^2 Score':'r2'}

if Config.REGEN_GRID_SEARCH_SVR:
    svr_gridsearch = GridSearchCV(svr_pipe, search_grid_1, scoring=scoring, refit='RMS Error',cv=pds.CV,n_jobs=-1,verbose=10)
    svr_gridsearch.fit(X, y)
    svr_gs1_parameters={}
    for p,v in svr_gridsearch.best_params_.items():
        svr_gs1_parameters[p.replace('linearsvr__','')]=v
    save_to_file(svr_gs1_parameters,'svr_gs1_parameters')

else:
    svr_gs1_parameters = load_from_file('svr_gs1_parameters')

for k,v in search_grid_1.items():
    print(f'{k}: {v}')

for k,v in svr_gs1_parameters.items():
    print(f'{k}: {v}')



In [None]:
svr_pipe = make_pipeline(filtered_transformer(feat_ranks[:20]),LinearSVR(dual='auto',max_iter=10000,random_state=43))


if Config.REGEN_GRID_SEARCH_SVR:
    
    svr_gridsearch = GridSearchCV(svr_pipe, search_grid_1, scoring=scoring, refit='RMS Error',cv=pds.CV,n_jobs=-1,verbose=10)
    svr_gridsearch.fit(X[feat_ranks[:20]], y)
    svr_gs1_parameters_t20={}
    for p,v in svr_gridsearch.best_params_.items():
        svr_gs1_parameters_t20[p.replace('linearsvr__','')]=v
    save_to_file(svr_gs1_parameters_t20,'svr_gs1_parameters_t20')

else:
    svr_gs1_parameters_t20 = load_from_file('svr_gs1_parameters_t20')

for k,v in search_grid_1.items():
    print(f'{k}: {v}')

for k,v in svr_gs1_parameters_t20.items():
    print(f'{k}: {v}')

In [None]:

pds.AddPreProc(filtered_transformer(feat_ranks),'preproc')
pds.Regressors={}
pds.Regressors['SVR Baseline'] = LinearSVR()
pds.Regressors['SVR GS1']=LinearSVR().set_params(**svr_gs1_parameters)
pds.Regressors['SVR GS1 T20']=LinearSVR().set_params(**svr_gs1_parameters_t20)

pds.AddQCSet('preproc','SVR Tuning')

# pds.CalculateScores('SVR Tuning','preproc','Models',X,y)
# _ = pds.GetScores(qc_set_keys=['SVR Tuning'],metric_keys=['R^2 Score','RMS Error','Mean Absolute Error'],verbose=False)

# for a in _:
#     print(a)


In [None]:
feat_ranks = df_feature_ranks.sort_values('Mean Rank')['Feature'].to_list()

for t in range(70,0,-10):
# for t in range(5,75,10):
    k='Top ' + str(t)
    tra=filtered_transformer(feat_ranks[:t])
    pds.AddPreProc(tra,'pp'+ str(t))
    pds.AddQCSet('pp'+ str(t),'SVR Tuning Tests')
    _ = pds.CalculateScores('SVR Tuning Tests','pp'+ str(t),k,X[feat_ranks[:t]],y,verbose=False)


temp = pds.GetScores(qc_set_keys=['SVR Tuning Tests'],metric_keys=['R^2 Score','RMS Error','Mean Absolute Error'],verbose=False)
fig,axs=plt.subplots(1,len(pds.active_metrics),figsize=(0.5+5*len(pds.active_metrics),4))  
for ax in axs:
    ax.set_xlabel('Top N Features Selected')
pds.GraphScores(qc_set_key='SVR Tuning Tests',axs=axs)

axs[0].set_ylim(0.015,0.035)
axs[1].set_ylim(0.02,0.045)
axs[2].set_ylim(0.85,1.0)

for ax in axs:
    for line in ax.get_lines():
        # print(line.get_label())
        if line.get_label() in ['_child8','_child10','Random Forest RandomSearch T60','Random Forest GridSearch T60']:
            line.set_linestyle('--')
    leg=ax.get_legend()
    for line in leg.get_lines():
        if line.get_label() in ['_child8','_child10','Random Forest RandomSearch T60','Random Forest GridSearch T60']:
            line.set_linestyle('--')
    ax.set_xlabel('Selected Features')

fig.suptitle('Mean Feature Ranking Metrics, Linear SVR Hyperparameter Tuning',fontsize=12,fontweight='bold')
fig.tight_layout()
fig.savefig('./Output Files/Images/Model Tuning/SVR_tuning_metrics.png',format='png',bbox_inches='tight')
