<div class="alert alert-block alert-success">
    <h1 align="center">Random Search for Hyper Parameter Tuning</h1>
    <h3 align="center"> Tabular Time Series</h3>
    <h5 align="center">Github: (https://github.com/MTisMT)</h5>
</div>

In [1]:
import pickle
import requests
import itertools
import numpy as np
import pandas as pd
import xgboost as xgb
from scipy import stats
from scipy.stats.mstats import spearmanr

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.style.use("seaborn")
import lightgbm as lgb
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
 
import tensorflow as tf
from keras import optimizers
from keras import regularizers
from keras.models import Sequential
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.layers import Dense, Dropout, Input

# Time series cross validation

In [2]:
def TS_crossval_block(X,y,k=5):
    wsize = X.shape[0]//(k+1)
    train_lb = 0
    train_ub = train_lb + wsize
    X_train={}; y_train={}
    X_test={}; y_test={}
    for i in range(1,k):
        train_ub = train_lb + wsize
        X_train[i] = X[train_lb:train_ub]; y_train[i] = y[train_lb:train_ub]
        X_test[i] = X[train_ub:train_ub + wsize]; y_test[i] = y[train_ub:train_ub + wsize]
        train_lb += wsize
    
    train_ub = train_lb + wsize
    X_train[i+1] = X[train_lb:train_ub]; y_train[i+1] = y[train_lb:train_ub]
    X_test[i+1] = X[train_ub:]; y_test[i+1] = y[train_ub:]
    return X_train,X_test,y_train,y_test

In [3]:
def TS_crossval(X,y,k=5):
    wsize = X.shape[0]//(k+1)
    train_lb = 0
    train_ub = train_lb + wsize
    X_train={}; y_train={}
    X_test={}; y_test={}
    for i in range(1,k):
        #train_ub = train_lb + wsize
        X_train[i] = X[train_lb:train_ub]; y_train[i] = y[train_lb:train_ub]
        X_test[i] = X[train_ub:train_ub + wsize]; y_test[i] = y[train_ub:train_ub + wsize]
        train_ub += wsize
    
    X_train[i+1] = X[train_lb:train_ub]; y_train[i+1] = y[train_lb:train_ub]
    X_test[i+1] = X[train_ub:]; y_test[i+1] = y[train_ub:]
    return X_train,X_test,y_train,y_test

# Score of models

In [4]:
def scorer(y_test, y_pred):
    # Can be any score based on the problem
    score = (stats.spearmanr(y_test, y_pred))[0]
    return score

# Deep Learning models

## MLP

In [5]:
def ANN_model(X,y,X_t,y_t,lr=0.005,bs=100, ep=10, actv='relu',
                  min_lr=0.00001, f_lr=0.2, reg=0,h_layers=2):
    #X=AE_r.predict(X)
    #X_t=AE_r.predict(X_t)
    score={}
    input_sz=X.shape[1]
    model = Sequential()
    model.add(Input(shape=(input_sz,)))
    #model.add(Dropout(0.1))
    model.add(Dense(1000,activation=actv, kernel_regularizer=regularizers.l1_l2(l1=reg, l2=reg),
                    bias_regularizer=regularizers.l2(reg)))
    model.add(Dropout(0.3))
    for h_layer in range(h_layers+1):
        model.add(Dense(1000,activation=actv, kernel_regularizer=regularizers.l1_l2(l1=reg, l2=reg),
                        bias_regularizer=regularizers.l2(reg)))
        model.add(Dropout(0.3))
        
    model.add(Dropout(0.3))
    model.add(Dense(1,activation='linear'))
    opt = tf.keras.optimizers.Adam(learning_rate=lr)
    #opt = tf.keras.optimizers.SGD(learning_rate=lr, momentum=0.9)
    model.compile(optimizer=opt,loss='mse')
    ES = EarlyStopping(monitor='loss', restore_best_weights=True, patience=7)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=f_lr,
                              patience=5, min_lr=min_lr)
    model.hist = model.fit(X,y,epochs=ep, callbacks=[ES,reduce_lr],
              validation_data=(X_t,y_t),shuffle=False, batch_size=bs,verbose=1)
    score['train_score'] = scorer(y,model.predict(X)[:,0])
    score['test_score'] = scorer(y_t,model.predict(X_t)[:,0])
    
    return model, score

# Conv ML model

## XGboost

In [6]:
def xg_boost_r(X_train, y_train,X_test=None ,y_test=None,n_est=300,lr=0.001,max_d=2,subcols=0.5,min_leaf=3):
    
    model = xgb.XGBRegressor(objective='reg:squarederror', max_depth=max_d, learning_rate=lr,
                          n_estimators=n_est, n_jobs=-1, colsample_bytree=subcols,min_child_weight=min_leaf)
    model.fit(X_train, y_train)

    pred_train = model.predict(X_train)
    model.train_score=scorer(y_train, pred_train)
    if X_test is not None and y_test is not None:
        pred_test = model.predict(X_test)
        model.test_score=scorer(y_test, pred_test)
    
    return model

In [None]:
def lightgbm_r(X_train, y_train,X_test=None ,y_test=None,n_est=300,lr=0.01,max_d=2,subcols=0.5,min_leaf=31):
    
    model = lgb.LGBMRegressor(max_depth=max_d, learning_rate=lr,
                          n_estimators=n_est, colsample_bytree=subcols,num_leaves=min_leaf)
    model.fit(X_train, y_train)

    pred_train = model.predict(X_train)
    model.train_score=scorer(y_train, pred_train)
    if X_test is not None and y_test is not None:
        pred_test = model.predict(X_test)
        model.test_score=scorer(y_test, pred_test)
    
    return model

# Random Search

In [7]:
def random_search(params,ratio=0.2):
    p = list(params.values())
    t = []
    for r in itertools.product(*p): t.append([*r])
    num = int(ratio * len(t))
    random_locs = np.random.permutation(len(t))[:num]
    print(num,"sets of parameters has randomly selected among", len(t), "possible sets")
    return [t[i] for i in list(random_locs)]

In [8]:
params = {'lr': [0.05,0.01,0.001,0.0001], 
           'ep':[2,5,10,20,50],
           'h_layer':[0,1,2,4],
           'bs':[32,64]}

param_cml={'subcols':[0.3,0.5,0.9], 
           'max_d':[2,3,5,7,10,15,None],
           'lr': [0.5,0.1,0.01,0.05,0.001,0.0001],
           'min_l': [10,20]}

random_search(params,ratio=0.2)

32 sets of parameters has randomly selected among 160 possible sets


[[0.05, 10, 0, 64],
 [0.001, 2, 2, 64],
 [0.01, 20, 4, 64],
 [0.01, 50, 2, 64],
 [0.05, 2, 1, 64],
 [0.01, 10, 0, 32],
 [0.01, 20, 2, 32],
 [0.0001, 20, 1, 64],
 [0.01, 50, 0, 32],
 [0.001, 10, 4, 32],
 [0.0001, 10, 0, 32],
 [0.0001, 50, 1, 32],
 [0.01, 20, 1, 64],
 [0.05, 20, 2, 64],
 [0.0001, 10, 1, 32],
 [0.0001, 5, 2, 32],
 [0.001, 10, 0, 64],
 [0.0001, 50, 4, 32],
 [0.001, 50, 4, 32],
 [0.0001, 20, 2, 32],
 [0.05, 50, 2, 64],
 [0.01, 5, 0, 64],
 [0.05, 10, 4, 64],
 [0.001, 20, 1, 64],
 [0.0001, 20, 4, 64],
 [0.05, 50, 2, 32],
 [0.001, 2, 2, 32],
 [0.0001, 20, 4, 32],
 [0.0001, 5, 4, 64],
 [0.001, 20, 2, 32],
 [0.001, 50, 1, 32],
 [0.05, 2, 1, 32]]

# Tune Models

In [9]:
def Tune_model(data, params, ANN= True,random_ratio = 0.2 ,model_name='model', folds=5):
    
    X_train = data['X_train']
    X_test = data['X_test']
    y_train = data['y_train']
    y_test = data['y_test']
    
    if ANN:
        random_parameters = random_search(params,ratio=0.2)
    else:
        random_parameters = random_search(params,ratio=0.2)

    Grid_cv=pd.DataFrame(columns=params.keys())
    Grid_cv['test_score_mean']=np.nan
    Grid_cv['test_score_std']=np.nan
    Grid_cv['train_score_mean']=np.nan
    Grid_cv['train_score_std']=np.nan
    for k in range(1,folds+1):
        Grid_cv[f'test_score_{k}']=np.nan
    for k in range(1,folds+1):
        Grid_cv[f'train_score_{k}']=np.nan

    #####TUNE Classic ML Model ######
    if not ANN:
        print(len(random_parameters),' models each in' , len(X_train) , 'time series cross validation ...')

        i=1
        counter = 0
        for subcols,max_d,lr,min_l in random_parameters:
            counter +=1
            test_score=[]
            train_score=[]
            for k in range(1,folds+1):
                print(counter,'HyperParam set', 'in', k,'crossval fold')

                model_F_r= lightgbm_r(X_train[k],y_train[k],X_test[k],y_test[k],
                                        n_est=500,lr=lr,max_d=max_d,subcols=subcols,min_leaf=min_l)

                test_score.append(model_F_r.test_score)
                train_score.append(model_F_r.train_score)
                   
            Grid_cv.loc[i]=[subcols, max_d, lr,np.mean(test_score),np.std(test_score),
                            np.mean(train_score),np.std(train_score),
                           *test_score,*train_score]
            i+=1
        print('Done!')
    if ANN:
        print(len(random_parameters),' models each in' , len(X_train) , 'time series cross validation ...')
        i=1
        counter = 0
        for lr,ep,h_lr,bs in random_parameters:
            counter +=1
            test_score=[]
            train_score=[]
            for k in range(1,folds+1):
                print(counter,'HyperParam set', 'in', k,'crossval fold')

                model_F_r,score = ANN_model(X_train[k],y_train[k],X_test[k],y_test[k],
                                      lr=lr,bs=bs, ep=ep, actv='relu',min_lr=0.00001,
                                      f_lr=0.2, reg=0, h_layers=h_lr)

                test_score.append(score['test_score'])
                train_score.append(score['train_score'])
                    
            Grid_cv.loc[i]=[lr, ep, h_lr, bs,np.mean(test_score),np.std(test_score),
                            np.mean(train_score),np.std(train_score),
                           *test_score,*train_score]
            i+=1
        print('Done!')
        
    return Grid_cv

# Action

In [10]:
#folds=4
#sub_sample = 200
#X_train,X_test,y_train,y_test=TS_crossval_block(train_data[:sub_sample],train_targets[:sub_sample],k=folds) #Sub
#X_train,X_test,y_train,y_test=TS_crossval_block(train_data,train_targets,k=folds)

In [11]:
#data={'X_train': X_train, 'X_test': X_test,
 #     'y_train': y_train, 'y_test': y_test}

In [12]:
#Grid_cv_r = Tune_model(data, param = param_ann, ANN= True, random_ratio = 0.2 , model_name='ANN', folds = folds)
#Grid_cv_r.to_csv('tune_random_search_r.csv',index=False)