#Perform space hyperparameters grid search

In [0]:
%tensorflow_version 2.x 

In [0]:
from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK, STATUS_RUNNING
from functools import partial

In [30]:
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tqdm import tqdm

tf.__version__

'2.0.0'

In [0]:
# Define searched space
space = {
    'batchsize': hp.choice('batchsize', [16, 32, 64, 128, 256, 512, 1024]),
    'epoch': hp.choice('epoch', [10, 20, 30]),
    'nfold': 5,
    'PCA_comp' : hp.choice('PCA_comp', [None, 90, 100, 110, 80, 70]),
    'lr' : hp.choice('lr', [0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1]),
    'beta1' : hp.choice('beta1', [0.9, 0.7, 0.5, 0.3, 0.1]),
    'beta2' : hp.choice('beta2', [0.999,0.995,0.99, 0.95]),
    'amsgrad' : hp.choice('amsgrad', [True, False]),
    'activation' : hp.choice('activation', ['leakyRelu', 'elu', 'linear', 'sigmoid', 'relu'])      
 }

## Load Data

In [0]:
train = pd.read_csv('train_eng.csv')
test = pd.read_csv('test_eng.csv')

In [33]:
print(train.isnull().sum().sum())
print(test.isnull().sum().sum())

0
0


## Apply Kfold Split
Instead of looping on kf.split, take the nfold test split and merge 4 of them to build train data

In [34]:
kf = KFold(n_splits=space['nfold'], random_state=750, shuffle=True)
train_folds = [None for x in range(space['nfold'])]

i=0
for _, test_indexed in kf.split(train):
  train_folds[i] = train.iloc[test_indexed,:]
  i += 1

for item in train_folds:
  print(item.shape)

(4968, 141)
(4968, 141)
(4968, 141)
(4968, 141)
(4968, 141)


In [0]:
def PowerData(df):
    '''
    add for all none categorical data it's square
    '''
    return df
    
    square_columns = [col for col in df.columns[~df.columns.str.startswith('Cat_')]]
    
    #print('Nb columns to square :', len(square_columns))
    
    for col in square_columns:
        df[f'{col}_x2'] = df[col]**2
        df[f'{col}_x3'] = df[col]**3
        df[f'{col}_x0_5'] = df[col]**0.5
        
    return df

Build a normalize that will apply standard scaler and PCA if required

In [0]:
def NormalizeData(train, CVorTest, PCA_comp = None, ScaleCat = False):
    '''
    Normalize data using a standard scaler
    train:
        dataframe that will be use to fit and transformed by the scaler and PCA
    CVorTest:
        dataframe that will be transformed the scaler and PCA
    PCA_comp:
        Number of PCA components to keep, if None, PCA not applied
    ScaleCat:
        Scale or not the categorical columns with the standard scaler
    '''
    
    sc = StandardScaler()
    
    if ScaleCat:
        scale_columns = train.columns
    else:
        scale_columns = [col for col in train.columns[~train.columns.str.startswith('Cat_')]]
          
    #perform feature scaling    
    #print('Nb columns to scale :', len(scale_columns))
    train.loc[:, scale_columns] = sc.fit_transform(train.loc[:, scale_columns]) 
    CVorTest.loc[:, scale_columns] = sc.transform(CVorTest.loc[:, scale_columns]) 
    
    if PCA_comp is None:
        return train.values, CVorTest.values
    
    pca = PCA(PCA_comp)
    train = pca.fit_transform(train)
    CVorTest = pca.transform(CVorTest)
    
    return train, CVorTest

In [0]:
def BuildModel(nb_features, activation = 'leakyRelu', Batchnorm=True):
  '''
    Build NN model
    nb_features:
      number of features in input
    Batchnorm:
      Apply or not batchnorm in hidden layers
    activation:
      Activation to use in hidden layer : leakyRelu, relu, sigmoid, linear, elu
  '''
  model = tf.keras.models.Sequential()

  layers_dim = [512, 1024, 256, 128, 128, 128, 64]

  for dim in layers_dim:
    model.add(tf.keras.layers.Dense(dim, input_shape=(nb_features,)))
    if Batchnorm:
      model.add(tf.keras.layers.BatchNormalization())
    if activation == 'leakyRelu':
      model.add(tf.keras.layers.LeakyReLU(alpha=0.05))
    else:
      model.add(tf.keras.layers.Activation(activation))

  model.add(tf.keras.layers.Dense(4))
  model.add(tf.keras.layers.Activation('softmax'))

  return model

In [0]:
#metrics to compare model with Kaggle scoring
def f1_macro(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float64)
    y_pred = tf.cast(tf.round(y_pred), tf.float64)

    TP = tf.cast(tf.math.count_nonzero(y_pred * y_true, axis=None), tf.float64)
    FP = tf.cast(tf.math.count_nonzero(y_pred * (y_true - 1), axis=None), tf.float64)
    FN = tf.cast(tf.math.count_nonzero((y_pred - 1) * y_true, axis=None), tf.float64)
    
    precision = tf.math.divide_no_nan(TP, TP + FP)
    recall = tf.math.divide_no_nan(TP, TP + FN)
    f1 = tf.math.divide_no_nan(2 * precision * recall,precision + recall)
    return tf.reduce_mean(f1)

def f1_micro(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float64)
    y_pred = tf.cast(tf.round(y_pred), tf.float64)

    TP = tf.cast(tf.math.count_nonzero(y_pred * y_true, axis=0), tf.float64)
    FP = tf.cast(tf.math.count_nonzero(y_pred * (y_true - 1), axis=0), tf.float64)
    FN = tf.cast(tf.math.count_nonzero((y_pred - 1) * y_true, axis=0), tf.float64)
    
    precision = tf.math.divide_no_nan(TP, TP + FP)
    recall = tf.math.divide_no_nan(TP, TP + FN)
    f1 = tf.math.divide_no_nan(2 * precision * recall,precision + recall)
    return tf.reduce_mean(f1)

## Performing the gridsearch
Will use a train/CV split : 80/20

Results will be store on a csv file on google drive

In [39]:
from google.colab import drive
drive.mount('/content/drive')
save_file = '/content/drive/My Drive/Kaggle/EmailClass/gridSearch.csv'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
def evaluateModel(params):
  nfolds = params['nfold']
  loss_CV_folds = [None for i in range(nfolds)]
  f1_macro_CV_folds = [None for i in range(nfolds)]
  f1_micro_CV_folds = [None for i in range(nfolds)]

  for i in range(nfolds):
    CV_block = i
    train_blocks = list(range(nfolds))
    train_blocks.pop(i)

    train = pd.concat([train_folds[j].copy() for j in train_blocks])
    CV = train_folds[CV_block].copy()

    #extract labels & convert to categorical
    labels_train = pd.get_dummies(train['label'])
    labels_CV = pd.get_dummies(CV['label'])

    #remove labels from train
    train.drop(columns=['label'], inplace=True)
    CV.drop(columns=['label'], inplace=True)

    #apply normalization
    train, CV = NormalizeData(PowerData(train), PowerData(CV),PCA_comp=params['PCA_comp'])

    #build model
    model = BuildModel(train.shape[1], activation=params['activation'])
    adam = tf.keras.optimizers.Adam(learning_rate=params['lr'], beta_1=params['beta1'], beta_2=params['beta2'], amsgrad=params['amsgrad'])
    model.compile('adam',loss='categorical_crossentropy', metrics=[f1_macro, f1_micro])

    #run model
    hist = model.fit(train, labels_train.values, validation_data=(CV, labels_CV.values), 
            epochs = params['epoch'], verbose = 0, batch_size=params['batchsize'])
    
    result = model.evaluate(CV, labels_CV.values, batch_size=params['batchsize'], verbose=0)
    loss_CV_folds[i] = result[0]
    f1_macro_CV_folds[i] = result[1]
    f1_micro_CV_folds[i] = result[2]

  print(params)
  print(f1_micro_CV_folds)

  params['CV loss'] = np.asarray(loss_CV_folds).mean()
  params['CV FScore macro'] = np.asarray(f1_macro_CV_folds).mean()
  params['CV FScore micro'] = np.asarray(f1_micro_CV_folds).mean()
  
  with open (save_file, 'a+') as fp:
        fp.write(str(params) +'\n')

  return {
        'loss': 1- params['CV FScore micro'],
        'status': STATUS_OK,
        'stats_running': STATUS_RUNNING
    }

In [0]:
# Trail
trials = Trials()

# Set algoritm parameters
algo = partial(tpe.suggest, 
               n_startup_jobs=-1)

# Seting the number of Evals
MAX_EVALS= 100

# Fit Tree Parzen Estimator
best_vals = fmin(evaluateModel, space=space, verbose=1,
                 algo=algo, max_evals=MAX_EVALS, trials=trials)

# Print best parameters
best_params = space_eval(space, best_vals)

{'PCA_comp': None, 'activation': 'sigmoid', 'amsgrad': False, 'batchsize': 256, 'beta1': 0.1, 'beta2': 0.99, 'epoch': 10, 'lr': 0.003, 'nfold': 5}
[0.932103, 0.93260354, 0.9388524, 0.9274831, 0.9301233]
{'PCA_comp': None, 'activation': 'sigmoid', 'amsgrad': False, 'batchsize': 256, 'beta1': 0.1, 'beta2': 0.99, 'epoch': 10, 'lr': 0.003, 'nfold': 5}
[0.932776, 0.9369172, 0.9352912, 0.9285185, 0.9242047]
{'PCA_comp': 100, 'activation': 'sigmoid', 'amsgrad': False, 'batchsize': 16, 'beta1': 0.9, 'beta2': 0.95, 'epoch': 20, 'lr': 0.003, 'nfold': 5}
[0.8984261, 0.9082019, 0.9021843, 0.89635915, 0.91100997]
{'PCA_comp': None, 'activation': 'elu', 'amsgrad': True, 'batchsize': 512, 'beta1': 0.5, 'beta2': 0.99, 'epoch': 30, 'lr': 0.0001, 'nfold': 5}
[0.94457567, 0.94937074, 0.94813186, 0.94039774, 0.9392177]
{'PCA_comp': 90, 'activation': 'elu', 'amsgrad': True, 'batchsize': 512, 'beta1': 0.5, 'beta2': 0.995, 'epoch': 30, 'lr': 0.0001, 'nfold': 5}
[0.9425166, 0.94393605, 0.94421625, 0.93742764,

In [0]:
best_params