# Retraining Models
As per Nigel's instructions it is probably best now to take our tuned models and retrain them in the normalisation mode. Hopefully it will still yield good results considering that this mode has a similar topology. 

In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import callbacks
from joblib import load, dump

## Load Prerequisites
We need to load the original models or their arhcitectures if they do not support retraining from their last saved weight point. We also need to load in the correct data. In this case version 8.0.5 is the normalisation mode data with preselection applied, the ratio of signal to background events forced to 1 and the mass sideband restriction implemented. The $q^2$ veto has also been applied.

In [2]:
# Load the data
base_path = "../data_files"
version = "8.0.5"

train = pd.read_csv(f'{base_path}/{version}/train.csv', index_col=[0])
X_train, y_train = train.drop(['category', 'Lb_M', 'IsSimulated', 'QSQR'], axis=1), train.category

val = pd.read_csv(f'{base_path}/{version}/val.csv', index_col=[0])
X_val, y_val = val.drop(['category', 'Lb_M', 'IsSimulated', 'QSQR'], axis=1), val.category

# Load the models
nn = tf.keras.models.load_model(f'../neural_network/models/v9.0.5')
knn = load(f'../classification_methods/models/KNN_9.0.5_tune.joblib')
rfc = load(f'../classification_methods/models/RFC_9.0.5_tune.joblib')
dtc = load(f'../classification_methods/models/DTC_9.0.5_tune.joblib')
xgb = load(f'../classification_methods/models/XGB_9.0.5.joblib')

2022-04-07 15:28:18.397762: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-04-07 15:28:18.397816: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-04-07 15:28:18.397867: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (eprexb.ph.bham.ac.uk): /proc/driver/nvidia/version does not exist
2022-04-07 15:28:18.398193: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
ht

## Retrain Models

In [20]:
# Neural Network
myCallbacks = [
    callbacks.EarlyStopping(patience=20, min_delta=0.0005, restore_best_weights=True), 
    callbacks.ReduceLROnPlateau(patience=5, factor=0.2, min_lr=0.001)
    ]

history = nn.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=80, batch_size=512, callbacks=myCallbacks, verbose=1)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80


In [21]:
nn.save('models/NN_8.0.5')

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
INFO:tensorflow:Assets written to: models/NN_8.0.5/assets


In [15]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
import xgboost

# Other models need to be re-trained from the start
knn = KNeighborsClassifier(n_neighbors=15, leaf_size=30, weights='uniform', p=2)
knn.fit(X_train, y_train)

rfc = RandomForestClassifier(n_estimators=80, min_weight_fraction_leaf=0.0, max_depth=8, min_samples_split=2)
rfc.fit(X_train, y_train)

dtc = DecisionTreeClassifier(max_depth=6)
dtc.fit(X_train, y_train)

DecisionTreeClassifier(max_depth=6)

In [18]:
# Finally re-train the XGB model
params = xgb.get_params()
xgb_clf = xgboost.XGBClassifier(**params)
xgb_clf.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=20, eval_metric='auc')

[0]	validation_0-auc:0.92548


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


[1]	validation_0-auc:0.92880
[2]	validation_0-auc:0.93652
[3]	validation_0-auc:0.94356
[4]	validation_0-auc:0.94524
[5]	validation_0-auc:0.94876
[6]	validation_0-auc:0.95039
[7]	validation_0-auc:0.95117
[8]	validation_0-auc:0.95288
[9]	validation_0-auc:0.95332
[10]	validation_0-auc:0.95425
[11]	validation_0-auc:0.95495
[12]	validation_0-auc:0.95606
[13]	validation_0-auc:0.95633
[14]	validation_0-auc:0.95670
[15]	validation_0-auc:0.95767
[16]	validation_0-auc:0.95833
[17]	validation_0-auc:0.95895
[18]	validation_0-auc:0.95947
[19]	validation_0-auc:0.96015
[20]	validation_0-auc:0.96055
[21]	validation_0-auc:0.96105
[22]	validation_0-auc:0.96152
[23]	validation_0-auc:0.96190
[24]	validation_0-auc:0.96221
[25]	validation_0-auc:0.96265
[26]	validation_0-auc:0.96314
[27]	validation_0-auc:0.96372
[28]	validation_0-auc:0.96410
[29]	validation_0-auc:0.96451
[30]	validation_0-auc:0.96502
[31]	validation_0-auc:0.96550
[32]	validation_0-auc:0.96617
[33]	validation_0-auc:0.96647
[34]	validation_0-a

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              eval_metric=['auc', 'logloss'], gamma=0, gpu_id=-1,
              importance_type=None, interaction_constraints='',
              learning_rate=0.05, max_delta_step=0, max_depth=5,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=4, num_parallel_tree=1, predictor='auto',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              subsample=1, tree_method='exact', use_label_encoder=False,
              validate_parameters=1, verbosity=None)

In [19]:
dump(knn, 'models/KNN_8.0.5.joblib')
dump(rfc, 'models/RFC_8.0.5.joblib')
dump(dtc, 'models/DTC_8.0.5.joblib')
dump(xgb, 'models/XGB_8.0.5.joblib')

['models/XGB_8.0.5.joblib']