In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import polars as pl
import time
import warnings
import gc
import copy
import pickle
from hurst import compute_Hc
from scipy.signal import hilbert
from scipy.signal import iirfilter, filtfilt
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from keras.metrics import KLDivergence
from sklearn.decomposition import PCA
from sklearn.utils import resample

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

import os
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
tf.config.list_physical_devices('GPU')
os.environ["KERAS_BACKEND"] = "tensorflow"
import keras
import keras.backend as K

warnings.filterwarnings("ignore")

Num GPUs Available:  1


In [None]:
# Fabien's paths
directory = 'D:/Kaggle/2024/Harmful_brain_activity_classification/train_models/'

In [None]:
verif_data = pd.read_csv(directory + "verif_headers_order.csv")
verif_headers_order = verif_data.columns

class cScaler:
    def __init__(self):
        self.full_train = StandardScaler()

file = open(directory + "standard_scaler.pickle", 'rb')
scaler = pickle.load(file)
file.close()

In [None]:
data = pd.read_parquet(directory + "Combined_Features_wf_all.parquet")

In [None]:
# data = balanced_data.copy()

Y_data = data.iloc[:,9:15]
print(data.shape)
print(Y_data.columns)
Y_data = Y_data.values

data = data.iloc[:,15:]
data = data.select_dtypes(include=[np.number])
data = data.drop("Total_votes", axis = 1)
print(data.shape)

In [None]:
# strategy for missing values
data = data.replace(np.nan, 0)

Y_data = Y_data / np.sum(Y_data,axis=1,keepdims=True)
X_cols = data.select_dtypes(include=[np.number]).keys()
X_data = data.select_dtypes(include=[np.number])
X_data = scaler.full_train.transform(X_data) # standardization

# Metrics

In [2]:
# assessment metrics
def log(x):
    x[x == 0] = 1e-15
    x[x == 1] = 1-1e-15
    return np.log(x)


def JensenShannonDiv(true_y, pred_y): # Jensen-Shannon Divergence https://towardsdatascience.com/how-to-understand-and-use-jensen-shannon-divergence-b10e11b03fd6
    true_y = tf.convert_to_tensor(true_y, dtype=tf.float32)
    pred_y = tf.convert_to_tensor(pred_y, dtype=tf.float32)
    # removing 0 to avoid divisions by 0.
    maskT = K.equal(true_y, 0)
    maskP = K.equal(pred_y, 0)
    true_y = K.switch(~maskT, true_y, 1e-15)
    pred_y = K.switch(~maskP, pred_y, 1e-15)
    # sum to 1
    true_y = true_y / K.sum(true_y, axis = 0)
    pred_y = pred_y / K.sum(pred_y, axis = 0)
    JSD1 = pred_y*K.log(2* pred_y/ (pred_y+true_y))
    JSD2 = true_y*K.log(2* true_y/ (pred_y+true_y))
    JSD = 0.5*K.sum(JSD1, axis = 1) + 0.5*K.sum(JSD2, axis = 1)
    # JSD should now be a 1D array. It there are NaN, calculating it's sum will fail. However, If I remove the NaN, I may falsely decrease
    # the sum, because I calculate it with less value. My solution to overcome this is to calculate the mean of the 1D array without NaN,
    # and multiplying the mean by the number of values in the 1D array, including NaN
    mask = tf.math.is_nan(JSD)
    JSD = K.mean(JSD[~mask]) * 1000
    return JSD


def wJensenShannonDiv(true_y, pred_y): # Jensen-Shannon Divergence https://towardsdatascience.com/how-to-understand-and-use-jensen-shannon-divergence-b10e11b03fd6
    true_y = tf.convert_to_tensor(true_y, dtype=tf.float32)
    pred_y = tf.convert_to_tensor(pred_y, dtype=tf.float32)
    # removing 0 to avoid divisions by 0.
    maskT = K.equal(true_y, 0)
    maskP = K.equal(pred_y, 0)
    true_y = K.switch(~maskT, true_y, 1e-15)
    pred_y = K.switch(~maskP, pred_y, 1e-15)
    # sum to 1
    true_y = true_y / K.sum(true_y, axis = 0)
    pred_y = pred_y / K.sum(pred_y, axis = 0)
    JSD1 = pred_y*K.log(2* pred_y/ (pred_y+true_y)) * tf.convert_to_tensor([3,1,1,1,1,2], dtype=tf.float32)
    JSD2 = true_y*K.log(2* true_y/ (pred_y+true_y)) * tf.convert_to_tensor([3,1,1,1,1,2], dtype=tf.float32)
    JSD = 0.5*K.sum(JSD1, axis = 1) + 0.5*K.sum(JSD2, axis = 1)
    # JSD should now be a 1D array. It there are NaN, calculating it's sum will fail. However, If I remove the NaN, I may falsely decrease
    # the sum, because I calculate it with less value. My solution to overcome this is to calculate the mean of the 1D array without NaN,
    # and multiplying the mean by the number of values in the 1D array, including NaN
    mask = tf.math.is_nan(JSD)
    JSD = K.mean(JSD[~mask])
    return JSD


# competition metric? 
def kl_divergence(solution, submission, epsilon = 1e-15, micro_average = False, sample_weights = None):
    if not isinstance(solution, pd.DataFrame): solution = pd.DataFrame(solution)
    if not isinstance(submission, pd.DataFrame): submission = pd.DataFrame(submission)   

    for col in solution.columns:

        if not pd.api.types.is_float_dtype(solution[col]):
            solution[col] = solution[col].astype(float)
        submission[col] = np.clip(submission[col], epsilon, 1 - epsilon)

        y_nonzero_indices = solution[col] != 0 
        solution[col] = solution[col].astype(float)
        solution.loc[y_nonzero_indices, col] = solution.loc[y_nonzero_indices, col] * \
                                                np.log(solution.loc[y_nonzero_indices, col] / submission.loc[y_nonzero_indices, col])
        
        solution.loc[~y_nonzero_indices, col] = 0

    if micro_average:
        return np.average(solution.sum(axis=1))#, weights=sample_weights)
    else:
        return np.average(solution.mean())

# Keras model

In [12]:
def dim_reduct(X, npc = 800):
    pca = PCA(npc)
    XtrainPCA = pca.fit_transform(X)
    return pca, XtrainPCA

def apply_PCA(X, pca):
    XtestPCA = pca.transform(X)
    return XtestPCA

In [None]:
batch_size = 64
epochs = 8
verbose = 1

CV_y_pred = pd.DataFrame(np.zeros(Y_data.shape))
PCAmodels = {}
all_perf = {}
NN_models = {}

In [None]:
tf.keras.backend.clear_session()

n_splits = 5

kfold = KFold(n_splits= n_splits, shuffle = True, random_state = 42)

# K-fold Cross Validation model evaluation

n = -1
# for train_idx, test_idx in kfold.split(balanced_data.label_id.unique()):
for train_idx, test_idx in kfold.split(X_data, Y_data):
    n += 1
    print("---", n)

    # mask = balanced_data.label_id.isin(balanced_data.label_id.unique()[train_idx])
    X_train, y_train = X_data[train_idx,:] , Y_data[train_idx,:]
    X_test, y_test = X_data[test_idx,:] , Y_data[test_idx,:]


    PCAmodels[n], X_train = dim_reduct(X_train, 800)
    X_test = apply_PCA(X_test, PCAmodels[n])
    # X_train = np.expand_dims(X_train, axis=2)
    # X_test = np.expand_dims(X_test, axis=2)
    
    print("X_train shape:", X_train.shape)
    print("y_train shape:", y_train.shape)
    print("X_test shape", X_test.shape)
    
    # mA0 = keras.layers.Input(shape=(X_train.shape[1], X_train.shape[2]), name = "inputA")
    mA0 = keras.layers.Input(shape=X_train.shape[1], name = "inputA")
    mA1 = keras.layers.Dense(1200, activation="gelu")(mA0)
    mA2 = keras.layers.Dropout(0.05)(mA1)
    mA3 = keras.layers.Dense(600, activation="gelu")(mA2)
    mA4 = keras.layers.Dropout(0.05)(mA3)
    mA5 = keras.layers.Dense(200, activation="gelu")(mA4)
    mA6 = keras.layers.Dropout(0.05)(mA5)
    mA7 = keras.layers.Dense(6, activation="softmax")(mA6)

    model = keras.models.Model(inputs=[mA0],outputs=[mA7], name = f'model{n}')

    model.compile(
        loss= keras.losses.KLDivergence(),
        #loss=JensenShannonDiv,
        optimizer=keras.optimizers.Adam(learning_rate=1e-4),
        metrics=[keras.metrics.KLDivergence(name = "KLD")],
        run_eagerly=True
    )
    if n == 0: print(model.summary())
    
    callbacks = [keras.callbacks.EarlyStopping(monitor="val_loss", patience=5)]
    
    model.fit(
        [X_train],
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_split=0.15,
        callbacks=callbacks,
        verbose=verbose
    )

    model.save(directory + f"model{n}.keras")

    NN_y_pred = model.predict(X_test)
    CV_y_pred.loc[test_idx] = NN_y_pred

    kld = KLDivergence()
    all_perf[n] = (kld(y_test, CV_y_pred.loc[test_idx,:]),
                   JensenShannonDiv(y_test, CV_y_pred.loc[test_idx]),
                   wJensenShannonDiv(y_test, CV_y_pred.loc[test_idx]))
    print(all_perf[n])
    
    gc.collect()

In [None]:
for i in all_perf.values():
    print(i[0].numpy(), i[1].numpy(), i[2].numpy()*10000)

print("----")
print(kld(Y_data, CV_y_pred),
    JensenShannonDiv(Y_data, CV_y_pred),
    wJensenShannonDiv(Y_data, CV_y_pred))

In [None]:
'''
Attempt 2
Best KLD, JSD
mA0 = keras.layers.Input(shape=X_train.shape[1], name = "inputA")
mA1 = keras.layers.Dense(1200, activation="LeakyReLU")(mA0)
mA2 = keras.layers.Dropout(0.05)(mA1)
mA3 = keras.layers.Dense(600, activation="LeakyReLU")(mA2)
mA4 = keras.layers.Dropout(0.05)(mA3)
mA5 = keras.layers.Dense(200, activation="LeakyReLU")(mA4)
mA6 = keras.layers.Dropout(0.05)(mA5)
mA7 = keras.layers.Dense(6, activation="softmax")(mA6)

loss: KLD
epoch: 7
with PCA 800
batch_size: 128
    
0.27772743 0.38466638
0.29020777 0.3947794
0.29215237 0.40049928
0.30260068 0.41565132
0.28473517 0.38930452
'''

In [None]:
'''
Attempt 3
mA0 = keras.layers.Input(shape=X_train.shape[1], name = "inputA")
mA1 = keras.layers.Dense(1200, activation="LeakyReLU")(mA0)
mA2 = keras.layers.Dropout(0.05)(mA1)
mA3 = keras.layers.Dense(600, activation="LeakyReLU")(mA2)
mA4 = keras.layers.Dense(100, activation="LeakyReLU")(mA3)
mA6 = keras.layers.Dropout(0.05)(mA4)
mA7 = keras.layers.Dense(6, activation="softmax")(mA6)

loss: KLD
epoch: 10
with PCA 800
batch_size: 128

0.2687153 0.012454825 0.19520142814144492
0.31809422 0.013991482 0.2201401730417274
0.31590238 0.014007937 0.2191763633163646
0.30434176 0.013607951 0.21086325432406738
0.30868018 0.013530641 0.21034438759670593
----
tf.Tensor(0.30409643, shape=(), dtype=float32) tf.Tensor(0.0027094362, shape=(), dtype=float32) tf.Tensor(4.229578e-06, shape=(), dtype=float32)
'''

In [None]:
'''
not submitted

mA0 = keras.layers.Input(shape=X_train.shape[1], name = "inputA")
mA3 = keras.layers.Dense(400, activation="gelu")(mA0)
mA3b = keras.layers.Dropout(0.05)(mA3)
mA4 = keras.layers.Dense(100, activation="gelu")(mA3b)
mA5 = keras.layers.Dropout(0.05)(mA4)
mA6 = keras.layers.Dense(25, activation="gelu")(mA5)
mA7 = keras.layers.Dense(6, activation="softmax")(mA6)

loss: KLD
epoch: 30, patience 5
with PCA 800
batch_size: 128

0.28346992 0.012983576 0.19991402950836346
0.29861128 0.012989608 0.20122837668168359
0.30290562 0.012781282 0.19633147530839778
0.29059058 0.012629622 0.19448616512818262
0.29403827 0.012706669 0.1957047606993001
----
tf.Tensor(0.2939548, shape=(), dtype=float32) tf.Tensor(0.0025643937, shape=(), dtype=float32) tf.Tensor(3.951708e-06, shape=(), dtype=float32)
'''

In [None]:
'''
Attempt 4
interesting case because KLD shows better overall perf, but JSD and wJSD show worse perf.


mA0 = keras.layers.Input(shape=X_train.shape[1], name = "inputA")
mA1 = keras.layers.Dense(1200, activation="gelu")(mA0)
mA2 = keras.layers.Dropout(0.05)(mA1)
mA3 = keras.layers.Dense(600, activation="gelu")(mA2)
mA4 = keras.layers.Dropout(0.05)(mA3)
mA5 = keras.layers.Dense(200, activation="gelu")(mA4)
mA6 = keras.layers.Dropout(0.05)(mA5)
mA7 = keras.layers.Dense(6, activation="softmax")(mA6)

loss: KLD
epoch: 3
with PCA 800
batch_size: 64

0.27293953 0.017414736 0.2629776099638548
0.27891082 0.0174157 0.2626997411425691
0.27661416 0.017378118 0.260330707533285
0.2903107 0.018472176 0.2772005245788023
0.2842966 0.018364944 0.27507252525538206
----
tf.Tensor(0.28122807, shape=(), dtype=float32) tf.Tensor(0.0035667575, shape=(), dtype=float32) tf.Tensor(5.3602034e-06, shape=(), dtype=float32)
''''


In [None]:
# save best model
class cPCAmodels:
    def __init__(self):
        self.model = 0

PCAm = cPCAmodels()
PCAm.model = PCAmodels[0]
file = open(directory + "PCAmodel_attempt3.pickle", 'wb')
pickle.dump(PCAm, file)
file.close()

In [None]:
# the best model is model 1
model = keras.models.load_model(directory + "model1.keras")
print(model)

In [None]:
# five more epoch with the entire dataset

n_attempt = 4
batch_size = 124
epochs = 5
verbose = 1

X_train = apply_PCA(X_data, PCAmodels[0])

model.fit(
    [X_train],
    Y_data,
    batch_size=batch_size,
    epochs=epochs,
    verbose=verbose
)

model.save(directory + f"finalmodel{n_attempt}.keras", save_format='keras')

# Gradient Boosted Trees

In [3]:
import lightgbm as lgb
import xgboost as xgb
import catboost as cb
import optuna
from keras.utils import to_categorical
from sklearn.model_selection import cross_val_score

In [4]:
directory = 'D:/Kaggle/2024/Harmful_brain_activity_classification/train_models/'
data = pd.read_parquet(directory + "Combined_Features_wf_all.parquet")

In [5]:
verif_data = pd.read_csv(directory + "verif_headers_order.csv")
verif_headers_order = verif_data.columns

class cScaler:
    def __init__(self):
        self.full_train = StandardScaler()

file = open(directory + "standard_scaler.pickle", 'rb')
scaler = pickle.load(file)
file.close()

In [6]:
data.expert_consensus.unique()

array(['Seizure', 'GPD', 'LRDA', 'Other', 'GRDA', 'LPD'], dtype=object)

In [7]:
# data = balanced_data.copy()

encoding = {"Seizure" : 0, "GPD": 1, "LRDA": 2, "Other": 3, "GRDA":4, "LPD":5}
decoding = {0: "Seizure", 1: "GPD", 2: "LRDA", 3: "Other", 4: "GRDA", 5: "LPD"}

Y_data_df = data.iloc[:,9:15]
Y_data_df = Y_data_df / Y_data_df.sum(axis=0)
print(Y_data_df.shape)

Y_data = data[["expert_consensus"]].copy()
Y_data["expert_consensus"] = Y_data["expert_consensus"].apply(lambda x: encoding[x])
print(Y_data.value_counts())
print(Y_data.shape)

data = data.iloc[:,15:]
data = data.select_dtypes(include=[np.number])
data = data.drop("Total_votes", axis = 1)
print(data.shape)

# strategy for missing values
data = data.replace(np.nan, 0)

X_cols = data.select_dtypes(include=[np.number]).keys()
X_data = data.select_dtypes(include=[np.number])
X_data = scaler.full_train.transform(X_data) # standardization
X_data = pd.DataFrame(X_data, columns = X_cols)
print(X_data.shape)

(106800, 6)
expert_consensus
0                   20933
4                   18861
3                   18808
1                   16702
2                   16640
5                   14856
dtype: int64
(106800, 1)
(106800, 1410)
(106800, 1410)


In [8]:
def log(x):
    
    # reshaping
    new_x = x
    if isinstance(x, pd.DataFrame) == False: new_x = pd.DataFrame(x)
    
    # replacing
    new_x = new_x.replace(0,1e-15)
    new_x = new_x.replace(1,1-1e-15)
    
    return np.log(new_x.values)


# true_y can be a list, array or pandas dataframe of a single column containing the categories to predict. 
# pred_y has to be an array with each column corresponding to one class probability
# idx stands for index. When one wants to assess the logloss for a single row in a dataframe,
# the row index (number) should be assigned to idx.
# adapt to your outcome
def logloss(true_y, pred_y, idx = None, byclass = False): 

    # reshaping
    ytrue = true_y
    ypred = pred_y
    if isinstance(pred_y, pd.DataFrame) == False: ypred = pd.DataFrame(pred_y)
    if isinstance(ytrue, pd.DataFrame) == False: ytrue = pd.DataFrame(ytrue)
    if ytrue.shape[1] == 1: ytrue = pd.DataFrame(to_categorical(ytrue))
     
    # logloss calculation
    if idx == None:
        val = -(log(ypred)*ytrue).mean()
    else:
        if type(idx) == int: val = -(np.reshape(log(ypred.iloc[idx,:]),(3,)) *ytrue.iloc[idx,:])
        else: val = -(log(ypred.iloc[idx,:])*ytrue.iloc[idx,:]).mean()

    # display results
    if byclass:
        for i in decoding.keys():
            print(f'logloss for {decoding[i]} : {val[i]}')      
    val = np.sum(val)
    print(f'Overall logloss: {val}') 
    
    return val

## Light GBM

In [None]:
def lgb_objective(trial):
  params = {'objective': "multiclass",
    'metric': "multi_logloss",
    'num_class': 6,
    'max_depth': trial.suggest_int('max_depth', 15,30),
    'num_leaves': trial.suggest_int('num_leaves', 40,120),
    'learning_rate': trial.suggest_float('learning_rate', 0.02, 0.035),
    'n_estimators': trial.suggest_int('n_estimators', 150, 275),
    'colsample_bytree': trial.suggest_float('colsample_bytree', 0.02, 0.15),
    'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4,1),
    'reg_alpha': trial.suggest_float('reg_alpha', 0.1, 0.7),
    'reg_lambda': trial.suggest_float('reg_lambda', 4, 8),
    'max_bin': trial.suggest_int('max_bin', 40,70),
    'device_type': "gpu",
    'verbose':-1}
    
  model = lgb.LGBMClassifier(**params);
  score = cross_val_score(model, X_data , Y_data, cv=5, scoring = "neg_log_loss")
  return -score.mean()

study = optuna.create_study(direction='minimize')
study.optimize(lgb_objective, n_trials=20)

In [9]:
lgbparams = {'objective': 'multiclass',
'metric': 'multi_logloss',
'num_class': 6,
'max_depth': 22,
'num_leaves': 56,
'learning_rate': 0.029200818616956066,
'n_estimators': 240,
'colsample_bytree': 0.14529209839987717,
'bagging_fraction': 0.680397874556198,
'reg_alpha': 0.3880867303670045,
'reg_lambda': 7.853855198384489,
'max_bin': 57,
'device_type': 'gpu',
"importance_type": "gain",
'verbose': -1}

CV_predictions = pd.DataFrame(np.zeros([X_data.shape[0], 6]))

LGBM = lgb.LGBMClassifier(**lgbparams)

n_splits = 5
kfold = KFold(n_splits= n_splits, shuffle = True, random_state = 42)

n = -1
# for train_idx, test_idx in kfold.split(balanced_data.label_id.unique()):
for train_idx, test_idx in kfold.split(X_data, Y_data):
    n += 1
    print("---", n)
    
    X_train, y_train = X_data.loc[train_idx,:] , Y_data.loc[train_idx,:]
    X_test, y_test = X_data.loc[test_idx,:] , Y_data.loc[test_idx,:]

    LGBM.fit(X_train, y_train)
    proba = LGBM.predict_proba(X_test)
      
    # build CV results
    CV_predictions.loc[test_idx,:] = proba
    gc.collect()

LL = logloss(Y_data, CV_predictions, byclass = True)
print(f"LGBM log loss: {LL}\n")
kld = KLDivergence()
KLDmetric = kld(Y_data_df, CV_predictions)
print(f"LGBM KLD: {KLDmetric}")

--- 0
--- 1
--- 2
--- 3
--- 4
logloss for Seizure : 0.08989963088863537
logloss for GPD : 0.042386723490068945
logloss for LRDA : 0.05511958265461755
logloss for Other : 0.12302536261757491
logloss for GRDA : 0.06776814264774196
logloss for LPD : 0.06866162342222995
Overall logloss: 0.4468610657208687
LGBM log loss: 0.4468610657208687

LGBM KLD: -0.0004141136014368385


In [14]:
print(CV_predictions)

               0         1         2         3         4         5
0       0.467960  0.030706  0.117178  0.152520  0.039671  0.191966
1       0.726207  0.021470  0.053387  0.091569  0.024286  0.083081
2       0.820036  0.014303  0.039367  0.057771  0.023844  0.044679
3       0.813929  0.014858  0.026494  0.085502  0.014746  0.044470
4       0.815186  0.019549  0.023259  0.079154  0.016919  0.045934
...          ...       ...       ...       ...       ...       ...
106795  0.044618  0.021189  0.775961  0.038914  0.076975  0.042344
106796  0.039969  0.017540  0.822999  0.031166  0.067627  0.020699
106797  0.045350  0.020080  0.793597  0.037814  0.077944  0.025215
106798  0.031221  0.015986  0.793649  0.032816  0.085343  0.040985
106799  0.026354  0.010867  0.838306  0.028975  0.074160  0.021339

[106800 rows x 6 columns]


# Combined models

In [15]:
tf.keras.backend.clear_session()

PCAmodels = {}

batch_size = 128
epochs = 100
verbose = 1

n_splits = 5
n = 0

# K-fold Cross Validation model evaluation

X_train, X_test, y_train, y_test, X_trainLGBM, X_testLGBM  = train_test_split(X_data, Y_data_df, CV_predictions, test_size = 0.2)                                                                       

PCAmodels[n], X_train = dim_reduct(X_train, 800)
X_test = apply_PCA(X_test, PCAmodels[n])
# X_train = np.expand_dims(X_train, axis=2)
# X_test = np.expand_dims(X_test, axis=2)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)
print("X_trainLGBM shape", X_trainLGBM.shape)
print("X_testLGBM shape", X_testLGBM.shape)

# mA0 = keras.layers.Input(shape=(X_train.shape[1], X_train.shape[2]), name = "inputA")
mA0 = keras.layers.Input(shape=X_train.shape[1], name = "inputA")
mA1 = keras.layers.Dense(1200, activation="gelu")(mA0)
mA2 = keras.layers.Dropout(0.05)(mA1)
mA3 = keras.layers.Dense(600, activation="gelu")(mA2)
mA4 = keras.layers.Dropout(0.05)(mA3)
mA5 = keras.layers.Dense(200, activation="gelu")(mA4)
mA6 = keras.layers.Dropout(0.05)(mA5)
mA7 = keras.layers.Dense(24, activation="gelu")(mA6)

mB0 = keras.layers.Input(shape=X_trainLGBM.shape[1], name = "inputB")

mAB = keras.layers.concatenate([mA7,mB0],name="concatenated_layer")
output_layer = keras.layers.Dense(6, activation = "softmax")(mAB)

model = keras.models.Model(inputs=[mA0,mB0],outputs=[output_layer], name = f'model{n}')

model.compile(
    loss= keras.losses.KLDivergence(),
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=[keras.metrics.KLDivergence(name = "KLD")],
    run_eagerly=True
)
if n == 0: print(model.summary())

callbacks = [keras.callbacks.EarlyStopping(monitor="val_loss", patience=5)]

model.fit(
    [X_train, X_trainLGBM],
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.15,
    callbacks=callbacks,
    verbose=verbose
)

model.save(directory + f"model_combined.keras")

predictions = model.predict([X_test, X_testLGBM])

kld = KLDivergence()
print(kld(y_test, predictions), JensenShannonDiv(y_test, predictions), wJensenShannonDiv(y_test, predictions))

gc.collect()

X_train shape: (85440, 800)
y_train shape: (85440, 6)
X_test shape: (21360, 800)
y_test shape: (21360, 6)
X_trainLGBM shape (85440, 6)
X_testLGBM shape (21360, 6)
Model: "model0"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 inputA (InputLayer)            [(None, 800)]        0           []                               
                                                                                                  
 dense (Dense)                  (None, 1200)         961200      ['inputA[0][0]']                 
                                                                                                  
 dropout (Dropout)              (None, 1200)         0           ['dense[0][0]']                  
                                                                                                  
 dense_1 (Dense)             

ValueError: Layer "model0" expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(32, 800), dtype=float32, numpy=
array([[-5.88153696e+00, -2.38435864e+00,  5.66012335e+00, ...,
         9.61677507e-02, -9.74584520e-02,  8.66410434e-02],
       [-3.09530020e+00,  1.29145203e+01, -1.27189665e+01, ...,
        -1.75717920e-01, -1.99270368e-01,  5.82260899e-02],
       [-1.34647989e+01, -6.06159639e+00,  8.60588551e+00, ...,
         8.07968061e-03, -4.89619970e-02, -2.03010812e-01],
       ...,
       [ 1.85393829e+01, -1.00781107e+01,  1.42456427e+01, ...,
         6.72174469e-02, -3.39383748e-03, -5.51992580e-02],
       [-2.16536369e+01, -2.39834309e+00,  4.86847305e+00, ...,
         2.07342952e-01, -6.52381685e-03,  4.72200364e-02],
       [-1.86684494e+01,  2.44482493e+00, -3.40622544e+00, ...,
        -2.03052163e-01,  3.98116320e-01, -1.07740834e-01]], dtype=float32)>]

In [18]:
predictions = model.predict([X_test, X_testLGBM])

kld = KLDivergence()
print(kld(y_test, predictions), JensenShannonDiv(y_test, predictions), wJensenShannonDiv(y_test, predictions))

gc.collect()

tf.Tensor(-0.00054504786, shape=(), dtype=float32) tf.Tensor(0.009731499, shape=(), dtype=float32) tf.Tensor(1.4608102e-05, shape=(), dtype=float32)


2578

perf combined model: The metrics unanimously described this model as better than the non-combined one
KLDivergence -0.00054504786
JensenShannonDiv: 009731499
wJensenShannonDiv : 1.4608102e-05

In [20]:
# save best model
class cPCAmodels:
    def __init__(self):
        self.model = 0

PCAm = cPCAmodels()
PCAm.model = PCAmodels[0]
file = open(directory + "PCAmodel_attempt5.pickle", 'wb')
pickle.dump(PCAm, file)
file.close()

In [21]:
# the best model is model 1
model = keras.models.load_model(directory + "model_combined.keras")
print(model)

<keras.engine.functional.Functional object at 0x00000242E37F8550>


In [24]:
# five more epoch with the entire dataset

n_attempt = 5
batch_size = 128
epochs = 10
verbose = 1

X_train = apply_PCA(X_data, PCAmodels[0])

model.fit(
    [X_train, CV_predictions],
    Y_data_df,
    batch_size=batch_size,
    epochs=epochs,
    verbose=verbose
)

model.save(directory + f"finalmodel{n_attempt}.keras", save_format='keras')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
