# **Higgs boson project**

### Setup

In [None]:
import google.colab as gc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow import keras
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [None]:
# Read the data from google drive
gc.drive.mount("/content/drive", force_remount=True)
frame_names = ["Higgs boson","lepton pT", "lepton eta", "lepton phi", "missing energy magnitude", "missing energy phi", "jet 1 pt",
               "jet 1 eta", "jet 1 phi", "jet 1 b-tag", "jet 2 pt", "jet 2 eta", "jet 2 phi", "jet 2 b-tag", "jet 3 pt",
               "jet 3 eta", "jet 3 phi", "jet 3 b-tag", "jet 4 pt", "jet 4 eta", "jet 4 phi", "jet 4 b-tag", "m jj",
               "m jjj", "m lv", "m jlv", "m bb", "m wbb", "m wwbb"]

def read_data(file_path):
    data = pd.read_csv(file_path, names=frame_names, low_memory=False)
    data["Higgs boson"] = data["Higgs boson"].astype("category").cat.codes
    
    data = convert_invalid_types(data)

    # Drop rows with missing values
    data.dropna(inplace=True)

    return data

def convert_invalid_types(data):
    object_cols = data.select_dtypes(include=["object"])
    mixed_type_cols = object_cols.columns[object_cols.apply(pd.Series.nunique) > 1]
    for col in mixed_type_cols:
        data[col] = pd.to_numeric(data[col].str.replace('"',''), errors="coerce")
    return data

data = read_data("/content/drive/My Drive/Higgs_Boson_Project/data.csv")

Mounted at /content/drive


### Show the data

For more data visualization, check the python files

In [None]:
pd.set_option('display.max_columns', None)
print("shape:", data.shape)
data.describe()

shape: (599997, 29)


Unnamed: 0,Higgs boson,lepton pT,lepton eta,lepton phi,missing energy magnitude,missing energy phi,jet 1 pt,jet 1 eta,jet 1 phi,jet 1 b-tag,jet 2 pt,jet 2 eta,jet 2 phi,jet 2 b-tag,jet 3 pt,jet 3 eta,jet 3 phi,jet 3 b-tag,jet 4 pt,jet 4 eta,jet 4 phi,jet 4 b-tag,m jj,m jjj,m lv,m jlv,m bb,m wbb,m wwbb
count,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0,599997.0
mean,0.529284,0.992485,-0.000115,0.000163,0.99802,-0.001152,0.990147,-0.002195,0.000513,1.000378,0.992595,0.001639,0.000405,0.997929,0.992338,0.001851,0.000928,1.001304,0.986216,-0.000309,-0.002131,0.996425,1.034049,1.024419,1.050636,1.01013,0.973343,1.033156,0.959836
std,0.499142,0.565045,1.007857,1.005479,0.599283,1.006755,0.474626,1.010297,1.006594,1.026462,0.500731,1.008571,1.00694,1.04679,0.487112,1.008364,1.005883,1.195057,0.505672,1.008151,1.005563,1.397913,0.669433,0.378087,0.164444,0.398453,0.524886,0.364497,0.313074
min,0.0,0.275,-2.43,-1.74,0.000626,-1.74,0.139,-2.97,-1.74,0.0,0.189,-2.91,-1.74,0.0,0.264,-2.73,-1.74,0.0,0.365,-2.5,-1.74,0.0,0.107,0.245,0.0922,0.157,0.0481,0.303,0.351
25%,0.0,0.591,-0.737,-0.87,0.577,-0.873,0.679,-0.689,-0.868,0.0,0.656,-0.693,-0.871,0.0,0.651,-0.697,-0.87,0.0,0.618,-0.715,-0.872,0.0,0.791,0.846,0.986,0.768,0.674,0.819,0.77
50%,1.0,0.854,-0.00103,0.00264,0.891,-0.00191,0.894,-0.003,-0.00105,1.09,0.89,0.00103,0.000454,0.0,0.898,0.00199,-0.00131,0.0,0.868,-0.000461,-0.00581,0.0,0.895,0.95,0.99,0.917,0.873,0.947,0.872
75%,1.0,1.24,0.738,0.87,1.29,0.872,1.17,0.685,0.87,2.17,1.2,0.697,0.872,2.21,1.22,0.703,0.873,2.55,1.22,0.715,0.868,3.1,1.02,1.08,1.02,1.14,1.14,1.14,1.06
max,1.0,8.71,2.43,1.74,9.9,1.74,8.38,2.97,1.74,2.17,11.6,2.91,1.74,2.21,8.51,2.73,1.74,2.55,11.6,2.5,1.74,3.1,22.3,11.6,5.92,10.5,13.7,8.43,6.26


### Process the data

In [None]:
engineered_parameters = ["m jj",  "m jjj", "m lv", "m jlv", "m bb", "m wbb", "m wwbb"]

In [None]:
X = data.iloc[:, 1:]
y = data['Higgs boson']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

mean = tf.reduce_mean(X_train, axis=0)
std = tf.math.reduce_std(X_train, axis=0)

X_train = (X_train - mean) / std
X_test = (X_test - mean) / std

### Define the models

#### Stored

In [None]:
def fast_MLP(name = None, compile = True):
  model = keras.models.Sequential([
    keras.layers.Dense(units=1024, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=512, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=256, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=128, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=64, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=32, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=16, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=1, activation='sigmoid'),
  ], name)
  if(compile):
    model.compile(
        loss='binary_crossentropy',
        optimizer=keras.optimizers.Adam(0.0005),
        metrics='accuracy',
    )
  return model

In [None]:
def deep_MLP(name = None, compile = True):
  model = keras.models.Sequential([
    keras.layers.Dense(units=256, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=128, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=128, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=64, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=64, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=32, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=32, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=16, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=1, activation='sigmoid', kernel_regularizer = keras.regularizers.L1(0.00003)),
  ], name)
  if(compile):
    model.compile(
        loss='binary_crossentropy',
        optimizer=keras.optimizers.Adam(0.0005),
        metrics='accuracy',
    )
  return model

In [None]:
def tanh_MLP(name = None, compile = True):
  model = keras.models.Sequential([
    keras.layers.Dense(units=256, activation='tanh', kernel_regularizer = keras.regularizers.L1(0.000001)),
    keras.layers.Dropout(0.15),
    keras.layers.Dense(units=128, activation='tanh', kernel_regularizer = keras.regularizers.L1(0.000001)),
    keras.layers.Dropout(0.15),
    keras.layers.Dense(units=128, activation='tanh', kernel_regularizer = keras.regularizers.L1(0.000001)),
    keras.layers.Dropout(0.15),
    keras.layers.Dense(units=64, activation='tanh', kernel_regularizer = keras.regularizers.L1(0.000001)),
    keras.layers.Dropout(0.15),
    keras.layers.Dense(units=64, activation='tanh', kernel_regularizer = keras.regularizers.L1(0.000001)),
    keras.layers.Dropout(0.15),
    keras.layers.Dense(units=32, activation='tanh', kernel_regularizer = keras.regularizers.L1(0.000001)),
    keras.layers.Dropout(0.15),
    keras.layers.Dense(units=32, activation='tanh', kernel_regularizer = keras.regularizers.L1(0.000001)),
    keras.layers.Dropout(0.15),
    keras.layers.Dense(units=16, activation='tanh', kernel_regularizer = keras.regularizers.L1(0.000001)),
    keras.layers.Dropout(0.15),
    keras.layers.Dense(units=1, activation='sigmoid', kernel_regularizer = keras.regularizers.L1(0.000001)),
  ], name)
  if(compile):
    model.compile(
        loss='binary_crossentropy',
        optimizer=keras.optimizers.Adam(0.001),
        metrics='accuracy',
    )
  return model

In [None]:
def MLP_ensemble(MLP_number, name = None, compile = True):
  input = keras.Input(shape=(X_train.shape[1],))
  models = [fast_MLP(compile = False)(input) for _ in range(MLP_number)]
  output = keras.layers.average(models)
  ensemble_model = keras.Model(inputs = input, outputs = output, name = name)
  if(compile):
    ensemble_model.compile(
        loss='binary_crossentropy',
        optimizer=keras.optimizers.Adam(0.0005),
        metrics='accuracy',
    )
  return ensemble_model

In [None]:
def get_error_data(model, X, y):
  predictions = model.predict(X, batch_size = 2048, verbose = 0) > 0.5
  false_predictions = (predictions[:, 0] - y.to_numpy()) != 0
  return X[false_predictions], y[false_predictions]

def boosted_MLP(X_train, y_train, X_test, y_test, name = None):
  X_first, X_second, y_first, y_second = train_test_split(X_train, y_train, test_size = 0.2)

  first = fast_MLP(compile = True)
  print("First model")
  first.fit(
    X_first,
    y_first,
    epochs = 50,
    batch_size = 2048,
    validation_data = (X_test, y_test),
    use_multiprocessing = True
  )

  X_error, y_error = get_error_data(first, X_first, y_first)

  print("Second model")
  second = lilit_1(compile = True)
  second.fit(
    pd.concat([X_error, X_second]),
    pd.concat([y_error, y_second]),
    epochs = 50,
    batch_size = 2048,
    validation_data = (X_test, y_test),
    use_multiprocessing = True
  )
  
  input = keras.Input(shape=(X.shape[1],))
  models = [model(input) for model in [first, second]]
  output = keras.layers.average(models)
  ensemble_model = keras.Model(inputs = input, outputs = output, name = name)
  if(compile):
    ensemble_model.compile(
        loss='binary_crossentropy',
        optimizer=keras.optimizers.Adam(0.0005),
        metrics='accuracy',
    )
  return ensemble_model

In [None]:
def test_MLP(name = None, compiled = True):
  model = keras.models.Sequential([
    keras.layers.Dense(units=8192, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00005)),
    keras.layers.Dropout(0.30),
    keras.layers.Dense(units=4096, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00003)),
    keras.layers.Dropout(0.20),
    keras.layers.Dense(units=2048, activation='relu', kernel_regularizer = keras.regularizers.L1(0.00002)),
    keras.layers.Dropout(0.10),
    keras.layers.Dense(units=1024, activation='relu', kernel_regularizer = keras.regularizers.L1(0.000005)),
    keras.layers.Dropout(0.10),
    keras.layers.Dense(units=512, activation='relu'),
    keras.layers.Dropout(0.05),
    keras.layers.Dense(units=256, activation='relu'),
    keras.layers.Dropout(0.05),
    keras.layers.Dense(units=1, activation='sigmoid'),
  ], name)
  if(compile):
    model.compile(
        loss='binary_crossentropy',
        optimizer=keras.optimizers.Adam(0.0003),
        metrics='accuracy',
    )
  return model

#### Sam's copies of lilit

In [None]:
def lilit_1(name = None, compile = True):
  X_raw = X_train.iloc[:, 0:21]
  X_high_level = X_train.iloc[:, 21:]

  inputs_1 = keras.Input(shape=(X_raw.shape[1], ))
  x_1 = keras.layers.Dense(1024, activation="relu")(inputs_1)
  x_1 = keras.layers.Dropout(0.4)(x_1)
  x_1 = keras.layers.Dense(720, activation = "relu")(x_1)
  x_1 = keras.layers.Dropout(0.2)(x_1)
  x_1 = keras.layers.Dense(512, activation = "relu")(x_1)
  x_1 = keras.layers.Dropout(0.2)(x_1)
  x_1 = keras.layers.Dense(72, activation = "relu")(x_1)
  x_1 = keras.layers.Dropout(0.2)(x_1)
  output_1 = keras.layers.Dense(16, activation = "relu")(x_1)

  inputs_2 = keras.Input(shape=(X_high_level.shape[1], ))
  x_2 = keras.layers.Dense(512, activation="relu")(inputs_2)
  x_2 = keras.layers.Dropout(0.4)(x_2)
  x_2 = keras.layers.Dense(256, activation = "relu")(x_2)
  x_2 = keras.layers.Dropout(0.2)(x_2)
  x_2 = keras.layers.Dense(128, activation = "relu")(x_2)
  x_2 = keras.layers.Dropout(0.2)(x_2)
  output_2 = keras.layers.Dense(8, activation = "relu")(x_2)

  merged = keras.layers.concatenate([output_1, output_2])
  merged = keras.layers.Dense(1024, activation="relu", kernel_regularizer=tf.keras.regularizers.L1(0.00002))(merged)
  merged = keras.layers.Dropout(0.25)(merged)
  merged = keras.layers.Dense(512, activation="relu", kernel_regularizer=tf.keras.regularizers.L1(0.00002))(merged)
  merged = keras.layers.Dropout(0.25)(merged)
  merged = keras.layers.Dense(256, activation="relu", kernel_regularizer=tf.keras.regularizers.L1(0.00002))(merged)
  merged = keras.layers.Dropout(0.25)(merged)
  merged = keras.layers.Dense(128, activation="relu", kernel_regularizer=tf.keras.regularizers.L1(0.00002))(merged)
  merged = keras.layers.Dropout(0.25)(merged)
  merged = keras.layers.Dense(64, activation="relu", kernel_regularizer=tf.keras.regularizers.L1(0.00002))(merged)
  merged = keras.layers.Dropout(0.25)(merged)
  merged = keras.layers.Dense(8, activation="relu", kernel_regularizer=tf.keras.regularizers.L1(0.00002))(merged)
  merged = keras.layers.Dropout(0.25)(merged)
  merged = keras.layers.Dense(1, activation="sigmoid")(merged)

  model = keras.Model(inputs=[inputs_1, inputs_2], outputs=merged, name = name)

  if(compile):
    model.compile(
      loss='binary_crossentropy',
      optimizer=keras.optimizers.Adam(0.0005),
      metrics='accuracy',
    )

  return model

In [None]:
def lilit_2(name = None, compile = True):
  inputs_a = keras.layers.Input(shape=(X_train.shape[1],))
  x_a = keras.layers.Dense(1024, activation='relu')(inputs_a)
  x_a = keras.layers.Dropout(0.25)(x_a)
  x_a = keras.layers.Dense(256, activation='relu')(x_a)
  x_a = keras.layers.Dropout(0.2)(x_a)
  x_a = keras.layers.Dense(16, activation='relu')(x_a)
  x_a = keras.layers.Dropout(0.2)(x_a)
  att = keras.layers.Dense(1, activation='softmax')(x_a)

  att_out = keras.layers.Multiply()([x_a, att])

  att_out = keras.layers.Dense(512, activation='relu')(att_out)
  att_out = keras.layers.Dropout(0.2)(att_out)
  att_out = keras.layers.Dense(64, activation='relu')(att_out)
  att_out = keras.layers.Dropout(0.2)(att_out)
  outputs_a = keras.layers.Dense(1, activation='sigmoid')(att_out)

  att_model = keras.models.Model(inputs=inputs_a, outputs=outputs_a)
  att_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  return att_model

In [None]:
def pseudo_final(name = None, compile = True):
  model = keras.models.Sequential([
    keras.layers.Dense(units=8192, activation='relu', kernel_regularizer = keras.regularizers.L1(0.0002)),
    keras.layers.Dropout(0.4),
    keras.layers.Dense(units=4096, activation='relu'),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(units=1024, activation='relu'),
    keras.layers.Dropout(0.20),
    keras.layers.Dense(units=512, activation='relu'),
    keras.layers.Dropout(0.15),
    keras.layers.Dense(units=64),
    keras.layers.Dropout(0.1),
    keras.layers.Dense(units=1, activation='sigmoid'),
  ], name)
  if(compile):
    model.compile(
        loss='binary_crossentropy',
        optimizer=keras.optimizers.Adam(0.0005),
        metrics='accuracy',
    )
  return model

#### Testing

In [None]:
def functional_model():
  input_layer = keras.Input(shape=(X_train.shape[1] - 1,), name='input_layer')
  Layer_1 = keras.layers.Dense(10, activation="relu",name='Layer_1')(input_layer)
  Layer_2 = keras.layers.Dense(10, activation="relu",name='Layer_2')(Layer_1)
  output_layer= keras.layers.Dense(1, activation="sigmoid",name='output_layer')(Layer_2)

  model = keras.models.Model(inputs=input_layer, outputs=output_layer)

  model.compile(
      loss='binary_crossentropy',
      optimizer=keras.optimizers.Adam(0.0005),
      metrics='accuracy',
  )
  return model

### Train a model

In [None]:
models = {}

In [None]:
def train_model(model, parameters = None):
  model.fit(
    X_train if parameters == None else X_train[parameters],
    y_train,
    epochs = 50,
    batch_size = 2048,
    validation_data = (X_test if parameters == None else X_test[parameters], y_test),
    use_multiprocessing = True,
    callbacks = [
        keras.callbacks.ModelCheckpoint(
        filepath=f'/content/drive/My Drive/Higgs_Boson_Project/Saved_models/{model.name}.h5',
        save_weights_only=True,
        monitor='val_accuracy',
        mode='max',
        save_best_only=True
        )
      ]
  )
  models[model.name] = model

In [None]:
%%time
train_model(pseudo_final('model2'))

### Evaluate a model

In [None]:
separator = '----------------------------------------------------------'
def show_confusion_matrix(model, X, y, proportions = False):
  predictions = (model.predict(X, batch_size = 2048, verbose = 0) > 0.5).astype(int)
  print(f"{model.name}'s confusion matrix")
  print(pd.DataFrame(
    tf.math.confusion_matrix(predictions, y) / (y.size if proportions else 1),
    columns = ['Not Higgs', 'Higgs'],
    index = ['Predicted not Higgs', 'Predicted Higgs']
  ))
  print(separator)

def show_prediction_similarity(model1, model2, X1, X2):
  predictions_1 = (model1.predict(X1, batch_size = 2048, verbose = 0) > 0.5).astype(int)
  predictions_2 = (model2.predict(X2, batch_size = 2048, verbose = 0) > 0.5).astype(int)
  print(pd.DataFrame(
    tf.math.confusion_matrix(predictions_1.T[0], predictions_2.T[0]),
    index = [f'Not Higgs {model1.name}', f'Higgs {model1.name}'],
    columns = [f'Not Higgs {model2.name}', f'Higgs {model2.name}']
  ))
  print(separator)

def get_error_similarity(model1, model2, X1, X2, y):
  predictions_1 = (model1.predict(X1, batch_size = 2048, verbose = 0) > 0.5)
  predictions_2 = (model2.predict(X2, batch_size = 2048, verbose = 0) > 0.5)

  true_predictions_1 = (predictions_1[:, 0] - y.to_numpy()) == 0
  true_predictions_2 = (predictions_2[:, 0] - y.to_numpy()) == 0

  return tf.math.confusion_matrix(true_predictions_1, true_predictions_2)

def show_error_similarity(model1, model2, X1, X2, y):
  print(pd.DataFrame(
    get_error_similarity(model1, model2, X1, X2, y),
    index = ['Mistakes model 1', 'Accuracies model 1'],
    columns = ['Mistakes model 2', 'Accuracies model 2']
  ))
  print(separator)

def get_similar_error_proportion(model1, model2, X1, X2, y):
  matrix = get_error_similarity(model1, model2, X1, X2, y)
  return matrix[0, 0] / (matrix[0, 0] + matrix[0, 1] + matrix[1, 0])

def compare(model1, model2, X1, X2, y):
  show_confusion_matrix(model1, X1, y)
  show_confusion_matrix(model2, X2, y)
  show_prediction_similarity(model1, model2, X1, X2)
  show_error_similarity(model1, model2, X1, X2, y)
  get_similar_error_proportion(model1, model2, X1, X2, y)

In [None]:
compare(models['model1'], models['test4'], X_test, X_test, y_test)

model1's confusion matrix
                     Not Higgs    Higgs
Predicted not Higgs    41079.0  14374.0
Predicted Higgs        15122.0  49425.0
----------------------------------------------------------
test4's confusion matrix
                     Not Higgs    Higgs
Predicted not Higgs    41202.0  14379.0
Predicted Higgs        14999.0  49420.0
----------------------------------------------------------
                  Not Higgs test4  Higgs test4
Not Higgs model1            49761         5692
Higgs model1                 5820        58727
----------------------------------------------------------
                    Mistakes model 2  Accuracies model 2
Mistakes model 1               23681                5815
Accuracies model 1              5697               84807
----------------------------------------------------------


In [None]:
name = 'test4'
def evaluate(model, X, y):
  model.evaluate(X, y, batch_size = 2048)

print('training accuracy')
evaluate(models[name], X_train, y_train)
print('testing accuracy')
evaluate(models[name], X_test, y_test)
show_confusion_matrix(models[name], X_test, y_test)


training accuracy
testing accuracy
test4's confusion matrix
                     Not Higgs    Higgs
Predicted not Higgs    41202.0  14379.0
Predicted Higgs        14999.0  49420.0
----------------------------------------------------------


In [None]:
# Show a model
# models[name].summary(expand_nested = False, show_trainable = True)
keras.utils.plot_model(models[name], f"{name}.png", show_shapes=True)

### Save a model

In [None]:
# Make sure that no other model has the same name, to not override another one
model_name = "test3"
models[model_name].save(f"/content/drive/My Drive/Higgs_Boson_Project/Saved_models/{model_name}.h5")

### Load a model

In [None]:
loaded_model_name = "model-76-27"

In [None]:
loaded = keras.models.load_model(f"/content/drive/My Drive/Higgs_Boson_Project/Saved_models/{loaded_model_name}.h5")
loaded.evaluate(X, y)
loaded.summary()

### Additional models:

In [None]:
# to prune
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(0.4, 0)
}

model_for_pruning = prune_low_magnitude(model, **pruning_params)

model_for_pruning.compile(
    optimizer='adam',
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=['accuracy']
)

model_for_pruning.fit(
    X_train,
    y_train,
    batch_size=2048, 
    epochs=4, 
    validation_data = (X_test, y_test),
    callbacks=callbacks
)

In [None]:
# Final model
X_raw = X_train.iloc[:, 0:21]
X_high_level = X_train.iloc[:, 21:]

X_test_raw = X_test.iloc[:, 0:21]
X_test_high_level = X_test.iloc[:, 21:]

inputs_raw = keras.Input(shape=(X_raw.shape[1], ))
x_raw = keras.layers.Dense(1024, activation="relu")(inputs_raw)
x_raw = keras.layers.Dropout(0.4)(x_raw)
x_raw = keras.layers.Dense(512, activation = "relu")(x_raw)
x_raw = keras.layers.Dropout(0.2)(x_raw)
x_raw = keras.layers.Dense(256, activation = "relu")(x_raw)
x_raw = keras.layers.Dropout(0.2)(x_raw)
x_raw = keras.layers.Dense(128, activation = "relu")(x_raw)
x_raw = keras.layers.Dropout(0.2)(x_raw)
x_raw = keras.layers.Dense(64, activation = "relu")(x_raw)
output_raw = keras.layers.Dense(32, activation = "relu")(x_raw)

inputs_high_level = keras.Input(shape=(X_high_level.shape[1], ))
x_high_level = keras.layers.Dense(420, activation="relu")(inputs_high_level)
x_high_level = keras.layers.Dropout(0.4)(x_high_level)
x_high_level = keras.layers.Dense(128, activation = "relu")(x_high_level)
x_high_level = keras.layers.Dropout(0.2)(x_high_level)
x_high_level = keras.layers.Dense(64, activation = "relu")(x_high_level)
x_high_level = keras.layers.Dropout(0.2)(x_high_level)
output_high_level = keras.layers.Dense(16, activation = "relu")(x_high_level)

merged = keras.layers.concatenate([output_raw, output_high_level])
merged = keras.layers.Dense(6000, activation="relu")(merged)
merged = keras.layers.Dropout(0.4)(merged)
merged = keras.layers.Dense(4096, activation="relu")(merged)
merged = keras.layers.Dropout(0.25)(merged)
merged = keras.layers.Dense(1024, activation="relu")(merged)
merged = keras.layers.Dropout(0.2)(merged)
merged = keras.layers.Dense(512, activation="relu", kernel_regularizer=tf.keras.regularizers.L1(0.0002))(merged)
merged = keras.layers.Dropout(0.2)(merged)
merged = keras.layers.Dense(64, activation="relu", kernel_regularizer=tf.keras.regularizers.L1(0.0002))(merged)
merged = keras.layers.Dropout(0.2)(merged)
merged = keras.layers.Dense(1, activation="sigmoid")(merged)

final_model = keras.Model(inputs=[inputs_raw, inputs_high_level], outputs=merged)