#**Monk**

In [None]:
# Install packages
!pip install tensorflow_decision_forests
!pip install wurlitzer
!pip install scikeras[tensorflow]

In [None]:
# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# MLP
import tensorflow as tf
from tensorflow import keras
from keras import layers

# SVM
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Decision Forest
import tensorflow_decision_forests as tfdf

# Cross-validation
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score, train_test_split
from sklearn.metrics import make_scorer, accuracy_score, log_loss

# Import statistics
from statistics import mean, stdev, median

In [None]:
# Mount google drive to access data loaded on Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

**Definition of functions**

In [None]:
## Definition of loss/accuracy plot functions
def loss_plot(history):
  loss = history.history['loss']
  val_loss = history.history['val_loss']
  epochs = range(1, len(loss) + 1)
  plt.figure(figsize=(9, 5))
  # Training and test loss
  plt.plot(epochs, loss, label='Training loss', color='royalblue')
  plt.plot(epochs, val_loss, label='Test loss', linestyle='dashed', color='darkorange')
  plt.title('Training & Test Loss', fontsize=14)
  plt.xlabel('Epochs', fontsize=14)
  plt.ylabel('Loss', fontsize=14)
  plt.xticks(fontsize=12)
  plt.yticks(fontsize=12)
  plt.legend(fontsize=14)
  plt.show()

def accuracy_plot(history):
  acc = history.history['accuracy']
  val_acc = history.history['val_accuracy']
  epochs = range(1, len(acc) + 1)
  plt.figure(figsize=(9, 5))
  # Training and test accuracy
  plt.plot(epochs, acc, label='Training acc', color='royalblue')
  plt.plot(epochs, val_acc, label='Test acc', linestyle='dashed', color='darkorange')
  plt.title('Training & Test Accuracy', fontsize=14)
  plt.xlabel('Epochs', fontsize=14)
  plt.ylabel('Accuracy', fontsize=14)
  plt.xticks(fontsize=12)
  plt.yticks(fontsize=12)
  plt.legend(fontsize=14)
  plt.show()

In [None]:
## Definition of MLP functions

# Building and compiling model
def build_model(activation, kernel_initializer, optimizer, units=2):
  # Define the Model
  model = keras.Sequential()
  model.add(tf.keras.Input(shape=(17,)))
  model.add(layers.Dense(units=units, activation=activation, kernel_initializer=kernel_initializer))
  model.add(layers.Dense(units=units, activation=activation, kernel_initializer=kernel_initializer))
  model.add(layers.Dense(units=1, activation='sigmoid'))
  # Compile the model
  model.compile(optimizer=optimizer,
                loss=tf.keras.losses.BinaryCrossentropy(),
                metrics=['accuracy'])
  return model

# Training model
def train_model(model, X_train, y_train, X_test, y_test, batch_size=32, epochs=500, callbacks=None):
  # Fit the model
  history = model.fit(X_train,
                      y_train,
                      validation_data=(X_test, y_test),
                      batch_size=batch_size,
                      shuffle=True,
                      epochs=epochs,
                      callbacks=callbacks)
  return history

In [None]:
## Definition of Random Forest

# Building model
def create_rf_model(num_trees, max_depth, min_examples, algorithm='RANDOM', num_candidate_attributes=0, verbose=2):
    # Define the model
    rf_model = tfdf.keras.RandomForestModel(
        num_trees=num_trees,
        max_depth=max_depth,
        min_examples=min_examples,
        categorical_algorithm=algorithm,
        num_candidate_attributes=num_candidate_attributes,
        task=tfdf.keras.Task.CLASSIFICATION,
        verbose=verbose)
    return rf_model

In [None]:
## Definition of Support Vector Machine
def create_svm_model(kernel, C=1, degree=1, gamma='scale', verbose=2):
  # Define the model
  svm_model = SVC(kernel=kernel,
                  C=C,
                  degree=degree,
                  gamma=gamma,
                  verbose=verbose)
  return svm_model


---
##**Monk 1**

###**Data preparation**

**Training set**

In [None]:
# Loading the training dataset MONK-1
path = '/content/drive/MyDrive/data/monk+s+problems/monks-1.train'
col_names = ['class', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'ID']

monk_1_train = pd.read_csv(path, delimiter=' ', header=0, names=col_names)
monk_1_train.set_index('ID', inplace=True)
monk_1_train.shape

In [None]:
# Check the first 5 rows
monk_1_train.head()

In [None]:
# Count unique values for each column
monk_1_train.nunique()

In [None]:
# Count the number of records for the two classes
monk_1_train['class'].value_counts()

In [None]:
# One-Hot Encoding training set
X_train_encoded = pd.get_dummies(monk_1_train, columns=col_names[1:-1])
X_train_encoded.shape

In [None]:
X_train_encoded.head()

In [None]:
# y_train, X_train split
y_train_monk1, X_train_monk1 = X_train_encoded['class'], X_train_encoded.iloc[:, 1:]

print(f'y shape: {y_train_monk1.shape}')
print(f'X shape: {X_train_monk1.shape}')

**Test set**

In [None]:
# Loading the test dataset MONK-1
path = '/content/drive/MyDrive/data/monk+s+problems/monks-1.test'
col_names = ['class', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'ID']

monk_1_test = pd.read_csv(path, delimiter=' ', header=0, names=col_names)
monk_1_test.set_index('ID', inplace=True)
monk_1_test.shape

In [None]:
# Encoding categorical variable
monk_1_test_encoded = pd.get_dummies(monk_1_test, columns=col_names[1:-1])
monk_1_test_encoded.shape

In [None]:
# y_test, X_test
y_test_monk1, X_test_monk1 = monk_1_test_encoded['class'], monk_1_test_encoded.iloc[:, 1:]

print(f'y shape: {y_test_monk1.shape}')
print(f'X shape: {X_test_monk1.shape}')

###**Multi Layer Perceptron - MLP**

In [None]:
# Build model
mlp1 = build_model(activation='elu',
                   kernel_initializer='HeUniform',
                   units=2,
                   optimizer=tf.keras.optimizers.Adam(learning_rate=0.04))

# Fit model
mlp1_history = train_model(mlp1, X_train_monk1, y_train_monk1, X_test_monk1, y_test_monk1, batch_size=32, epochs=500)

In [None]:
# Loss plot
loss_plot(mlp1_history)

In [None]:
# Accuracy plot
accuracy_plot(mlp1_history)

In [None]:
# Model evaluation on the training & test set
results_TR = mlp1.evaluate(x=X_train_monk1, y=y_train_monk1)
print(f"train loss: {results_TR[0]}, train acc: {results_TR[1]}")

results_TS = mlp1.evaluate(x=X_test_monk1, y=y_test_monk1)
print(f"test loss: {results_TS[0]}, test acc: {results_TS[1]}")

In [None]:
%%time
# Train the model multiple times to assess weights initialization influence

trials = 5

history_list = []
train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []


for trial in range(trials):
    model = build_model(activation='elu',
                        kernel_initializer='HeUniform',
                        units=2,
                        optimizer=tf.keras.optimizers.Adam(learning_rate=0.05))
    history = train_model(model, X_train_monk1, y_train_monk1, X_test_monk1, y_test_monk1, batch_size=32, epochs=500)
    history_list.append(history)
    results_train = model.evaluate(x=X_train_monk1, y=y_train_monk1)
    results_test = model.evaluate(x=X_test_monk1, y=y_test_monk1)
    # train
    train_losses.append(results_train[0])
    train_accuracies.append(results_train[1])
    # test
    test_losses.append(results_test[0])
    test_accuracies.append(results_test[1])

In [None]:
print('MLP 1 MODEL')
print()
print('TRAIN ACCURACY')
print(f'train max: {np.amax(train_accuracies)}')
print(f'train mean: {np.mean(train_accuracies)}')
print(f'train median: {np.median(train_accuracies)}')
print(f'train variance: {np.var(train_accuracies)}')
print()
print('TEST ACCURACY')
print(f'test max: {np.amax(test_accuracies)}')
print(f'test mean: {np.mean(test_accuracies)}')
print(f'train median: {np.median(train_accuracies)}')
print(f'test variance: {np.var(test_accuracies)}')

###**Random Forest**

In [None]:
# Convert the dataset to categorical variables otherwise used by the RF as numerical
monk_1_train_new = monk_1_train.astype(str)
monk_1_test_new = monk_1_test.astype(str)

print(monk_1_train_new.dtypes)
print(monk_1_test_new.dtypes)

In [None]:
# Build the model
rf1 = create_rf_model(num_trees=300,
                      max_depth=10,
                      min_examples=1,
                      algorithm='RANDOM',
                      num_candidate_attributes=0)

# Model assessment on training/test set
rf1.compile(metrics=["accuracy"])

# Train the model
rf1.fit(x=tfdf.keras.pd_dataframe_to_tf_dataset(monk_1_train_new, label='class'))

In [None]:
# Summary of the model
rf1.summary()

In [None]:
# Training loss & accuracy
print('TRAINING:\n')
evaluation_TR = rf1.evaluate(tfdf.keras.pd_dataframe_to_tf_dataset(monk_1_train_new, label='class'), return_dict=True)

for name, value in evaluation_TR.items():
  print(f"{name}: {value:.4f}")

In [None]:
# Test loss & accuracy
print('TEST:\n')
evaluation_TS = rf1.evaluate(tfdf.keras.pd_dataframe_to_tf_dataset(monk_1_test_new, label='class'), return_dict=True)

for name, value in evaluation_TS.items():
  print(f"{name}: {value:.4f}")

###**Support Vector Machine - SVM**

In [None]:
# Build the model
svm1 = create_svm_model(kernel='poly',
                        C=3,
                        degree=2,
                        gamma='scale')
# Train the model
svm1.fit(X_train_monk1, y_train_monk1)

In [None]:
# Model assessment on training & test set
y_pred_TR = svm1.predict(X_train_monk1)
Accuracy_TR = accuracy_score(y_train_monk1, y_pred_TR)

y_pred_TS = svm1.predict(X_test_monk1)
Accuracy_TS = accuracy_score(y_test_monk1, y_pred_TS)

print(f'Train Accuracy: {Accuracy_TR}')
print(f'Test Accuracy: {Accuracy_TS}')

---
##**Monk 2**

###**Data preparation**

**Training set**

In [None]:
# Loading the training dataset MONK-2
path = '/content/drive/MyDrive/data/monk+s+problems/monks-2.train'
col_names = ['class', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'ID']

monk_2_train = pd.read_csv(path, delimiter=' ', header=0, names=col_names)
monk_2_train.set_index('ID', inplace=True)
monk_2_train.shape

In [None]:
# Check the first 5 rows
monk_2_train.head()

In [None]:
# Count unique values for each column
monk_2_train.nunique()

In [None]:
# Count the number of records for the two classes
monk_2_train['class'].value_counts()

In [None]:
# One-Hot Encoding training set
X_train_encoded = pd.get_dummies(monk_2_train, columns=col_names[1:-1])
X_train_encoded.shape

In [None]:
X_train_encoded.head()

In [None]:
# y_train, X_train split
y_train_monk2, X_train_monk2 = X_train_encoded['class'], X_train_encoded.iloc[:, 1:]

print(f'y shape: {y_train_monk2.shape}')
print(f'X shape: {X_train_monk2.shape}')

**Test set**

In [None]:
# Loading the test dataset MONK-2
path = '/content/drive/MyDrive/data/monk+s+problems/monks-2.test'
col_names = ['class', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'ID']

monk_2_test = pd.read_csv(path, delimiter=' ', header=0, names=col_names)
monk_2_test.set_index('ID', inplace=True)
monk_2_test.shape

In [None]:
# One-Hot Encoding test set
X_test_encoded = pd.get_dummies(monk_2_test, columns=col_names[1:-1])
X_test_encoded.shape

In [None]:
X_test_encoded.head()

In [None]:
# y_test, X_test split
y_test_monk2, X_test_monk2 = X_test_encoded['class'], X_test_encoded.iloc[:, 1:]

print(f'y shape: {y_test_monk2.shape}')
print(f'X shape: {X_test_monk2.shape}')

###**Multi Layer Perceptron - MLP**

In [None]:
# Build model
mlp2 = build_model(activation='elu',
                   kernel_initializer='RandomUniform',
                   units=2,
                   optimizer=tf.keras.optimizers.Adam(learning_rate=0.05))

# Fit model
mlp2_history = train_model(mlp2, X_train_monk2, y_train_monk2, X_test_monk2, y_test_monk2, batch_size=32, epochs=500)

In [None]:
# Loss plot
loss_plot(mlp2_history)

In [None]:
# Accuracy plot
accuracy_plot(mlp2_history)

In [None]:
# Model evaluation on the training & test set
results_TR = mlp2.evaluate(x=X_train_monk2, y=y_train_monk2)
print(f"train loss: {results_TR[0]}, train acc: {results_TR[1]}")

results_TS = mlp2.evaluate(x=X_test_monk2, y=y_test_monk2)
print(f"test loss: {results_TS[0]}, test acc: {results_TS[1]}")

In [None]:
%%time
# Train the model multiple times to assess weights initialization influence

trials = 5

history_list = []
train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []


for trial in range(trials):
    model = build_model(activation='elu',
                        kernel_initializer='RandomUniform',
                        units=2,
                        optimizer=tf.keras.optimizers.Adam(learning_rate=0.05))
    history = train_model(model, X_train_monk2, y_train_monk2, X_test_monk2, y_test_monk2, batch_size=32, epochs=500)
    history_list.append(history)
    results_train = model.evaluate(x=X_train_monk2, y=y_train_monk2)
    results_test = model.evaluate(x=X_test_monk2, y= y_test_monk2)
    # train
    train_losses.append(results_train[0])
    train_accuracies.append(results_train[1])
    # test
    test_losses.append(results_test[0])
    test_accuracies.append(results_test[1])

In [None]:
print('MLP 2 MODEL')
print()
print('TRAIN ACCURACY')
print(f'train max: {np.amax(train_accuracies)}')
print(f'train mean: {np.mean(train_accuracies)}')
print(f'train median: {np.median(train_accuracies)}')
print(f'train variance: {np.var(train_accuracies)}')
print()
print('TEST ACCURACY')
print(f'test max: {np.amax(test_accuracies)}')
print(f'test mean: {np.mean(test_accuracies)}')
print(f'train median: {np.median(train_accuracies)}')
print(f'test variance: {np.var(test_accuracies)}')

###**Random Forest**

In [None]:
# Convert the dataset to categorical variables otherwise used by the RF as numerical
monk_2_train_new = monk_2_train.astype(str)
monk_2_test_new = monk_2_test.astype(str)

print(monk_2_train_new.dtypes)
print(monk_2_test_new.dtypes)

In [None]:
# Build the model
rf2 = create_rf_model(num_trees=200,
                      max_depth=15,
                      min_examples=1,
                      algorithm='RANDOM',
                      num_candidate_attributes=0)

# Model assessment on training/test set
rf2.compile(metrics=["accuracy"])

# Train the model
rf2.fit(x=tfdf.keras.pd_dataframe_to_tf_dataset(monk_2_train_new, label='class'))

In [None]:
# Summary of the model
rf2.summary()

In [None]:
# Training loss & accuracy
print('TRAINING:\n')
evaluation_TR = rf2.evaluate(tfdf.keras.pd_dataframe_to_tf_dataset(monk_2_train_new, label='class'), return_dict=True)

for name, value in evaluation_TR.items():
  print(f"{name}: {value:.4f}")

In [None]:
# Test loss & accuracy
print('TEST:\n')
evaluation_TS = rf2.evaluate(tfdf.keras.pd_dataframe_to_tf_dataset(monk_2_test_new, label='class'), return_dict=True)

for name, value in evaluation_TS.items():
  print(f"{name}: {value:.4f}")

###**Support Vector Machine**

In [None]:
# Build the model
svm2 = create_svm_model(kernel='poly',
                        C=15,
                        degree=2,
                        gamma='scale')
# Train the model
svm2.fit(X_train_monk2, y_train_monk2)

In [None]:
# Model assessment on training & test set
y_pred_TR = svm2.predict(X_train_monk2)
Accuracy_TR = accuracy_score(y_train_monk2,y_pred_TR)

y_pred_TS = svm2.predict(X_test_monk2)
Accuracy_TS = accuracy_score(y_test_monk2, y_pred_TS)

print(f'Train Accuracy: {Accuracy_TR}')
print(f'Test Accuracy: {Accuracy_TS}')

---
##**Monk 3**

###**Data preparation**

**Training set**

In [None]:
# Loading the training dataset MONK-3
path = '/content/drive/MyDrive/data/monk+s+problems/monks-3.train'
col_names = ['class', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'ID']

monk_3_train = pd.read_csv(path, delimiter=' ', header=0, names=col_names)
monk_3_train.set_index('ID', inplace=True)
monk_3_train.shape

In [None]:
# Check the first 5 rows
monk_3_train.head()

In [None]:
# Count unique values for each column
monk_3_train.nunique()

In [None]:
# Count the number of records for the two classes
monk_3_train['class'].value_counts()

In [None]:
# One-Hot Encoding training set
X_design_encoded = pd.get_dummies(monk_3_train, columns=col_names[1:-1])
X_design_encoded.shape

In [None]:
X_design_encoded.head()

In [None]:
# y_train, X_train split
y_design_monk3, X_design_monk3 = X_design_encoded['class'], X_design_encoded.iloc[:, 1:]

print(f'y shape: {y_design_monk3.shape}')
print(f'X shape: {X_design_monk3.shape}')

**Test set**

In [None]:
# Loading the test dataset MONK-3
path = '/content/drive/MyDrive/data/monk+s+problems/monks-3.test'
col_names = ['class', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'ID']

monk_3_test = pd.read_csv(path, delimiter=' ', header=0, names=col_names)
monk_3_test.set_index('ID', inplace=True)
monk_3_test.shape

In [None]:
# One-Hot Encoding test set
X_test_encoded = pd.get_dummies(monk_3_test, columns=col_names[1:-1])
X_test_encoded.shape

In [None]:
X_test_encoded.head()

In [None]:
# y_test, X_test split
y_test_monk3, X_test_monk3 = X_test_encoded['class'], X_test_encoded.iloc[:, 1:]

print(f'y shape: {y_test_monk3.shape}')
print(f'X shape: {X_test_monk3.shape}')

###**Multi Layer Perceptron - MLP**

In [None]:
# Build model
mlp3 = build_model(activation='elu',
                   kernel_initializer='HeUniform',
                   units=2,
                   optimizer=tf.keras.optimizers.SGD(learning_rate=0.05, momentum=0.01))

# Fit model
mlp3_history = train_model(mlp3, X_design_monk3, y_design_monk3, X_test_monk3, y_test_monk3, batch_size=32, epochs=500)

In [None]:
# Loss plot
loss_plot(mlp3_history)

In [None]:
# Accuracy plot
accuracy_plot(mlp3_history)

In [None]:
# Model evaluation on the training & test set
results_TR = mlp3.evaluate(x=X_design_monk3, y=y_design_monk3)
print(f"train loss: {results_TR[0]}, train acc: {results_TR[1]}")

results_TS = mlp3.evaluate(x=X_test_monk3, y=y_test_monk3)
print(f"test loss: {results_TS[0]}, test acc: {results_TS[1]}")

In [None]:
%%time
# Train the model multiple times to assess weights initialization influence

trials = 5

history_list = []
train_losses = []
test_losses = []
train_accuracies = []
test_accuracies = []


for trial in range(trials):
    model = build_model(activation='elu',
                        kernel_initializer='HeUniform',
                        units=2,
                        optimizer=tf.keras.optimizers.SGD(learning_rate=0.05, momentum=0.01))
    history = train_model(model, X_design_monk3, y_design_monk3, X_test_monk3, y_test_monk3, batch_size=64, epochs=500)
    results_train = model.evaluate(x=X_design_monk3, y=y_design_monk3)
    results_test = model.evaluate(x=X_test_monk3, y=y_test_monk3)
    history_list.append(history)
    # train
    train_losses.append(results_train[0])
    train_accuracies.append(results_train[1])
    # test
    test_losses.append(results_test[0])
    test_accuracies.append(results_test[1])

In [None]:
print('MLP 3 MODEL')
print()
print('TRAIN ACCURACY')
print(f'train max: {np.amax(train_accuracies)}')
print(f'train mean: {np.mean(train_accuracies)}')
print(f'train median: {np.median(train_accuracies)}')
print(f'train variance: {np.var(train_accuracies)}')
print()
print('TEST ACCURACY')
print(f'test max: {np.amax(test_accuracies)}')
print(f'test mean: {np.mean(test_accuracies)}')
print(f'train median: {np.median(train_accuracies)}')
print(f'test variance: {np.var(test_accuracies)}')

###**Multi Layer Perceptron - MLP (Regularization & Early stopping)**

In [None]:
# Define early stopping
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

In [None]:
# Accuracy on individual fold
accuracy_per_fold = []

# Number of epochs
epochs_per_fold = []

# Define the K-fold Cross Validator
kfold = StratifiedKFold(n_splits=5, shuffle=True)

# Run a K-folds cross-validation
for  fold_idx, (train_indices, val_indices) in enumerate(kfold.split(X_design_monk3, y_design_monk3)):
  print(f"Running fold {fold_idx+1}")

  # Extract the training and validation examples
  X_train_fold , y_train_fold = X_design_monk3.iloc[train_indices, :], y_design_monk3.iloc[train_indices]
  X_val_fold , y_val_fold = X_design_monk3.iloc[val_indices, :], y_design_monk3.iloc[val_indices]

  # Build model
  model=build_model(units=2,
                    activation='elu',
                    kernel_initializer='HeUniform',
                    optimizer=tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.7, weight_decay=0.01))

  # Train the model
  history = model.fit(X_train_fold,
                      y_train_fold,
                      validation_data=(X_val_fold, y_val_fold),
                      batch_size=64,
                      epochs=1000,
                      callbacks=[early_stopping],
                      verbose=0)

  # Evaluate the model
  accuracy = model.evaluate(x=X_val_fold, y=y_val_fold)[1]
  n_epochs = len(history.history['val_accuracy'])
  print(f'val_accuracy: {accuracy}')
  print(f'n_epochs: {n_epochs}')
  accuracy_per_fold.append(accuracy)
  epochs_per_fold.append(n_epochs)

In [None]:
# Printing results
print(f"Mean Accuracy: {mean(accuracy_per_fold)}")
print(f"Stdev Accuracy: {stdev(accuracy_per_fold)}")
print(f'Median epochs: {median(sorted(epochs_per_fold))}')

In [None]:
model_reg = build_model(activation='elu',
                     kernel_initializer='HeUniform',
                     units=2,
                     optimizer=tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.7, weight_decay=0.01)
                     )

history_reg = train_model(model_reg,
                          X_design_monk3,
                          y_design_monk3,
                          X_test_monk3,
                          y_test_monk3,
                          batch_size=32,
                          epochs=500)

In [None]:
loss_plot(history_reg)

In [None]:
accuracy_plot(history_reg)

In [None]:
%%time

# Split design into train and validation sets
train, val = train_test_split(X_design_encoded, test_size=0.30, shuffle=True, random_state=42)
print(f'train shape: {train.shape}')
print(f'val shape: {val.shape}')

# y_test, X_test split
y_train, X_train = train['class'], train.iloc[:, 1:]
y_val, X_val = val['class'], val.iloc[:, 1:]
print(f'X_train shape: {X_train.shape}, y_train shape: {y_train.shape}')
print(f'X_val shape: {X_val.shape}, y_val shape: {y_val.shape}')

trials = 5

history_list = []
train_losses = []
train_accuracies = []

val_losses = []
val_accuracies = []

test_losses = []
test_accuracies = []


for trial in range(trials):
    model = build_model(activation='elu',
                        kernel_initializer='HeUniform',
                        units=2,
                        optimizer=tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.7, weight_decay=0.01))
    history = train_model(model, X_train, y_train, X_val, y_val, batch_size=32, epochs=500, callbacks=[early_stopping])
    history_list.append(history)
    results_train = model.evaluate(x=X_train, y=y_train)
    results_val = model.evaluate(x=X_val, y=y_val)
    results_test = model.evaluate(x=X_test_monk3, y=y_test_monk3)
    # train
    train_losses.append(results_train[0])
    train_accuracies.append(results_train[1])
    # val
    val_losses.append(results_val[0])
    val_accuracies.append(results_val[1])
    # test
    test_losses.append(results_test[0])
    test_accuracies.append(results_test[1])

In [None]:
print('MODEL REGULARIZATION + EARLY STOPPING')
print()
print('TRAIN ACCURACY')
print(f'train max: {np.amax(train_accuracies)}')
print(f'train mean: {np.mean(train_accuracies)}')
print(f'train median: {np.median(train_accuracies)}')
print(f'train variance: {np.var(train_accuracies)}')
print()

print('VAL ACCURACY')
print(f'val max: {np.amax(val_accuracies)}')
print(f'val mean: {np.mean(val_accuracies)}')
print(f'val median: {np.median(val_accuracies)}')
print(f'val variance: {np.var(val_accuracies)}')
print()


print('TEST ACCURACY')
print(f'test max: {np.amax(test_accuracies)}')
print(f'test mean: {np.mean(test_accuracies)}')
print(f'test median: {np.median(test_accuracies)}')
print(f'test variance: {np.var(test_accuracies)}')

###**Random Forest**

In [None]:
# Convert the dataset to categorical variables otherwise used by the RF as numerical
monk_3_train_new = monk_3_train.astype(str)
monk_3_test_new = monk_3_test.astype(str)

print(monk_3_train_new.dtypes)
print(monk_3_test_new.dtypes)

In [None]:
# Validation accuracy on the individual folds.
accuracy_per_fold = []

# Define the K-fold Cross Validator
kfold = StratifiedKFold(n_splits=5, shuffle=True)

# Run a 10-folds cross-validation.
for  fold_idx, (train_indices, val_indices) in enumerate(kfold.split(X_design_monk3, y_design_monk3)):
  print(f"Running fold {fold_idx+1}")

  # Extract the training and testing examples.
  train_fold = monk_3_train_new.iloc[train_indices, :].astype(str)
  val_fold = monk_3_train_new.iloc[val_indices, :].astype(str)

  # Specify the model
  model = create_rf_model(num_trees=200,
                          max_depth=30,
                          min_examples=15,
                          num_candidate_attributes=0,
                          algorithm= 'RANDOM',
                          verbose=0)

  model.compile(metrics=["accuracy"])

  # Train the model
  model.fit(x=tfdf.keras.pd_dataframe_to_tf_dataset(train_fold, label='class'))

  # Evaluate the model.
  accuracy = model.evaluate(tfdf.keras.pd_dataframe_to_tf_dataset(val_fold, label='class'), return_dict=True)['accuracy']
  print(f'val_accuracy: {accuracy}')
  accuracy_per_fold.append(accuracy)

print(f"Mean Accuracy: {mean(accuracy_per_fold)}")
print(f"Stdev Accuracy: {stdev(accuracy_per_fold)}")

In [None]:
# Build the model
rf3 = create_rf_model(num_trees=200,
                      max_depth=30,
                      min_examples=15,
                      algorithm='RANDOM',
                      num_candidate_attributes=0)

# Model assessment on training/test set
rf3.compile(metrics=["accuracy"])

# Train the model
rf3.fit(x=tfdf.keras.pd_dataframe_to_tf_dataset(monk_3_train_new, label='class'))

In [None]:
# Summary of the model
rf3.summary()

In [None]:
# Training loss/accuracy
print('TRAINING:\n')
evaluation_TR = rf3.evaluate(tfdf.keras.pd_dataframe_to_tf_dataset(monk_3_train_new, label='class'), return_dict=True)

for name, value in evaluation_TR.items():
  print(f"{name}: {value:.4f}")

In [None]:
# Test loss/accuracy
print('TEST:\n')
evaluation_TS = rf3.evaluate(tfdf.keras.pd_dataframe_to_tf_dataset(monk_3_test_new, label='class'), return_dict=True)

for name, value in evaluation_TS.items():
  print(f"{name}: {value:.4f}")

###**Support Vector Machine - SVM**

In [None]:
# Stratified K-Fold
kfold = StratifiedKFold(n_splits=5, shuffle=True)

# Specify the model
model = create_svm_model(kernel='rbf',
                          C=1,
                          gamma='scale')

val_scores = cross_val_score(model, X_design_monk3, y_design_monk3, cv=kfold, scoring=make_scorer(accuracy_score))
print(val_scores)
print(f'val_mean: {mean(val_scores)}')
print(f'val_stdev: {stdev(val_scores)}')

In [None]:
# Build the model
svm3 = create_svm_model(kernel='rbf',
                          C=1,
                          gamma='scale')

# Train the model
svm3.fit(X_design_monk3, y_design_monk3)

In [None]:
# Model assessment on training/test set
y_pred_TR = svm3.predict(X_design_monk3)
Accuracy_TR = accuracy_score(y_design_monk3, y_pred_TR)

y_pred_TS = svm3.predict(X_test_monk3)
Accuracy_TS = accuracy_score(y_test_monk3, y_pred_TS)

print(f'Train Accuracy: {Accuracy_TR}')
print(f'Test Accuracy: {Accuracy_TS}')