In [30]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, matthews_corrcoef, cohen_kappa_score, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
import kerastuner as kt

In [31]:
# Load data
df = pd.read_excel('Tvet_dataRegular.xlsx')

In [32]:
df

Unnamed: 0,ProfileCode,Gender,Age,Score_E,Score_A,Score_C,Score_N,Score_O,Result,Trade
0,711,Male,21,31,32,38,31,35,85,Auto Mechanic
1,712,Male,16,13,16,21,11,27,79,Auto Mechanic
2,717,Male,22,36,36,38,21,32,80,Auto Mechanic
3,720,Male,17,19,29,26,23,29,95,Auto Mechanic
4,723,Male,17,16,35,30,20,24,80,Auto Mechanic
...,...,...,...,...,...,...,...,...,...,...
546,5615,Male,21,21,34,36,22,21,95,Refrigeration & Air Conditioning
547,5616,Male,19,22,22,36,28,25,96,Refrigeration & Air Conditioning
548,5618,Male,20,22,34,26,8,28,94,Refrigeration & Air Conditioning
549,5620,Male,19,11,23,31,31,26,93,Refrigeration & Air Conditioning


In [33]:
# Encode categorical variables
le_gender = LabelEncoder()
df['Gender'] = le_gender.fit_transform(df['Gender'])

le_trade = LabelEncoder()
df['Trade'] = le_trade.fit_transform(df['Trade'])

# Split data into features and target
X = df.drop('Trade', axis=1)
y = df['Trade']

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert target to categorical
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [34]:
# Build the hyperparameter tuning model
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units_input', min_value=32, max_value=256, step=32), input_dim=X_train.shape[1], activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(hp.Float('dropout_input', 0.1, 0.5, step=0.1)))
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32), activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(hp.Float(f'dropout_{i}', 0.1, 0.5, step=0.1)))
    model.add(Dense(y_train_categorical.shape[1], activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [36]:
# Fine-tune the best model if necessary
history = best_model.fit(X_train, y_train_categorical, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

NameError: name 'best_model' is not defined

In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, matthews_corrcoef, cohen_kappa_score, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical

# Load data
df = pd.read_excel('Tvet_dataRegular.xlsx')

# Encode categorical variables
le_gender = LabelEncoder()
df['Gender'] = le_gender.fit_transform(df['Gender'])

le_trade = LabelEncoder()
df['Trade'] = le_trade.fit_transform(df['Trade'])

# Split data into features and target
X = df.drop('Trade', axis=1)
y = df['Trade']

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert target to categorical
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

# Build the model
def create_model():
    model = Sequential()
    model.add(Dense(256, input_dim=X_train.shape[1], activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(y_train_categorical.shape[1], activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)

# Train the model
history = model.fit(X_train, y_train_categorical, epochs=100, batch_size=32, validation_split=0.2, verbose=1, callbacks=[early_stopping, reduce_lr])

# Make predictions
y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)

# Decode the predicted and actual labels
y_test_decoded = le_trade.inverse_transform(y_test)
y_pred_decoded = le_trade.inverse_transform(y_pred)

# Calculate MAE and NMAE
mae = mean_absolute_error(y_test, y_pred)
nmae = mae / (y.max() - y.min())

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
roc_auc = roc_auc_score(to_categorical(y_test), y_pred_proba, average='weighted', multi_class='ovr')
mcc = matthews_corrcoef(y_test, y_pred)
cohen_kappa = cohen_kappa_score(y_test, y_pred)

# Print the evaluation metrics
print(f'Model Accuracy: {accuracy}')
print(f'Model F1 Score: {f1}')
print(f'Model Precision: {precision}')
print(f'Model Recall: {recall}')
print(f'Model ROC AUC: {roc_auc}')
print(f'Model MCC: {mcc}')
print(f'Model Cohen Kappa: {cohen_kappa}')
print(f'Model MAE: {mae}')
print(f'Model NMAE: {nmae}')

# Save predictions to Excel
predictions_df = pd.DataFrame({
    'Actual': y_test_decoded,
    'Predicted': y_pred_decoded
})
predictions_df.to_excel('DeepLearning_Predictions.xlsx', index=False)

# Save evaluation metrics to Excel
metrics_df = pd.DataFrame({
    'Metric': ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'ROC AUC', 'MCC', 'Cohen Kappa', 'MAE', 'NMAE'],
    'Score': [accuracy, f1, precision, recall, roc_auc, mcc, cohen_kappa, mae, nmae]
})
metrics_df.to_excel('Evaluation_Metrics.xlsx', index=False)


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - accuracy: 0.1184 - loss: 3.4404 - val_accuracy: 0.2500 - val_loss: 2.3489 - learning_rate: 0.0010
Epoch 2/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2122 - loss: 2.9989 - val_accuracy: 0.2727 - val_loss: 2.2932 - learning_rate: 0.0010
Epoch 3/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.1407 - loss: 2.9890 - val_accuracy: 0.2727 - val_loss: 2.2393 - learning_rate: 0.0010
Epoch 4/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2191 - loss: 2.8075 - val_accuracy: 0.2614 - val_loss: 2.1942 - learning_rate: 0.0010
Epoch 5/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2213 - loss: 2.6430 - val_accuracy: 0.2727 - val_loss: 2.1513 - learning_rate: 0.0010
Epoch 6/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step -

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [4]:
#More Complex (Working Now)
import os
import shutil
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, matthews_corrcoef, cohen_kappa_score, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Embedding, Flatten, Concatenate, Activation
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical
import keras_tuner as kt

# Remove the existing tuner project directory if it exists
project_dir = 'keras_tuner_dir/tvet_course_recommender'
if os.path.exists(project_dir):
    shutil.rmtree(project_dir)

# Load data
df = pd.read_excel('Tvet_data.xlsx')

# Encode categorical variables
le_gender = LabelEncoder()
df['Gender'] = le_gender.fit_transform(df['Gender'])

le_trade = LabelEncoder()
df['Trade'] = le_trade.fit_transform(df['Trade'])

# Split data into features and target
X = df.drop('Trade', axis=1)
y = df['Trade']

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert target to categorical
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

# Embedding input dimensions
gender_input_dim = len(le_gender.classes_)

# Build the hyperparameter tuning model
def build_model(hp):
    # Input layers
    profile_code_input = Input(shape=(1,), name='ProfileCode')
    gender_input = Input(shape=(1,), name='Gender')
    age_input = Input(shape=(1,), name='Age')
    score_e_input = Input(shape=(1,), name='Score_E')
    score_a_input = Input(shape=(1,), name='Score_A')
    score_c_input = Input(shape=(1,), name='Score_C')
    score_n_input = Input(shape=(1,), name='Score_N')
    score_o_input = Input(shape=(1,), name='Score_O')

    # Embedding layers
    gender_embedding = Embedding(input_dim=gender_input_dim, output_dim=hp.Int('gender_emb_dim', min_value=4, max_value=16, step=4))(gender_input)
    gender_embedding = Flatten()(gender_embedding)
    
    # Concatenate all inputs
    concatenated = Concatenate()([profile_code_input, gender_embedding, age_input, score_e_input, score_a_input, score_c_input, score_n_input, score_o_input])
    
    # Fully connected layers with attention
    x = Dense(units=hp.Int('units_input', min_value=128, max_value=512, step=64), activation='relu', kernel_regularizer=l2(0.001))(concatenated)
    x = BatchNormalization()(x)
    x = Dropout(hp.Float('dropout_input', 0.3, 0.7, step=0.1))(x)
    
    for i in range(hp.Int('num_layers', 2, 5)):
        x = Dense(units=hp.Int(f'units_{i}', min_value=64, max_value=512, step=64), activation='relu', kernel_regularizer=l2(0.001))(x)
        x = BatchNormalization()(x)
        x = Dropout(hp.Float(f'dropout_{i}', 0.3, 0.7, step=0.1))(x)
    
    # Attention mechanism
    attention = Dense(1, activation='tanh')(x)
    attention = Activation('softmax')(attention)
    x = tf.keras.layers.multiply([x, attention])

    # Output layer
    output = Dense(y_train_categorical.shape[1], activation='softmax')(x)

    model = Model(inputs=[profile_code_input, gender_input, age_input, score_e_input, score_a_input, score_c_input, score_n_input, score_o_input], outputs=output)

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Set up the tuner with Bayesian optimization
tuner = kt.BayesianOptimization(
    build_model, 
    objective='val_accuracy', 
    max_trials=5, 
    executions_per_trial=3, 
    directory='keras_tuner_dir', 
    project_name='tvet_course_recommender',
    overwrite=True  # Allow overwriting the existing project
)

# Prepare the data for the model
train_data = {
    'ProfileCode': X_train[:, 0],
    'Gender': X_train[:, 1],
    'Age': X_train[:, 2],
    'Score_E': X_train[:, 3],
    'Score_A': X_train[:, 4],
    'Score_C': X_train[:, 5],
    'Score_N': X_train[:, 6],
    'Score_O': X_train[:, 7],
}

test_data = {
    'ProfileCode': X_test[:, 0],
    'Gender': X_test[:, 1],
    'Age': X_test[:, 2],
    'Score_E': X_test[:, 3],
    'Score_A': X_test[:, 4],
    'Score_C': X_test[:, 5],
    'Score_N': X_test[:, 6],
    'Score_O': X_test[:, 7],
}

# Search for the best hyperparameters
tuner.search(train_data, y_train_categorical, epochs=100, validation_split=0.2, verbose=1, callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)])

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Save the best model
best_model.save('best_model.h5')

# Load the best model and recreate the optimizer
best_model = tf.keras.models.load_model('best_model.h5')
best_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Fine-tune the best model
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)

history = best_model.fit(train_data, y_train_categorical, epochs=100, batch_size=32, validation_split=0.2, verbose=1, callbacks=[early_stopping, reduce_lr])

# Make predictions
y_pred_proba = best_model.predict(test_data)
y_pred = np.argmax(y_pred_proba, axis=1)

# Decode the predicted and actual labels
y_test_decoded = le_trade.inverse_transform(y_test)
y_pred_decoded = le_trade.inverse_transform(y_pred)

# Calculate MAE and NMAE
mae = mean_absolute_error(y_test, y_pred)
nmae = mae / (y.max() - y.min())

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
roc_auc = roc_auc_score(to_categorical(y_test), y_pred_proba, average='weighted', multi_class='ovr')
mcc = matthews_corrcoef(y_test, y_pred)
cohen_kappa = cohen_kappa_score(y_test, y_pred)

# Print the evaluation metrics
print(f'Model Accuracy: {accuracy}')
print(f'Model F1 Score: {f1}')
print(f'Model Precision: {precision}')
print(f'Model Recall: {recall}')
print(f'Model ROC AUC: {roc_auc}')
print(f'Model MCC: {mcc}')
print(f'Model Cohen Kappa: {cohen_kappa}')
print(f'Model MAE: {mae}')
print(f'Model NMAE: {nmae}')

# Save predictions to Excel
predictions_df = pd.DataFrame({
    'Actual': y_test_decoded,
    'Predicted': y_pred_decoded
})
predictions_df.to_excel('DeepLearning_Predictions.xlsx', index=False)

# Save evaluation metrics to Excel
metrics_df = pd.DataFrame({
    'Metric': ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'ROC AUC', 'MCC', 'Cohen Kappa', 'MAE', 'NMAE'],
    'Score': [accuracy, f1, precision, recall, roc_auc, mcc, cohen_kappa, mae, nmae]
})
metrics_df.to_excel('Evaluation_Metrics.xlsx', index=False)


Trial 5 Complete [00h 00m 28s]
val_accuracy: 0.4027777810891469

Best val_accuracy So Far: 0.5069444477558136
Total elapsed time: 00h 02m 49s


  saveable.load_own_variables(weights_store.get(inner_path))


Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 72ms/step - accuracy: 0.8532 - loss: 1.3343 - val_accuracy: 0.4792 - val_loss: 2.4520 - learning_rate: 0.0010
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.8450 - loss: 1.2367 - val_accuracy: 0.4792 - val_loss: 2.4437 - learning_rate: 0.0010
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8824 - loss: 1.1662 - val_accuracy: 0.5000 - val_loss: 2.4698 - learning_rate: 0.0010
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8384 - loss: 1.2689 - val_accuracy: 0.5208 - val_loss: 2.4628 - learning_rate: 0.0010
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9097 - loss: 1.1610 - val_accuracy: 0.4792 - val_loss: 2.4670 - learning_rate: 0.0010
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/s



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
Model Accuracy: 0.4666666666666667
Model F1 Score: 0.4581622678396871
Model Precision: 0.5257843137254902
Model Recall: 0.4666666666666667
Model ROC AUC: 0.8402018771798705
Model MCC: 0.3691720066364861
Model Cohen Kappa: 0.3554884189325277
Model MAE: 1.5833333333333333
Model NMAE: 0.2638888888888889


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


June 05, 2024 Updated Code with Malik Sb. Feedback

In [1]:
pip install keras-tuner scikit-learn pandas numpy tensorflow

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import shutil
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, matthews_corrcoef, cohen_kappa_score, mean_absolute_error
from sklearn.metrics import ndcg_score, mean_squared_error
from sklearn.metrics import precision_recall_fscore_support
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Embedding, Flatten, Concatenate, Activation
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical
import keras_tuner as kt

# Function to calculate Top N Accuracy
def top_n_accuracy(y_true, y_pred_proba, n=3):
    top_n_preds = np.argsort(y_pred_proba, axis=1)[:, -n:]
    top_n_accuracy = np.mean([1 if y_true[i] in top_n_preds[i] else 0 for i in range(len(y_true))])
    return top_n_accuracy

# Function to calculate Mean Reciprocal Rank (MRR)
def mrr(y_true, y_pred_proba):
    order = np.argsort(y_pred_proba, axis=1)
    ranks = np.where(order == np.expand_dims(y_true, axis=1))[1]
    return np.mean(1.0 / (ranks + 1))

# Function to calculate Precision at K
def precision_at_k(y_true, y_pred_proba, k=5):
    top_k_preds = np.argsort(y_pred_proba, axis=1)[:, -k:]
    precision_at_k = np.mean([1 if y_true[i] in top_k_preds[i] else 0 for i in range(len(y_true))])
    return precision_at_k

# Load data
df = pd.read_excel('Tvet_dataCBT.xlsx')

# Encode categorical variables
le_gender = LabelEncoder()
df['Gender'] = le_gender.fit_transform(df['Gender'])

le_trade = LabelEncoder()
df['Trade'] = le_trade.fit_transform(df['Trade'])

# Split data into features and target
X = df.drop('Trade', axis=1)
y = df['Trade']

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert target to categorical
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

# Embedding input dimensions
gender_input_dim = len(le_gender.classes_)

# Build the hyperparameter tuning model
def build_model(hp):
    # Input layers
    profile_code_input = Input(shape=(1,), name='ProfileCode')
    gender_input = Input(shape=(1,), name='Gender')
    age_input = Input(shape=(1,), name='Age')
    score_e_input = Input(shape=(1,), name='Score_E')
    score_a_input = Input(shape=(1,), name='Score_A')
    score_c_input = Input(shape=(1,), name='Score_C')
    score_n_input = Input(shape=(1,), name='Score_N')
    score_o_input = Input(shape=(1,), name='Score_O')

    # Embedding layers
    gender_embedding = Embedding(input_dim=gender_input_dim, output_dim=hp.Int('gender_emb_dim', min_value=4, max_value=16, step=4))(gender_input)
    gender_embedding = Flatten()(gender_embedding)
    
    # Concatenate all inputs
    concatenated = Concatenate()([profile_code_input, gender_embedding, age_input, score_e_input, score_a_input, score_c_input, score_n_input, score_o_input])
    
    # Fully connected layers with attention
    x = Dense(units=hp.Int('units_input', min_value=128, max_value=512, step=64), activation='relu', kernel_regularizer=l2(0.001))(concatenated)
    x = BatchNormalization()(x)
    x = Dropout(hp.Float('dropout_input', 0.3, 0.7, step=0.1))(x)
    
    for i in range(hp.Int('num_layers', 2, 5)):
        x = Dense(units=hp.Int(f'units_{i}', min_value=64, max_value=512, step=64), activation='relu', kernel_regularizer=l2(0.001))(x)
        x = BatchNormalization()(x)
        x = Dropout(hp.Float(f'dropout_{i}', 0.3, 0.7, step=0.1))(x)
    
    # Attention mechanism
    attention = Dense(1, activation='tanh')(x)
    attention = Activation('softmax')(attention)
    x = tf.keras.layers.multiply([x, attention])

    # Output layer
    output = Dense(y_train_categorical.shape[1], activation='softmax')(x)

    model = Model(inputs=[profile_code_input, gender_input, age_input, score_e_input, score_a_input, score_c_input, score_n_input, score_o_input], outputs=output)

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Set up the tuner with Bayesian optimization
tuner = kt.BayesianOptimization(
    build_model, 
    objective='val_accuracy', 
    max_trials=20, 
    executions_per_trial=3, 
    directory='keras_tuner_dir', 
    project_name='tvet_course_recommender',
    overwrite=True  # Allow overwriting the existing project
)

# Prepare the data for the model
train_data = {
    'ProfileCode': X_train[:, 0],
    'Gender': X_train[:, 1],
    'Age': X_train[:, 2],
    'Score_E': X_train[:, 3],
    'Score_A': X_train[:, 4],
    'Score_C': X_train[:, 5],
    'Score_N': X_train[:, 6],
    'Score_O': X_train[:, 7],
}

test_data = {
    'ProfileCode': X_test[:, 0],
    'Gender': X_test[:, 1],
    'Age': X_test[:, 2],
    'Score_E': X_test[:, 3],
    'Score_A': X_test[:, 4],
    'Score_C': X_test[:, 5],
    'Score_N': X_test[:, 6],
    'Score_O': X_test[:, 7],
}

# Search for the best hyperparameters
tuner.search(train_data, y_train_categorical, epochs=100, validation_split=0.2, verbose=1, callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)])

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Save the best model
best_model.save('best_model.h5')

# Load the best model and recreate the optimizer
best_model = tf.keras.models.load_model('best_model.h5')
best_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Fine-tune the best model
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)

history = best_model.fit(train_data, y_train_categorical, epochs=100, batch_size=32, validation_split=0.2, verbose=1, callbacks=[early_stopping, reduce_lr])

# Make predictions
y_pred_proba = best_model.predict(test_data)
y_pred = np.argmax(y_pred_proba, axis=1)

# Decode the predicted and actual labels
y_test_decoded = le_trade.inverse_transform(y_test)
y_pred_decoded = le_trade.inverse_transform(y_pred)

# Calculate MAE and NMAE
mae = mean_absolute_error(y_test, y_pred)
nmae = mae / (y.max() - y.min())

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate Top N Accuracy
#top_n_acc = top_n_accuracy(y_test, y_pred_proba, n=3)

# Calculate MRR
mrr_score = mrr(y_test, y_pred_proba)

# Calculate NDCG
ndcg = ndcg_score(to_categorical(y_test), y_pred_proba, k=5)

# Calculate Precision at K
#prec_at_k = precision_at_k(y_test, y_pred_proba, k=5)

# Evaluate the model using traditional metrics
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
roc_auc = roc_auc_score(to_categorical(y_test), y_pred_proba, average='weighted', multi_class='ovr')
mcc = matthews_corrcoef(y_test, y_pred)
cohen_kappa = cohen_kappa_score(y_test, y_pred)

# Print the evaluation metrics
print(f'Model Accuracy: {accuracy}')
print(f'Model F1 Score: {f1}')
print(f'Model Precision: {precision}')
print(f'Model Recall: {recall}')
print(f'Model ROC AUC: {roc_auc}')
print(f'Model MCC: {mcc}')
print(f'Model Cohen Kappa: {cohen_kappa}')
print(f'Model MAE: {mae}')
print(f'Model NMAE: {nmae}')
print(f'Model RMSE: {rmse}')
#print(f'Model Top N Accuracy: {top_n_acc}')
print(f'Model MRR: {mrr_score}')
print(f'Model NDCG: {ndcg}')
#print(f'Model Precision at K: {prec_at_k}')

# Save predictions to Excel
predictions_df = pd.DataFrame({
    'Actual': y_test_decoded,
    'Predicted': y_pred_decoded
})
predictions_df.to_excel('DeepLearning_Predictions.xlsx', index=False)

# Save evaluation metrics to Excel
metrics_df = pd.DataFrame({
    'Metric': ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'ROC AUC', 'MCC', 'Cohen Kappa', 'MAE', 'NMAE', 'RMSE', 'Top N Accuracy', 'MRR', 'NDCG', 'Precision at K'],
    'Score': [accuracy, f1, precision, recall, roc_auc, mcc, cohen_kappa, mae, nmae, rmse, top_n_acc, mrr_score, ndcg, prec_at_k]
})
metrics_df.to_excel('Evaluation_Metrics.xlsx', index=False)


Trial 20 Complete [00h 00m 26s]
val_accuracy: 0.7017543911933899

Best val_accuracy So Far: 0.9122806986172994
Total elapsed time: 00h 08m 35s


  saveable.load_own_variables(weights_store.get(inner_path))


Epoch 1/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 180ms/step - accuracy: 0.9931 - loss: 0.5934 - val_accuracy: 0.8421 - val_loss: 0.8693 - learning_rate: 0.0010
Epoch 2/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.9813 - loss: 0.5890 - val_accuracy: 0.8421 - val_loss: 0.8446 - learning_rate: 0.0010
Epoch 3/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.9783 - loss: 0.6040 - val_accuracy: 0.9474 - val_loss: 0.7989 - learning_rate: 0.0010
Epoch 4/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 1.0000 - loss: 0.5593 - val_accuracy: 0.9474 - val_loss: 0.7630 - learning_rate: 0.0010
Epoch 5/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.9635 - loss: 0.6026 - val_accuracy: 0.9474 - val_loss: 0.7292 - learning_rate: 0.0010
Epoch 6/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/



Model Accuracy: 0.7391304347826086
Model F1 Score: 0.7328130806391675
Model Precision: 0.7575250836120402
Model Recall: 0.7391304347826086
Model ROC AUC: 0.9248665141113652
Model MCC: 0.5513698630136986
Model Cohen Kappa: 0.5384615384615384
Model MAE: 0.30434782608695654
Model NMAE: 0.15217391304347827
Model RMSE: 0.6255432421712243
Model MRR: 0.3768115942028985
Model NDCG: 0.9037208052795107


NameError: name 'top_n_acc' is not defined