In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import joblib

from keras import Sequential
from keras.src.callbacks import LearningRateScheduler, EarlyStopping
from keras.src.layers import Dense, Dropout, BatchNormalization
from keras.src.regularizers import L2
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, multilabel_confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier

# Domains column Imputing
## Loading the dataset

In [None]:
df = pd.read_excel('../../../data/BGG_Cleaned_Data_Set_Imputing.xlsx')

## Data preparation
#### Splitting the data

In [None]:
targets = ['Strategy Games', 'Thematic Games', 'Wargames', 'Family Games', 'Customizable Games', 'Abstract Games', 'Party Games','Children\'s Games']

X = df.drop(targets,axis=1)
y = df[targets]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Defining FNN model

In [None]:
def create_fnn():
    model = Sequential([
        Dense(256, input_dim=X_train.shape[1], activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(len(targets), activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

#### Learning rate scheduler

In [None]:
def scheduler(epoch, lr):
    if epoch > 20:
        return lr * 0.5
    return lr

#### Early stopping

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

#### Callbacks for the lr adjustment

In [None]:
callbacks = [LearningRateScheduler(scheduler), early_stopping]

#### Setting up the pipeline

In [None]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', KerasClassifier(
        build_fn=create_fnn,
        epochs=50,
        batch_size=32,
        verbose=1,
        callbacks=callbacks
    ))
])

#### Training the model

In [None]:
pipeline.fit(X_train, y_train, clf__validation_data=(X_test, y_test))

#### Make predictions

In [None]:
y_pred = pipeline.predict(X_test)

## Model evaluation
#### Accuracy

In [None]:
accuracy = accuracy_score(y_test, y_pred, normalize=True)
accuracy

#### Confusion matrix

In [None]:
matrix = multilabel_confusion_matrix(y_test, y_pred)

for i, (label, mat) in enumerate(zip(targets, matrix)):
    df_confusion_matrix = pd.DataFrame(mat, index=[f'Actual {label}', f'Not {label}'], columns=[f'Predicted {label}', f'Not {label}'])

    plt.figure(figsize=(6, 4))
    sns.heatmap(df_confusion_matrix, annot=True, fmt='g', cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'Confusion Matrix for {label}')
    plt.show()

#### Classification report

In [None]:
report = classification_report(y_test, y_pred, zero_division=0, target_names=targets)
print(report)

## Saving the model

In [None]:
joblib.dump(pipeline, '../../../models/saved/domain_imputing_feed_forward_model.pkl')