In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import multilabel_confusion_matrix, precision_recall_curve
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Auto encoder network for imputing the domain feature
## Load in the data

In [None]:
df = pd.read_excel('../../../data/BGG_Cleaned_Data_Set_Imputing.xlsx')

## Data preparation
#### Splitting the data

In [None]:
targets = ['Strategy Games', 'Thematic Games', 'Wargames', 'Unknown', 'Family Games', 'Customizable Games',
           'Abstract Games', 'Party Games', 'Children\'s Games']

X = df.drop(targets, axis=1)
y = df[targets]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

#### Standardize the features

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

## Building the auto encoder network
#### Auto encoder network

In [None]:
input_dim = X_train_scaled.shape[1]
encoding_dim = 64

#### Autoencoder architecture

In [None]:
input_layer = Input(shape=(input_dim,))
encoded = Dense(128, activation='relu')(input_layer)
encoded = Dense(encoding_dim, activation='relu')(encoded)
decoded = Dense(128, activation='relu')(encoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)

autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer='adam', loss='mse')

#### Training the auto encoder

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
autoencoder.fit(X_train_scaled, X_train_scaled, validation_data=(X_val_scaled, X_val_scaled), epochs=50, batch_size=32, callbacks=[early_stopping])

#### Extracting the encoder part

In [None]:
autoencoder = Model(inputs=input_layer, outputs=encoded)

#### Transforming the input features using the encoder

In [None]:
X_train_encoded = autoencoder.predict(X_train_scaled)
X_val_encoded = autoencoder.predict(X_val_scaled)
X_test_encoded = autoencoder.predict(X_test_scaled)

## Building and training the classifier
#### Building the classifier

In [None]:
classifier = Sequential([
    Dense(64, activation='relu', input_dim=encoding_dim),
    Dropout(0.3),
    Dense(y_train.shape[1], activation='sigmoid')
])
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#### Training the classifier

In [None]:
classifier.fit(X_train_encoded, y_train,
               validation_data=(X_val_encoded, y_val),
               epochs=50,
               batch_size=32,
               callbacks=[early_stopping])

## Evaluating the model
#### Accuracy

In [None]:
loss, accuracy = classifier.evaluate(X_test_encoded, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

#### Multi-label confusion matrix

In [None]:
y_test_pred = classifier.predict(X_test_encoded)
y_test_pred_binary = (y_test_pred > 0.5).astype(int)

matrix = multilabel_confusion_matrix(y_test, y_test_pred_binary)

for i, (label, mat) in enumerate(zip(targets, matrix)):
    df_confusion_matrix = pd.DataFrame(mat, index=[f'Actual {label}', f'Not {label}'], columns=[f'Predicted {label}', f'Not {label}'])

    plt.figure(figsize=(6, 4))
    sns.heatmap(df_confusion_matrix, annot=True, fmt='g', cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'Confusion Matrix for {label}')
    plt.show()

#### Precision, recall, and F1-score

In [None]:
plt.figure(figsize=(10, 8))
for i in range(y_test.shape[1]):  # Loop through each label
    precision, recall, _ = precision_recall_curve(y_test.iloc[:, i], y_test_pred[:, i])
    plt.plot(recall, precision, label=f'Class {i}')

# Add labels and legend
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for Each Class')
plt.legend()
plt.grid()
plt.show()