In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.preprocessing import LabelEncoder


drive_mount = False
if drive_mount:
    from google.colab import drive
    drive.mount('/content/drive/')
    DATA_DIR = "/content/drive/MyDrive/Colab_Notebooks/COMP0036/datasets"
else:
    DATA_DIR = "../datasets"
EPL_TRAINING_DATA_CSV = DATA_DIR + "/epl-training-processed.csv"


# Read the CSV file
epl_df = pd.read_csv(EPL_TRAINING_DATA_CSV)
epl_df.drop(['Date', 'HomeTeam', 'AwayTeam'], axis=1, inplace=True)

epl_df

Unnamed: 0,HomeTeamExpenditure,HomeTeamIncome,HomeTeamBalance,AwayTeamExpenditure,AwayTeamIncome,AwayTeamBalance,home_rest_time,away_rest_time,HomeFTHG_all_avg_25,AwayFTHG_all_avg_25,...,D_Away,L_Away,GF_Away,GA_Away,GD_Away,Pts_Away,HomeElo,AwayElo,EloDiff,FTR
0,12.00,0.00,-12.00,34.08,0.00,-34.08,98,98,1.24,1.04,...,9.0,17.0,35.0,47.0,-12.0,45.0,1486.372150,1471.813088,14.559062,1
1,6.75,2.65,-4.10,0.00,2.10,2.10,98,98,1.32,0.88,...,13.0,8.0,66.0,38.0,28.0,64.0,1472.842921,1583.004052,-110.161131,0
2,13.88,0.38,-13.50,10.50,1.35,-9.15,98,98,1.52,1.68,...,8.0,16.0,49.0,53.0,-4.0,50.0,1450.497956,1489.364296,-38.866341,1
3,9.08,6.90,-2.18,31.50,11.80,-19.70,99,99,1.24,0.84,...,8.0,6.0,67.0,30.0,37.0,80.0,1499.163815,1688.999370,-189.835554,0
4,34.55,0.23,-34.33,0.40,0.08,-0.33,100,100,1.92,2.16,...,8.0,15.0,48.0,57.0,-9.0,53.0,1590.870126,1516.163605,74.706521,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5615,94.00,169.20,75.20,153.20,44.60,-108.60,7,7,1.04,1.24,...,14.0,5.0,68.0,33.0,35.0,71.0,1548.891535,1700.032741,-151.141206,1
5616,467.80,269.40,-198.40,64.85,10.09,-54.76,7,7,1.12,1.48,...,14.0,9.0,58.0,46.0,12.0,59.0,1563.118212,1602.142634,-39.024422,0
5617,100.35,195.90,95.55,68.47,52.60,-15.87,8,6,2.12,1.36,...,7.0,16.0,55.0,53.0,2.0,52.0,1654.830595,1540.074128,114.756466,1
5618,137.56,156.40,18.84,40.50,82.80,42.30,7,8,1.48,1.76,...,12.0,18.0,34.0,57.0,-23.0,36.0,1569.364767,1484.013329,85.351438,0


## Grid Search

In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
from sklearn.metrics import make_scorer, accuracy_score

scaler = StandardScaler()

USE_XGBOOST = False
if USE_XGBOOST:
    from xgboost import XGBClassifier

# Sample DataFrame
df = epl_df.copy()

# Drop rows with NaN values
df = df.dropna()

# Encode the 'FTR' column
target_column = 'FTR'

# Features and Target
X = df.drop(target_column, axis=1)
X = scaler.fit_transform(X)
y = df[target_column]

# Define 5-fold cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Pipeline that includes SMOTE and the classifier
def build_pipeline(classifier, classifier_params):
    return ImbPipeline([
        ('smote', SMOTE()),
        ('classifier', classifier(**classifier_params))
    ])

# Parameter grid for Logistic Regression
logistic_params = {'classifier__multi_class': ['multinomial'], 'classifier__C': [0.1, 1, 10]}

# Parameter grid for Random Forest
forest_params = {'classifier__n_estimators': [100, 200, 300], 'classifier__max_depth': [5, 10, 15], 'classifier__min_samples_split': [2, 5, 10]}

# Parameter grid for SVM
svm_params = {'classifier__C': [0.1, 1, 10], 'classifier__kernel': ['linear', 'rbf']}

logistic_smote_pipeline = build_pipeline(LogisticRegression, {'max_iter': 1000})
rf_smote_pipeline = build_pipeline(RandomForestClassifier, {'random_state': 42})
svm_smote_pipeline = build_pipeline(SVC, {'probability': True})

# Grid search for each model
logistic_grid = GridSearchCV(logistic_smote_pipeline, logistic_params, cv=kf, scoring='accuracy', n_jobs=-1)
forest_grid = GridSearchCV(rf_smote_pipeline, forest_params, cv=kf, scoring='accuracy', n_jobs=-1)
svm_grid = GridSearchCV(svm_smote_pipeline, svm_params, cv=kf, scoring='accuracy', n_jobs=-1)

# Fit the grid search models
logistic_grid.fit(X, y)
print(f"Best Logistic Regression Parameters: {logistic_grid.best_params_}")
print(f"Logistic Regression Best CV Accuracy: {logistic_grid.best_score_:.4f}")

forest_grid.fit(X, y)
print(f"Best Random Forest Parameters: {forest_grid.best_params_}")
print(f"Random Forest Best CV Accuracy: {forest_grid.best_score_:.4f}")

svm_grid.fit(X, y)
print(f"Best SVM Parameters: {svm_grid.best_params_}")
print(f"SVM Best CV Accuracy: {svm_grid.best_score_:.4f}")

# XGBoost Classifier Model with Grid Search
if USE_XGBOOST:
    xgboost_params = {'classifier__n_estimators': [100, 200, 300], 'classifier__max_depth': [3, 5, 7]}
    xgboost_grid = GridSearchCV(build_pipeline(XGBClassifier, {'use_label_encoder': False, 'eval_metric': 'mlogloss'}), xgboost_params, cv=kf, scoring='accuracy', n_jobs=-1)
    xgboost_grid.fit(X, y)
    print(f"Best XGBoost Parameters: {xgboost_grid.best_params_}")
    print(f"XGBoost Best CV Accuracy: {xgboost_grid.best_score_:.4f}")


Best Logistic Regression Parameters: {'classifier__C': 0.1, 'classifier__multi_class': 'multinomial'}
Logistic Regression Best CV Accuracy: 0.5053
Best Random Forest Parameters: {'classifier__max_depth': 15, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 200}
Random Forest Best CV Accuracy: 0.5190
Best SVM Parameters: {'classifier__C': 0.1, 'classifier__kernel': 'rbf'}
SVM Best CV Accuracy: 0.5096


## Cross-Validation

In [31]:
# Logistic Regression Model with Cross-Validation
LOGISTIC_PARAMS = {
    'multi_class': 'multinomial',
    'C': 0.1,
    'solver': 'lbfgs',
    'max_iter': 1000
}
RANDOM_FOREST_PARAMS = {
    'max_depth': 15,
    'min_samples_split': 5,
    'n_estimators': 200
}
SVM_PARAMS = {
    'probability': True, 
    'C': 0.1,
    'kernel': 'rbf'
}

### CV Without SMOTE

In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

USE_XGBOOST = False

# Sample DataFrame
df = epl_df.copy()

# Drop rows with NaN values and explicitly create a new copy
df = df.dropna()

# Encode the 'FTR' column
target_column = 'FTR' # W, D, L becomes 0, 1, 2

# Features and Target
X = df.drop(target_column, axis=1)
# X = df.drop(['HomeElo', 'AwayElo'], axis=1)
X = scaler.fit_transform(X)
y = df[target_column]

# Define 5-fold cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


logistic_model = LogisticRegression(**LOGISTIC_PARAMS)
logistic_scores = cross_val_score(logistic_model, X, y, cv=kf, scoring='accuracy', n_jobs=-1)

# Random Forest Classifier Model with Cross-Validation
random_forest_model = RandomForestClassifier(**RANDOM_FOREST_PARAMS)
forest_scores = cross_val_score(random_forest_model, X, y, cv=kf, scoring='accuracy', n_jobs=-1)

# XGBoost Classifier Model with Cross-Validation
if USE_XGBOOST:
    from xgboost import XGBClassifier
    xgboost_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
    xgboost_scores = cross_val_score(xgboost_model, X, y, cv=kf, scoring='accuracy', n_jobs=-1)


# SVM Classifier Model with Cross-Validation
svm_model = SVC(**SVM_PARAMS)
svm_scores = cross_val_score(svm_model, X, y, cv=kf, scoring='accuracy', n_jobs=-1)


# Displaying the results
print(f"Logistic Regression CV Accuracy: {logistic_scores.mean():.4f} (+/- {logistic_scores.std() * 2:.4f})")
print(f"Random Forest CV Accuracy: {forest_scores.mean():.4f} (+/- {forest_scores.std() * 2:.4f})")
if USE_XGBOOST:
    print(f"XGBoost CV Accuracy: {xgboost_scores.mean():.4f} (+/- {xgboost_scores.std() * 2:.4f})")
print(f"SVM CV Accuracy: {svm_scores.mean():.4f} (+/- {svm_scores.std() * 2:.4f})")


Logistic Regression CV Accuracy: 0.5365 (+/- 0.0153)
Random Forest CV Accuracy: 0.5351 (+/- 0.0130)
SVM CV Accuracy: 0.5399 (+/- 0.0271)


### CV With SMOTE

In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

USE_XGBOOST = False

# Sample DataFrame
df = epl_df.copy()

# Drop rows with NaN values and explicitly create a new copy
df = df.dropna()

# Encode the 'FTR' column
target_column = 'FTR' # W, D, L becomes 0, 1, 2

# Features and Target
X = df.drop(target_column, axis=1)
# X = df.drop(['HomeElo', 'AwayElo'], axis=1)
X = scaler.fit_transform(X)
y = df[target_column]

# Define 5-fold cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


logistic_model = build_pipeline(LogisticRegression, LOGISTIC_PARAMS)
logistic_scores = cross_val_score(logistic_model, X, y, cv=kf, scoring='accuracy', n_jobs=-1)

# Random Forest Classifier Model with Cross-Validation
random_forest_model = build_pipeline(RandomForestClassifier, RANDOM_FOREST_PARAMS)
forest_scores = cross_val_score(random_forest_model, X, y, cv=kf, scoring='accuracy', n_jobs=-1)

# XGBoost Classifier Model with Cross-Validation
if USE_XGBOOST:
    from xgboost import XGBClassifier
    xgboost_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
    xgboost_scores = cross_val_score(xgboost_model, X, y, cv=kf, scoring='accuracy', n_jobs=-1)


# SVM Classifier Model with Cross-Validation
svm_model = build_pipeline(SVC, SVM_PARAMS)
svm_scores = cross_val_score(svm_model, X, y, cv=kf, scoring='accuracy', n_jobs=-1)


# Displaying the results
print(f"Logistic Regression CV Accuracy: {logistic_scores.mean():.4f} (+/- {logistic_scores.std() * 2:.4f})")
print(f"Random Forest CV Accuracy: {forest_scores.mean():.4f} (+/- {forest_scores.std() * 2:.4f})")
if USE_XGBOOST:
    print(f"XGBoost CV Accuracy: {xgboost_scores.mean():.4f} (+/- {xgboost_scores.std() * 2:.4f})")
print(f"SVM CV Accuracy: {svm_scores.mean():.4f} (+/- {svm_scores.std() * 2:.4f})")


Logistic Regression CV Accuracy: 0.5062 (+/- 0.0197)
Random Forest CV Accuracy: 0.5105 (+/- 0.0189)
SVM CV Accuracy: 0.5121 (+/- 0.0258)


## classification report

### with smote

In [33]:
from sklearn.svm import SVC
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE  # Import SMOTE

scaler = StandardScaler()

df = epl_df.copy()

# Specify the name of the target column
target_column = 'FTR'  # Replace with the name of your target column

# Features and Target
X = df.drop(target_column, axis=1)
X = scaler.fit_transform(X)
y = df[target_column]  # Target

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

# Apply SMOTE
smote = SMOTE()
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Create and train models on the SMOTE-augmented data
logistic_model = LogisticRegression(**LOGISTIC_PARAMS)
logistic_model.fit(X_train_smote, y_train_smote)

random_forest_model = RandomForestClassifier(**RANDOM_FOREST_PARAMS)
random_forest_model.fit(X_train_smote, y_train_smote)

svm_model = SVC(**SVM_PARAMS)  # Ensure probability is set to True if needed
svm_model.fit(X_train_smote, y_train_smote)

if USE_XGBOOST:
    from xgboost import XGBClassifier, plot_importance
    xgboost_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
    xgboost_model.fit(X_train_smote, y_train_smote)
    plot_importance(xgboost_model)
    plt.show()


# Predicting on the test set
y_pred = logistic_model.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1)

print("Model: Logistic Regression")
print("Accuracy:", accuracy)
print("Classification Report:\n", report)

y_pred = random_forest_model.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1)
print("Model: Random Forest")
print("Accuracy:", accuracy)
print("Classification Report:\n", report)

if USE_XGBOOST:
    y_pred = xgboost_model.predict(X_test)

    # Evaluating the model
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, zero_division=1)

    print("Model: XGBoost")
    print("Accuracy:", accuracy)
    print("Classification Report:\n", report)

# Predicting on the test set with SVM
y_pred = svm_model.predict(X_test)

# Evaluating the SVM model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1)

print("Model: Support Vector Machine (SVM)")
print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Model: Logistic Regression
Accuracy: 0.5115658362989324
Classification Report:
               precision    recall  f1-score   support

           0       0.52      0.58      0.55       336
           1       0.30      0.37      0.33       274
           2       0.69      0.54      0.61       514

    accuracy                           0.51      1124
   macro avg       0.50      0.50      0.49      1124
weighted avg       0.54      0.51      0.52      1124

Model: Random Forest
Accuracy: 0.5284697508896797
Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.59      0.54       336
           1       0.31      0.27      0.29       274
           2       0.65      0.63      0.64       514

    accuracy                           0.53      1124
   macro avg       0.49      0.49      0.49      1124
weighted avg       0.52      0.53      0.52      1124
Model: Support Vector Machine (SVM)
Accuracy: 0.5169039145907474
Classification Repor

### without smote

In [35]:
from sklearn.svm import SVC
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE  # Import SMOTE

scaler = StandardScaler()

df = epl_df.copy()

# Specify the name of the target column
target_column = 'FTR'  # Replace with the name of your target column

# Features and Target
X = df.drop(target_column, axis=1)
X = scaler.fit_transform(X)
y = df[target_column]  # Target

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)


# Create and train models on the SMOTE-augmented data
logistic_model = LogisticRegression(**LOGISTIC_PARAMS)
logistic_model.fit(X_train, y_train)

random_forest_model = RandomForestClassifier(**RANDOM_FOREST_PARAMS)
random_forest_model.fit(X_train, y_train)

svm_model = SVC(**SVM_PARAMS)  # Ensure probability is set to True if needed
svm_model.fit(X_train, y_train)

if USE_XGBOOST:
    from xgboost import XGBClassifier, plot_importance
    xgboost_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
    xgboost_model.fit(X_train, y_train)
    plot_importance(xgboost_model)
    plt.show()


# Predicting on the test set
y_pred = logistic_model.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1)

print("Model: Logistic Regression")
print("Accuracy:", accuracy)
print("Classification Report:\n", report)

y_pred = random_forest_model.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1)
print("Model: Random Forest")
print("Accuracy:", accuracy)
print("Classification Report:\n", report)

if USE_XGBOOST:
    y_pred = xgboost_model.predict(X_test)

    # Evaluating the model
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, zero_division=1)

    print("Model: XGBoost")
    print("Accuracy:", accuracy)
    print("Classification Report:\n", report)

# Predicting on the test set with SVM
y_pred = svm_model.predict(X_test)

# Evaluating the SVM model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1)

print("Model: Support Vector Machine (SVM)")
print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Model: Logistic Regression
Accuracy: 0.550711743772242
Classification Report:
               precision    recall  f1-score   support

           0       0.52      0.54      0.53       329
           1       0.33      0.11      0.16       266
           2       0.60      0.78      0.68       529

    accuracy                           0.55      1124
   macro avg       0.48      0.48      0.46      1124
weighted avg       0.51      0.55      0.51      1124

Model: Random Forest
Accuracy: 0.5364768683274022
Classification Report:
               precision    recall  f1-score   support

           0       0.49      0.51      0.50       329
           1       0.30      0.06      0.11       266
           2       0.57      0.79      0.67       529

    accuracy                           0.54      1124
   macro avg       0.46      0.45      0.42      1124
weighted avg       0.49      0.54      0.48      1124
Model: Support Vector Machine (SVM)
Accuracy: 0.5462633451957295
Classification Report

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, Dataset
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE
from collections import Counter

# Define the MLP class
class MLP(nn.Module):
    def __init__(self, input_dim, layer_num, output_dim, hidden_dim, dropout_prob=0.4):
        super(MLP, self).__init__()
        layers = []

        # Input layer
        layers.append(nn.Linear(input_dim, hidden_dim))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout_prob))

        # Hidden layers
        for _ in range(layer_num - 2):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_prob))

        # Output layer
        layers.append(nn.Linear(hidden_dim, output_dim))

        self.layers = nn.Sequential(*layers)

    def forward(self, input):
        for layer in self.layers:
            input = layer(input)
        return input

def calculate_accuracy(y_true, y_pred):
    predicted = torch.argmax(y_pred, dim=1)
    correct = (predicted == y_true).float()
    return correct.sum() / len(correct)

# Function to evaluate the model on the test set
def evaluate_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    all_y_pred = []
    all_y_true = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            y_pred_tensor = model(X_batch)
            y_pred = torch.argmax(y_pred_tensor, dim=1)
            all_y_pred.extend(y_pred.cpu().numpy())
            all_y_true.extend(y_batch.cpu().numpy())

    return all_y_true, all_y_pred

# Check for GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assume 'df' is your DataFrame
df = epl_df.copy()

# Preprocess DataFrame
X = df.iloc[:, :-1].values  # All columns except last
y = df.iloc[:, -1].values  # Last column

scaler = StandardScaler()
X = scaler.fit_transform(X)

# Encode labels ('H', 'D', 'A') to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Convert to PyTorch tensors (keep them on CPU for now)
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y_encoded, dtype=torch.long)

# Create a custom dataset
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Initial train-test split
train_X, test_X, train_y, test_y = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# Apply SMOTE to the training set to balance it
smote = SMOTE(random_state=42)
train_X_resampled, train_y_resampled = smote.fit_resample(train_X, train_y)

# Further split the resampled train set into train and validation sets
train_X_resampled, val_X, train_y_resampled, val_y = train_test_split(train_X_resampled, train_y_resampled, test_size=0.25, random_state=42) # 0.25 x 0.8 = 0.2

# Create custom datasets with the resampled data
train_dataset = CustomDataset(train_X_resampled, train_y_resampled)
val_dataset = CustomDataset(val_X, val_y)
test_dataset = CustomDataset(test_X, test_y)

# Create DataLoaders for train, validation, and test sets
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

best_accuracy = 0
best_accuracy_report = None

# Model Parameters
input_dim = X.shape[1]  # Number of features
output_dim = 3  # For three categories
layer_num = 5  # Number of layers
hidden_dim = 40  # Number of neurons in hidden layer
learning_rate = 0.001
epochs = 30
weight_decay = 1e-2  # L2 regularization factor

for run in range(100):
  model = MLP(input_dim, layer_num, output_dim, hidden_dim).to(device)
  optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  criterion = nn.CrossEntropyLoss()

  # Training and Validation Loop
  for epoch in range(epochs):

    # Training Phase
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

    # Validation Phase
    model.eval()
    total_val_loss = 0
    total_val_correct = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)

  # Evaluate the model
  y_test_np, y_pred = evaluate_model(model, test_loader)

  # Calculate accuracy and F1 score
  accuracy = accuracy_score(y_test_np, y_pred)
  report = classification_report(y_test_np, y_pred, zero_division=1)

  # Update best scores and reports
  if accuracy > best_accuracy:
      best_accuracy = accuracy
      best_accuracy_report = report

print("Best Accuracy Report:")
print(best_accuracy_report)
