In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch import optim
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, roc_curve, f1_score

# Excel file path
excel_file_path = "./Data/Fine-tuning/866-reaction-mean-std-cv-1-20-encoding-vector.xlsx"

# Read the Excel file and load data from all sheets into a dictionary
data = pd.read_excel(excel_file_path, sheet_name='FullCV_1')
syn = pd.read_csv("./Data/Fine-tuning/866-reaction-mean-std-encoding-vector-regular-smote-synthetic.csv")

train = data.iloc[:605]
val = data.iloc[605:692]
test = data.iloc[692:]

train_syn = pd.concat([train, syn], axis=0, ignore_index=True)

X_train = train_syn.iloc[:, 1:-1].values
y_train = train_syn.iloc[:, -1].values

X_val = val.iloc[:, 1:-1].values
y_val = val.iloc[:, -1].values

X_test = test.iloc[:, 1:-1].values
y_test = test.iloc[:, -1].values

class NN(nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        self.regressor = nn.Sequential(
            nn.Linear(400, 300),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(300, 150),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(150, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.regressor(x)
        return x

# Convert the data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1)
X_val_tensor = torch.FloatTensor(X_val)
y_val_tensor = torch.FloatTensor(y_val).unsqueeze(1)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test).unsqueeze(1)

# Hyperparameters
learning_rates = [0.1, 0.01, 0.001, 0.0001]
num_epochs = [200, 500, 800, 1000, 1500]
best_lr = None
best_num_epochs = None
best_val_acc = 0
best_train_acc = 0

# Model, criterion, and optimizer
for lr in learning_rates:
    for epochs in num_epochs:
        model = NN()
        criterion = nn.BCELoss()
        optimizer = optim.Adam(model.parameters(), lr=lr)

        for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()
            y_pred = model(X_train_tensor)
            loss = criterion(y_pred.squeeze(), y_train_tensor.squeeze())
            loss.backward()
            optimizer.step()

        # Calculate accuracy on validation set
        with torch.no_grad():
            model.eval()
            y_pred_val = (model(X_val_tensor).squeeze().detach().numpy() > 0.5).astype(int)
            val_acc = accuracy_score(y_val, y_pred_val)

            # Calculate accuracy on training set
            y_pred_train = (model(X_train_tensor).squeeze().detach().numpy() > 0.5).astype(int)
            train_acc = accuracy_score(y_train, y_pred_train)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_train_acc = train_acc
            best_lr = lr
            best_num_epochs = epochs
            best_model_state = model.state_dict()

        print(f"Learning Rate: {lr}, Epochs: {epochs}, Train Accuracy: {train_acc:.4f}, Validation Accuracy: {val_acc:.4f}")

print(f"Best learning rate: {best_lr}")
print(f"Best number of epochs: {best_num_epochs}")
print(f"Best Train Accuracy: {best_train_acc:.4f}")
print(f"Best Validation Accuracy: {best_val_acc:.4f}")


# Load the best model
model.load_state_dict(best_model_state)

# Final training on train + validation set with best hyperparameters
X_train_val = np.vstack((X_train, X_val))
y_train_val = np.concatenate((y_train, y_val))
X_train_val_tensor = torch.FloatTensor(X_train_val)
y_train_val_tensor = torch.FloatTensor(y_train_val).unsqueeze(1)
optimizer = optim.Adam(model.parameters(), lr=best_lr)

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    y_pred_train_val = model(X_train_val_tensor)
    loss = criterion(y_pred_train_val.squeeze(), y_train_val_tensor.squeeze())
    loss.backward()
    optimizer.step()

# Evaluate on test set
with torch.no_grad():
    model.eval()
    y_pred_test = model(X_test_tensor)
    y_pred_test_binary = torch.round(y_pred_test).detach().numpy()
    y_pred_test_prob = y_pred_test.detach().numpy()

    test_acc = accuracy_score(y_test, y_pred_test_binary)
    test_auc = roc_auc_score(y_test, y_pred_test_prob)
    test_f1 = f1_score(y_test, y_pred_test_binary)
    test_cm = confusion_matrix(y_test, y_pred_test_binary)
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Test AUC: {test_auc:.4f}")
    print(f"Test F1: {test_f1:.4f}")
    print(f"Test Confusion Matrix:\n{test_cm}")