In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split


In [None]:
df = pd.read_csv('heart.csv')
df["Sex"].replace({'M': 0, 'F': 1}, inplace=True)
df["ChestPainType"].replace({"ATA": 0, "NAP": 1, "ASY": 2, "TA": 1}, inplace=True)
df["RestingECG"].replace({"Normal": 0, "ST": 1, "LVH": 2}, inplace=True)
df["ExerciseAngina"].replace({'N': 0, 'Y': 1}, inplace=True)
df["ST_Slope"].replace({"Up": 0, "Flat": 1, "Down": 2}, inplace=True)

In [None]:
features = ['ST_Slope',
 'ChestPainType',
 'ExerciseAngina',
 'Cholesterol',
 'MaxHR',
 'Oldpeak',
 'Sex',
 'FastingBS',
 'Age',
 'RestingBP']

In [None]:
X, x_test, Y, y_test = train_test_split(df[features], df['HeartDisease'], train_size=0.8, test_size=0.2, random_state=42)
X_train, x_valid, Y_train, y_valid = train_test_split(X,Y, test_size=0.25, train_size=0.75)

In [None]:
class ANN(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim, dropout_prob):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout_prob)
        self.activation = nn.ReLU()
    
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.dropout(x)
        x = self.activation(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [None]:
# X_train = torch.from_numpy(X_train.values.astype(np.float32)).repeat(10, 1)
# y_train = torch.from_numpy(Y_train.values.astype(np.float32)).repeat(10)
# X_val = torch.from_numpy(x_valid.values.astype(np.float32)).repeat(10, 1)
# y_val = torch.from_numpy(y_valid.values.astype(np.float32)).repeat(10)

X_train = torch.from_numpy(X_train.values.astype(np.float32))
y_train = torch.from_numpy(Y_train.values.astype(np.float32))
X_val = torch.from_numpy(x_valid.values.astype(np.float32))
y_val = torch.from_numpy(y_valid.values.astype(np.float32))

In [None]:
from loa import Lion

In [None]:
def train(model, X_train, y_train, X_val, y_val, num_epochs, batch_size, learning_rate):
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i in range(0, len(X_train), batch_size):
            batch_X, batch_y = X_train[i:i+batch_size], y_train[i:i+batch_size]
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y.unsqueeze(1))
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        # Calculate validation accuracy
        with torch.no_grad():
            outputs = model(X_val)
            predicted = torch.round(torch.sigmoid(outputs))
            accuracy = (predicted == y_val.unsqueeze(1)).sum().item() / len(y_val)
        
        print(f"Epoch {epoch+1}/{num_epochs}, loss={running_loss/len(X_train)}, accuracy={accuracy}")

    return accuracy

In [None]:
import optuna

# Define the objective function to optimize
def objective(trial):
    # Sample hyperparameters
    hidden_dim = trial.suggest_int("hidden_dim", 10, 400, log=True)
    dropout_prob = trial.suggest_float("dropout_prob", 0.0, 0.5)
    
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
    batch_size = trial.suggest_int("batch_size", 8, 16, log=True)

    # Create the model and train it
    model = ANN(input_dim=len(features), output_dim=1, hidden_dim=hidden_dim, dropout_prob=dropout_prob)
    accuracy = train(model, X_train, y_train, X_val, y_val, num_epochs=150, batch_size=batch_size, learning_rate=learning_rate)

    return 1.0 - accuracy  # Optuna minimizes the objective function, so we need to return 1.0 - accuracy

In [None]:
# Create an Optuna study and optimize the objective function
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

In [None]:
# Print the best hyperparameters and the corresponding accuracy
print(f"Best hyperparameters: {study.best_params}")
print(f"Best accuracy: {1.0 - study.best_value}")

In [None]:
# X = df[[x for x in df.columns if x != 'HeartDisease']]
# y = df['HeartDisease']

# from mrmr import mrmr_classif
# selected_features = mrmr_classif(X=X, y=y, K=10)