In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import optuna
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb

# Load Dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'
df = pd.read_csv(url)

# Drop 'name' column and separate features (X) and target (y)
X = df.drop(columns=['name', 'status'])
y = df['status']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Step 3: Optuna for Hyperparameter Tuning ###

# SVM Model
def objective_svm(trial):
    C = trial.suggest_loguniform('C', 1e-3, 1e3)
    gamma = trial.suggest_loguniform('gamma', 1e-4, 1e0)
    kernel = trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly'])
    
    model = SVC(C=C, gamma=gamma, kernel=kernel)
    model.fit(X_train_scaled, y_train)
    
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# XGBoost Model
def objective_xgb(trial):
    param = {
        'booster': 'gbtree',
        'lambda': trial.suggest_loguniform('lambda', 1e-3, 10.0),
        'alpha': trial.suggest_loguniform('alpha', 1e-3, 10.0),
        'eta': trial.suggest_loguniform('eta', 0.01, 0.2),
        'max_depth': trial.suggest_int('max_depth', 3, 9),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
    }
    
    dtrain = xgb.DMatrix(X_train_scaled, label=y_train)
    model = xgb.train(param, dtrain)
    
    dtest = xgb.DMatrix(X_test_scaled)
    y_pred = (model.predict(dtest) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Random Forest Model
def objective_rf(trial):
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    max_depth = trial.suggest_int('max_depth', 2, 32, log=True)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 16)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 16)
    
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, 
                                   min_samples_split=min_samples_split, 
                                   min_samples_leaf=min_samples_leaf)
    model.fit(X_train_scaled, y_train)
    
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

### Step 4: Running Optuna Optimization for Each Model ###

# SVM Optimization
study_svm = optuna.create_study(direction='maximize')
study_svm.optimize(objective_svm, n_trials=1000)
print(f"SVM Best Hyperparameters: {study_svm.best_params}")

# XGBoost Optimization
study_xgb = optuna.create_study(direction='maximize')
study_xgb.optimize(objective_xgb, n_trials=1000)
print(f"XGBoost Best Hyperparameters: {study_xgb.best_params}")

# Random Forest Optimization
study_rf = optuna.create_study(direction='maximize')
study_rf.optimize(objective_rf, n_trials=1000)
print(f"Random Forest Best Hyperparameters: {study_rf.best_params}")

### Step 5: Evaluating and Comparing the Models ###

# SVM Model Evaluation
svm_model = SVC(**study_svm.best_params)
svm_model.fit(X_train_scaled, y_train)
y_pred_svm = svm_model.predict(X_test_scaled)
print("SVM Classification Report:")
print(classification_report(y_test, y_pred_svm))

# XGBoost Model Evaluation
dtrain = xgb.DMatrix(X_train_scaled, label=y_train)
xgb_model = xgb.train(study_xgb.best_params, dtrain)
dtest = xgb.DMatrix(X_test_scaled)
y_pred_xgb = (xgb_model.predict(dtest) > 0.5).astype(int)
print("XGBoost Classification Report:")
print(classification_report(y_test, y_pred_xgb))

# Random Forest Model Evaluation
rf_model = RandomForestClassifier(**study_rf.best_params)
rf_model.fit(X_train_scaled, y_train)
y_pred_rf = rf_model.predict(X_test_scaled)
print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred_rf))


# Train the best model
train_best_model(best_model, X_train_tensor, y_train_tensor)

# Test the best model
best_model.eval()
with torch.no_grad():
    y_pred_best = best_model(X_test_tensor)
    y_pred_best = (y_pred_best > 0.5).float()
    print("Best PCNN Classification Report:")
    print(classification_report(y_test, y_pred_best.numpy()))


[I 2024-10-16 22:34:06,709] A new study created in memory with name: no-name-a2fd5517-b4b6-47e5-8a75-b815dcfad957
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e0)
[I 2024-10-16 22:34:06,719] Trial 0 finished with value: 0.9230769230769231 and parameters: {'C': 170.64914336005103, 'gamma': 0.0005046995848121491, 'kernel': 'rbf'}. Best is trial 0 with value: 0.9230769230769231.
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e0)
[I 2024-10-16 22:34:06,724] Trial 1 finished with value: 0.8717948717948718 and parameters: {'C': 4.359715729224048, 'gamma': 0.00044645317767739626, 'kernel': 'linear'}. Best is trial 0 with value: 0.9230769230769231.
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e0)
[I 2024-10-16 22:34:06,727] Trial 2 finished with value: 0.8205128205128205 and parameters: {'C': 0.011405395834875551, 'gamma': 0.02091451841032

SVM Best Hyperparameters: {'C': 6.247538698508372, 'gamma': 0.0945777536102647, 'kernel': 'rbf'}


  'lambda': trial.suggest_loguniform('lambda', 1e-3, 10.0),
  'alpha': trial.suggest_loguniform('alpha', 1e-3, 10.0),
  'eta': trial.suggest_loguniform('eta', 0.01, 0.2),
  'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2024-10-16 22:34:14,227] Trial 20 finished with value: 0.8205128205128205 and parameters: {'lambda': 0.009310239170799003, 'alpha': 0.03653880321892493, 'eta': 0.010535364231481109, 'max_depth': 7, 'min_child_weight': 3, 'gamma': 7.760409791683023e-06, 'colsample_bytree': 0.5751550224955673}. Best is trial 2 with value: 0.9487179487179487.
  'lambda': trial.suggest_loguniform('lambda', 1e-3, 10.0),
  'alpha': trial.suggest_loguniform('alpha', 1e-3, 10.0),
  'eta': trial.suggest_loguniform('eta', 0.01, 0.2),
  'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2024-10-16 22:34:14,243] Trial 21 finished with 

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import optuna
from sklearn.metrics import accuracy_score, classification_report

# Define PCNN Model with dynamic hyperparameters
class PCNN(nn.Module):
    def __init__(self, num_filters, kernel_size):
        super(PCNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=22, out_channels=num_filters, kernel_size=kernel_size, padding=0)
        self.fc1 = nn.Linear(num_filters, 1)  # Adjust according to the output size after conv

    def forward(self, x):
        x = self.conv1(x)
        x = torch.relu(x)
        x = x.view(x.size(0), -1)  # Flatten before passing to the fully connected layer
        x = torch.sigmoid(self.fc1(x))  # Sigmoid for binary classification
        return x

# Reshape the input to have the format (batch_size, in_channels, sequence_length)
X_train_cnn = X_train_scaled.reshape(X_train_scaled.shape[0], 22, 1)  # 22 features as in_channels, 1 sequence length
X_test_cnn = X_test_scaled.reshape(X_test_scaled.shape[0], 22, 1)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train_cnn, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test_cnn, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Objective function for Optuna
def objective(trial):
    # Hyperparameter suggestions
    num_filters = trial.suggest_int('num_filters', 32, 128, step=32)
    kernel_size = 1  # Since sequence length is 1, we fix kernel size to 1
    learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)

    # Instantiate the model
    model = PCNN(num_filters=num_filters, kernel_size=kernel_size)
    
    # Loss and optimizer
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Training loop
    model.train()
    epochs = 20  # You can adjust the number of epochs based on your need
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()

    # Test the model
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test_tensor)
        y_pred = (y_pred > 0.5).float()

    # Calculate accuracy
    accuracy = accuracy_score(y_test_tensor.numpy(), y_pred.numpy())
    return accuracy

# Run Optuna optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Best hyperparameters
print(f"Best hyperparameters: {study.best_params}")

# Evaluate the model with the best hyperparameters
best_num_filters = study.best_params['num_filters']
best_lr = study.best_params['lr']

# Instantiate and train the best model
best_model = PCNN(num_filters=best_num_filters, kernel_size=1)
criterion = nn.BCELoss()
optimizer = optim.Adam(best_model.parameters(), lr=best_lr)

# Train the best model
def train_best_model(model, X_train, y_train, epochs=100):
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

# Train the best model
train_best_model(best_model, X_train_tensor, y_train_tensor)

# Test the best model
best_model.eval()
with torch.no_grad():
    y_pred_best = best_model(X_test_tensor)
    y_pred_best = (y_pred_best > 0.5).float()
    print("Best PCNN Classification Report:")
    print(classification_report(y_test, y_pred_best.numpy()))


[I 2024-10-14 14:29:21,806] A new study created in memory with name: no-name-a923f615-d60c-4028-9e9e-7cf4ac5b2286
  learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)
[I 2024-10-14 14:29:21,836] Trial 0 finished with value: 0.8205128205128205 and parameters: {'num_filters': 96, 'lr': 0.0038536200869348346}. Best is trial 0 with value: 0.8205128205128205.
  learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)
[I 2024-10-14 14:29:21,856] Trial 1 finished with value: 0.8205128205128205 and parameters: {'num_filters': 32, 'lr': 0.0003838703607349182}. Best is trial 0 with value: 0.8205128205128205.
  learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)
[I 2024-10-14 14:29:21,887] Trial 2 finished with value: 0.8974358974358975 and parameters: {'num_filters': 128, 'lr': 0.008521550798334096}. Best is trial 2 with value: 0.8974358974358975.
  learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)
[I 2024-10-14 14:29:21,914] Trial 3 finished with value: 0.5641025641025

Best hyperparameters: {'num_filters': 128, 'lr': 0.008521550798334096}
Best PCNN Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.71      0.83         7
           1       0.94      1.00      0.97        32

    accuracy                           0.95        39
   macro avg       0.97      0.86      0.90        39
weighted avg       0.95      0.95      0.95        39



In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
import optuna
from sklearn.metrics import accuracy_score, classification_report

# Define the Novel Parkinson Classification Neural Network (PCNN) Model
class NovelPCNN(nn.Module):
    def __init__(self, num_filters1, num_filters2, kernel_size1, kernel_size2, fc_neurons):
        super(NovelPCNN, self).__init__()
        # First convolutional layer
        self.conv1 = nn.Conv1d(in_channels=22, out_channels=num_filters1, kernel_size=kernel_size1, padding=1)
        # Second convolutional layer
        self.conv2 = nn.Conv1d(in_channels=num_filters1, out_channels=num_filters2, kernel_size=kernel_size2, padding=1)
        # Store fc_neurons as an instance variable
        self.fc_neurons = fc_neurons
        # We won't set the fully connected input size here yet; it will be dynamically calculated later.
        self.fc1 = None  # Placeholder for fully connected layer
        self.fc2 = nn.Linear(fc_neurons, 1)  # Output layer

    def forward(self, x):
        x = self.conv1(x)
        x = torch.relu(x)
        x = self.conv2(x)
        x = torch.relu(x)
        # Dynamically calculate the flattened size
        x = x.view(x.size(0), -1)  # Flatten the output for the fully connected layer

        # Set up the fully connected layer only when the input size is known
        if self.fc1 is None:
            self.fc1 = nn.Linear(x.size(1), self.fc_neurons)  # Use instance variable self.fc_neurons

        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))  # Sigmoid for binary classification
        return x

# Reshape the input to have the format (batch_size, in_channels, sequence_length)
X_train_cnn = X_train_scaled.reshape(X_train_scaled.shape[0], 22, 1)  # 22 features as in_channels, 1 sequence length
X_test_cnn = X_test_scaled.reshape(X_test_scaled.shape[0], 22, 1)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train_cnn, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test_cnn, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Objective function for Optuna
def objective(trial):
    # Hyperparameter suggestions
    num_filters1 = trial.suggest_int('num_filters1', 32, 128, step=32)
    num_filters2 = trial.suggest_int('num_filters2', 32, 128, step=32)
    kernel_size1 = trial.suggest_int('kernel_size1', 1, 3)
    kernel_size2 = trial.suggest_int('kernel_size2', 1, 3)
    fc_neurons = trial.suggest_int('fc_neurons', 50, 200, step=50)
    learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)

    # Instantiate the model
    model = NovelPCNN(num_filters1=num_filters1, num_filters2=num_filters2, 
                      kernel_size1=kernel_size1, kernel_size2=kernel_size2, 
                      fc_neurons=fc_neurons)
    
    # Loss and optimizer
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Training loop
    model.train()
    epochs = 20  # You can adjust the number of epochs based on your need
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()

    # Test the model
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test_tensor)
        y_pred = (y_pred > 0.5).float()

    # Calculate accuracy
    accuracy = accuracy_score(y_test_tensor.numpy(), y_pred.numpy())
    return accuracy

# Run Optuna optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Best hyperparameters
print(f"Best hyperparameters: {study.best_params}")

# Evaluate the model with the best hyperparameters
best_num_filters1 = study.best_params['num_filters1']
best_num_filters2 = study.best_params['num_filters2']
best_kernel_size1 = study.best_params['kernel_size1']
best_kernel_size2 = study.best_params['kernel_size2']
best_fc_neurons = study.best_params['fc_neurons']
best_lr = study.best_params['lr']

# Instantiate and train the best model
best_model = NovelPCNN(num_filters1=best_num_filters1, num_filters2=best_num_filters2,
                       kernel_size1=best_kernel_size1, kernel_size2=best_kernel_size2,
                       fc_neurons=best_fc_neurons)
criterion = nn.BCELoss()
optimizer = optim.Adam(best_model.parameters(), lr=best_lr)

# Train the best model
def train_best_model(model, X_train, y_train, epochs=100):
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()


[I 2024-10-16 22:32:11,083] A new study created in memory with name: no-name-e20a40df-bd35-4d72-bdd5-983669d49830
  learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)
[I 2024-10-16 22:32:11,244] Trial 0 finished with value: 0.8205128205128205 and parameters: {'num_filters1': 96, 'num_filters2': 64, 'kernel_size1': 2, 'kernel_size2': 2, 'fc_neurons': 200, 'lr': 0.00027645684916877133}. Best is trial 0 with value: 0.8205128205128205.
  learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)
[I 2024-10-16 22:32:11,322] Trial 1 finished with value: 0.8205128205128205 and parameters: {'num_filters1': 96, 'num_filters2': 64, 'kernel_size1': 1, 'kernel_size2': 2, 'fc_neurons': 150, 'lr': 2.5657476956732957e-05}. Best is trial 0 with value: 0.8205128205128205.
  learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)
[I 2024-10-16 22:32:11,372] Trial 2 finished with value: 0.8205128205128205 and parameters: {'num_filters1': 96, 'num_filters2': 64, 'kernel_size1': 2, 'kernel_size2

Best hyperparameters: {'num_filters1': 128, 'num_filters2': 96, 'kernel_size1': 1, 'kernel_size2': 3, 'fc_neurons': 200, 'lr': 0.006521864021838692}
Best PCNN Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.57      0.73         7
           1       0.91      1.00      0.96        32

    accuracy                           0.92        39
   macro avg       0.96      0.79      0.84        39
weighted avg       0.93      0.92      0.91        39



ImportError: cannot import name 'plot_roc_curve' from 'sklearn.metrics' (/opt/anaconda3/lib/python3.12/site-packages/sklearn/metrics/__init__.py)

ImportError: cannot import name 'plot_roc_curve' from 'sklearn.metrics' (/opt/anaconda3/lib/python3.12/site-packages/sklearn/metrics/__init__.py)

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import optuna
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, Flatten, MaxPooling1D, Dropout, GlobalAveragePooling1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
import xgboost as xgb

# Load Dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'
df = pd.read_csv(url)

# Drop 'name' column and separate features (X) and target (y)
X = df.drop(columns=['name', 'status'])
y = df['status']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reshape input data for CNN
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# CNN Model (TensorFlow/Keras)
def create_cnn_model(trial):
    model = Sequential()
    model.add(Conv1D(filters=trial.suggest_int('filters', 32, 256),
                     kernel_size=trial.suggest_int('kernel_size', 2, 5),
                     activation='relu',
                     input_shape=(X_train_scaled.shape[1], 1)))
    model.add(GlobalAveragePooling1D())
    model.add(Dense(trial.suggest_int('dense_units', 32, 256), activation='relu'))
    model.add(Dropout(trial.suggest_float('dropout', 0.1, 0.5)))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer=Adam(learning_rate=trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

def objective_cnn(trial):
    model = create_cnn_model(trial)
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    history = model.fit(X_train_reshaped, y_train,
                        epochs=100,
                        batch_size=trial.suggest_categorical('batch_size', [16, 32, 64]),
                        validation_split=0.2,
                        callbacks=[early_stopping],
                        verbose=0)
    return model.evaluate(X_test_reshaped, y_test, verbose=0)[1]  # Return accuracy

# NovelPCNN Model (PyTorch)
class NovelPCNN(nn.Module):
    def __init__(self, num_filters1, num_filters2, kernel_size1, kernel_size2, fc_neurons):
        super(NovelPCNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=22, out_channels=num_filters1, kernel_size=kernel_size1, padding=1)
        self.conv2 = nn.Conv1d(in_channels=num_filters1, out_channels=num_filters2, kernel_size=kernel_size2, padding=1)
        self.fc_neurons = fc_neurons
        self.fc1 = None
        self.fc2 = nn.Linear(fc_neurons, 1)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = x.view(x.size(0), -1)
        if self.fc1 is None:
            self.fc1 = nn.Linear(x.size(1), self.fc_neurons)
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

def objective_novelpcnn(trial):
    num_filters1 = trial.suggest_int('num_filters1', 32, 128, step=32)
    num_filters2 = trial.suggest_int('num_filters2', 32, 128, step=32)
    kernel_size1 = trial.suggest_int('kernel_size1', 1, 3)
    kernel_size2 = trial.suggest_int('kernel_size2', 1, 3)
    fc_neurons = trial.suggest_int('fc_neurons', 50, 200, step=50)
    learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)

    model = NovelPCNN(num_filters1, num_filters2, kernel_size1, kernel_size2, fc_neurons)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    X_train_tensor = torch.tensor(X_train_scaled.reshape(X_train_scaled.shape[0], 22, 1), dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
    X_test_tensor = torch.tensor(X_test_scaled.reshape(X_test_scaled.shape[0], 22, 1), dtype=torch.float32)

    for epoch in range(100):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        y_pred = model(X_test_tensor)
        y_pred = (y_pred > 0.5).float()
    return accuracy_score(y_test, y_pred.numpy())

# Traditional ML Models
def objective_svm(trial):
    C = trial.suggest_loguniform('C', 1e-3, 1e3)
    gamma = trial.suggest_loguniform('gamma', 1e-4, 1e0)
    kernel = trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly'])
    
    model = SVC(C=C, gamma=gamma, kernel=kernel)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    return accuracy_score(y_test, y_pred)

def objective_rf(trial):
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    max_depth = trial.suggest_int('max_depth', 2, 32, log=True)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 16)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 16)
    
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, 
                                   min_samples_split=min_samples_split, 
                                   min_samples_leaf=min_samples_leaf)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    return accuracy_score(y_test, y_pred)

def objective_xgb(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 9),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
    }
    
    model = xgb.XGBClassifier(**param, random_state=42)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    return accuracy_score(y_test, y_pred)

# Additional Models
def objective_adaboost(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 500)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1.0)
    
    model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate, random_state=42)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    return accuracy_score(y_test, y_pred)

def objective_dt(trial):
    max_depth = trial.suggest_int('max_depth', 1, 32)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 20)
    
    model = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split,
                                   min_samples_leaf=min_samples_leaf, random_state=42)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    return accuracy_score(y_test, y_pred)

def objective_gb(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 500)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1.0)
    max_depth = trial.suggest_int('max_depth', 1, 32)
    
    model = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=learning_rate,
                                       max_depth=max_depth, random_state=42)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    return accuracy_score(y_test, y_pred)

def objective_knn(trial):
    n_neighbors = trial.suggest_int('n_neighbors', 1, 20)
    weights = trial.suggest_categorical('weights', ['uniform', 'distance'])
    
    model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    return accuracy_score(y_test, y_pred)

# Optimize and evaluate all models
models = {
    'CNN': objective_cnn,
    'NovelPCNN': objective_novelpcnn,
    'SVM': objective_svm,
    'Random Forest': objective_rf,
    'XGBoost': objective_xgb,
    'AdaBoost': objective_adaboost,
    'Decision Tree': objective_dt,
    'Gradient Boosting': objective_gb,
    'KNN': objective_knn
}

results = {}

for name, objective in models.items():
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=50)
    print(f"\n{name} Best Hyperparameters: {study.best_params}")
    print(f"{name} Best Accuracy: {study.best_value:.4f}")
    results[name] = study.best_value

# Ensemble Model
best_models = {
    'AdaBoost': AdaBoostClassifier(**study.best_params, random_state=42),
    'KNN': KNeighborsClassifier(**study.best_params),
    'XGBoost': xgb.XGBClassifier(**study.best_params, random_state=42)
}

ensemble_model = VotingClassifier(estimators=[(name, model) for name, model in best_models.items()], voting='soft')
ensemble_model.fit(X_train_scaled, y_train)
y_pred_ensemble = ensemble_model.predict(X_test_scaled)
ensemble_accuracy = accuracy_score(y_test, y_pred_ensemble)
results['Ensemble'] = ensemble_accuracy

print("\nEnsemble Model Classification Report:")
print(classification_report(y_test, y_pred_ensemble))

# Final Results
print("\nFinal Results:")
for name, accuracy in results.items():
    print(f"{name}: {accuracy:.4f}")

print("\nBest performing model:")
best_model = max(results, key=results.get)
print(f"{best_model} with accuracy: {results[best_model]:.4f}")



[I 2024-10-23 22:23:28,483] A new study created in memory with name: no-name-af336cbb-bedd-4948-87a5-97a634774e80
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  model.compile(optimizer=Adam(learning_rate=trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)),
[I 2024-10-23 22:23:29,328] Trial 0 finished with value: 0.8717948794364929 and parameters: {'filters': 72, 'kernel_size': 2, 'dense_units': 208, 'dropout': 0.10579676853512537, 'learning_rate': 0.00939960898320955, 'batch_size': 32}. Best is trial 0 with value: 0.8717948794364929.
[I 2024-10-23 22:23:30,283] Trial 1 finished with value: 0.8717948794364929 and parameters: {'filters': 224, 'kernel_size': 4, 'dense_units': 73, 'dropout': 0.2205382193426603, 'learning_rate': 0.0010731746688067154, 'batch_size': 32}. Best is trial 0 with value: 0.8717948794364929.
[I 2024-10-23 22:23:31,515] Trial 2 finished with value: 0.9230769276618958 and parameters: {'filters': 184, 'kernel_size': 4, 'dense_units': 235,


CNN Best Hyperparameters: {'filters': 184, 'kernel_size': 4, 'dense_units': 235, 'dropout': 0.3143844428263541, 'learning_rate': 0.0007632988202132982, 'batch_size': 64}
CNN Best Accuracy: 0.9231


[I 2024-10-23 22:24:39,444] Trial 0 finished with value: 0.9487179487179487 and parameters: {'num_filters1': 128, 'num_filters2': 32, 'kernel_size1': 2, 'kernel_size2': 3, 'fc_neurons': 200, 'lr': 0.006683824928597849}. Best is trial 0 with value: 0.9487179487179487.
  learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)
[I 2024-10-23 22:24:39,942] Trial 1 finished with value: 0.8205128205128205 and parameters: {'num_filters1': 32, 'num_filters2': 32, 'kernel_size1': 3, 'kernel_size2': 1, 'fc_neurons': 50, 'lr': 1.806015423825017e-05}. Best is trial 0 with value: 0.9487179487179487.
  learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)
[I 2024-10-23 22:24:40,906] Trial 2 finished with value: 0.1794871794871795 and parameters: {'num_filters1': 32, 'num_filters2': 32, 'kernel_size1': 3, 'kernel_size2': 3, 'fc_neurons': 200, 'lr': 1.056080799469607e-05}. Best is trial 0 with value: 0.9487179487179487.
  learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)
[I 2024-10-23 


NovelPCNN Best Hyperparameters: {'num_filters1': 128, 'num_filters2': 32, 'kernel_size1': 2, 'kernel_size2': 3, 'fc_neurons': 200, 'lr': 0.006683824928597849}
NovelPCNN Best Accuracy: 0.9487


[I 2024-10-23 22:25:29,853] Trial 10 finished with value: 0.8974358974358975 and parameters: {'C': 894.3584496538774, 'gamma': 0.03691707725809518, 'kernel': 'linear'}. Best is trial 0 with value: 0.9230769230769231.
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e0)
[I 2024-10-23 22:25:29,859] Trial 11 finished with value: 0.8717948717948718 and parameters: {'C': 1.5607175419639605, 'gamma': 0.0417173702088733, 'kernel': 'linear'}. Best is trial 0 with value: 0.9230769230769231.
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e0)
[I 2024-10-23 22:25:29,865] Trial 12 finished with value: 0.9230769230769231 and parameters: {'C': 805.2316885887125, 'gamma': 0.02442281791580995, 'kernel': 'rbf'}. Best is trial 0 with value: 0.9230769230769231.
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
  gamma = trial.suggest_loguniform('gamma', 1e-4, 1e0)
[I 2024-10-23 22:25:29,871] Trial 13 finished 


SVM Best Hyperparameters: {'C': 195.99406437605438, 'gamma': 0.011184877027930737, 'kernel': 'rbf'}
SVM Best Accuracy: 0.9487


[I 2024-10-23 22:25:30,539] Trial 0 finished with value: 0.8974358974358975 and parameters: {'n_estimators': 872, 'max_depth': 2, 'min_samples_split': 12, 'min_samples_leaf': 8}. Best is trial 0 with value: 0.8974358974358975.
[I 2024-10-23 22:25:30,646] Trial 1 finished with value: 0.8974358974358975 and parameters: {'n_estimators': 206, 'max_depth': 8, 'min_samples_split': 10, 'min_samples_leaf': 16}. Best is trial 0 with value: 0.8974358974358975.
[I 2024-10-23 22:25:30,940] Trial 2 finished with value: 0.9487179487179487 and parameters: {'n_estimators': 560, 'max_depth': 6, 'min_samples_split': 12, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.9487179487179487.
[I 2024-10-23 22:25:31,325] Trial 3 finished with value: 0.8974358974358975 and parameters: {'n_estimators': 783, 'max_depth': 2, 'min_samples_split': 16, 'min_samples_leaf': 3}. Best is trial 2 with value: 0.9487179487179487.
[I 2024-10-23 22:25:31,789] Trial 4 finished with value: 0.8974358974358975 and parameters:


Random Forest Best Hyperparameters: {'n_estimators': 560, 'max_depth': 6, 'min_samples_split': 12, 'min_samples_leaf': 1}
Random Forest Best Accuracy: 0.9487


[I 2024-10-23 22:25:44,476] Trial 2 finished with value: 0.8717948717948718 and parameters: {'max_depth': 1, 'learning_rate': 0.6164976219287805, 'n_estimators': 151, 'min_child_weight': 2, 'subsample': 0.9305035462572879, 'colsample_bytree': 0.5133150370971957}. Best is trial 1 with value: 0.8974358974358975.
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 1.0),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2024-10-23 22:25:44,551] Trial 3 finished with value: 0.8717948717948718 and parameters: {'max_depth': 1, 'learning_rate': 0.5189979995961106, 'n_estimators': 357, 'min_child_weight': 9, 'subsample': 0.7459715793267012, 'colsample_bytree': 0.8112681969449212}. Best is trial 1 with value: 0.8974358974358975.
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 1.0),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.sugges


XGBoost Best Hyperparameters: {'max_depth': 8, 'learning_rate': 0.005316035849768041, 'n_estimators': 291, 'min_child_weight': 3, 'subsample': 0.9172651716624177, 'colsample_bytree': 0.9214817143723445}
XGBoost Best Accuracy: 0.9487


[I 2024-10-23 22:25:52,043] Trial 1 finished with value: 0.8461538461538461 and parameters: {'n_estimators': 63, 'learning_rate': 0.2586310070290599}. Best is trial 0 with value: 0.8461538461538461.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1.0)
[I 2024-10-23 22:25:52,135] Trial 2 finished with value: 0.9230769230769231 and parameters: {'n_estimators': 125, 'learning_rate': 0.0018511347101234587}. Best is trial 2 with value: 0.9230769230769231.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1.0)
[I 2024-10-23 22:25:52,202] Trial 3 finished with value: 0.9230769230769231 and parameters: {'n_estimators': 85, 'learning_rate': 0.0015542416789566347}. Best is trial 2 with value: 0.9230769230769231.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1.0)
[I 2024-10-23 22:25:52,279] Trial 4 finished with value: 0.9230769230769231 and parameters: {'n_estimators': 101, 'learning_rate': 0.001375888976085486}. Best is trial 2 with value: 0


AdaBoost Best Hyperparameters: {'n_estimators': 125, 'learning_rate': 0.0018511347101234587}
AdaBoost Best Accuracy: 0.9231


[I 2024-10-23 22:25:59,107] Trial 22 finished with value: 0.9230769230769231 and parameters: {'max_depth': 28, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 12 with value: 0.9487179487179487.
[I 2024-10-23 22:25:59,116] Trial 23 finished with value: 0.9487179487179487 and parameters: {'max_depth': 30, 'min_samples_split': 3, 'min_samples_leaf': 6}. Best is trial 12 with value: 0.9487179487179487.
[I 2024-10-23 22:25:59,125] Trial 24 finished with value: 0.9487179487179487 and parameters: {'max_depth': 22, 'min_samples_split': 10, 'min_samples_leaf': 8}. Best is trial 12 with value: 0.9487179487179487.
[I 2024-10-23 22:25:59,133] Trial 25 finished with value: 0.8717948717948718 and parameters: {'max_depth': 24, 'min_samples_split': 6, 'min_samples_leaf': 4}. Best is trial 12 with value: 0.9487179487179487.
[I 2024-10-23 22:25:59,141] Trial 26 finished with value: 0.9487179487179487 and parameters: {'max_depth': 29, 'min_samples_split': 8, 'min_samples_leaf': 6}. Best is 


Decision Tree Best Hyperparameters: {'max_depth': 13, 'min_samples_split': 9, 'min_samples_leaf': 7}
Decision Tree Best Accuracy: 0.9487


[I 2024-10-23 22:25:59,575] Trial 1 finished with value: 0.9230769230769231 and parameters: {'n_estimators': 477, 'learning_rate': 0.38360543904366734, 'max_depth': 22}. Best is trial 0 with value: 0.9230769230769231.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1.0)
[I 2024-10-23 22:25:59,844] Trial 2 finished with value: 0.8205128205128205 and parameters: {'n_estimators': 289, 'learning_rate': 0.0010950893631182564, 'max_depth': 14}. Best is trial 0 with value: 0.9230769230769231.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1.0)
[I 2024-10-23 22:26:00,048] Trial 3 finished with value: 0.9230769230769231 and parameters: {'n_estimators': 224, 'learning_rate': 0.0038453785182879, 'max_depth': 31}. Best is trial 0 with value: 0.9230769230769231.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1.0)
[I 2024-10-23 22:26:00,213] Trial 4 finished with value: 0.9230769230769231 and parameters: {'n_estimators': 183, 'learning_rate': 0


Gradient Boosting Best Hyperparameters: {'n_estimators': 60, 'learning_rate': 0.1559758287176522, 'max_depth': 3}
Gradient Boosting Best Accuracy: 0.9487


[I 2024-10-23 22:26:07,095] Trial 13 finished with value: 0.8974358974358975 and parameters: {'n_neighbors': 4, 'weights': 'uniform'}. Best is trial 7 with value: 0.9743589743589743.
[I 2024-10-23 22:26:07,110] Trial 14 finished with value: 0.9230769230769231 and parameters: {'n_neighbors': 3, 'weights': 'uniform'}. Best is trial 7 with value: 0.9743589743589743.
[I 2024-10-23 22:26:07,127] Trial 15 finished with value: 0.9230769230769231 and parameters: {'n_neighbors': 10, 'weights': 'uniform'}. Best is trial 7 with value: 0.9743589743589743.
[I 2024-10-23 22:26:07,144] Trial 16 finished with value: 0.8717948717948718 and parameters: {'n_neighbors': 6, 'weights': 'uniform'}. Best is trial 7 with value: 0.9743589743589743.
[I 2024-10-23 22:26:07,161] Trial 17 finished with value: 0.9743589743589743 and parameters: {'n_neighbors': 1, 'weights': 'uniform'}. Best is trial 7 with value: 0.9743589743589743.
[I 2024-10-23 22:26:07,181] Trial 18 finished with value: 0.8717948717948718 and par


KNN Best Hyperparameters: {'n_neighbors': 1, 'weights': 'uniform'}
KNN Best Accuracy: 0.9744


TypeError: AdaBoostClassifier.__init__() got an unexpected keyword argument 'n_neighbors'