In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import StandardScaler


In [2]:
Wine_r = pd.read_csv('winequality-red.csv', sep=";")
Wine_w = pd.read_csv('winequality-white.csv', sep=";")

In [3]:
Wine_r['is_red'] = 1  # 1 indicates red wine
Wine_w['is_red'] = 0  # 0 indicates white wine

# Concatenate the two datasets
wine_combined = pd.concat([Wine_r, Wine_w], axis=0, ignore_index=True)
X = wine_combined.drop("is_red", axis=1)  # Features
y = wine_combined["is_red"]  # Target



# Random Forest

#### Hyperparam Tuning

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Oversample the minority class using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Define parameter grid
param_grid = {
    "n_estimators": [100, 200, 300, 400, 500],
    "max_features": ["sqrt", "log2", None],
    "criterion": ["gini", "entropy"]
}

# Initialize Random Forest Classifier
rf = RandomForestClassifier(random_state=42)

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=rf,
    param_distributions=param_grid,
    n_iter=20,  # Number of combinations to try
    scoring='accuracy',
    cv=3,  # 3-fold cross-validation
    verbose=2,
    random_state=42,
    n_jobs=-1
)

# Fit RandomizedSearchCV
random_search.fit(X_resampled, y_resampled)

# Best parameters
print("Best Parameters:", random_search.best_params_)

# Evaluate on test set using the best model
best_rf = random_search.best_estimator_
y_pred = best_rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Set Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred))




Fitting 3 folds for each of 20 candidates, totalling 60 fits
Best Parameters: {'n_estimators': 500, 'max_features': 'log2', 'criterion': 'gini'}
Test Set Accuracy: 99.62%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       959
           1       1.00      0.99      0.99       341

    accuracy                           1.00      1300
   macro avg       1.00      0.99      1.00      1300
weighted avg       1.00      1.00      1.00      1300



### 80/20 Split

In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import numpy as np

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# To store results for each test
results = []

# Perform 3 random tests
for i in range(3):
    print(f"\n--- Trial {i + 1} ---")
    
    # Train-test split with a different random seed
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=i)
    
    # Oversample the minority class using SMOTE
    smote = SMOTE(random_state=i)
    X_resampled, y_resampled = smote.fit_resample(X_train, y_train)
    
    # Train Random Forest Classifier
    classifier = RandomForestClassifier(n_estimators=500, class_weight='balanced', max_features = 'log2', criterion = 'gini', random_state=i)
    classifier.fit(X_resampled, y_resampled)
    
    # Make predictions
    y_pred = classifier.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    
    # Append results
    results.append({
        "Trial": i + 1,
        "Accuracy": accuracy,
        "Precision (Class 1)": report["1"]["precision"],
        "Recall (Class 1)": report["1"]["recall"],
        "F1-Score (Class 1)": report["1"]["f1-score"]
    })
    
    # Print results for this trial
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Display overall results
import pandas as pd
results_df = pd.DataFrame(results)
print("\nSummary of Results Across Trials:")
print(results_df)
average_accuracy = results_df["Accuracy"].mean()
print(f"Average Accuracy across the 3 trials: {average_accuracy * 100:.2f}%")



--- Trial 1 ---




Accuracy: 99.54%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       989
           1       0.99      0.99      0.99       311

    accuracy                           1.00      1300
   macro avg       0.99      0.99      0.99      1300
weighted avg       1.00      1.00      1.00      1300


--- Trial 2 ---




Accuracy: 99.54%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1003
           1       0.99      0.99      0.99       297

    accuracy                           1.00      1300
   macro avg       0.99      0.99      0.99      1300
weighted avg       1.00      1.00      1.00      1300


--- Trial 3 ---




Accuracy: 99.85%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       968
           1       1.00      0.99      1.00       332

    accuracy                           1.00      1300
   macro avg       1.00      1.00      1.00      1300
weighted avg       1.00      1.00      1.00      1300


Summary of Results Across Trials:
   Trial  Accuracy  Precision (Class 1)  Recall (Class 1)  F1-Score (Class 1)
0      1  0.995385             0.987220          0.993569            0.990385
1      2  0.995385             0.989899          0.989899            0.989899
2      3  0.998462             1.000000          0.993976            0.996979
Average Accuracy across the 3 trials: 99.64%


### 50/50

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import numpy as np

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# To store results for each test
results = []

# Perform 3 random tests
for i in range(3):
    print(f"\n--- Trial {i + 1} ---")
    
    # Train-test split with a different random seed
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.5, random_state=i)
    
    # Oversample the minority class using SMOTE
    smote = SMOTE(random_state=i)
    X_resampled, y_resampled = smote.fit_resample(X_train, y_train)
    
    # Train Random Forest Classifier
    classifier = RandomForestClassifier(n_estimators=500, class_weight='balanced', max_features = 'log2', criterion = 'gini', random_state=i)
    classifier.fit(X_resampled, y_resampled)
    
    # Make predictions
    y_pred = classifier.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    
    # Append results
    results.append({
        "Trial": i + 1,
        "Accuracy": accuracy,
        "Precision (Class 1)": report["1"]["precision"],
        "Recall (Class 1)": report["1"]["recall"],
        "F1-Score (Class 1)": report["1"]["f1-score"]
    })
    
    # Print results for this trial
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Display overall results
import pandas as pd
results_df = pd.DataFrame(results)
print("\nSummary of Results Across Trials:")
print(results_df)
average_accuracy = results_df["Accuracy"].mean()
print(f"Average Accuracy across the 3 trials: {average_accuracy * 100:.2f}%")



--- Trial 1 ---




Accuracy: 99.57%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2440
           1       0.99      0.99      0.99       809

    accuracy                           1.00      3249
   macro avg       1.00      0.99      0.99      3249
weighted avg       1.00      1.00      1.00      3249


--- Trial 2 ---




Accuracy: 99.60%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2453
           1       1.00      0.99      0.99       796

    accuracy                           1.00      3249
   macro avg       1.00      0.99      0.99      3249
weighted avg       1.00      1.00      1.00      3249


--- Trial 3 ---




Accuracy: 99.60%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2412
           1       1.00      0.99      0.99       837

    accuracy                           1.00      3249
   macro avg       1.00      0.99      0.99      3249
weighted avg       1.00      1.00      1.00      3249


Summary of Results Across Trials:
   Trial  Accuracy  Precision (Class 1)  Recall (Class 1)  F1-Score (Class 1)
0      1  0.995691             0.993789          0.988875            0.991326
1      2  0.995999             0.997459          0.986181            0.991788
2      3  0.995999             0.997585          0.986858            0.992192
Average Accuracy across the 3 trials: 99.59%


### 20/80

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import numpy as np

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# To store results for each test
results = []

# Perform 3 random tests
for i in range(3):
    print(f"\n--- Trial {i + 1} ---")
    
    # Train-test split with a different random seed
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.8, random_state=i)
    
    # Oversample the minority class using SMOTE
    smote = SMOTE(random_state=i)
    X_resampled, y_resampled = smote.fit_resample(X_train, y_train)
    
    # Train Random Forest Classifier
    classifier = RandomForestClassifier(n_estimators=500, class_weight='balanced', max_features = 'log2', criterion = 'gini', random_state=i)
    classifier.fit(X_resampled, y_resampled)
    
    # Make predictions
    y_pred = classifier.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    
    # Append results
    results.append({
        "Trial": i + 1,
        "Accuracy": accuracy,
        "Precision (Class 1)": report["1"]["precision"],
        "Recall (Class 1)": report["1"]["recall"],
        "F1-Score (Class 1)": report["1"]["f1-score"]
    })
    
    # Print results for this trial
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Display overall results
import pandas as pd
results_df = pd.DataFrame(results)
print("\nSummary of Results Across Trials:")
print(results_df)
average_accuracy = results_df["Accuracy"].mean()
print(f"Average Accuracy across the 3 trials: {average_accuracy * 100:.2f}%")



--- Trial 1 ---




Accuracy: 99.40%
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      3904
           1       0.99      0.98      0.99      1294

    accuracy                           0.99      5198
   macro avg       0.99      0.99      0.99      5198
weighted avg       0.99      0.99      0.99      5198


--- Trial 2 ---




Accuracy: 99.25%
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      3904
           1       1.00      0.97      0.98      1294

    accuracy                           0.99      5198
   macro avg       0.99      0.99      0.99      5198
weighted avg       0.99      0.99      0.99      5198


--- Trial 3 ---




Accuracy: 99.25%
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      3912
           1       0.99      0.98      0.98      1286

    accuracy                           0.99      5198
   macro avg       0.99      0.99      0.99      5198
weighted avg       0.99      0.99      0.99      5198


Summary of Results Across Trials:
   Trial  Accuracy  Precision (Class 1)  Recall (Class 1)  F1-Score (Class 1)
0      1  0.994036             0.992206          0.983771            0.987971
1      2  0.992497             0.996047          0.973725            0.984760
2      3  0.992497             0.989788          0.979782            0.984760
Average Accuracy across the 3 trials: 99.30%


## SVM

#### Hyperparam Tuning

In [8]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
import pandas as pd

# Separate features and target

# Define the parameter grid for tuning
param_grid = {
    'C': [0.1, 1, 10, 100, 1000],  # Regularization parameter
}

# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the SVM model
svm_model = SVC(class_weight='balanced', random_state=42)

# Perform RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=svm_model,
    param_distributions=param_grid,
    n_iter=20,  # Number of random combinations to try
    cv=3,  # 3-fold cross-validation
    verbose=2,
    random_state=42,
    n_jobs=-1
)

# Fit RandomizedSearchCV
random_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", random_search.best_params_)

# Evaluate the best model
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)

# Calculate accuracy and classification report
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(report)



Fitting 3 folds for each of 5 candidates, totalling 15 fits
Best Parameters: {'C': 1000}
Accuracy: 98.31%
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       959
           1       0.96      0.98      0.97       341

    accuracy                           0.98      1300
   macro avg       0.98      0.98      0.98      1300
weighted avg       0.98      0.98      0.98      1300



### 80/20

In [9]:
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import pandas as pd

# To store results for each test
results = []

# Perform 3 random tests
for i in range(3):
    print(f"\n--- Trial {i + 1} ---")
    
    # Perform train-test split with different random seeds
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)
    
    # Train SVM classifier
    classifier = SVC(kernel='linear', class_weight='balanced', C = 1000, random_state=i)
    classifier.fit(X_train, y_train)
    
    # Make predictions
    y_pred = classifier.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    
    # Append results
    results.append({
        "Trial": i + 1,
        "Accuracy": accuracy,
        "Precision (Class 1)": report["1"]["precision"],
        "Recall (Class 1)": report["1"]["recall"],
        "F1-Score (Class 1)": report["1"]["f1-score"]
    })
    
    # Print results for this trial
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Display overall results
results_df = pd.DataFrame(results)
print("\nSummary of Results Across Trials:")
print(results_df)
average_accuracy = results_df["Accuracy"].mean()
print(f"Average Accuracy across the 3 trials: {average_accuracy * 100:.2f}%")




--- Trial 1 ---
Accuracy: 98.92%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       989
           1       0.97      0.99      0.98       311

    accuracy                           0.99      1300
   macro avg       0.98      0.99      0.99      1300
weighted avg       0.99      0.99      0.99      1300


--- Trial 2 ---
Accuracy: 98.31%
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      1003
           1       0.94      0.98      0.96       297

    accuracy                           0.98      1300
   macro avg       0.97      0.98      0.98      1300
weighted avg       0.98      0.98      0.98      1300


--- Trial 3 ---
Accuracy: 98.85%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       968
           1       0.96      0.99      0.98       332

    accurac

### 50/50

In [10]:
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import pandas as pd


# To store results for each test
results = []

# Perform 3 random tests
for i in range(3):
    print(f"\n--- Trial {i + 1} ---")
    
    # Perform train-test split with different random seeds
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=i)
    
    # Train SVM classifier
    classifier = SVC(kernel='linear', class_weight='balanced', C = 1000, random_state=i)
    classifier.fit(X_train, y_train)
    
    # Make predictions
    y_pred = classifier.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    
    # Append results
    results.append({
        "Trial": i + 1,
        "Accuracy": accuracy,
        "Precision (Class 1)": report["1"]["precision"],
        "Recall (Class 1)": report["1"]["recall"],
        "F1-Score (Class 1)": report["1"]["f1-score"]
    })
    
    # Print results for this trial
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Display overall results
results_df = pd.DataFrame(results)
print("\nSummary of Results Across Trials:")
print(results_df)
average_accuracy = results_df["Accuracy"].mean()
print(f"Average Accuracy across the 3 trials: {average_accuracy * 100:.2f}%")



--- Trial 1 ---
Accuracy: 98.74%
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      2440
           1       0.97      0.98      0.97       809

    accuracy                           0.99      3249
   macro avg       0.98      0.99      0.98      3249
weighted avg       0.99      0.99      0.99      3249


--- Trial 2 ---
Accuracy: 98.31%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      2453
           1       0.94      0.99      0.97       796

    accuracy                           0.98      3249
   macro avg       0.97      0.98      0.98      3249
weighted avg       0.98      0.98      0.98      3249


--- Trial 3 ---
Accuracy: 98.09%
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      2412
           1       0.94      0.99      0.96       837

    accurac

### 20/80

In [11]:
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import pandas as pd


# To store results for each test
results = []

# Perform 3 random tests
for i in range(3):
    print(f"\n--- Trial {i + 1} ---")
    
    # Perform train-test split with different random seeds
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=i)
    
    # Train SVM classifier
    classifier = SVC(kernel='linear', class_weight='balanced',C =1000, random_state=i)
    classifier.fit(X_train, y_train)
    
    # Make predictions
    y_pred = classifier.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    
    # Append results
    results.append({
        "Trial": i + 1,
        "Accuracy": accuracy,
        "Precision (Class 1)": report["1"]["precision"],
        "Recall (Class 1)": report["1"]["recall"],
        "F1-Score (Class 1)": report["1"]["f1-score"]
    })
    
    # Print results for this trial
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Display overall results
results_df = pd.DataFrame(results)
print("\nSummary of Results Across Trials:")
print(results_df)
average_accuracy = results_df["Accuracy"].mean()
print(f"Average Accuracy across the 3 trials: {average_accuracy * 100:.2f}%")




--- Trial 1 ---
Accuracy: 98.69%
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      3904
           1       0.97      0.97      0.97      1294

    accuracy                           0.99      5198
   macro avg       0.98      0.98      0.98      5198
weighted avg       0.99      0.99      0.99      5198


--- Trial 2 ---
Accuracy: 98.58%
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      3904
           1       0.96      0.98      0.97      1294

    accuracy                           0.99      5198
   macro avg       0.98      0.99      0.98      5198
weighted avg       0.99      0.99      0.99      5198


--- Trial 3 ---
Accuracy: 97.98%
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      3912
           1       0.94      0.98      0.96      1286

    accurac

## ANN

#### Hyperparam Tuning

In [12]:
from sklearn.model_selection import ParameterGrid
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Define hyperparameter grid
param_grid = {
    "lr": [0.001, 0.01, 0.1],                # Learning rates to tune
    "hidden_size_1": [32, 64, 128],          # Hidden layer 1 sizes to tune
    "hidden_size_2": [16, 32, 64],           # Hidden layer 2 sizes to tune
}

# Convert the grid to a list of parameter combinations
grid = list(ParameterGrid(param_grid))

# To store results
tuning_results = []

# Iterate over all combinations of hyperparameters
for params in grid:
    print(f"\nTesting Parameters: {params}")

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Scale data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Convert data to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_train = torch.tensor(y_train.to_numpy(), dtype=torch.long)
    y_test = torch.tensor(y_test.to_numpy(), dtype=torch.long)

    # Define model with hyperparameters
    class ANN(nn.Module):
        def __init__(self, input_size):
            super(ANN, self).__init__()
            self.fc1 = nn.Linear(input_size, params["hidden_size_1"])
            self.fc2 = nn.Linear(params["hidden_size_1"], params["hidden_size_2"])
            self.fc3 = nn.Linear(params["hidden_size_2"], 2)  # 2 output classes

        def forward(self, x):
            x = torch.relu(self.fc1(x))
            x = torch.relu(self.fc2(x))
            x = self.fc3(x)
            return x

    model = ANN(input_size=X_train.shape[1])

    # Define loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=params["lr"])

    # Training loop
    epochs = 50  # Fixed number of epochs
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

    # Evaluate the model
    with torch.no_grad():
        y_pred = model(X_test).argmax(dim=1)
        accuracy = (y_pred == y_test).float().mean().item()

    # Append results
    tuning_results.append({
        "Params": params,
        "Accuracy": accuracy
    })

# Convert results to DataFrame and display the best parameters
results_df = pd.DataFrame(tuning_results)
best_result = results_df.loc[results_df["Accuracy"].idxmax()]
print("\nBest Parameters and Results:")
print(best_result)

best_params = best_result["Params"]
best_accuracy = best_result["Accuracy"]

print("\nBest Parameters:")
print(f"Hidden Layer 1 Size: {best_params['hidden_size_1']}")
print(f"Hidden Layer 2 Size: {best_params['hidden_size_2']}")
print(f"Learning Rate: {best_params['lr']}")
print(f"\nBest Accuracy: {best_accuracy * 100:.2f}%")




Testing Parameters: {'hidden_size_1': 32, 'hidden_size_2': 16, 'lr': 0.001}

Testing Parameters: {'hidden_size_1': 32, 'hidden_size_2': 16, 'lr': 0.01}

Testing Parameters: {'hidden_size_1': 32, 'hidden_size_2': 16, 'lr': 0.1}

Testing Parameters: {'hidden_size_1': 32, 'hidden_size_2': 32, 'lr': 0.001}

Testing Parameters: {'hidden_size_1': 32, 'hidden_size_2': 32, 'lr': 0.01}

Testing Parameters: {'hidden_size_1': 32, 'hidden_size_2': 32, 'lr': 0.1}

Testing Parameters: {'hidden_size_1': 32, 'hidden_size_2': 64, 'lr': 0.001}

Testing Parameters: {'hidden_size_1': 32, 'hidden_size_2': 64, 'lr': 0.01}

Testing Parameters: {'hidden_size_1': 32, 'hidden_size_2': 64, 'lr': 0.1}

Testing Parameters: {'hidden_size_1': 64, 'hidden_size_2': 16, 'lr': 0.001}

Testing Parameters: {'hidden_size_1': 64, 'hidden_size_2': 16, 'lr': 0.01}

Testing Parameters: {'hidden_size_1': 64, 'hidden_size_2': 16, 'lr': 0.1}

Testing Parameters: {'hidden_size_1': 64, 'hidden_size_2': 32, 'lr': 0.001}

Testing Pa

### 80/20

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Define the ANN
class ANN(nn.Module):
    def __init__(self, input_size):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_size, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# To store results
results = []

# Perform 3 random tests
for i in range(3):
    print(f"\n--- Trial {i + 1} ---")
    
    # Split data with a different random seed
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=i
    )
    
    # Scale data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Convert data to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_train = torch.tensor(y_train.to_numpy(), dtype=torch.long)
    y_test = torch.tensor(y_test.to_numpy(), dtype=torch.long)

    # Initialize the model
    model = ANN(input_size=X_train.shape[1])

    # Define loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.1)

    # Train the model
    for epoch in range(100):  # Adjust the number of epochs as needed
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch + 1}/100], Loss: {loss.item():.4f}")

    # Evaluate the model
    with torch.no_grad():
        y_pred = model(X_test).argmax(dim=1)
        accuracy = (y_pred == y_test).float().mean().item()
        print(f"Accuracy: {accuracy:.4f}")

    # Append results
    results.append({"Trial": i + 1, "Accuracy": accuracy})

# Summary of results
results_df = pd.DataFrame(results)
print("\nSummary of Results Across Trials:")
print(results_df)
print(f"Average Accuracy: {results_df['Accuracy'].mean() * 100:.2f}%")



--- Trial 1 ---
Epoch [10/100], Loss: 0.0377
Epoch [20/100], Loss: 0.0457
Epoch [30/100], Loss: 0.0174
Epoch [40/100], Loss: 0.0127
Epoch [50/100], Loss: 0.0103
Epoch [60/100], Loss: 0.0081
Epoch [70/100], Loss: 0.0061
Epoch [80/100], Loss: 0.0035
Epoch [90/100], Loss: 0.0017
Epoch [100/100], Loss: 0.0006
Accuracy: 0.9938

--- Trial 2 ---
Epoch [10/100], Loss: 0.0585
Epoch [20/100], Loss: 0.0498
Epoch [30/100], Loss: 0.0206
Epoch [40/100], Loss: 0.0161
Epoch [50/100], Loss: 0.0123
Epoch [60/100], Loss: 0.0103
Epoch [70/100], Loss: 0.0090
Epoch [80/100], Loss: 0.0080
Epoch [90/100], Loss: 0.0070
Epoch [100/100], Loss: 0.0061
Accuracy: 0.9954

--- Trial 3 ---
Epoch [10/100], Loss: 0.0355
Epoch [20/100], Loss: 0.0865
Epoch [30/100], Loss: 0.0656
Epoch [40/100], Loss: 0.0213
Epoch [50/100], Loss: 0.0128
Epoch [60/100], Loss: 0.0095
Epoch [70/100], Loss: 0.0073
Epoch [80/100], Loss: 0.0060
Epoch [90/100], Loss: 0.0050
Epoch [100/100], Loss: 0.0041
Accuracy: 0.9931

Summary of Results Acros

### 50/50

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Define the ANN
class ANN(nn.Module):
    def __init__(self, input_size):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_size, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# To store results
results = []

# Perform 3 random tests
for i in range(3):
    print(f"\n--- Trial {i + 1} ---")
    
    # Split data with a different random seed
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.5, random_state=i
    )
    
    # Scale data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Convert data to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_train = torch.tensor(y_train.to_numpy(), dtype=torch.long)
    y_test = torch.tensor(y_test.to_numpy(), dtype=torch.long)

    # Initialize the model
    model = ANN(input_size=X_train.shape[1])

    # Define loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.1)

    # Train the model
    for epoch in range(100):  # Adjust the number of epochs as needed
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch + 1}/100], Loss: {loss.item():.4f}")

    # Evaluate the model
    with torch.no_grad():
        y_pred = model(X_test).argmax(dim=1)
        accuracy = (y_pred == y_test).float().mean().item()
        print(f"Accuracy: {accuracy:.4f}")

    # Append results
    results.append({"Trial": i + 1, "Accuracy": accuracy})

# Summary of results
results_df = pd.DataFrame(results)
print("\nSummary of Results Across Trials:")
print(results_df)
print(f"Average Accuracy: {results_df['Accuracy'].mean() * 100:.2f}%")



--- Trial 1 ---
Epoch [10/100], Loss: 0.0516
Epoch [20/100], Loss: 0.0762
Epoch [30/100], Loss: 0.0518
Epoch [40/100], Loss: 0.0203
Epoch [50/100], Loss: 0.0133
Epoch [60/100], Loss: 0.0107
Epoch [70/100], Loss: 0.0088
Epoch [80/100], Loss: 0.0077
Epoch [90/100], Loss: 0.0071
Epoch [100/100], Loss: 0.0064
Accuracy: 0.9954

--- Trial 2 ---
Epoch [10/100], Loss: 0.0587
Epoch [20/100], Loss: 0.0214
Epoch [30/100], Loss: 0.0122
Epoch [40/100], Loss: 0.0065
Epoch [50/100], Loss: 0.0033
Epoch [60/100], Loss: 0.0017
Epoch [70/100], Loss: 0.0007
Epoch [80/100], Loss: 0.0002
Epoch [90/100], Loss: 0.0001
Epoch [100/100], Loss: 0.0001
Accuracy: 0.9960

--- Trial 3 ---
Epoch [10/100], Loss: 0.0896
Epoch [20/100], Loss: 0.0590
Epoch [30/100], Loss: 0.0162
Epoch [40/100], Loss: 0.0065
Epoch [50/100], Loss: 0.0039
Epoch [60/100], Loss: 0.0027
Epoch [70/100], Loss: 0.0019
Epoch [80/100], Loss: 0.0015
Epoch [90/100], Loss: 0.0012
Epoch [100/100], Loss: 0.0011
Accuracy: 0.9960

Summary of Results Acros

### 20/80

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Define the ANN
class ANN(nn.Module):
    def __init__(self, input_size):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_size, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# To store results
results = []

# Perform 3 random tests
for i in range(3):
    print(f"\n--- Trial {i + 1} ---")
    
    # Split data with a different random seed
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.8, random_state=i
    )
    
    # Scale data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Convert data to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_train = torch.tensor(y_train.to_numpy(), dtype=torch.long)
    y_test = torch.tensor(y_test.to_numpy(), dtype=torch.long)

    # Initialize the model
    model = ANN(input_size=X_train.shape[1])

    # Define loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.1)

    # Train the model
    for epoch in range(100):  # Adjust the number of epochs as needed
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch + 1}/100], Loss: {loss.item():.4f}")

    # Evaluate the model
    with torch.no_grad():
        y_pred = model(X_test).argmax(dim=1)
        accuracy = (y_pred == y_test).float().mean().item()
        print(f"Accuracy: {accuracy:.4f}")

    # Append results
    results.append({"Trial": i + 1, "Accuracy": accuracy})

# Summary of results
results_df = pd.DataFrame(results)
print("\nSummary of Results Across Trials:")
print(results_df)
print(f"Average Accuracy: {results_df['Accuracy'].mean() * 100:.2f}%")



--- Trial 1 ---
Epoch [10/100], Loss: 0.0901
Epoch [20/100], Loss: 0.0393
Epoch [30/100], Loss: 0.0077
Epoch [40/100], Loss: 0.0030
Epoch [50/100], Loss: 0.0009
Epoch [60/100], Loss: 0.0003
Epoch [70/100], Loss: 0.0001
Epoch [80/100], Loss: 0.0001
Epoch [90/100], Loss: 0.0001
Epoch [100/100], Loss: 0.0000
Accuracy: 0.9906

--- Trial 2 ---
Epoch [10/100], Loss: 0.0363
Epoch [20/100], Loss: 0.0195
Epoch [30/100], Loss: 0.0099
Epoch [40/100], Loss: 0.0068
Epoch [50/100], Loss: 0.0037
Epoch [60/100], Loss: 0.0011
Epoch [70/100], Loss: 0.0003
Epoch [80/100], Loss: 0.0001
Epoch [90/100], Loss: 0.0001
Epoch [100/100], Loss: 0.0001
Accuracy: 0.9896

--- Trial 3 ---
Epoch [10/100], Loss: 0.0321
Epoch [20/100], Loss: 0.0046
Epoch [30/100], Loss: 0.0007
Epoch [40/100], Loss: 0.0001
Epoch [50/100], Loss: 0.0000
Epoch [60/100], Loss: 0.0000
Epoch [70/100], Loss: 0.0000
Epoch [80/100], Loss: 0.0000
Epoch [90/100], Loss: 0.0000
Epoch [100/100], Loss: 0.0000
Accuracy: 0.9933

Summary of Results Acros

## XG Boost

In [18]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from collections import Counter
from sklearn.metrics import roc_auc_score


### Hyperparam Tuning Using Normal Looping

In [19]:
best_params = None
best_auc = 0
scale_pos_weight = len(y_train[y_train == 0]) / len(y_train[y_train == 1]) * 0.8

for n_estimators in [100, 200, 300]:
    for max_depth in [3, 5, 7]:
        for learning_rate in [0.01, 0.05, 0.1]:
            xgb = XGBClassifier(
                scale_pos_weight=scale_pos_weight,
                n_estimators=n_estimators,
                max_depth=max_depth,
                learning_rate=learning_rate,
                random_state=42,
                eval_metric='logloss'
            )
            xgb.fit(X_train, y_train)
            y_proba = xgb.predict_proba(X_test)[:, 1]
            auc = roc_auc_score(y_test, y_proba)
            if auc > best_auc:
                best_auc = auc
                best_params = {
                    'n_estimators': n_estimators,
                    'max_depth': max_depth,
                    'learning_rate': learning_rate
                }
print(f"Best Parameters: {best_params}")


Best Parameters: {'n_estimators': 300, 'max_depth': 3, 'learning_rate': 0.1}


### 80/20

In [20]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np


# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# To store results
results = []

# Perform 3 trials
for i in range(3):
    print(f"\n--- Trial {i + 1} ---")

    # Split the data with a different random seed each time
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, random_state=i
    )

    # Calculate scale_pos_weight to handle class imbalance
    scale_pos_weight = len(y_train[y_train == 0]) / len(y_train[y_train == 1]) * 0.8

    # Initialize and train the XGBoost classifier
    xgb = XGBClassifier(
        scale_pos_weight=scale_pos_weight,
        n_estimators=300,
        max_depth=3,
        learning_rate=0.1,
        random_state=i,
        use_label_encoder=False,
        eval_metric='logloss'
    )
    xgb.fit(X_train, y_train)

    # Make predictions
    y_pred = xgb.predict(X_test)
    y_proba = xgb.predict_proba(X_test)[:, 1]  # For ROC-AUC

    # Calculate metrics
    accuracy = (y_pred == y_test).mean()
    roc_auc = roc_auc_score(y_test, y_proba)
    report = classification_report(y_test, y_pred, output_dict=True)

    # Append results
    results.append({
        "Trial": i + 1,
        "Accuracy": accuracy,
        "ROC-AUC": roc_auc,
        "Precision (Class 1)": report["1"]["precision"],
        "Recall (Class 1)": report["1"]["recall"],
        "F1-Score (Class 1)": report["1"]["f1-score"]
    })

    # Print metrics for this trial
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print(f"ROC-AUC Score: {roc_auc:.2f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Summarize results
results_df = pd.DataFrame(results)
print("\nSummary of Results Across Trials:")
print(results_df)

# Calculate and print the average metrics across trials
average_accuracy = results_df["Accuracy"].mean()
average_roc_auc = results_df["ROC-AUC"].mean()
print(f"\nAverage Accuracy across trials: {average_accuracy * 100:.2f}%")
print(f"Average ROC-AUC across trials: {average_roc_auc:.2f}")



--- Trial 1 ---
Accuracy: 99.23%
ROC-AUC Score: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       989
           1       0.97      0.99      0.98       311

    accuracy                           0.99      1300
   macro avg       0.99      0.99      0.99      1300
weighted avg       0.99      0.99      0.99      1300


--- Trial 2 ---


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



Accuracy: 99.46%
ROC-AUC Score: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1003
           1       0.99      0.99      0.99       297

    accuracy                           0.99      1300
   macro avg       0.99      0.99      0.99      1300
weighted avg       0.99      0.99      0.99      1300


--- Trial 3 ---
Accuracy: 99.77%
ROC-AUC Score: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       968
           1       0.99      1.00      1.00       332

    accuracy                           1.00      1300
   macro avg       1.00      1.00      1.00      1300
weighted avg       1.00      1.00      1.00      1300


Summary of Results Across Trials:
   Trial  Accuracy   ROC-AUC  Precision (Class 1)  Recall (Class 1)  \
0      1  0.992308  0.999854             0.974763          0.993569   
1      2  0.994615  0.996727    

Parameters: { "use_label_encoder" } are not used.



### 50/50

In [21]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np


# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# To store results
results = []

# Perform 3 trials
for i in range(3):
    print(f"\n--- Trial {i + 1} ---")

    # Split the data with a different random seed each time
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.5, random_state=i
    )

    # Calculate scale_pos_weight to handle class imbalance
    scale_pos_weight = len(y_train[y_train == 0]) / len(y_train[y_train == 1]) * 0.8

    # Initialize and train the XGBoost classifier
    xgb = XGBClassifier(
        scale_pos_weight=scale_pos_weight,
        n_estimators=300,
        max_depth=3,
        learning_rate=0.1,
        random_state=i,
        use_label_encoder=False,
        eval_metric='logloss'
    )
    xgb.fit(X_train, y_train)

    # Make predictions
    y_pred = xgb.predict(X_test)
    y_proba = xgb.predict_proba(X_test)[:, 1]  # For ROC-AUC

    # Calculate metrics
    accuracy = (y_pred == y_test).mean()
    roc_auc = roc_auc_score(y_test, y_proba)
    report = classification_report(y_test, y_pred, output_dict=True)

    # Append results
    results.append({
        "Trial": i + 1,
        "Accuracy": accuracy,
        "ROC-AUC": roc_auc,
        "Precision (Class 1)": report["1"]["precision"],
        "Recall (Class 1)": report["1"]["recall"],
        "F1-Score (Class 1)": report["1"]["f1-score"]
    })

    # Print metrics for this trial
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print(f"ROC-AUC Score: {roc_auc:.2f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Summarize results
results_df = pd.DataFrame(results)
print("\nSummary of Results Across Trials:")
print(results_df)

# Calculate and print the average metrics across trials
average_accuracy = results_df["Accuracy"].mean()
average_roc_auc = results_df["ROC-AUC"].mean()
print(f"\nAverage Accuracy across trials: {average_accuracy * 100:.2f}%")
print(f"Average ROC-AUC across trials: {average_roc_auc:.2f}")



--- Trial 1 ---
Accuracy: 99.63%
ROC-AUC Score: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2440
           1       0.99      0.99      0.99       809

    accuracy                           1.00      3249
   macro avg       1.00      1.00      1.00      3249
weighted avg       1.00      1.00      1.00      3249


--- Trial 2 ---


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



Accuracy: 99.57%
ROC-AUC Score: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2453
           1       0.99      0.99      0.99       796

    accuracy                           1.00      3249
   macro avg       1.00      0.99      0.99      3249
weighted avg       1.00      1.00      1.00      3249


--- Trial 3 ---
Accuracy: 99.54%
ROC-AUC Score: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2412
           1       0.99      0.99      0.99       837

    accuracy                           1.00      3249
   macro avg       0.99      0.99      0.99      3249
weighted avg       1.00      1.00      1.00      3249


Summary of Results Across Trials:
   Trial  Accuracy   ROC-AUC  Precision (Class 1)  Recall (Class 1)  \
0      1  0.996307  0.999679             0.992583          0.992583   
1      2  0.995691  0.998149    

Parameters: { "use_label_encoder" } are not used.



### 20/80

In [22]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np


# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# To store results
results = []

# Perform 3 trials
for i in range(3):
    print(f"\n--- Trial {i + 1} ---")

    # Split the data with a different random seed each time
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.8, random_state=i
    )

    # Calculate scale_pos_weight to handle class imbalance
    scale_pos_weight = len(y_train[y_train == 0]) / len(y_train[y_train == 1]) * 0.8

    # Initialize and train the XGBoost classifier
    xgb = XGBClassifier(
        scale_pos_weight=scale_pos_weight,
        n_estimators=300,
        max_depth=3,
        learning_rate=0.1,
        random_state=i,
        use_label_encoder=False,
        eval_metric='logloss'
    )
    xgb.fit(X_train, y_train)

    # Make predictions
    y_pred = xgb.predict(X_test)
    y_proba = xgb.predict_proba(X_test)[:, 1]  # For ROC-AUC

    # Calculate metrics
    accuracy = (y_pred == y_test).mean()
    roc_auc = roc_auc_score(y_test, y_proba)
    report = classification_report(y_test, y_pred, output_dict=True)

    # Append results
    results.append({
        "Trial": i + 1,
        "Accuracy": accuracy,
        "ROC-AUC": roc_auc,
        "Precision (Class 1)": report["1"]["precision"],
        "Recall (Class 1)": report["1"]["recall"],
        "F1-Score (Class 1)": report["1"]["f1-score"]
    })

    # Print metrics for this trial
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print(f"ROC-AUC Score: {roc_auc:.2f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Summarize results
results_df = pd.DataFrame(results)
print("\nSummary of Results Across Trials:")
print(results_df)

# Calculate and print the average metrics across trials
average_accuracy = results_df["Accuracy"].mean()
average_roc_auc = results_df["ROC-AUC"].mean()
print(f"\nAverage Accuracy across trials: {average_accuracy * 100:.2f}%")
print(f"Average ROC-AUC across trials: {average_roc_auc:.2f}")



--- Trial 1 ---
Accuracy: 99.27%
ROC-AUC Score: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3904
           1       0.99      0.99      0.99      1294

    accuracy                           0.99      5198
   macro avg       0.99      0.99      0.99      5198
weighted avg       0.99      0.99      0.99      5198


--- Trial 2 ---
Accuracy: 99.06%
ROC-AUC Score: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      3904
           1       0.98      0.98      0.98      1294

    accuracy                           0.99      5198
   macro avg       0.99      0.99      0.99      5198
weighted avg       0.99      0.99      0.99      5198


--- Trial 3 ---


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



Accuracy: 99.19%
ROC-AUC Score: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      3912
           1       0.99      0.98      0.98      1286

    accuracy                           0.99      5198
   macro avg       0.99      0.99      0.99      5198
weighted avg       0.99      0.99      0.99      5198


Summary of Results Across Trials:
   Trial  Accuracy   ROC-AUC  Precision (Class 1)  Recall (Class 1)  \
0      1  0.992689  0.997363             0.985317          0.985317   
1      2  0.990573  0.998557             0.983683          0.978362   
2      3  0.991920  0.996204             0.988994          0.978227   

   F1-Score (Class 1)  
0            0.985317  
1            0.981015  
2            0.983581  

Average Accuracy across trials: 99.17%
Average ROC-AUC across trials: 1.00
