In [117]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

def model_comparison(df, target_column, test_size=0.2, random_state=42, cv=5):
    # Prepare the data
    X = df.drop(target_column, axis=1)
    y = df[target_column]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Dictionary of models and their reduced hyperparameter grids
    models = {
        'Logistic Regression': (LogisticRegression(max_iter=1000), {
            'C': [0.01, 0.1, 1],
            'solver': ['liblinear', 'lbfgs'],
            'class_weight': [None, 'balanced']
        }),
        'KNN': (KNeighborsClassifier(), {
            'n_neighbors': [3, 5, 7],
            'weights': ['uniform', 'distance'],
            'p': [1, 2]
        }),
        'Decision Tree': (DecisionTreeClassifier(), {
            'max_depth': [None, 10, 20, 40],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4],
        }),
        'Random Forest': (RandomForestClassifier(), {
            'n_estimators': [100, 200, 400],
            'max_depth': [None, 10, 20],
            'min_samples_split': [2, 5],
            'min_samples_leaf': [2, 4]
        }),
        'Gradient Boosting': (GradientBoostingClassifier(), {
            'n_estimators': [100, 200, 400],
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [3, 4]
        }),
        'XGBoost': (XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'), {
            'n_estimators': [100, 200, 400],
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [3, 4],
            'gamma': [0, 0.1]
        }),
        'LightGBM': (LGBMClassifier(), {
            'n_estimators': [100, 200, 400],
            'learning_rate': [0.01, 0.05, 0.1],
            'num_leaves': [31, 50, 70]
        }),
        'CatBoost': (CatBoostClassifier(verbose=0), {
            'iterations': [100, 200, 400],
            'learning_rate': [0.01, 0.05, 0.1],
            'depth': [4, 6, 8]
        })
    }
    # Dictionary to store results
    results = {}

    for name, (model, param_grid) in models.items():
        start_time = time.time()
        
        # Perform GridSearchCV
        grid_search = GridSearchCV(model, param_grid=param_grid, cv=cv, scoring='accuracy', n_jobs=-1)
        grid_search.fit(X_train_scaled, y_train)
        
        # Get the best model
        best_model = grid_search.best_estimator_

        # Measure training time for best parameters
        best_param_train_start = time.time()
        best_model.fit(X_train_scaled, y_train)
        best_param_train_time = time.time() - best_param_train_start

        # Measure inference time for best parameters
        inference_start_time = time.time()
        y_pred = best_model.predict(X_test_scaled)
        inference_time = time.time() - inference_start_time
        
        # Calculate total computation time
        computation_time = time.time() - start_time
        
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        if len(np.unique(y)) == 2:  # Binary classification
            auc = roc_auc_score(y_test, best_model.predict_proba(X_test_scaled)[:, 1])
        else:  # Multiclass classification
            auc = roc_auc_score(y_test, best_model.predict_proba(X_test_scaled), multi_class='ovr', average='macro')

        # Perform cross-validation
        cv_scores = cross_val_score(best_model, X_train_scaled, y_train, cv=cv, scoring='accuracy')

        results[name] = {
            'Accuracy': accuracy,
            'AUC Score': auc,
            'CV Mean Accuracy': np.mean(cv_scores),
            'CV Std Accuracy': np.std(cv_scores),
            'Training Time (Best Params)': best_param_train_time,
            'Inference Time (Best Params)': inference_time,
            'Computation Time (Total)': computation_time,
            'Best Parameters': grid_search.best_params_
        }

    # Convert results to a DataFrame
    result_df = pd.DataFrame(results).T

    return result_df


In [118]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.neural_network import MLPClassifier

def mlp_comparison(X, y, result_df, test_size=0.2, random_state=42, cv=3):
    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Define the MLP model and its hyperparameter grid
    mlp = MLPClassifier(max_iter=1000, random_state=random_state)
    param_grid = {
        'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
        'activation': ['relu', 'tanh'],
        'solver': ['adam', 'sgd'],
        'alpha': [0.0001, 0.001, 0.01],
        'learning_rate': ['constant', 'adaptive']
    }

    # Perform GridSearchCV with StratifiedKFold
    start_time = time.time()
    skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=random_state)
    grid_search = GridSearchCV(mlp, param_grid=param_grid, cv=skf, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train_scaled, y_train)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Measure training time for best parameters
    best_param_train_start = time.time()
    best_model.fit(X_train_scaled, y_train)
    training_time = time.time() - best_param_train_start

    # Measure inference time for best parameters
    inference_start_time = time.time()
    y_pred = best_model.predict(X_test_scaled)
    inference_time = time.time() - inference_start_time

    # Calculate total computation time
    computation_time = time.time() - start_time

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    if len(np.unique(y)) == 2:  # Binary classification
        auc = roc_auc_score(y_test, best_model.predict_proba(X_test_scaled)[:, 1])
    else:  # Multiclass classification
        auc = roc_auc_score(y_test, best_model.predict_proba(X_test_scaled), multi_class='ovr', average='macro')

    # Perform cross-validation
    cv_scores = cross_val_score(best_model, X_train_scaled, y_train, cv=cv, scoring='accuracy')

    # Store results in the existing result DataFrame
    result_df.loc['MLP'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': np.mean(cv_scores),
        'CV Std Accuracy': np.std(cv_scores),
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': grid_search.best_params_
    }

    return result_df, grid_search.best_params_

In [119]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import optuna

def dnn_comparison(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
    y_train_tensor = torch.LongTensor(y_train).to(device)
    X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
    y_test_tensor = torch.LongTensor(y_test).to(device)

    # Define the DNN model
    class DNN(nn.Module):
        def __init__(self, input_dim, hidden_dims, output_dim):
            super(DNN, self).__init__()
            layers = []
            for hidden_dim in hidden_dims:
                layers.append(nn.Linear(input_dim, hidden_dim))
                layers.append(nn.ReLU())
                input_dim = hidden_dim
            layers.append(nn.Linear(input_dim, output_dim))
            self.network = nn.Sequential(*layers)
        
        def forward(self, x):
            return self.network(x)

    # Define the objective function for Optuna
    def objective(trial):
        # Define hyperparameters to tune
        hidden_dims = [trial.suggest_int(f'hidden_dim_{i}', 32, 256) for i in range(3)]
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        # Create the model
        input_dim = X_train.shape[1]
        output_dim = len(np.unique(y))
        model = DNN(input_dim, hidden_dims, output_dim).to(device)

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # Create DataLoader
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Training
        for epoch in range(num_epochs):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            outputs = model(X_test_tensor)
            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())

        return accuracy

    # Start timing the entire process
    start_time = time.time()

    # Perform hyperparameter tuning with Optuna
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    # Get the best hyperparameters
    best_params = study.best_params

    # Train the final model with the best hyperparameters
    input_dim = X_train.shape[1]
    output_dim = len(np.unique(y))
    best_model = DNN(input_dim, [best_params[f'hidden_dim_{i}'] for i in range(3)], output_dim).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(best_model.parameters(), lr=best_params['learning_rate'])

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=best_params['batch_size'], shuffle=True)

    training_start_time = time.time()
    for epoch in range(best_params['num_epochs']):
        best_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = best_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    training_time = time.time() - training_start_time

    # Evaluation
    best_model.eval()
    with torch.no_grad():
        inference_start_time = time.time()
        outputs = best_model(X_test_tensor)
        _, predicted = torch.max(outputs, 1)
        inference_time = time.time() - inference_start_time

        # Convert to numpy for metric calculation
        y_pred = predicted.cpu().numpy()
        y_true = y_test_tensor.cpu().numpy()
        
        accuracy = accuracy_score(y_true, y_pred)
        
        if output_dim == 2:  # Binary classification
            proba = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
            auc = roc_auc_score(y_true, proba)
        else:  # Multi-class classification
            proba = torch.softmax(outputs, dim=1).cpu().numpy()
            auc = roc_auc_score(y_true, proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        X_fold_train_tensor = torch.FloatTensor(X_fold_train).to(device)
        y_fold_train_tensor = torch.LongTensor(y_fold_train).to(device)
        X_fold_val_tensor = torch.FloatTensor(X_fold_val).to(device)
        y_fold_val_tensor = torch.LongTensor(y_fold_val).to(device)

        fold_model = DNN(input_dim, [best_params[f'hidden_dim_{i}'] for i in range(3)], output_dim).to(device)
        fold_optimizer = optim.Adam(fold_model.parameters(), lr=best_params['learning_rate'])
        fold_dataset = TensorDataset(X_fold_train_tensor, y_fold_train_tensor)
        fold_loader = DataLoader(fold_dataset, batch_size=best_params['batch_size'], shuffle=True)

        for epoch in range(best_params['num_epochs']):
            fold_model.train()
            for batch_X, batch_y in fold_loader:
                fold_optimizer.zero_grad()
                outputs = fold_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_model.eval()
        with torch.no_grad():
            outputs = fold_model(X_fold_val_tensor)
            _, predicted = torch.max(outputs, 1)
            fold_accuracy = accuracy_score(y_fold_val_tensor.cpu().numpy(), predicted.cpu().numpy())
            cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    # Calculate total computation time
    computation_time = time.time() - start_time

    # Store results in the existing result DataFrame
    result_df.loc['DNN'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params

In [120]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import optuna

def dcn_comparison(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
    y_train_tensor = torch.LongTensor(y_train).to(device)
    X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
    y_test_tensor = torch.LongTensor(y_test).to(device)

    class CrossLayer(nn.Module):
        def __init__(self, input_dim):
            super(CrossLayer, self).__init__()
            self.weight = nn.Parameter(torch.Tensor(input_dim, 1))
            self.bias = nn.Parameter(torch.Tensor(input_dim, 1))
            nn.init.xavier_uniform_(self.weight)
            nn.init.zeros_(self.bias)

        def forward(self, x0, x):
            x = x.unsqueeze(2)
            x0 = x0.unsqueeze(2)
            interaction = torch.matmul(x0, torch.matmul(x.transpose(1, 2), self.weight))
            return x0.squeeze(2) + interaction.squeeze(2) + self.bias.T

    class DCN(nn.Module):
        def __init__(self, input_dim, cross_layers, hidden_layers, output_dim):
            super(DCN, self).__init__()
            self.cross_layers = nn.ModuleList([CrossLayer(input_dim) for _ in range(cross_layers)])
            
            deep_layers = []
            for i in range(len(hidden_layers)):
                if i == 0:
                    deep_layers.append(nn.Linear(input_dim, hidden_layers[i]))
                else:
                    deep_layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
                deep_layers.append(nn.ReLU())
            self.deep_net = nn.Sequential(*deep_layers)
            
            self.final_layer = nn.Linear(input_dim + hidden_layers[-1], output_dim)

        def forward(self, x):
            cross_out = x
            for layer in self.cross_layers:
                cross_out = layer(x, cross_out)
            deep_out = self.deep_net(x)
            concat_out = torch.cat([cross_out, deep_out], dim=1)
            return self.final_layer(concat_out)

    def objective(trial):
        cross_layers = trial.suggest_int('cross_layers', 1, 5)
        hidden_layers = [trial.suggest_int(f'hidden_layer_{i}', 32, 256) for i in range(3)]
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        input_dim = X_train.shape[1]
        output_dim = len(np.unique(y))
        model = DCN(input_dim, cross_layers, hidden_layers, output_dim).to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        for epoch in range(num_epochs):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        model.eval()
        with torch.no_grad():
            outputs = model(X_test_tensor)
            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())

        return accuracy

    start_time = time.time()

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    best_params = study.best_params

    input_dim = X_train.shape[1]
    output_dim = len(np.unique(y))
    best_model = DCN(input_dim, best_params['cross_layers'], 
                     [best_params[f'hidden_layer_{i}'] for i in range(3)], 
                     output_dim).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(best_model.parameters(), lr=best_params['learning_rate'])

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=best_params['batch_size'], shuffle=True)

    training_start_time = time.time()
    for epoch in range(best_params['num_epochs']):
        best_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = best_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    training_time = time.time() - training_start_time

    best_model.eval()
    with torch.no_grad():
        inference_start_time = time.time()
        outputs = best_model(X_test_tensor)
        _, predicted = torch.max(outputs, 1)
        inference_time = time.time() - inference_start_time

        y_pred = predicted.cpu().numpy()
        y_true = y_test_tensor.cpu().numpy()
        
        accuracy = accuracy_score(y_true, y_pred)
        
        if output_dim == 2:
            proba = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
            auc = roc_auc_score(y_true, proba)
        else:
            proba = torch.softmax(outputs, dim=1).cpu().numpy()
            auc = roc_auc_score(y_true, proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        X_fold_train_tensor = torch.FloatTensor(X_fold_train).to(device)
        y_fold_train_tensor = torch.LongTensor(y_fold_train).to(device)
        X_fold_val_tensor = torch.FloatTensor(X_fold_val).to(device)
        y_fold_val_tensor = torch.LongTensor(y_fold_val).to(device)

        fold_model = DCN(input_dim, best_params['cross_layers'], 
                         [best_params[f'hidden_layer_{i}'] for i in range(3)], 
                         output_dim).to(device)
        fold_optimizer = optim.Adam(fold_model.parameters(), lr=best_params['learning_rate'])
        fold_dataset = TensorDataset(X_fold_train_tensor, y_fold_train_tensor)
        fold_loader = DataLoader(fold_dataset, batch_size=best_params['batch_size'], shuffle=True)

        for epoch in range(best_params['num_epochs']):
            fold_model.train()
            for batch_X, batch_y in fold_loader:
                fold_optimizer.zero_grad()
                outputs = fold_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_model.eval()
        with torch.no_grad():
            outputs = fold_model(X_fold_val_tensor)
            _, predicted = torch.max(outputs, 1)
            fold_accuracy = accuracy_score(y_fold_val_tensor.cpu().numpy(), predicted.cpu().numpy())
            cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    computation_time = time.time() - start_time

    result_df.loc['DCN'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params



In [121]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import optuna

def wide_and_deep_comparison(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
    y_train_tensor = torch.LongTensor(y_train).to(device)
    X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
    y_test_tensor = torch.LongTensor(y_test).to(device)

    class WideAndDeepNetwork(nn.Module):
        def __init__(self, input_dim, hidden_layers, output_dim):
            super(WideAndDeepNetwork, self).__init__()
            
            # Wide part
            self.wide = nn.Linear(input_dim, output_dim)
            
            # Deep part
            deep_layers = []
            for i in range(len(hidden_layers)):
                if i == 0:
                    deep_layers.append(nn.Linear(input_dim, hidden_layers[i]))
                else:
                    deep_layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
                deep_layers.append(nn.ReLU())
            deep_layers.append(nn.Linear(hidden_layers[-1], output_dim))
            self.deep = nn.Sequential(*deep_layers)

        def forward(self, x):
            wide_out = self.wide(x)
            deep_out = self.deep(x)
            return wide_out + deep_out

    def objective(trial):
        hidden_layers = [trial.suggest_int(f'hidden_layer_{i}', 32, 256) for i in range(3)]
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        input_dim = X_train.shape[1]
        output_dim = len(np.unique(y))
        model = WideAndDeepNetwork(input_dim, hidden_layers, output_dim).to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        for epoch in range(num_epochs):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        model.eval()
        with torch.no_grad():
            outputs = model(X_test_tensor)
            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())

        return accuracy

    start_time = time.time()

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    best_params = study.best_params

    input_dim = X_train.shape[1]
    output_dim = len(np.unique(y))
    best_model = WideAndDeepNetwork(input_dim, 
                                    [best_params[f'hidden_layer_{i}'] for i in range(3)], 
                                    output_dim).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(best_model.parameters(), lr=best_params['learning_rate'])

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=best_params['batch_size'], shuffle=True)

    training_start_time = time.time()
    for epoch in range(best_params['num_epochs']):
        best_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = best_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    training_time = time.time() - training_start_time

    best_model.eval()
    with torch.no_grad():
        inference_start_time = time.time()
        outputs = best_model(X_test_tensor)
        _, predicted = torch.max(outputs, 1)
        inference_time = time.time() - inference_start_time

        y_pred = predicted.cpu().numpy()
        y_true = y_test_tensor.cpu().numpy()
        
        accuracy = accuracy_score(y_true, y_pred)
        
        if output_dim == 2:
            proba = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
            auc = roc_auc_score(y_true, proba)
        else:
            proba = torch.softmax(outputs, dim=1).cpu().numpy()
            auc = roc_auc_score(y_true, proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        X_fold_train_tensor = torch.FloatTensor(X_fold_train).to(device)
        y_fold_train_tensor = torch.LongTensor(y_fold_train).to(device)
        X_fold_val_tensor = torch.FloatTensor(X_fold_val).to(device)
        y_fold_val_tensor = torch.LongTensor(y_fold_val).to(device)

        fold_model = WideAndDeepNetwork(input_dim, 
                                        [best_params[f'hidden_layer_{i}'] for i in range(3)], 
                                        output_dim).to(device)
        fold_optimizer = optim.Adam(fold_model.parameters(), lr=best_params['learning_rate'])
        fold_dataset = TensorDataset(X_fold_train_tensor, y_fold_train_tensor)
        fold_loader = DataLoader(fold_dataset, batch_size=best_params['batch_size'], shuffle=True)

        for epoch in range(best_params['num_epochs']):
            fold_model.train()
            for batch_X, batch_y in fold_loader:
                fold_optimizer.zero_grad()
                outputs = fold_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_model.eval()
        with torch.no_grad():
            outputs = fold_model(X_fold_val_tensor)
            _, predicted = torch.max(outputs, 1)
            fold_accuracy = accuracy_score(y_fold_val_tensor.cpu().numpy(), predicted.cpu().numpy())
            cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    computation_time = time.time() - start_time

    result_df.loc['Wide_and_Deep'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params

In [122]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
from xgboost import XGBClassifier
import optuna

def xgb_nn_comparison(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    start_time = time.time()

    # Ensure X and y are numpy arrays
    X = X.values if isinstance(X, pd.DataFrame) else X
    y = y.values if isinstance(y, pd.Series) else y

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    class NeuralNetwork(nn.Module):
        def __init__(self, input_dim, hidden_layers, output_dim):
            super(NeuralNetwork, self).__init__()
            layers = []
            prev_dim = input_dim
            for hidden_dim in hidden_layers:
                layers.append(nn.Linear(prev_dim, hidden_dim))
                layers.append(nn.ReLU())
                prev_dim = hidden_dim
            layers.append(nn.Linear(prev_dim, output_dim))
            self.network = nn.Sequential(*layers)
        
        def forward(self, x):
            return self.network(x)

    def objective(trial):
        # Define hyperparameters to tune for XGBoost
        xgb_params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 300),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0)
        }

        # Train XGBoost model
        xgb_model = XGBClassifier(**xgb_params, use_label_encoder=False, eval_metric='mlogloss')
        xgb_model.fit(X_train_scaled, y_train)
        
        # Extract features using XGBoost
        X_train_transformed = xgb_model.apply(X_train_scaled)
        X_test_transformed = xgb_model.apply(X_test_scaled)
        
        # Convert to PyTorch tensors
        X_train_transformed_tensor = torch.FloatTensor(X_train_transformed).to(device)
        X_test_transformed_tensor = torch.FloatTensor(X_test_transformed).to(device)
        y_train_tensor = torch.LongTensor(y_train).to(device)
        y_test_tensor = torch.LongTensor(y_test).to(device)
        
        # Define hyperparameters to tune for Neural Network
        hidden_layers = []
        for i in range(3):  # Allow up to 3 hidden layers
            if trial.suggest_categorical(f'use_hidden_layer_{i}', [True, False]):
                hidden_layers.append(trial.suggest_int(f'hidden_layer_{i}', 32, 256))
        
        nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        # Create the Neural Network model
        input_dim = X_train_transformed.shape[1]
        output_dim = len(np.unique(y))
        model = NeuralNetwork(input_dim, hidden_layers, output_dim).to(device)

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=nn_learning_rate)

        # Create DataLoader
        train_dataset = TensorDataset(X_train_transformed_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Training
        for epoch in range(num_epochs):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            outputs = model(X_test_transformed_tensor)
            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())

        return accuracy

    # Perform hyperparameter tuning with Optuna
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    # Get the best hyperparameters
    best_params = study.best_params

    # Train the final XGBoost model with the best hyperparameters
    xgb_best_params = {
        'n_estimators': best_params['n_estimators'],
        'max_depth': best_params['max_depth'],
        'learning_rate': best_params['xgb_learning_rate'],
        'subsample': best_params['subsample'],
        'colsample_bytree': best_params['colsample_bytree']
    }
    xgb_model = XGBClassifier(**xgb_best_params, use_label_encoder=False, eval_metric='mlogloss')
    xgb_model.fit(X_train_scaled, y_train)

    # Extract features using XGBoost
    X_train_transformed = xgb_model.apply(X_train_scaled)
    X_test_transformed = xgb_model.apply(X_test_scaled)

    # Convert to PyTorch tensors
    X_train_transformed_tensor = torch.FloatTensor(X_train_transformed).to(device)
    X_test_transformed_tensor = torch.FloatTensor(X_test_transformed).to(device)
    y_train_tensor = torch.LongTensor(y_train).to(device)
    y_test_tensor = torch.LongTensor(y_test).to(device)

    # Train the final Neural Network model with the best hyperparameters
    nn_best_params = {
        'hidden_layers': [],
        'learning_rate': best_params['nn_learning_rate'],
        'batch_size': best_params['batch_size'],
        'num_epochs': best_params['num_epochs']
    }

    for i in range(3):  # Assuming max 3 hidden layers
        if f'use_hidden_layer_{i}' in best_params and best_params[f'use_hidden_layer_{i}']:
            nn_best_params['hidden_layers'].append(best_params[f'hidden_layer_{i}'])

    input_dim = X_train_transformed.shape[1]
    output_dim = len(np.unique(y))
    best_model = NeuralNetwork(input_dim, nn_best_params['hidden_layers'], output_dim).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(best_model.parameters(), lr=nn_best_params['learning_rate'])

    train_dataset = TensorDataset(X_train_transformed_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=nn_best_params['batch_size'], shuffle=True)

    training_start_time = time.time()
    for epoch in range(nn_best_params['num_epochs']):
        best_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = best_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    training_time = time.time() - training_start_time

    # Evaluation
    best_model.eval()
    with torch.no_grad():
        inference_start_time = time.time()
        outputs = best_model(X_test_transformed_tensor)
        _, predicted = torch.max(outputs, 1)
        inference_time = time.time() - inference_start_time

        y_pred = predicted.cpu().numpy()
        y_true = y_test_tensor.cpu().numpy()
        
        accuracy = accuracy_score(y_true, y_pred)
        
        if output_dim == 2:  # Binary classification
            proba = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
            auc = roc_auc_score(y_true, proba)
        else:  # Multi-class classification
            proba = torch.softmax(outputs, dim=1).cpu().numpy()
            auc = roc_auc_score(y_true, proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        xgb_model.fit(X_fold_train, y_fold_train)
        X_fold_train_transformed = xgb_model.apply(X_fold_train)
        X_fold_val_transformed = xgb_model.apply(X_fold_val)

        X_fold_train_tensor = torch.FloatTensor(X_fold_train_transformed).to(device)
        y_fold_train_tensor = torch.LongTensor(y_fold_train).to(device)
        X_fold_val_tensor = torch.FloatTensor(X_fold_val_transformed).to(device)
        y_fold_val_tensor = torch.LongTensor(y_fold_val).to(device)

        fold_model = NeuralNetwork(input_dim, nn_best_params['hidden_layers'], output_dim).to(device)
        fold_optimizer = optim.Adam(fold_model.parameters(), lr=nn_best_params['learning_rate'])
        fold_dataset = TensorDataset(X_fold_train_tensor, y_fold_train_tensor)
        fold_loader = DataLoader(fold_dataset, batch_size=nn_best_params['batch_size'], shuffle=True)

        for epoch in range(nn_best_params['num_epochs']):
            fold_model.train()
            for batch_X, batch_y in fold_loader:
                fold_optimizer.zero_grad()
                outputs = fold_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_model.eval()
        with torch.no_grad():
            outputs = fold_model(X_fold_val_tensor)
            _, predicted = torch.max(outputs, 1)
            fold_accuracy = accuracy_score(y_fold_val_tensor.cpu().numpy(), predicted.cpu().numpy())
            cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    computation_time = time.time() - start_time

    result_df.loc['XGBoost + NN'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params


In [123]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
from lightgbm import LGBMClassifier
import optuna

def lgbm_nn_comparison(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    start_time = time.time()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Ensure X and y are numpy arrays
    X = X.values if isinstance(X, pd.DataFrame) else X
    y = y.values if isinstance(y, pd.Series) else y

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    class NeuralNetwork(nn.Module):
        def __init__(self, input_dim, hidden_layers, output_dim):
            super(NeuralNetwork, self).__init__()
            layers = []
            for i in range(len(hidden_layers)):
                if i == 0:
                    layers.append(nn.Linear(input_dim, hidden_layers[i]))
                else:
                    layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
                layers.append(nn.ReLU())
            layers.append(nn.Linear(hidden_layers[-1], output_dim))
            self.network = nn.Sequential(*layers)
        
        def forward(self, x):
            return self.network(x)

    def objective(trial):
        # Define hyperparameters to tune for LightGBM
        lgb_params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 300),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('lgb_learning_rate', 1e-4, 1e-1, log=True),
            'num_leaves': trial.suggest_int('num_leaves', 20, 100),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0)
        }

        # Train LightGBM model
        lgb_model = LGBMClassifier(**lgb_params)
        lgb_model.fit(X_train_scaled, y_train)
        
        # Extract features using LightGBM
        X_train_transformed = lgb_model.predict_proba(X_train_scaled)
        X_test_transformed = lgb_model.predict_proba(X_test_scaled)
        
        # Convert to PyTorch tensors
        X_train_transformed_tensor = torch.FloatTensor(X_train_transformed).to(device)
        X_test_transformed_tensor = torch.FloatTensor(X_test_transformed).to(device)
        y_train_tensor = torch.LongTensor(y_train).to(device)
        y_test_tensor = torch.LongTensor(y_test).to(device)
        
        # Define hyperparameters to tune for Neural Network
        hidden_layers = [trial.suggest_int(f'hidden_layer_{i}', 32, 256) for i in range(3)]
        nn_learning_rate = trial.suggest_float('nn_learning_rate', 1e-4, 1e-1, log=True)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        # Create the Neural Network model
        input_dim = X_train_transformed.shape[1]
        output_dim = len(np.unique(y))
        model = NeuralNetwork(input_dim, hidden_layers, output_dim).to(device)

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=nn_learning_rate)

        # Create DataLoader
        train_dataset = TensorDataset(X_train_transformed_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Training
        for epoch in range(num_epochs):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            outputs = model(X_test_transformed_tensor)
            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())

        return accuracy

    # Perform hyperparameter tuning with Optuna
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    # Get the best hyperparameters
    best_params = study.best_params

    # Train the final LightGBM model with the best hyperparameters
    lgb_best_params = {
        'n_estimators': best_params['n_estimators'],
        'max_depth': best_params['max_depth'],
        'learning_rate': best_params['lgb_learning_rate'],
        'num_leaves': best_params['num_leaves'],
        'subsample': best_params['subsample'],
        'colsample_bytree': best_params['colsample_bytree']
    }
    lgb_model = LGBMClassifier(**lgb_best_params)
    lgb_model.fit(X_train_scaled, y_train)

    # Extract features using LightGBM
    X_train_transformed = lgb_model.predict_proba(X_train_scaled)
    X_test_transformed = lgb_model.predict_proba(X_test_scaled)

    # Convert to PyTorch tensors
    X_train_transformed_tensor = torch.FloatTensor(X_train_transformed).to(device)
    X_test_transformed_tensor = torch.FloatTensor(X_test_transformed).to(device)
    y_train_tensor = torch.LongTensor(y_train).to(device)
    y_test_tensor = torch.LongTensor(y_test).to(device)

    # Train the final Neural Network model with the best hyperparameters
    nn_best_params = {
        'hidden_layers': [best_params[f'hidden_layer_{i}'] for i in range(3)],
        'learning_rate': best_params['nn_learning_rate'],
        'batch_size': best_params['batch_size'],
        'num_epochs': best_params['num_epochs']
    }
    input_dim = X_train_transformed.shape[1]
    output_dim = len(np.unique(y))
    best_model = NeuralNetwork(input_dim, nn_best_params['hidden_layers'], output_dim).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(best_model.parameters(), lr=nn_best_params['learning_rate'])

    train_dataset = TensorDataset(X_train_transformed_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=nn_best_params['batch_size'], shuffle=True)

    training_start_time = time.time()
    for epoch in range(nn_best_params['num_epochs']):
        best_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = best_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    training_time = time.time() - training_start_time

    # Evaluation
    best_model.eval()
    with torch.no_grad():
        inference_start_time = time.time()
        outputs = best_model(X_test_transformed_tensor)
        _, predicted = torch.max(outputs, 1)
        inference_time = time.time() - inference_start_time

        y_pred = predicted.cpu().numpy()
        y_true = y_test_tensor.cpu().numpy()
        
        accuracy = accuracy_score(y_true, y_pred)
        
        if output_dim == 2:  # Binary classification
            proba = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
            auc = roc_auc_score(y_true, proba)
        else:  # Multi-class classification
            proba = torch.softmax(outputs, dim=1).cpu().numpy()
            auc = roc_auc_score(y_true, proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        lgb_model.fit(X_fold_train, y_fold_train)
        X_fold_train_transformed = lgb_model.predict_proba(X_fold_train)
        X_fold_val_transformed = lgb_model.predict_proba(X_fold_val)

        X_fold_train_tensor = torch.FloatTensor(X_fold_train_transformed).to(device)
        y_fold_train_tensor = torch.LongTensor(y_fold_train).to(device)
        X_fold_val_tensor = torch.FloatTensor(X_fold_val_transformed).to(device)
        y_fold_val_tensor = torch.LongTensor(y_fold_val).to(device)

        fold_model = NeuralNetwork(input_dim, nn_best_params['hidden_layers'], output_dim).to(device)
        fold_optimizer = optim.Adam(fold_model.parameters(), lr=nn_best_params['learning_rate'])
        fold_dataset = TensorDataset(X_fold_train_tensor, y_fold_train_tensor)
        fold_loader = DataLoader(fold_dataset, batch_size=nn_best_params['batch_size'], shuffle=True)

        for epoch in range(nn_best_params['num_epochs']):
            fold_model.train()
            for batch_X, batch_y in fold_loader:
                fold_optimizer.zero_grad()
                outputs = fold_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_model.eval()
        with torch.no_grad():
            outputs = fold_model(X_fold_val_tensor)
            _, predicted = torch.max(outputs, 1)
            fold_accuracy = accuracy_score(y_fold_val_tensor.cpu().numpy(), predicted.cpu().numpy())
            cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    # Calculate total computation time
    computation_time = time.time() - start_time

    # Store results in the existing result DataFrame
    result_df.loc['LightGBM + NN'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params

In [124]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import optuna

def autoint_nn_comparison(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    start_time = time.time()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Ensure X and y are numpy arrays
    X = X.values if isinstance(X, pd.DataFrame) else X
    y = y.values if isinstance(y, pd.Series) else y

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
    y_train_tensor = torch.LongTensor(y_train).to(device)
    X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
    y_test_tensor = torch.LongTensor(y_test).to(device)

    class AutoInt(nn.Module):
        def __init__(self, input_dim, embedding_dim, num_heads, num_layers):
            super(AutoInt, self).__init__()
            self.embedding = nn.Linear(input_dim, embedding_dim)
            self.attention_layers = nn.ModuleList([
                nn.MultiheadAttention(embedding_dim, num_heads) for _ in range(num_layers)
            ])
            self.fc = nn.Linear(embedding_dim, 1)

        def forward(self, x):
            x = self.embedding(x).unsqueeze(1)
            for attn_layer in self.attention_layers:
                x, _ = attn_layer(x, x, x)
            x = x.squeeze(1)
            x = self.fc(x)
            return x

    class NeuralNetwork(nn.Module):
        def __init__(self, input_dim, hidden_layers, output_dim):
            super(NeuralNetwork, self).__init__()
            layers = []
            for i in range(len(hidden_layers)):
                if i == 0:
                    layers.append(nn.Linear(input_dim, hidden_layers[i]))
                else:
                    layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
                layers.append(nn.ReLU())
            layers.append(nn.Linear(hidden_layers[-1], output_dim))
            self.network = nn.Sequential(*layers)
        
        def forward(self, x):
            return self.network(x)

    def objective(trial):
        # Define hyperparameters to tune for AutoInt
        num_heads = trial.suggest_int('num_heads', 1, 8)
        embedding_dim = trial.suggest_int('embedding_dim', num_heads, 64, step=num_heads)
        num_layers = trial.suggest_int('num_layers', 1, 3)
        
        # Train AutoInt model
        autoint_model = AutoInt(X_train.shape[1], embedding_dim, num_heads, num_layers).to(device)
        optimizer = optim.Adam(autoint_model.parameters(), lr=0.001)
        criterion = nn.MSELoss()
        
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor.float().unsqueeze(1))
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        
        for epoch in range(10):  # Fixed number of epochs for AutoInt
            autoint_model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = autoint_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        
        # Extract features using AutoInt
        autoint_model.eval()
        with torch.no_grad():
            X_train_transformed = autoint_model.embedding(X_train_tensor).cpu().numpy()
            X_test_transformed = autoint_model.embedding(X_test_tensor).cpu().numpy()
        
        # Convert to PyTorch tensors
        X_train_transformed_tensor = torch.FloatTensor(X_train_transformed).to(device)
        X_test_transformed_tensor = torch.FloatTensor(X_test_transformed).to(device)
        
        # Define hyperparameters to tune for Neural Network
        hidden_layers = [trial.suggest_int(f'hidden_layer_{i}', 32, 256) for i in range(3)]
        nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        # Create the Neural Network model
        input_dim = X_train_transformed.shape[1]
        output_dim = len(np.unique(y))
        model = NeuralNetwork(input_dim, hidden_layers, output_dim).to(device)

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=nn_learning_rate)

        # Create DataLoader
        train_dataset = TensorDataset(X_train_transformed_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Training
        for epoch in range(num_epochs):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            outputs = model(X_test_transformed_tensor)
            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())

        return accuracy

    # Perform hyperparameter tuning with Optuna
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    # Get the best hyperparameters
    best_params = study.best_params

    # Train the final AutoInt model with the best hyperparameters
    embedding_dim = best_params['embedding_dim']
    num_heads = best_params['num_heads']
    num_layers = best_params['num_layers']
    autoint_model = AutoInt(X_train.shape[1], embedding_dim, num_heads, num_layers).to(device)
    optimizer = optim.Adam(autoint_model.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor.float().unsqueeze(1))
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    for epoch in range(10):  # Fixed number of epochs for AutoInt
        autoint_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = autoint_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    # Extract features using AutoInt
    autoint_model.eval()
    with torch.no_grad():
        X_train_transformed = autoint_model.embedding(X_train_tensor).cpu().numpy()
        X_test_transformed = autoint_model.embedding(X_test_tensor).cpu().numpy()

    # Convert to PyTorch tensors
    X_train_transformed_tensor = torch.FloatTensor(X_train_transformed).to(device)
    X_test_transformed_tensor = torch.FloatTensor(X_test_transformed).to(device)

    # Train the final Neural Network model with the best hyperparameters
    nn_best_params = {
        'hidden_layers': [best_params[f'hidden_layer_{i}'] for i in range(3)],
        'learning_rate': best_params['nn_learning_rate'],
        'batch_size': best_params['batch_size'],
        'num_epochs': best_params['num_epochs']
    }
    input_dim = X_train_transformed.shape[1]
    output_dim = len(np.unique(y))
    best_model = NeuralNetwork(input_dim, nn_best_params['hidden_layers'], output_dim).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(best_model.parameters(), lr=nn_best_params['learning_rate'])

    train_dataset = TensorDataset(X_train_transformed_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=nn_best_params['batch_size'], shuffle=True)

    training_start_time = time.time()
    for epoch in range(nn_best_params['num_epochs']):
        best_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = best_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    training_time = time.time() - training_start_time

    # Evaluation
    best_model.eval()
    with torch.no_grad():
        inference_start_time = time.time()
        outputs = best_model(X_test_transformed_tensor)
        _, predicted = torch.max(outputs, 1)
        inference_time = time.time() - inference_start_time

        y_pred = predicted.cpu().numpy()
        y_true = y_test_tensor.cpu().numpy()
        
        accuracy = accuracy_score(y_true, y_pred)
        
        if output_dim == 2:  # Binary classification
            proba = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
            auc = roc_auc_score(y_true, proba)
        else:  # Multi-class classification
            proba = torch.softmax(outputs, dim=1).cpu().numpy()
            auc = roc_auc_score(y_true, proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        X_fold_train_tensor = torch.FloatTensor(X_fold_train).to(device)
        y_fold_train_tensor = torch.LongTensor(y_fold_train).to(device)
        X_fold_val_tensor = torch.FloatTensor(X_fold_val).to(device)
        y_fold_val_tensor = torch.LongTensor(y_fold_val).to(device)

        fold_autoint_model = AutoInt(X_fold_train.shape[1], embedding_dim, num_heads, num_layers).to(device)
        fold_optimizer = optim.Adam(fold_autoint_model.parameters(), lr=0.001)
        fold_criterion = nn.MSELoss()
        fold_train_dataset = TensorDataset(X_fold_train_tensor, y_fold_train_tensor.float().unsqueeze(1))
        fold_train_loader = DataLoader(fold_train_dataset, batch_size=32, shuffle=True)

        for epoch in range(10):  # Fixed number of epochs for AutoInt
            fold_autoint_model.train()
            for batch_X, batch_y in fold_train_loader:
                fold_optimizer.zero_grad()
                outputs = fold_autoint_model(batch_X)
                loss = fold_criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_autoint_model.eval()
        with torch.no_grad():
            X_fold_train_transformed = fold_autoint_model.embedding(X_fold_train_tensor).cpu().numpy()
            X_fold_val_transformed = fold_autoint_model.embedding(X_fold_val_tensor).cpu().numpy()

        X_fold_train_transformed_tensor = torch.FloatTensor(X_fold_train_transformed).to(device)
        X_fold_val_transformed_tensor = torch.FloatTensor(X_fold_val_transformed).to(device)

        fold_model = NeuralNetwork(input_dim, nn_best_params['hidden_layers'], output_dim).to(device)
        fold_optimizer = optim.Adam(fold_model.parameters(), lr=nn_best_params['learning_rate'])
        fold_train_dataset = TensorDataset(X_fold_train_transformed_tensor, y_fold_train_tensor)
        fold_train_loader = DataLoader(fold_train_dataset, batch_size=nn_best_params['batch_size'], shuffle=True)

        for epoch in range(nn_best_params['num_epochs']):
            fold_model.train()
            for batch_X, batch_y in fold_train_loader:
                fold_optimizer.zero_grad()
                outputs = fold_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_model.eval()
        with torch.no_grad():
            outputs = fold_model(X_fold_val_transformed_tensor)
            _, predicted = torch.max(outputs, 1)
            fold_accuracy = accuracy_score(y_fold_val_tensor.cpu().numpy(), predicted.cpu().numpy())
            cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    # Calculate total computation time
    computation_time = time.time() - start_time

    # Store results in the existing result DataFrame
    result_df.loc['AutoInt + NN'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params

In [125]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import optuna

def ft_transformer_nn_comparison(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    start_time = time.time()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Ensure X and y are numpy arrays
    X = X.values if isinstance(X, pd.DataFrame) else X
    y = y.values if isinstance(y, pd.Series) else y

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
    y_train_tensor = torch.LongTensor(y_train).to(device)
    X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
    y_test_tensor = torch.LongTensor(y_test).to(device)

    class FTTransformer(nn.Module):
        def __init__(self, input_dim, embedding_dim, num_heads, num_layers):
            super(FTTransformer, self).__init__()
            self.embedding = nn.Linear(input_dim, embedding_dim)
            self.transformer_layers = nn.ModuleList([
                nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=num_heads) for _ in range(num_layers)
            ])
            self.fc = nn.Linear(embedding_dim, input_dim)

        def forward(self, x):
            x = self.embedding(x).unsqueeze(1)
            for transformer_layer in self.transformer_layers:
                x = transformer_layer(x)
            x = x.squeeze(1)
            x = self.fc(x)
            return x

    class NeuralNetwork(nn.Module):
        def __init__(self, input_dim, hidden_layers, output_dim):
            super(NeuralNetwork, self).__init__()
            layers = []
            for i in range(len(hidden_layers)):
                if i == 0:
                    layers.append(nn.Linear(input_dim, hidden_layers[i]))
                else:
                    layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
                layers.append(nn.ReLU())
            layers.append(nn.Linear(hidden_layers[-1], output_dim))
            self.network = nn.Sequential(*layers)
        
        def forward(self, x):
            return self.network(x)

    def objective(trial):
        # Define hyperparameters to tune for FT-Transformer
        num_heads = trial.suggest_int('num_heads', 1, 8)
        embedding_dim = trial.suggest_int('embedding_dim', num_heads, 64, step=num_heads)
        num_layers = trial.suggest_int('num_layers', 1, 3)
        
        # Train FT-Transformer model
        ft_transformer_model = FTTransformer(X_train.shape[1], embedding_dim, num_heads, num_layers).to(device)
        optimizer = optim.Adam(ft_transformer_model.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss()
        
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        
        for epoch in range(10):  # Fixed number of epochs for FT-Transformer
            ft_transformer_model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = ft_transformer_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        
        # Extract features using FT-Transformer
        ft_transformer_model.eval()
        with torch.no_grad():
            X_train_transformed = ft_transformer_model.embedding(X_train_tensor).cpu().numpy()
            X_test_transformed = ft_transformer_model.embedding(X_test_tensor).cpu().numpy()
        
        # Convert to PyTorch tensors
        X_train_transformed_tensor = torch.FloatTensor(X_train_transformed).to(device)
        X_test_transformed_tensor = torch.FloatTensor(X_test_transformed).to(device)
        
        # Define hyperparameters to tune for Neural Network
        hidden_layers = [trial.suggest_int(f'hidden_layer_{i}', 32, 256) for i in range(3)]
        nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        # Create the Neural Network model
        input_dim = X_train_transformed.shape[1]
        output_dim = len(np.unique(y))
        model = NeuralNetwork(input_dim, hidden_layers, output_dim).to(device)

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=nn_learning_rate)

        # Create DataLoader
        train_dataset = TensorDataset(X_train_transformed_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Training
        for epoch in range(num_epochs):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            outputs = model(X_test_transformed_tensor)
            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())

        return accuracy

    # Perform hyperparameter tuning with Optuna
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    # Get the best hyperparameters
    best_params = study.best_params

    # Train the final FT-Transformer model with the best hyperparameters
    embedding_dim = best_params['embedding_dim']
    num_heads = best_params['num_heads']
    num_layers = best_params['num_layers']
    ft_transformer_model = FTTransformer(X_train.shape[1], embedding_dim, num_heads, num_layers).to(device)
    optimizer = optim.Adam(ft_transformer_model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    for epoch in range(10):  # Fixed number of epochs for FT-Transformer
        ft_transformer_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = ft_transformer_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    # Extract features using FT-Transformer
    ft_transformer_model.eval()
    with torch.no_grad():
        X_train_transformed = ft_transformer_model.embedding(X_train_tensor).cpu().numpy()
        X_test_transformed = ft_transformer_model.embedding(X_test_tensor).cpu().numpy()

    # Convert to PyTorch tensors
    X_train_transformed_tensor = torch.FloatTensor(X_train_transformed).to(device)
    X_test_transformed_tensor = torch.FloatTensor(X_test_transformed).to(device)

    # Train the final Neural Network model with the best hyperparameters
    nn_best_params = {
        'hidden_layers': [best_params[f'hidden_layer_{i}'] for i in range(3)],
        'learning_rate': best_params['nn_learning_rate'],
        'batch_size': best_params['batch_size'],
        'num_epochs': best_params['num_epochs']
    }
    input_dim = X_train_transformed.shape[1]
    output_dim = len(np.unique(y))
    best_model = NeuralNetwork(input_dim, nn_best_params['hidden_layers'], output_dim).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(best_model.parameters(), lr=nn_best_params['learning_rate'])

    train_dataset = TensorDataset(X_train_transformed_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=nn_best_params['batch_size'], shuffle=True)

    training_start_time = time.time()
    for epoch in range(nn_best_params['num_epochs']):
        best_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = best_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    training_time = time.time() - training_start_time

    # Evaluation
    best_model.eval()
    with torch.no_grad():
        inference_start_time = time.time()
        outputs = best_model(X_test_transformed_tensor)
        _, predicted = torch.max(outputs, 1)
        inference_time = time.time() - inference_start_time

        y_pred = predicted.cpu().numpy()
        y_true = y_test_tensor.cpu().numpy()
        
        accuracy = accuracy_score(y_true, y_pred)
        
        if output_dim == 2:  # Binary classification
            proba = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
            auc = roc_auc_score(y_true, proba)
        else:  # Multi-class classification
            proba = torch.softmax(outputs, dim=1).cpu().numpy()
            auc = roc_auc_score(y_true, proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        X_fold_train_tensor = torch.FloatTensor(X_fold_train).to(device)
        y_fold_train_tensor = torch.LongTensor(y_fold_train).to(device)
        X_fold_val_tensor = torch.FloatTensor(X_fold_val).to(device)
        y_fold_val_tensor = torch.LongTensor(y_fold_val).to(device)

        fold_ft_transformer_model = FTTransformer(X_fold_train.shape[1], embedding_dim, num_heads, num_layers).to(device)
        fold_optimizer = optim.Adam(fold_ft_transformer_model.parameters(), lr=0.001)
        fold_criterion = nn.CrossEntropyLoss()

        fold_train_dataset = TensorDataset(X_fold_train_tensor, y_fold_train_tensor)
        fold_train_loader = DataLoader(fold_train_dataset, batch_size=32, shuffle=True)

        for epoch in range(10):
            fold_ft_transformer_model.train()
            for batch_X, batch_y in fold_train_loader:
                fold_optimizer.zero_grad()
                outputs = fold_ft_transformer_model(batch_X)
                loss = fold_criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_ft_transformer_model.eval()
        with torch.no_grad():
            X_fold_train_transformed = fold_ft_transformer_model.embedding(X_fold_train_tensor).cpu().numpy()
            X_fold_val_transformed = fold_ft_transformer_model.embedding(X_fold_val_tensor).cpu().numpy()

        X_fold_train_transformed_tensor = torch.FloatTensor(X_fold_train_transformed).to(device)
        X_fold_val_transformed_tensor = torch.FloatTensor(X_fold_val_transformed).to(device)

        fold_model = NeuralNetwork(input_dim, nn_best_params['hidden_layers'], output_dim).to(device)
        fold_optimizer = optim.Adam(fold_model.parameters(), lr=nn_best_params['learning_rate'])
        fold_train_dataset = TensorDataset(X_fold_train_transformed_tensor, y_fold_train_tensor)
        fold_train_loader = DataLoader(fold_train_dataset, batch_size=nn_best_params['batch_size'], shuffle=True)

        for epoch in range(nn_best_params['num_epochs']):
            fold_model.train()
            for batch_X, batch_y in fold_train_loader:
                fold_optimizer.zero_grad()
                outputs = fold_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_model.eval()
        with torch.no_grad():
            outputs = fold_model(X_fold_val_transformed_tensor)
            _, predicted = torch.max(outputs, 1)
            fold_accuracy = accuracy_score(y_fold_val_tensor.cpu().numpy(), predicted.cpu().numpy())
            cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    # Calculate total computation time
    computation_time = time.time() - start_time

    # Store results in the existing result DataFrame
    result_df.loc['FT-Transformer'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params


In [126]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import optuna

def neural_architecture_search(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    start_time = time.time()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Ensure X and y are numpy arrays
    X = X.values if isinstance(X, pd.DataFrame) else X
    y = y.values if isinstance(y, pd.Series) else y

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
    y_train_tensor = torch.LongTensor(y_train).to(device)
    X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
    y_test_tensor = torch.LongTensor(y_test).to(device)

    class NeuralNetwork(nn.Module):
        def __init__(self, input_dim, hidden_layers, output_dim):
            super(NeuralNetwork, self).__init__()
            layers = []
            for i in range(len(hidden_layers)):
                if i == 0:
                    layers.append(nn.Linear(input_dim, hidden_layers[i]))
                else:
                    layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
                layers.append(nn.ReLU())
            layers.append(nn.Linear(hidden_layers[-1], output_dim))
            self.network = nn.Sequential(*layers)
        
        def forward(self, x):
            return self.network(x)

    def objective(trial):
        # Define hyperparameters to tune for Neural Network
        num_layers = trial.suggest_int('num_layers', 1, 5)
        hidden_layers = [trial.suggest_int(f'hidden_layer_{i}', 32, 256) for i in range(num_layers)]
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        # Create the Neural Network model
        input_dim = X_train.shape[1]
        output_dim = len(np.unique(y))
        model = NeuralNetwork(input_dim, hidden_layers, output_dim).to(device)

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # Create DataLoader
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Training
        for epoch in range(num_epochs):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            outputs = model(X_test_tensor)
            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())

        return accuracy

    # Perform hyperparameter tuning with Optuna
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    # Get the best hyperparameters
    best_params = study.best_params

    # Train the final Neural Network model with the best hyperparameters
    input_dim = X_train.shape[1]
    output_dim = len(np.unique(y))
    best_model = NeuralNetwork(input_dim, 
                               [best_params[f'hidden_layer_{i}'] for i in range(best_params['num_layers'])], 
                               output_dim).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(best_model.parameters(), lr=best_params['learning_rate'])

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=best_params['batch_size'], shuffle=True)

    training_start_time = time.time()
    for epoch in range(best_params['num_epochs']):
        best_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = best_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    training_time = time.time() - training_start_time

    # Evaluation
    best_model.eval()
    with torch.no_grad():
        inference_start_time = time.time()
        outputs = best_model(X_test_tensor)
        _, predicted = torch.max(outputs, 1)
        inference_time = time.time() - inference_start_time

        y_pred = predicted.cpu().numpy()
        y_true = y_test_tensor.cpu().numpy()
        
        accuracy = accuracy_score(y_true, y_pred)
        
        if output_dim == 2:  # Binary classification
            proba = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
            auc = roc_auc_score(y_true, proba)
        else:  # Multi-class classification
            proba = torch.softmax(outputs, dim=1).cpu().numpy()
            auc = roc_auc_score(y_true, proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        X_fold_train_tensor = torch.FloatTensor(X_fold_train).to(device)
        y_fold_train_tensor = torch.LongTensor(y_fold_train).to(device)
        X_fold_val_tensor = torch.FloatTensor(X_fold_val).to(device)
        y_fold_val_tensor = torch.LongTensor(y_fold_val).to(device)

        fold_model = NeuralNetwork(input_dim, 
                                   [best_params[f'hidden_layer_{i}'] for i in range(best_params['num_layers'])], 
                                   output_dim).to(device)
        fold_optimizer = optim.Adam(fold_model.parameters(), lr=best_params['learning_rate'])
        fold_dataset = TensorDataset(X_fold_train_tensor, y_fold_train_tensor)
        fold_loader = DataLoader(fold_dataset, batch_size=best_params['batch_size'], shuffle=True)

        for epoch in range(best_params['num_epochs']):
            fold_model.train()
            for batch_X, batch_y in fold_loader:
                fold_optimizer.zero_grad()
                outputs = fold_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_model.eval()
        with torch.no_grad():
            outputs = fold_model(X_fold_val_tensor)
            _, predicted = torch.max(outputs, 1)
            fold_accuracy = accuracy_score(y_fold_val_tensor.cpu().numpy(), predicted.cpu().numpy())
            cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    # Calculate total computation time
    computation_time = time.time() - start_time

    # Store results in the existing result DataFrame
    result_df.loc['Neural Architecture Search'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params


In [142]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import optuna

def node_comparison(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    start_time = time.time()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Ensure X and y are numpy arrays
    X = X.values if isinstance(X, pd.DataFrame) else X
    y = y.values if isinstance(y, pd.Series) else y

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
    y_train_tensor = torch.LongTensor(y_train).to(device)
    X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
    y_test_tensor = torch.LongTensor(y_test).to(device)

    class NODE(nn.Module):
        def __init__(self, input_dim, num_layers, num_trees, tree_dim, output_dim):
            super(NODE, self).__init__()
            self.layers = nn.ModuleList()
            for _ in range(num_layers):
                layer = nn.ModuleList()
                for _ in range(num_trees):
                    tree = nn.Sequential(
                        nn.Linear(input_dim, tree_dim),
                        nn.ReLU(),
                        nn.Linear(tree_dim, 1)
                    )
                    layer.append(tree)
                self.layers.append(layer)
            self.output = nn.Linear(num_layers * num_trees, output_dim)

        def forward(self, x):
            tree_outputs = []
            for layer in self.layers:
                layer_outputs = []
                for tree in layer:
                    layer_outputs.append(tree(x))
                layer_output = torch.cat(layer_outputs, dim=1)
                tree_outputs.append(layer_output)
            x = torch.cat(tree_outputs, dim=1)
            return self.output(x)

    def objective(trial):
        # Define hyperparameters to tune for NODE
        num_layers = trial.suggest_int('num_layers', 1, 5)
        num_trees = trial.suggest_int('num_trees', 1, 10)
        tree_dim = trial.suggest_int('tree_dim', 8, 64)
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        # Create the NODE model
        input_dim = X_train.shape[1]
        output_dim = len(np.unique(y))
        model = NODE(input_dim, num_layers, num_trees, tree_dim, output_dim).to(device)

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # Create DataLoader
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Training
        for epoch in range(num_epochs):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            outputs = model(X_test_tensor)
            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())

        return accuracy

    # Perform hyperparameter tuning with Optuna
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    # Get the best hyperparameters
    best_params = study.best_params

    # Train the final NODE model with the best hyperparameters
    input_dim = X_train.shape[1]
    output_dim = len(np.unique(y))
    best_model = NODE(input_dim, 
                      best_params['num_layers'], 
                      best_params['num_trees'], 
                      best_params['tree_dim'], 
                      output_dim).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(best_model.parameters(), lr=best_params['learning_rate'])

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=best_params['batch_size'], shuffle=True)

    training_start_time = time.time()
    for epoch in range(best_params['num_epochs']):
        best_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = best_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    training_time = time.time() - training_start_time

    # Evaluation
    best_model.eval()
    with torch.no_grad():
        inference_start_time = time.time()
        outputs = best_model(X_test_tensor)
        inference_time = time.time() - inference_start_time

        # Convert to numpy for metric calculation
        y_true = y_test_tensor.cpu().numpy()
        
        _, predicted = torch.max(outputs, 1)
        y_pred = predicted.cpu().numpy()
        proba = torch.softmax(outputs, dim=1).cpu().numpy()

        accuracy = accuracy_score(y_true, y_pred)
        
        if output_dim == 2:  # Binary classification
            auc = roc_auc_score(y_true, proba[:, 1])
        else:  # Multi-class classification
            auc = roc_auc_score(y_true, proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        X_fold_train_tensor = torch.FloatTensor(X_fold_train).to(device)
        y_fold_train_tensor = torch.LongTensor(y_fold_train).to(device)
        X_fold_val_tensor = torch.FloatTensor(X_fold_val).to(device)
        y_fold_val_tensor = torch.LongTensor(y_fold_val).to(device)

        fold_model = NODE(input_dim, 
                          best_params['num_layers'], 
                          best_params['num_trees'], 
                          best_params['tree_dim'], 
                          output_dim).to(device)
        fold_optimizer = optim.Adam(fold_model.parameters(), lr=best_params['learning_rate'])
        fold_dataset = TensorDataset(X_fold_train_tensor, y_fold_train_tensor)
        fold_loader = DataLoader(fold_dataset, batch_size=best_params['batch_size'], shuffle=True)

        for epoch in range(best_params['num_epochs']):
            fold_model.train()
            for batch_X, batch_y in fold_loader:
                fold_optimizer.zero_grad()
                outputs = fold_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_model.eval()
        with torch.no_grad():
            outputs = fold_model(X_fold_val_tensor)
            _, predicted = torch.max(outputs, 1)
            fold_accuracy = accuracy_score(y_fold_val_tensor.cpu().numpy(), predicted.cpu().numpy())
            cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    # Calculate total computation time
    computation_time = time.time() - start_time

    # Store results in the existing result DataFrame
    result_df.loc['NODE'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params


In [143]:
import pandas as pd
import numpy as np
import time
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
from pytorch_tabnet.tab_model import TabNetClassifier
import optuna

def tabnet_comparison(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    start_time = time.time()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Ensure X and y are numpy arrays
    X = X.values if isinstance(X, pd.DataFrame) else X
    y = y.values if isinstance(y, pd.Series) else y

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    def objective(trial):
        # Define hyperparameters to tune for TabNet
        n_d = trial.suggest_int('n_d', 8, 64)
        n_a = trial.suggest_int('n_a', 8, 64)
        n_steps = trial.suggest_int('n_steps', 3, 10)
        gamma = trial.suggest_float('gamma', 1.0, 2.0)
        lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        # Create the TabNet model
        model = TabNetClassifier(
            n_d=n_d,
            n_a=n_a,
            n_steps=n_steps,
            gamma=gamma,
            lambda_sparse=lambda_sparse,
            optimizer_fn=torch.optim.Adam,
            optimizer_params=dict(lr=learning_rate),
            device_name=device
        )

        # Training
        model.fit(
            X_train=X_train_scaled, y_train=y_train,
            eval_set=[(X_test_scaled, y_test)],
            eval_name=['val'],
            eval_metric=['accuracy'],
            max_epochs=num_epochs,
            patience=10,
            batch_size=batch_size,
            virtual_batch_size=batch_size // 2,
            num_workers=0,
            drop_last=False
        )

        # Evaluation
        y_pred = model.predict(X_test_scaled)
        accuracy = accuracy_score(y_test, y_pred)

        return accuracy

    # Perform hyperparameter tuning with Optuna
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    # Get the best hyperparameters
    best_params = study.best_params

    # Train the final TabNet model with the best hyperparameters
    best_model = TabNetClassifier(
        n_d=best_params['n_d'],
        n_a=best_params['n_a'],
        n_steps=best_params['n_steps'],
        gamma=best_params['gamma'],
        lambda_sparse=best_params['lambda_sparse'],
        optimizer_fn=torch.optim.Adam,
        optimizer_params=dict(lr=best_params['learning_rate']),
        device_name=device
    )

    training_start_time = time.time()
    best_model.fit(
        X_train=X_train_scaled, y_train=y_train,
        eval_set=[(X_test_scaled, y_test)],
        eval_name=['val'],
        eval_metric=['accuracy'],
        max_epochs=best_params['num_epochs'],
        patience=10,
        batch_size=best_params['batch_size'],
        virtual_batch_size=best_params['batch_size'] // 2,
        num_workers=0,
        drop_last=False
    )
    training_time = time.time() - training_start_time

    # Evaluation
    y_pred = best_model.predict(X_test_scaled)
    inference_start_time = time.time()
    y_pred_proba = best_model.predict_proba(X_test_scaled)
    inference_time = time.time() - inference_start_time

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    if len(np.unique(y)) == 2:  # Binary classification
        auc = roc_auc_score(y_test, y_pred_proba[:, 1])
    else:  # Multiclass classification
        auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        fold_model = TabNetClassifier(
            n_d=best_params['n_d'],
            n_a=best_params['n_a'],
            n_steps=best_params['n_steps'],
            gamma=best_params['gamma'],
            lambda_sparse=best_params['lambda_sparse'],
            optimizer_fn=torch.optim.Adam,
            optimizer_params=dict(lr=best_params['learning_rate']),
            device_name=device
        )

        fold_model.fit(
            X_train=X_fold_train, y_train=y_fold_train,
            eval_set=[(X_fold_val, y_fold_val)],
            eval_name=['val'],
            eval_metric=['accuracy'],
            max_epochs=best_params['num_epochs'],
            patience=10,
            batch_size=best_params['batch_size'],
            virtual_batch_size=best_params['batch_size'] // 2,
            num_workers=0,
            drop_last=False
        )

        y_fold_pred = fold_model.predict(X_fold_val)
        fold_accuracy = accuracy_score(y_fold_val, y_fold_pred)
        cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    # Calculate total computation time
    computation_time = time.time() - start_time

    # Store results in the existing result DataFrame
    result_df.loc['TabNet'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params


In [144]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import optuna

def kan_comparison(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    start_time = time.time()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Ensure X and y are numpy arrays
    X = X.values if isinstance(X, pd.DataFrame) else X
    y = y.values if isinstance(y, pd.Series) else y

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
    y_train_tensor = torch.LongTensor(y_train).to(device)
    X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
    y_test_tensor = torch.LongTensor(y_test).to(device)

    class KAN(nn.Module):
        def __init__(self, input_dim, hidden_dim, output_dim):
            super(KAN, self).__init__()
            self.hidden_layer = nn.Linear(input_dim, hidden_dim)
            self.activation = nn.ReLU()
            self.output_layer = nn.Linear(hidden_dim, output_dim)

        def forward(self, x):
            h = self.activation(self.hidden_layer(x))
            out = self.output_layer(h)
            return out

    def objective(trial):
        # Define hyperparameters to tune for KAN
        hidden_dim = trial.suggest_int('hidden_dim', 32, 256)
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        # Create the KAN model
        input_dim = X_train.shape[1]
        output_dim = len(np.unique(y))
        model = KAN(input_dim, hidden_dim, output_dim).to(device)

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # Create DataLoader
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Training
        for epoch in range(num_epochs):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            outputs = model(X_test_tensor)
            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())

        return accuracy

    # Perform hyperparameter tuning with Optuna
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    # Get the best hyperparameters
    best_params = study.best_params

    # Train the final KAN model with the best hyperparameters
    input_dim = X_train.shape[1]
    output_dim = len(np.unique(y))
    best_model = KAN(input_dim, 
                     best_params['hidden_dim'], 
                     output_dim).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(best_model.parameters(), lr=best_params['learning_rate'])

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=best_params['batch_size'], shuffle=True)

    training_start_time = time.time()
    for epoch in range(best_params['num_epochs']):
        best_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = best_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    training_time = time.time() - training_start_time

    # Evaluation
    best_model.eval()
    with torch.no_grad():
        inference_start_time = time.time()
        outputs = best_model(X_test_tensor)
        inference_time = time.time() - inference_start_time

        # Convert to numpy for metric calculation
        y_true = y_test_tensor.cpu().numpy()
        
        _, predicted = torch.max(outputs, 1)
        y_pred = predicted.cpu().numpy()
        proba = torch.softmax(outputs, dim=1).cpu().numpy()

        accuracy = accuracy_score(y_true, y_pred)
        
        if output_dim == 2:  # Binary classification
            auc = roc_auc_score(y_true, proba[:, 1])
        else:  # Multi-class classification
            auc = roc_auc_score(y_true, proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        X_fold_train_tensor = torch.FloatTensor(X_fold_train).to(device)
        y_fold_train_tensor = torch.LongTensor(y_fold_train).to(device)
        X_fold_val_tensor = torch.FloatTensor(X_fold_val).to(device)
        y_fold_val_tensor = torch.LongTensor(y_fold_val).to(device)

        fold_model = KAN(input_dim, best_params['hidden_dim'], output_dim).to(device)
        fold_optimizer = optim.Adam(fold_model.parameters(), lr=best_params['learning_rate'])
        fold_dataset = TensorDataset(X_fold_train_tensor, y_fold_train_tensor)
        fold_loader = DataLoader(fold_dataset, batch_size=best_params['batch_size'], shuffle=True)

        for epoch in range(best_params['num_epochs']):
            fold_model.train()
            for batch_X, batch_y in fold_loader:
                fold_optimizer.zero_grad()
                outputs = fold_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_model.eval()
        with torch.no_grad():
            outputs = fold_model(X_fold_val_tensor)
            _, predicted = torch.max(outputs, 1)
            fold_accuracy = accuracy_score(y_fold_val_tensor.cpu().numpy(), predicted.cpu().numpy())
            cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    # Calculate total computation time
    computation_time = time.time() - start_time

    # Store results in the existing result DataFrame
    result_df.loc['KAN'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params


In [145]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import optuna

def saint_comparison(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    start_time = time.time()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Ensure X and y are numpy arrays
    X = X.values if isinstance(X, pd.DataFrame) else X
    y = y.values if isinstance(y, pd.Series) else y

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
    y_train_tensor = torch.LongTensor(y_train).to(device)
    X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
    y_test_tensor = torch.LongTensor(y_test).to(device)

    class SAINT(nn.Module):
        def __init__(self, input_dim, num_classes, dim, depth, heads, mlp_dim, dropout=0.1):
            super(SAINT, self).__init__()
            self.embeds = nn.Linear(input_dim, dim)
            self.transformer = nn.TransformerEncoder(
                nn.TransformerEncoderLayer(d_model=dim, nhead=heads, dim_feedforward=mlp_dim, dropout=dropout),
                num_layers=depth
            )
            self.mlp_head = nn.Sequential(
                nn.LayerNorm(dim),
                nn.Linear(dim, num_classes)
            )

        def forward(self, x):
            x = self.embeds(x)
            x = x.unsqueeze(1)  # Add sequence dimension
            x = self.transformer(x)
            x = x.squeeze(1)  # Remove sequence dimension
            return self.mlp_head(x)

    def objective(trial):
        # Define hyperparameters to tune for SAINT
        heads = trial.suggest_int('heads', 1, 8)
        dim = trial.suggest_int('dim', heads, 256, step=heads)  # Ensure dim is divisible by heads
        depth = trial.suggest_int('depth', 1, 6)
        mlp_dim = trial.suggest_int('mlp_dim', 32, 256)
        dropout = trial.suggest_float('dropout', 0.0, 0.5)
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        # Create the SAINT model
        input_dim = X_train.shape[1]
        num_classes = len(np.unique(y))
        model = SAINT(input_dim, num_classes, dim, depth, heads, mlp_dim, dropout).to(device)

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # Create DataLoader
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Training
        for epoch in range(num_epochs):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            outputs = model(X_test_tensor)
            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())

        return accuracy

    # Perform hyperparameter tuning with Optuna
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    # Get the best hyperparameters
    best_params = study.best_params

    # Train the final SAINT model with the best hyperparameters
    input_dim = X_train.shape[1]
    num_classes = len(np.unique(y))
    best_model = SAINT(input_dim, num_classes, 
                       best_params['dim'], 
                       best_params['depth'], 
                       best_params['heads'], 
                       best_params['mlp_dim'], 
                       best_params['dropout']).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(best_model.parameters(), lr=best_params['learning_rate'])

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=best_params['batch_size'], shuffle=True)

    training_start_time = time.time()
    for epoch in range(best_params['num_epochs']):
        best_model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = best_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    training_time = time.time() - training_start_time

    # Evaluation
    best_model.eval()
    with torch.no_grad():
        inference_start_time = time.time()
        outputs = best_model(X_test_tensor)
        inference_time = time.time() - inference_start_time

        # Convert to numpy for metric calculation
        y_true = y_test_tensor.cpu().numpy()
        
        _, predicted = torch.max(outputs, 1)
        y_pred = predicted.cpu().numpy()
        proba = torch.softmax(outputs, dim=1).cpu().numpy()

        accuracy = accuracy_score(y_true, y_pred)
        
        if num_classes == 2:  # Binary classification
            auc = roc_auc_score(y_true, proba[:, 1])
        else:  # Multi-class classification
            auc = roc_auc_score(y_true, proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        X_fold_train_tensor = torch.FloatTensor(X_fold_train).to(device)
        y_fold_train_tensor = torch.LongTensor(y_fold_train).to(device)
        X_fold_val_tensor = torch.FloatTensor(X_fold_val).to(device)
        y_fold_val_tensor = torch.LongTensor(y_fold_val).to(device)

        fold_model = SAINT(input_dim, num_classes, 
                           best_params['dim'], 
                           best_params['depth'], 
                           best_params['heads'], 
                           best_params['mlp_dim'], 
                           best_params['dropout']).to(device)
        fold_optimizer = optim.Adam(fold_model.parameters(), lr=best_params['learning_rate'])
        fold_dataset = TensorDataset(X_fold_train_tensor, y_fold_train_tensor)
        fold_loader = DataLoader(fold_dataset, batch_size=best_params['batch_size'], shuffle=True)

        for epoch in range(best_params['num_epochs']):
            fold_model.train()
            for batch_X, batch_y in fold_loader:
                fold_optimizer.zero_grad()
                outputs = fold_model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                fold_optimizer.step()

        fold_model.eval()
        with torch.no_grad():
            outputs = fold_model(X_fold_val_tensor)
            _, predicted = torch.max(outputs, 1)
            fold_accuracy = accuracy_score(y_fold_val_tensor.cpu().numpy(), predicted.cpu().numpy())
            cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    # Calculate total computation time
    computation_time = time.time() - start_time

    # Store results in the existing result DataFrame
    result_df.loc['SAINT'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params



In [146]:
import pandas as pd
import numpy as np
import time
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
from pytorch_tabnet.tab_model import TabNetClassifier
import optuna

def vime_comparison(X, y, result_df, test_size=0.2, random_state=42, n_trials=10, n_folds=5):
    start_time = time.time()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Ensure X and y are numpy arrays
    X = X.values if isinstance(X, pd.DataFrame) else X
    y = y.values if isinstance(y, pd.Series) else y

    # Stratified train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    def objective(trial):
        # Define hyperparameters to tune for VIME-like model (using TabNet as proxy)
        n_d = trial.suggest_int('n_d', 8, 64)
        n_a = trial.suggest_int('n_a', 8, 64)
        n_steps = trial.suggest_int('n_steps', 3, 10)
        gamma = trial.suggest_float('gamma', 1.0, 2.0)
        lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
        num_epochs = trial.suggest_int('num_epochs', 10, 100)

        # Create the TabNet model
        model = TabNetClassifier(
            n_d=n_d,
            n_a=n_a,
            n_steps=n_steps,
            gamma=gamma,
            lambda_sparse=lambda_sparse,
            optimizer_fn=torch.optim.Adam,
            optimizer_params=dict(lr=learning_rate),
            device_name=device
        )

        # Training
        model.fit(
            X_train=X_train_scaled, y_train=y_train,
            eval_set=[(X_test_scaled, y_test)],
            eval_name=['val'],
            eval_metric=['accuracy'],
            max_epochs=num_epochs,
            patience=10,
            batch_size=batch_size,
            virtual_batch_size=batch_size // 2,
            num_workers=0,
            drop_last=False
        )

        # Evaluation
        y_pred = model.predict(X_test_scaled)
        accuracy = accuracy_score(y_test, y_pred)

        return accuracy

    # Perform hyperparameter tuning with Optuna
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)

    # Get the best hyperparameters
    best_params = study.best_params

    # Train the final TabNet model with the best hyperparameters
    best_model = TabNetClassifier(
        n_d=best_params['n_d'],
        n_a=best_params['n_a'],
        n_steps=best_params['n_steps'],
        gamma=best_params['gamma'],
        lambda_sparse=best_params['lambda_sparse'],
        optimizer_fn=torch.optim.Adam,
        optimizer_params=dict(lr=best_params['learning_rate']),
        device_name=device
    )

    training_start_time = time.time()
    best_model.fit(
        X_train=X_train_scaled, y_train=y_train,
        eval_set=[(X_test_scaled, y_test)],
        eval_name=['val'],
        eval_metric=['accuracy'],
        max_epochs=best_params['num_epochs'],
        patience=10,
        batch_size=best_params['batch_size'],
        virtual_batch_size=best_params['batch_size'] // 2,
        num_workers=0,
        drop_last=False
    )
    training_time = time.time() - training_start_time

    # Evaluation
    y_pred = best_model.predict(X_test_scaled)
    inference_start_time = time.time()
    y_pred_proba = best_model.predict_proba(X_test_scaled)
    inference_time = time.time() - inference_start_time

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    if len(np.unique(y)) == 2:  # Binary classification
        auc = roc_auc_score(y_test, y_pred_proba[:, 1])
    else:  # Multiclass classification
        auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='macro')

    # Cross-validation
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    for fold, (train_index, val_index) in enumerate(skf.split(X_train_scaled, y_train)):
        X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

        fold_model = TabNetClassifier(
            n_d=best_params['n_d'],
            n_a=best_params['n_a'],
            n_steps=best_params['n_steps'],
            gamma=best_params['gamma'],
            lambda_sparse=best_params['lambda_sparse'],
            optimizer_fn=torch.optim.Adam,
            optimizer_params=dict(lr=best_params['learning_rate']),
            device_name=device
        )

        fold_model.fit(
            X_train=X_fold_train, y_train=y_fold_train,
            eval_set=[(X_fold_val, y_fold_val)],
            eval_name=['val'],
            eval_metric=['accuracy'],
            max_epochs=best_params['num_epochs'],
            patience=10,
            batch_size=best_params['batch_size'],
            virtual_batch_size=best_params['batch_size'] // 2,
            num_workers=0,
            drop_last=False
        )

        y_fold_pred = fold_model.predict(X_fold_val)
        fold_accuracy = accuracy_score(y_fold_val, y_fold_pred)
        cv_scores.append(fold_accuracy)

    cv_mean_accuracy = np.mean(cv_scores)
    cv_std_accuracy = np.std(cv_scores)

    # Calculate total computation time
    computation_time = time.time() - start_time

    # Store results in the existing result DataFrame
    result_df.loc['VIME'] = {
        'Accuracy': accuracy,
        'AUC Score': auc,
        'CV Mean Accuracy': cv_mean_accuracy,
        'CV Std Accuracy': cv_std_accuracy,
        'Training Time (Best Params)': training_time,
        'Inference Time (Best Params)': inference_time,
        'Computation Time (Total)': computation_time,
        'Best Parameters': best_params
    }

    return result_df, best_params


In [147]:
import numpy as np
import pandas as pd
from scipy.stats import yeojohnson

def yeojohnson_transform(X):
    if isinstance(X, pd.DataFrame):
        X_transformed = X.copy()
        for column in X_transformed.columns:
            X_transformed[column], _ = yeojohnson(X_transformed[column])
    else:
        X = np.asarray(X)
        if X.ndim == 1:
            X = X.reshape(-1, 1)
        X_transformed = np.zeros_like(X)
        for i in range(X.shape[1]):
            X_transformed[:, i], _ = yeojohnson(X[:, i])
    
    return X_transformed

# Function call example
def apply_yeojohnson(X, y):
    X_transformed = yeojohnson_transform(X)
    return X_transformed, y

In [148]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from collections import Counter

def apply_smote_to_training(X, y, test_size=0.2, random_state=42):

    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)
    

    # Apply SMOTE to the training data only
    smote = SMOTE(random_state=random_state)
    X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
    
    # Combine the SMOTE-augmented training data with the original test data
    X_combined = np.vstack((X_train_smote, X_test))
    y_combined = np.concatenate((y_train_smote, y_test))
    
    # If X was originally a DataFrame, convert X_combined back to DataFrame
    if isinstance(X, pd.DataFrame):
        X_combined = pd.DataFrame(X_combined, columns=X.columns)
    
    return X_combined, y_combined




In [149]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd

def encode_categorical_data(df, target_column='Y'):
    df_encoded = df.copy()
    le = LabelEncoder()
    categorical_cols = df_encoded.select_dtypes(include=['object', 'category']).columns
    categorical_cols = [col for col in categorical_cols if col != target_column]
    
    for column in categorical_cols:
        if df_encoded[column].dtype == 'object' or df_encoded[column].dtype.name == 'category':
            df_encoded[column] = le.fit_transform(df_encoded[column].astype(str))
    
    df_encoded[target_column], _ = pd.factorize(df_encoded[target_column])
    
    return df_encoded

In [150]:
file_prefix = "apple"  # Change this to any word you like
df =  pd.read_csv(f'Dataset/{file_prefix}.csv')
if len(df) >= 1405:
    df = df.sample(n=1405, random_state=42, replace=True)
df =  encode_categorical_data(df)
X = df.drop('Y', axis=1)
y = df['Y']
X, y = apply_yeojohnson(X, y)
X, y = apply_smote_to_training(X, y)

result = model_comparison(df, 'Y')
print(result)
result, best_params = mlp_comparison(X, y, result)
print(result)
result, best_params = dnn_comparison(X, y, result)
print(result)
result, best_params = dcn_comparison(X, y, result)
print(result)
result, best_params = wide_and_deep_comparison(X, y, result)
print(result)
result, best_params = xgb_nn_comparison(X, y, result)
print(result)
result, best_params = lgbm_nn_comparison(X, y, result)
print(result)
result, best_params = autoint_nn_comparison(X, y, result)
print(result)
result, best_params = ft_transformer_nn_comparison(X, y, result)
print(result)
result, best_params = neural_architecture_search(X, y, result)
print(result)
result, best_params = kan_comparison(X, y, result)
print(result)
result, best_params = node_comparison(X, y, result)
print(result)
result, best_params = tabnet_comparison(X, y, result)
print(result)
result, best_params = saint_comparison(X, y, result)
print(result)

result.to_csv(f'result/comparison/classification/{file_prefix}_result.csv', index=True)

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



KeyboardInterrupt: 

In [None]:
file_prefix = "heart"  # Change this to any word you like
df =  pd.read_csv(f'Dataset/{file_prefix}.csv')
if len(df) >= 1405:
    df = df.sample(n=1405, random_state=42, replace=True)
df =  encode_categorical_data(df)
X = df.drop('Y', axis=1)
y = df['Y']
X, y = apply_yeojohnson(X, y)
X, y = apply_smote_to_training(X, y)

result = model_comparison(df, 'Y')
print(result)
result, best_params = mlp_comparison(X, y, result)
print(result)
result, best_params = dnn_comparison(X, y, result)
print(result)
result, best_params = dcn_comparison(X, y, result)
print(result)
result, best_params = wide_and_deep_comparison(X, y, result)
print(result)
result, best_params = xgb_nn_comparison(X, y, result)
print(result)
result, best_params = lgbm_nn_comparison(X, y, result)
print(result)
result, best_params = autoint_nn_comparison(X, y, result)
print(result)
result, best_params = ft_transformer_nn_comparison(X, y, result)
print(result)
result, best_params = neural_architecture_search(X, y, result)
print(result)
result, best_params = kan_comparison(X, y, result)
print(result)
result, best_params = node_comparison(X, y, result)
print(result)
result, best_params = tabnet_comparison(X, y, result)
print(result)
result, best_params = saint_comparison(X, y, result)
print(result)

result.to_csv(f'result/comparison/classification/{file_prefix}_result.csv', index=True)

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 109, number of negative: 133
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000171 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 246
[LightGBM] [Info] Number of data points in the train set: 242, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.450413 -> initscore=-0.199001
[LightGBM] [Info] Start training from score -0.199001
[LightGBM] [Info] Number of positive: 109, number of negative: 133
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000054 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 246
[LightGBM] [Info] Number of data points in the train set: 242, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.450413 -> initscore=-0.199001
[LightGBM] [Info] Start training from score -0.199001
[LightGBM] [Info] Number o

[I 2024-08-05 14:36:46,489] A new study created in memory with name: no-name-eced5878-3fb8-44cb-84aa-1c48d5eb66e7


                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.836066  0.927802         0.826276         0.05041   
KNN                  0.836066  0.931573         0.838435        0.045479   
Decision Tree        0.852459  0.854526         0.760544        0.047249   
Random Forest        0.868852   0.93319         0.813861         0.03034   
Gradient Boosting    0.786885   0.90194         0.809694        0.028769   
XGBoost              0.770492   0.90625         0.814031        0.013309   
LightGBM             0.868852   0.91056         0.814031        0.013309   
CatBoost             0.836066  0.920259          0.83869        0.033851   
MLP                  0.815385  0.902462         0.861668        0.040573   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logistic Regression                    0.001995                          0.0   
KNN                                    0.000998                     0.001994   

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:36:47,409] Trial 0 finished with value: 0.7846153846153846 and parameters: {'hidden_dim_0': 99, 'hidden_dim_1': 112, 'hidden_dim_2': 105, 'learning_rate': 0.018460524339003227, 'batch_size': 256, 'num_epochs': 64}. Best is trial 0 with value: 0.7846153846153846.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:36:48,549] Trial 1 finished with value: 0.8307692307692308 and parameters: {'hidden_dim_0': 216, 'hidden_dim_1': 249, 'hidden_dim_2': 85, 'learning_rate': 0.0008517532638797331, 'batch_size': 32, 'num_epochs': 28}. Best is trial 1 with value: 0.8307692307692308.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:36:50,024] Trial 2 finished with value: 0.7692307692307693 and parameters: {'hidden_dim_0': 121, 'hidden_dim_1': 232, 'hidden_dim_2': 63, 'learning_rate': 0.0005452352385443556, 'batch_size': 256, 'num_epoch

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.836066  0.927802         0.826276         0.05041   
KNN                  0.836066  0.931573         0.838435        0.045479   
Decision Tree        0.852459  0.854526         0.760544        0.047249   
Random Forest        0.868852   0.93319         0.813861         0.03034   
Gradient Boosting    0.786885   0.90194         0.809694        0.028769   
XGBoost              0.770492   0.90625         0.814031        0.013309   
LightGBM             0.868852   0.91056         0.814031        0.013309   
CatBoost             0.836066  0.920259          0.83869        0.033851   
MLP                  0.815385  0.902462         0.861668        0.040573   
DNN                  0.830769  0.910985         0.807692        0.034401   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logistic Regression                    0.001995                          0.0   
KNN

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:37:05,727] Trial 0 finished with value: 0.8461538461538461 and parameters: {'cross_layers': 1, 'hidden_layer_0': 205, 'hidden_layer_1': 36, 'hidden_layer_2': 76, 'learning_rate': 0.02255854313749633, 'batch_size': 64, 'num_epochs': 69}. Best is trial 0 with value: 0.8461538461538461.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:37:07,747] Trial 1 finished with value: 0.8153846153846154 and parameters: {'cross_layers': 1, 'hidden_layer_0': 220, 'hidden_layer_1': 198, 'hidden_layer_2': 219, 'learning_rate': 0.00019138650243547548, 'batch_size': 128, 'num_epochs': 79}. Best is trial 0 with value: 0.8461538461538461.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:37:09,989] Trial 2 finished with value: 0.7384615384615385 and parameters: {'cross_layers': 4, 'hidden_layer_0': 247, 'hidden_layer_1': 94, 'hidden_layer_2':

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.836066  0.927802         0.826276         0.05041   
KNN                  0.836066  0.931573         0.838435        0.045479   
Decision Tree        0.852459  0.854526         0.760544        0.047249   
Random Forest        0.868852   0.93319         0.813861         0.03034   
Gradient Boosting    0.786885   0.90194         0.809694        0.028769   
XGBoost              0.770492   0.90625         0.814031        0.013309   
LightGBM             0.868852   0.91056         0.814031        0.013309   
CatBoost             0.836066  0.920259          0.83869        0.033851   
MLP                  0.815385  0.902462         0.861668        0.040573   
DNN                  0.830769  0.910985         0.807692        0.034401   
DCN                       0.8  0.903409         0.807692        0.040339   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logisti

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:37:27,848] Trial 0 finished with value: 0.8307692307692308 and parameters: {'hidden_layer_0': 191, 'hidden_layer_1': 140, 'hidden_layer_2': 204, 'learning_rate': 0.0760188332296495, 'batch_size': 256, 'num_epochs': 29}. Best is trial 0 with value: 0.8307692307692308.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:37:28,927] Trial 1 finished with value: 0.7384615384615385 and parameters: {'hidden_layer_0': 80, 'hidden_layer_1': 203, 'hidden_layer_2': 102, 'learning_rate': 0.09735773635409173, 'batch_size': 256, 'num_epochs': 70}. Best is trial 0 with value: 0.8307692307692308.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:37:32,168] Trial 2 finished with value: 0.8307692307692308 and parameters: {'hidden_layer_0': 89, 'hidden_layer_1': 143, 'hidden_layer_2': 112, 'learning_rate': 0.0005798919894294969, 'batch_size':

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.836066  0.927802         0.826276         0.05041   
KNN                  0.836066  0.931573         0.838435        0.045479   
Decision Tree        0.852459  0.854526         0.760544        0.047249   
Random Forest        0.868852   0.93319         0.813861         0.03034   
Gradient Boosting    0.786885   0.90194         0.809694        0.028769   
XGBoost              0.770492   0.90625         0.814031        0.013309   
LightGBM             0.868852   0.91056         0.814031        0.013309   
CatBoost             0.836066  0.920259          0.83869        0.033851   
MLP                  0.815385  0.902462         0.861668        0.040573   
DNN                  0.830769  0.910985         0.807692        0.034401   
DCN                       0.8  0.903409         0.807692        0.040339   
Wide_and_Deep        0.738462  0.885417         0.830769        0.044522   

           

  'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
Parameters: { "use_label_encoder" } are not used.

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:37:46,008] Trial 0 finished with value: 0.8153846153846154 and parameters: {'n_estimators': 144, 'max_depth': 9, 'xgb_learning_rate': 0.002499126916001507, 'subsample': 0.9645190127415659, 'colsample_bytree': 0.630609102076196, 'use_hidden_layer_0': False, 'use_hidden_layer_1': True, 'hidden_layer_1': 70, 'use_hidden_layer_2': True, 'hidden_layer_2': 141, 'nn_learning_rate': 0.003987980449143117, 'batch_size': 64, 'num_epochs': 40}. Best is trial 0 with value: 0.8153846153846154.
  'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
Parameters: { "use_label_encoder" } are not used.

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:37:46,988] Trial 1 finished with value: 0.7384615384615385 and para

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.836066  0.927802         0.826276         0.05041   
KNN                  0.836066  0.931573         0.838435        0.045479   
Decision Tree        0.852459  0.854526         0.760544        0.047249   
Random Forest        0.868852   0.93319         0.813861         0.03034   
Gradient Boosting    0.786885   0.90194         0.809694        0.028769   
XGBoost              0.770492   0.90625         0.814031        0.013309   
LightGBM             0.868852   0.91056         0.814031        0.013309   
CatBoost             0.836066  0.920259          0.83869        0.033851   
MLP                  0.815385  0.902462         0.861668        0.040573   
DNN                  0.830769  0.910985         0.807692        0.034401   
DCN                       0.8  0.903409         0.807692        0.040339   
Wide_and_Deep        0.738462  0.885417         0.830769        0.044522   
XGBoost + NN

[I 2024-08-05 14:38:11,115] Trial 0 finished with value: 0.7846153846153846 and parameters: {'n_estimators': 103, 'max_depth': 9, 'lgb_learning_rate': 0.02272792537474905, 'num_leaves': 49, 'subsample': 0.9090289280443524, 'colsample_bytree': 0.7966866126178318, 'hidden_layer_0': 37, 'hidden_layer_1': 54, 'hidden_layer_2': 128, 'nn_learning_rate': 0.019043705660556992, 'batch_size': 256, 'num_epochs': 63}. Best is trial 0 with value: 0.7846153846153846.


[LightGBM] [Info] Number of positive: 128, number of negative: 132
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000088 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 284
[LightGBM] [Info] Number of data points in the train set: 260, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


[I 2024-08-05 14:38:12,744] Trial 1 finished with value: 0.7846153846153846 and parameters: {'n_estimators': 143, 'max_depth': 5, 'lgb_learning_rate': 0.01376898158285796, 'num_leaves': 89, 'subsample': 0.6437422059401541, 'colsample_bytree': 0.6400979211129809, 'hidden_layer_0': 35, 'hidden_layer_1': 193, 'hidden_layer_2': 40, 'nn_learning_rate': 0.034900831640750765, 'batch_size': 32, 'num_epochs': 49}. Best is trial 0 with value: 0.7846153846153846.


[LightGBM] [Info] Number of positive: 128, number of negative: 132
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000083 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 284
[LightGBM] [Info] Number of data points in the train set: 260, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


[I 2024-08-05 14:38:13,315] Trial 2 finished with value: 0.8307692307692308 and parameters: {'n_estimators': 184, 'max_depth': 8, 'lgb_learning_rate': 0.04154693102393806, 'num_leaves': 100, 'subsample': 0.6322522424362469, 'colsample_bytree': 0.5087999950569316, 'hidden_layer_0': 243, 'hidden_layer_1': 190, 'hidden_layer_2': 233, 'nn_learning_rate': 0.00024336402047071934, 'batch_size': 64, 'num_epochs': 10}. Best is trial 2 with value: 0.8307692307692308.


[LightGBM] [Info] Number of positive: 128, number of negative: 132
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 284
[LightGBM] [Info] Number of data points in the train set: 260, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


[I 2024-08-05 14:38:14,739] Trial 3 finished with value: 0.8461538461538461 and parameters: {'n_estimators': 293, 'max_depth': 6, 'lgb_learning_rate': 0.0007960676413054438, 'num_leaves': 29, 'subsample': 0.7034202746782126, 'colsample_bytree': 0.7045399010181819, 'hidden_layer_0': 173, 'hidden_layer_1': 144, 'hidden_layer_2': 200, 'nn_learning_rate': 0.0016002407536632537, 'batch_size': 64, 'num_epochs': 50}. Best is trial 3 with value: 0.8461538461538461.


[LightGBM] [Info] Number of positive: 128, number of negative: 132
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000061 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 284
[LightGBM] [Info] Number of data points in the train set: 260, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


[I 2024-08-05 14:38:15,110] Trial 4 finished with value: 0.7692307692307693 and parameters: {'n_estimators': 248, 'max_depth': 8, 'lgb_learning_rate': 0.002070895175524996, 'num_leaves': 68, 'subsample': 0.9954940848696812, 'colsample_bytree': 0.5678937661916427, 'hidden_layer_0': 45, 'hidden_layer_1': 104, 'hidden_layer_2': 231, 'nn_learning_rate': 0.007786257188551187, 'batch_size': 256, 'num_epochs': 23}. Best is trial 3 with value: 0.8461538461538461.


[LightGBM] [Info] Number of positive: 128, number of negative: 132
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.034846 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 284
[LightGBM] [Info] Number of data points in the train set: 260, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


[I 2024-08-05 14:38:16,558] Trial 5 finished with value: 0.8461538461538461 and parameters: {'n_estimators': 115, 'max_depth': 9, 'lgb_learning_rate': 0.002617041074274871, 'num_leaves': 50, 'subsample': 0.7590535135094937, 'colsample_bytree': 0.7603412127434337, 'hidden_layer_0': 120, 'hidden_layer_1': 49, 'hidden_layer_2': 77, 'nn_learning_rate': 0.04766466983127491, 'batch_size': 128, 'num_epochs': 86}. Best is trial 3 with value: 0.8461538461538461.


[LightGBM] [Info] Number of positive: 128, number of negative: 132
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000149 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 284
[LightGBM] [Info] Number of data points in the train set: 260, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


[I 2024-08-05 14:38:17,145] Trial 6 finished with value: 0.8307692307692308 and parameters: {'n_estimators': 291, 'max_depth': 7, 'lgb_learning_rate': 0.07801897637863622, 'num_leaves': 20, 'subsample': 0.952767854918148, 'colsample_bytree': 0.8811374847891835, 'hidden_layer_0': 224, 'hidden_layer_1': 232, 'hidden_layer_2': 78, 'nn_learning_rate': 0.001466632152384165, 'batch_size': 128, 'num_epochs': 24}. Best is trial 3 with value: 0.8461538461538461.


[LightGBM] [Info] Number of positive: 128, number of negative: 132
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000105 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 284
[LightGBM] [Info] Number of data points in the train set: 260, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


[I 2024-08-05 14:38:19,154] Trial 7 finished with value: 0.49230769230769234 and parameters: {'n_estimators': 213, 'max_depth': 9, 'lgb_learning_rate': 0.00011067108174227844, 'num_leaves': 90, 'subsample': 0.6974457008095957, 'colsample_bytree': 0.8930061022518286, 'hidden_layer_0': 152, 'hidden_layer_1': 72, 'hidden_layer_2': 93, 'nn_learning_rate': 0.07835152375955051, 'batch_size': 64, 'num_epochs': 84}. Best is trial 3 with value: 0.8461538461538461.


[LightGBM] [Info] Number of positive: 128, number of negative: 132
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000078 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 284
[LightGBM] [Info] Number of data points in the train set: 260, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


[I 2024-08-05 14:38:20,469] Trial 8 finished with value: 0.8307692307692308 and parameters: {'n_estimators': 105, 'max_depth': 6, 'lgb_learning_rate': 0.0005581723540632012, 'num_leaves': 55, 'subsample': 0.5742277105482356, 'colsample_bytree': 0.8465207861799234, 'hidden_layer_0': 210, 'hidden_layer_1': 190, 'hidden_layer_2': 201, 'nn_learning_rate': 0.0008964422229949746, 'batch_size': 256, 'num_epochs': 89}. Best is trial 3 with value: 0.8461538461538461.


[LightGBM] [Info] Number of positive: 128, number of negative: 132
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000074 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 284
[LightGBM] [Info] Number of data points in the train set: 260, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


[I 2024-08-05 14:38:22,221] Trial 9 finished with value: 0.8307692307692308 and parameters: {'n_estimators': 186, 'max_depth': 8, 'lgb_learning_rate': 0.004185443220896242, 'num_leaves': 51, 'subsample': 0.9281111019952394, 'colsample_bytree': 0.7555100391579879, 'hidden_layer_0': 226, 'hidden_layer_1': 34, 'hidden_layer_2': 247, 'nn_learning_rate': 0.00014352827535753292, 'batch_size': 128, 'num_epochs': 95}. Best is trial 3 with value: 0.8461538461538461.


[LightGBM] [Info] Number of positive: 128, number of negative: 132
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000109 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 284
[LightGBM] [Info] Number of data points in the train set: 260, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772
[LightGBM] [Info] Number of positive: 102, number of negative: 106
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000083 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 250
[LightGBM] [Info] Number of data points in the train set: 208, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.490385 -> initscore=-0.038466
[LightGBM] [Info] Start training from score -0.038466
[LightGBM] [Info] Number o

[I 2024-08-05 14:38:30,393] A new study created in memory with name: no-name-060381c5-1ed3-4410-a9ee-ae79d3301620


                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.836066  0.927802         0.826276         0.05041   
KNN                  0.836066  0.931573         0.838435        0.045479   
Decision Tree        0.852459  0.854526         0.760544        0.047249   
Random Forest        0.868852   0.93319         0.813861         0.03034   
Gradient Boosting    0.786885   0.90194         0.809694        0.028769   
XGBoost              0.770492   0.90625         0.814031        0.013309   
LightGBM             0.868852   0.91056         0.814031        0.013309   
CatBoost             0.836066  0.920259          0.83869        0.033851   
MLP                  0.815385  0.902462         0.861668        0.040573   
DNN                  0.830769  0.910985         0.807692        0.034401   
DCN                       0.8  0.903409         0.807692        0.040339   
Wide_and_Deep        0.738462  0.885417         0.830769        0.044522   
XGBoost + NN

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:38:31,913] Trial 0 finished with value: 0.8 and parameters: {'num_heads': 7, 'embedding_dim': 21, 'num_layers': 3, 'hidden_layer_0': 58, 'hidden_layer_1': 38, 'hidden_layer_2': 253, 'nn_learning_rate': 0.0009492756666689487, 'batch_size': 256, 'num_epochs': 52}. Best is trial 0 with value: 0.8.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:38:33,913] Trial 1 finished with value: 0.7538461538461538 and parameters: {'num_heads': 6, 'embedding_dim': 48, 'num_layers': 1, 'hidden_layer_0': 191, 'hidden_layer_1': 215, 'hidden_layer_2': 118, 'nn_learning_rate': 0.0004420125225747476, 'batch_size': 128, 'num_epochs': 82}. Best is trial 0 with value: 0.8.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:38:35,024] Trial 2 finished with value: 0.7230769230769231 and parameters: {'num_heads': 2, 'embedding_dim

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.836066  0.927802         0.826276         0.05041   
KNN                  0.836066  0.931573         0.838435        0.045479   
Decision Tree        0.852459  0.854526         0.760544        0.047249   
Random Forest        0.868852   0.93319         0.813861         0.03034   
Gradient Boosting    0.786885   0.90194         0.809694        0.028769   
XGBoost              0.770492   0.90625         0.814031        0.013309   
LightGBM             0.868852   0.91056         0.814031        0.013309   
CatBoost             0.836066  0.920259          0.83869        0.033851   
MLP                  0.815385  0.902462         0.861668        0.040573   
DNN                  0.830769  0.910985         0.807692        0.034401   
DCN                       0.8  0.903409         0.807692        0.040339   
Wide_and_Deep        0.738462  0.885417         0.830769        0.044522   
XGBoost + NN

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:39:05,286] Trial 0 finished with value: 0.8 and parameters: {'num_heads': 2, 'embedding_dim': 28, 'num_layers': 1, 'hidden_layer_0': 189, 'hidden_layer_1': 36, 'hidden_layer_2': 216, 'nn_learning_rate': 0.0047680483036902895, 'batch_size': 256, 'num_epochs': 30}. Best is trial 0 with value: 0.8.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:39:07,201] Trial 1 finished with value: 0.8153846153846154 and parameters: {'num_heads': 6, 'embedding_dim': 30, 'num_layers': 2, 'hidden_layer_0': 164, 'hidden_layer_1': 41, 'hidden_layer_2': 250, 'nn_learning_rate': 0.020421454076107216, 'batch_size': 32, 'num_epochs': 11}. Best is trial 1 with value: 0.8153846153846154.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:39:09,847] Trial 2 finished with value: 0.8461538461538461 and parameters: {'num_heads': 5, '

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.836066  0.927802         0.826276         0.05041   
KNN                  0.836066  0.931573         0.838435        0.045479   
Decision Tree        0.852459  0.854526         0.760544        0.047249   
Random Forest        0.868852   0.93319         0.813861         0.03034   
Gradient Boosting    0.786885   0.90194         0.809694        0.028769   
XGBoost              0.770492   0.90625         0.814031        0.013309   
LightGBM             0.868852   0.91056         0.814031        0.013309   
CatBoost             0.836066  0.920259          0.83869        0.033851   
MLP                  0.815385  0.902462         0.861668        0.040573   
DNN                  0.830769  0.910985         0.807692        0.034401   
DCN                       0.8  0.903409         0.807692        0.040339   
Wide_and_Deep        0.738462  0.885417         0.830769        0.044522   
XGBoost + NN

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:39:49,372] Trial 0 finished with value: 0.8 and parameters: {'num_layers': 5, 'hidden_layer_0': 173, 'hidden_layer_1': 242, 'hidden_layer_2': 120, 'hidden_layer_3': 256, 'hidden_layer_4': 58, 'learning_rate': 0.015448446351329718, 'batch_size': 128, 'num_epochs': 44}. Best is trial 0 with value: 0.8.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:39:50,327] Trial 1 finished with value: 0.8461538461538461 and parameters: {'num_layers': 3, 'hidden_layer_0': 134, 'hidden_layer_1': 116, 'hidden_layer_2': 201, 'learning_rate': 0.0822192445433533, 'batch_size': 256, 'num_epochs': 74}. Best is trial 1 with value: 0.8461538461538461.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:39:51,456] Trial 2 finished with value: 0.7846153846153846 and parameters: {'num_layers': 3, 'hidden_layer_0': 35, 'hidden_layer_1': 213, 'hidden_

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.836066  0.927802         0.826276   
KNN                         0.836066  0.931573         0.838435   
Decision Tree               0.852459  0.854526         0.760544   
Random Forest               0.868852   0.93319         0.813861   
Gradient Boosting           0.786885   0.90194         0.809694   
XGBoost                     0.770492   0.90625         0.814031   
LightGBM                    0.868852   0.91056         0.814031   
CatBoost                    0.836066  0.920259          0.83869   
MLP                         0.815385  0.902462         0.861668   
DNN                         0.830769  0.910985         0.807692   
DCN                              0.8  0.903409         0.807692   
Wide_and_Deep               0.738462  0.885417         0.830769   
XGBoost + NN                0.846154  0.926136         0.811538   
LightGBM + NN               0.846154  0.912879         0.82692

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:40:04,857] Trial 0 finished with value: 0.8153846153846154 and parameters: {'hidden_dim': 40, 'learning_rate': 0.05872810925710183, 'batch_size': 128, 'num_epochs': 61}. Best is trial 0 with value: 0.8153846153846154.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:40:05,330] Trial 1 finished with value: 0.8153846153846154 and parameters: {'hidden_dim': 125, 'learning_rate': 0.000491771743561688, 'batch_size': 256, 'num_epochs': 56}. Best is trial 0 with value: 0.8153846153846154.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:40:06,117] Trial 2 finished with value: 0.8 and parameters: {'hidden_dim': 120, 'learning_rate': 0.0009960877208362421, 'batch_size': 64, 'num_epochs': 61}. Best is trial 0 with value: 0.8153846153846154.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:4

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.836066  0.927802         0.826276   
KNN                         0.836066  0.931573         0.838435   
Decision Tree               0.852459  0.854526         0.760544   
Random Forest               0.868852   0.93319         0.813861   
Gradient Boosting           0.786885   0.90194         0.809694   
XGBoost                     0.770492   0.90625         0.814031   
LightGBM                    0.868852   0.91056         0.814031   
CatBoost                    0.836066  0.920259          0.83869   
MLP                         0.815385  0.902462         0.861668   
DNN                         0.830769  0.910985         0.807692   
DCN                              0.8  0.903409         0.807692   
Wide_and_Deep               0.738462  0.885417         0.830769   
XGBoost + NN                0.846154  0.926136         0.811538   
LightGBM + NN               0.846154  0.912879         0.82692

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:40:17,163] Trial 0 finished with value: 0.7846153846153846 and parameters: {'num_layers': 5, 'num_trees': 2, 'tree_dim': 23, 'learning_rate': 0.00024309895268353408, 'batch_size': 64, 'num_epochs': 84}. Best is trial 0 with value: 0.7846153846153846.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:40:19,708] Trial 1 finished with value: 0.8153846153846154 and parameters: {'num_layers': 5, 'num_trees': 4, 'tree_dim': 64, 'learning_rate': 0.002419152474894677, 'batch_size': 128, 'num_epochs': 40}. Best is trial 1 with value: 0.8153846153846154.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:40:23,390] Trial 2 finished with value: 0.8307692307692308 and parameters: {'num_layers': 2, 'num_trees': 9, 'tree_dim': 19, 'learning_rate': 0.004942203163182991, 'batch_size': 256, 'num_epochs': 97}. Best is trial 2 with value: 0.

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.836066  0.927802         0.826276   
KNN                         0.836066  0.931573         0.838435   
Decision Tree               0.852459  0.854526         0.760544   
Random Forest               0.868852   0.93319         0.813861   
Gradient Boosting           0.786885   0.90194         0.809694   
XGBoost                     0.770492   0.90625         0.814031   
LightGBM                    0.868852   0.91056         0.814031   
CatBoost                    0.836066  0.920259          0.83869   
MLP                         0.815385  0.902462         0.861668   
DNN                         0.830769  0.910985         0.807692   
DCN                              0.8  0.903409         0.807692   
Wide_and_Deep               0.738462  0.885417         0.830769   
XGBoost + NN                0.846154  0.926136         0.811538   
LightGBM + NN               0.846154  0.912879         0.82692

  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 2.39521 | val_accuracy: 0.4     |  0:00:00s
epoch 1  | loss: 2.33374 | val_accuracy: 0.49231 |  0:00:00s
epoch 2  | loss: 2.25598 | val_accuracy: 0.43077 |  0:00:00s
epoch 3  | loss: 2.06991 | val_accuracy: 0.44615 |  0:00:00s
epoch 4  | loss: 1.81803 | val_accuracy: 0.47692 |  0:00:01s
epoch 5  | loss: 1.69788 | val_accuracy: 0.43077 |  0:00:01s
epoch 6  | loss: 1.60041 | val_accuracy: 0.46154 |  0:00:01s
epoch 7  | loss: 1.51641 | val_accuracy: 0.50769 |  0:00:01s
epoch 8  | loss: 1.43862 | val_accuracy: 0.44615 |  0:00:01s
epoch 9  | loss: 1.29836 | val_accuracy: 0.4     |  0:00:02s
epoch 10 | loss: 1.20131 | val_accuracy: 0.44615 |  0:00:02s
epoch 11 | loss: 1.06568 | val_accuracy: 0.47692 |  0:00:02s
epoch 12 | loss: 1.05675 | val_accuracy: 0.49231 |  0:00:02s
epoch 13 | loss: 0.98585 | val_accuracy: 0.47692 |  0:00:03s
epoch 14 | loss: 0.78582 | val_accuracy: 0.6     |  0:00:03s
epoch 15 | loss: 0.92693 | val_accuracy: 0.61538 |  0:00:03s
epoch 16 | loss: 0.87098

[I 2024-08-05 14:40:58,163] Trial 0 finished with value: 0.676923076923077 and parameters: {'n_d': 25, 'n_a': 45, 'n_steps': 6, 'gamma': 1.5103802649047307, 'lambda_sparse': 1.2137184419326321e-05, 'learning_rate': 0.0010368237356642152, 'batch_size': 256, 'num_epochs': 43}. Best is trial 0 with value: 0.676923076923077.


epoch 28 | loss: 0.58473 | val_accuracy: 0.6     |  0:00:06s

Early stopping occurred at epoch 28 with best_epoch = 18 and best_val_accuracy = 0.67692


  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 2.68549 | val_accuracy: 0.38462 |  0:00:00s
epoch 1  | loss: 1.15789 | val_accuracy: 0.56923 |  0:00:00s
epoch 2  | loss: 1.44904 | val_accuracy: 0.56923 |  0:00:00s
epoch 3  | loss: 1.10372 | val_accuracy: 0.66154 |  0:00:00s
epoch 4  | loss: 0.64539 | val_accuracy: 0.8     |  0:00:01s
epoch 5  | loss: 0.66315 | val_accuracy: 0.75385 |  0:00:01s
epoch 6  | loss: 0.59228 | val_accuracy: 0.70769 |  0:00:01s
epoch 7  | loss: 0.57664 | val_accuracy: 0.72308 |  0:00:01s
epoch 8  | loss: 0.61616 | val_accuracy: 0.67692 |  0:00:01s
epoch 9  | loss: 0.65263 | val_accuracy: 0.75385 |  0:00:02s
epoch 10 | loss: 0.55934 | val_accuracy: 0.73846 |  0:00:02s
epoch 11 | loss: 0.58173 | val_accuracy: 0.72308 |  0:00:02s
epoch 12 | loss: 0.57014 | val_accuracy: 0.69231 |  0:00:02s
epoch 13 | loss: 0.71911 | val_accuracy: 0.70769 |  0:00:03s
epoch 14 | loss: 0.78034 | val_accuracy: 0.69231 |  0:00:03s


[I 2024-08-05 14:41:01,583] Trial 1 finished with value: 0.8 and parameters: {'n_d': 38, 'n_a': 13, 'n_steps': 7, 'gamma': 1.9471066073829557, 'lambda_sparse': 8.443073062487446e-05, 'learning_rate': 0.014953720346443183, 'batch_size': 256, 'num_epochs': 52}. Best is trial 1 with value: 0.8.



Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_accuracy = 0.8


  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 3.03377 | val_accuracy: 0.70769 |  0:00:00s
epoch 1  | loss: 1.09837 | val_accuracy: 0.75385 |  0:00:02s
epoch 2  | loss: 1.0549  | val_accuracy: 0.76923 |  0:00:03s
epoch 3  | loss: 0.77136 | val_accuracy: 0.75385 |  0:00:04s
epoch 4  | loss: 0.62837 | val_accuracy: 0.69231 |  0:00:05s
epoch 5  | loss: 0.64348 | val_accuracy: 0.72308 |  0:00:06s
epoch 6  | loss: 0.58982 | val_accuracy: 0.75385 |  0:00:06s
epoch 7  | loss: 0.5569  | val_accuracy: 0.75385 |  0:00:08s
epoch 8  | loss: 0.69084 | val_accuracy: 0.75385 |  0:00:08s
epoch 9  | loss: 0.61706 | val_accuracy: 0.69231 |  0:00:09s
epoch 10 | loss: 0.65035 | val_accuracy: 0.78462 |  0:00:10s
epoch 11 | loss: 0.5839  | val_accuracy: 0.84615 |  0:00:11s
epoch 12 | loss: 0.48698 | val_accuracy: 0.72308 |  0:00:13s
epoch 13 | loss: 0.5085  | val_accuracy: 0.76923 |  0:00:14s
epoch 14 | loss: 0.47326 | val_accuracy: 0.72308 |  0:00:15s
epoch 15 | loss: 0.43113 | val_accuracy: 0.73846 |  0:00:15s
epoch 16 | loss: 0.47703

[I 2024-08-05 14:41:23,464] Trial 2 finished with value: 0.8461538461538461 and parameters: {'n_d': 61, 'n_a': 56, 'n_steps': 7, 'gamma': 1.019884803442702, 'lambda_sparse': 1.6486340315032598e-05, 'learning_rate': 0.006723114873712845, 'batch_size': 32, 'num_epochs': 75}. Best is trial 2 with value: 0.8461538461538461.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.75247 | val_accuracy: 0.64615 |  0:00:00s
epoch 1  | loss: 0.55994 | val_accuracy: 0.64615 |  0:00:00s
epoch 2  | loss: 0.57164 | val_accuracy: 0.75385 |  0:00:01s
epoch 3  | loss: 0.6158  | val_accuracy: 0.8     |  0:00:01s
epoch 4  | loss: 0.56899 | val_accuracy: 0.70769 |  0:00:02s
epoch 5  | loss: 0.55131 | val_accuracy: 0.67692 |  0:00:02s
epoch 6  | loss: 0.47894 | val_accuracy: 0.64615 |  0:00:03s
epoch 7  | loss: 0.43889 | val_accuracy: 0.64615 |  0:00:03s
epoch 8  | loss: 0.43469 | val_accuracy: 0.64615 |  0:00:04s
epoch 9  | loss: 0.44873 | val_accuracy: 0.63077 |  0:00:04s
epoch 10 | loss: 0.3767  | val_accuracy: 0.73846 |  0:00:04s
epoch 11 | loss: 0.35236 | val_accuracy: 0.8     |  0:00:05s
epoch 12 | loss: 0.48712 | val_accuracy: 0.76923 |  0:00:05s


[I 2024-08-05 14:41:29,768] Trial 3 finished with value: 0.8 and parameters: {'n_d': 48, 'n_a': 52, 'n_steps': 3, 'gamma': 1.772850067387795, 'lambda_sparse': 1.5175943635585027e-06, 'learning_rate': 0.0052708973895925345, 'batch_size': 32, 'num_epochs': 50}. Best is trial 2 with value: 0.8461538461538461.


epoch 13 | loss: 0.32737 | val_accuracy: 0.72308 |  0:00:06s

Early stopping occurred at epoch 13 with best_epoch = 3 and best_val_accuracy = 0.8


  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.42161 | val_accuracy: 0.58462 |  0:00:00s
epoch 1  | loss: 0.90736 | val_accuracy: 0.58462 |  0:00:00s
epoch 2  | loss: 0.8007  | val_accuracy: 0.56923 |  0:00:00s
epoch 3  | loss: 0.76254 | val_accuracy: 0.61538 |  0:00:01s
epoch 4  | loss: 0.70322 | val_accuracy: 0.67692 |  0:00:01s
epoch 5  | loss: 0.66406 | val_accuracy: 0.81538 |  0:00:02s
epoch 6  | loss: 0.6319  | val_accuracy: 0.66154 |  0:00:02s
epoch 7  | loss: 0.62995 | val_accuracy: 0.73846 |  0:00:03s
epoch 8  | loss: 0.61193 | val_accuracy: 0.76923 |  0:00:03s
epoch 9  | loss: 0.57333 | val_accuracy: 0.70769 |  0:00:03s
epoch 10 | loss: 0.53938 | val_accuracy: 0.69231 |  0:00:04s
epoch 11 | loss: 0.48907 | val_accuracy: 0.72308 |  0:00:04s
epoch 12 | loss: 0.52388 | val_accuracy: 0.75385 |  0:00:04s
epoch 13 | loss: 0.50456 | val_accuracy: 0.78462 |  0:00:05s
epoch 14 | loss: 0.46101 | val_accuracy: 0.73846 |  0:00:05s
epoch 15 | loss: 0.4551  | val_accuracy: 0.75385 |  0:00:05s

Early stopping occurred

[I 2024-08-05 14:41:35,829] Trial 4 finished with value: 0.8153846153846154 and parameters: {'n_d': 13, 'n_a': 51, 'n_steps': 10, 'gamma': 1.925523207371039, 'lambda_sparse': 2.0943261295610206e-05, 'learning_rate': 0.008340761022420063, 'batch_size': 256, 'num_epochs': 33}. Best is trial 2 with value: 0.8461538461538461.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 2.01789 | val_accuracy: 0.55385 |  0:00:00s
epoch 1  | loss: 1.6578  | val_accuracy: 0.47692 |  0:00:00s
epoch 2  | loss: 1.69078 | val_accuracy: 0.47692 |  0:00:01s
epoch 3  | loss: 1.61889 | val_accuracy: 0.49231 |  0:00:01s
epoch 4  | loss: 1.51459 | val_accuracy: 0.52308 |  0:00:01s
epoch 5  | loss: 1.52787 | val_accuracy: 0.53846 |  0:00:01s
epoch 6  | loss: 1.58869 | val_accuracy: 0.46154 |  0:00:02s
epoch 7  | loss: 1.45688 | val_accuracy: 0.46154 |  0:00:02s
epoch 8  | loss: 1.45758 | val_accuracy: 0.41538 |  0:00:02s
epoch 9  | loss: 1.51065 | val_accuracy: 0.44615 |  0:00:02s


[I 2024-08-05 14:41:39,227] Trial 5 finished with value: 0.5538461538461539 and parameters: {'n_d': 36, 'n_a': 24, 'n_steps': 6, 'gamma': 1.7995967695937787, 'lambda_sparse': 0.00015538975344226463, 'learning_rate': 0.00016085301650661593, 'batch_size': 128, 'num_epochs': 67}. Best is trial 2 with value: 0.8461538461538461.


epoch 10 | loss: 1.48686 | val_accuracy: 0.43077 |  0:00:03s

Early stopping occurred at epoch 10 with best_epoch = 0 and best_val_accuracy = 0.55385


  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 4.5019  | val_accuracy: 0.52308 |  0:00:00s
epoch 1  | loss: 1.5758  | val_accuracy: 0.56923 |  0:00:00s
epoch 2  | loss: 1.10001 | val_accuracy: 0.56923 |  0:00:01s
epoch 3  | loss: 1.17054 | val_accuracy: 0.61538 |  0:00:01s
epoch 4  | loss: 0.90823 | val_accuracy: 0.75385 |  0:00:01s
epoch 5  | loss: 0.86636 | val_accuracy: 0.72308 |  0:00:02s
epoch 6  | loss: 0.67553 | val_accuracy: 0.72308 |  0:00:02s
epoch 7  | loss: 0.61364 | val_accuracy: 0.72308 |  0:00:02s
epoch 8  | loss: 0.52609 | val_accuracy: 0.72308 |  0:00:03s
epoch 9  | loss: 0.56996 | val_accuracy: 0.78462 |  0:00:03s
epoch 10 | loss: 0.37145 | val_accuracy: 0.76923 |  0:00:03s
epoch 11 | loss: 0.38212 | val_accuracy: 0.8     |  0:00:04s
epoch 12 | loss: 0.42091 | val_accuracy: 0.70769 |  0:00:04s
epoch 13 | loss: 0.39882 | val_accuracy: 0.8     |  0:00:04s
epoch 14 | loss: 0.41461 | val_accuracy: 0.75385 |  0:00:05s
epoch 15 | loss: 0.36134 | val_accuracy: 0.76923 |  0:00:05s
epoch 16 | loss: 0.32973

[I 2024-08-05 14:41:51,457] Trial 6 finished with value: 0.8615384615384616 and parameters: {'n_d': 61, 'n_a': 19, 'n_steps': 9, 'gamma': 1.2432563410652697, 'lambda_sparse': 8.150043893542066e-05, 'learning_rate': 0.006999676630697713, 'batch_size': 256, 'num_epochs': 40}. Best is trial 6 with value: 0.8615384615384616.


epoch 35 | loss: 0.43945 | val_accuracy: 0.78462 |  0:00:11s

Early stopping occurred at epoch 35 with best_epoch = 25 and best_val_accuracy = 0.86154


  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.64858 | val_accuracy: 0.56923 |  0:00:00s
epoch 1  | loss: 2.13563 | val_accuracy: 0.66154 |  0:00:00s
epoch 2  | loss: 0.85502 | val_accuracy: 0.6     |  0:00:00s
epoch 3  | loss: 0.83967 | val_accuracy: 0.75385 |  0:00:00s
epoch 4  | loss: 0.52689 | val_accuracy: 0.78462 |  0:00:00s
epoch 5  | loss: 0.37552 | val_accuracy: 0.64615 |  0:00:00s
epoch 6  | loss: 0.37724 | val_accuracy: 0.66154 |  0:00:00s
epoch 7  | loss: 0.29291 | val_accuracy: 0.66154 |  0:00:01s
epoch 8  | loss: 0.31583 | val_accuracy: 0.69231 |  0:00:01s
epoch 9  | loss: 0.3217  | val_accuracy: 0.66154 |  0:00:01s
epoch 10 | loss: 0.33145 | val_accuracy: 0.67692 |  0:00:01s
epoch 11 | loss: 0.32373 | val_accuracy: 0.70769 |  0:00:01s
epoch 12 | loss: 0.32635 | val_accuracy: 0.75385 |  0:00:01s
epoch 13 | loss: 0.30012 | val_accuracy: 0.69231 |  0:00:01s


[I 2024-08-05 14:41:53,389] Trial 7 finished with value: 0.7846153846153846 and parameters: {'n_d': 51, 'n_a': 33, 'n_steps': 3, 'gamma': 1.3224424366872936, 'lambda_sparse': 0.00019005620770259622, 'learning_rate': 0.03638576740383167, 'batch_size': 256, 'num_epochs': 59}. Best is trial 6 with value: 0.8615384615384616.


epoch 14 | loss: 0.30391 | val_accuracy: 0.64615 |  0:00:01s

Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_accuracy = 0.78462


  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 2.77947 | val_accuracy: 0.53846 |  0:00:00s
epoch 1  | loss: 1.03163 | val_accuracy: 0.69231 |  0:00:00s
epoch 2  | loss: 0.94312 | val_accuracy: 0.6     |  0:00:00s
epoch 3  | loss: 1.05401 | val_accuracy: 0.6     |  0:00:01s
epoch 4  | loss: 0.87769 | val_accuracy: 0.66154 |  0:00:01s
epoch 5  | loss: 0.74653 | val_accuracy: 0.73846 |  0:00:01s
epoch 6  | loss: 0.51568 | val_accuracy: 0.66154 |  0:00:02s
epoch 7  | loss: 0.44131 | val_accuracy: 0.70769 |  0:00:02s
epoch 8  | loss: 0.5923  | val_accuracy: 0.66154 |  0:00:02s
epoch 9  | loss: 0.48889 | val_accuracy: 0.76923 |  0:00:03s
epoch 10 | loss: 0.57677 | val_accuracy: 0.69231 |  0:00:03s
epoch 11 | loss: 0.49488 | val_accuracy: 0.56923 |  0:00:03s
epoch 12 | loss: 0.57789 | val_accuracy: 0.66154 |  0:00:03s


[I 2024-08-05 14:41:57,793] Trial 8 finished with value: 0.7692307692307693 and parameters: {'n_d': 8, 'n_a': 33, 'n_steps': 9, 'gamma': 1.6176458157301528, 'lambda_sparse': 0.00021385897086305386, 'learning_rate': 0.04961382142384742, 'batch_size': 128, 'num_epochs': 14}. Best is trial 6 with value: 0.8615384615384616.


epoch 13 | loss: 0.54108 | val_accuracy: 0.72308 |  0:00:04s
Stop training because you reached max_epochs = 14 with best_epoch = 9 and best_val_accuracy = 0.76923


  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.98058 | val_accuracy: 0.52308 |  0:00:00s
epoch 1  | loss: 1.43057 | val_accuracy: 0.69231 |  0:00:02s
epoch 2  | loss: 1.15099 | val_accuracy: 0.64615 |  0:00:02s
epoch 3  | loss: 1.23763 | val_accuracy: 0.70769 |  0:00:04s
epoch 4  | loss: 0.89479 | val_accuracy: 0.69231 |  0:00:05s
epoch 5  | loss: 1.16308 | val_accuracy: 0.64615 |  0:00:05s
epoch 6  | loss: 1.00536 | val_accuracy: 0.67692 |  0:00:06s
epoch 7  | loss: 0.93063 | val_accuracy: 0.66154 |  0:00:07s
epoch 8  | loss: 0.94772 | val_accuracy: 0.64615 |  0:00:08s
epoch 9  | loss: 0.81614 | val_accuracy: 0.70769 |  0:00:09s
epoch 10 | loss: 0.94647 | val_accuracy: 0.73846 |  0:00:10s
epoch 11 | loss: 0.76673 | val_accuracy: 0.67692 |  0:00:11s
epoch 12 | loss: 0.9648  | val_accuracy: 0.70769 |  0:00:12s
epoch 13 | loss: 0.97474 | val_accuracy: 0.69231 |  0:00:13s
epoch 14 | loss: 1.18861 | val_accuracy: 0.64615 |  0:00:14s
epoch 15 | loss: 1.43494 | val_accuracy: 0.63077 |  0:00:15s
epoch 16 | loss: 1.1648 

[I 2024-08-05 14:42:34,230] Trial 9 finished with value: 0.8 and parameters: {'n_d': 57, 'n_a': 12, 'n_steps': 10, 'gamma': 1.5184412272713093, 'lambda_sparse': 1.3631803347512485e-06, 'learning_rate': 0.003958759659932973, 'batch_size': 32, 'num_epochs': 99}. Best is trial 6 with value: 0.8615384615384616.


epoch 0  | loss: 4.5019  | val_accuracy: 0.52308 |  0:00:00s
epoch 1  | loss: 1.5758  | val_accuracy: 0.56923 |  0:00:00s
epoch 2  | loss: 1.10001 | val_accuracy: 0.56923 |  0:00:00s
epoch 3  | loss: 1.17054 | val_accuracy: 0.61538 |  0:00:01s
epoch 4  | loss: 0.90823 | val_accuracy: 0.75385 |  0:00:01s
epoch 5  | loss: 0.86636 | val_accuracy: 0.72308 |  0:00:01s
epoch 6  | loss: 0.67553 | val_accuracy: 0.72308 |  0:00:01s
epoch 7  | loss: 0.61364 | val_accuracy: 0.72308 |  0:00:02s
epoch 8  | loss: 0.52609 | val_accuracy: 0.72308 |  0:00:02s
epoch 9  | loss: 0.56996 | val_accuracy: 0.78462 |  0:00:02s
epoch 10 | loss: 0.37145 | val_accuracy: 0.76923 |  0:00:03s
epoch 11 | loss: 0.38212 | val_accuracy: 0.8     |  0:00:03s
epoch 12 | loss: 0.42091 | val_accuracy: 0.70769 |  0:00:03s
epoch 13 | loss: 0.39882 | val_accuracy: 0.8     |  0:00:03s
epoch 14 | loss: 0.41461 | val_accuracy: 0.75385 |  0:00:04s
epoch 15 | loss: 0.36134 | val_accuracy: 0.76923 |  0:00:04s
epoch 16 | loss: 0.32973



epoch 0  | loss: 4.40477 | val_accuracy: 0.55769 |  0:00:00s
epoch 1  | loss: 2.5076  | val_accuracy: 0.63462 |  0:00:00s
epoch 2  | loss: 1.44232 | val_accuracy: 0.67308 |  0:00:00s
epoch 3  | loss: 0.90976 | val_accuracy: 0.76923 |  0:00:00s
epoch 4  | loss: 0.99558 | val_accuracy: 0.67308 |  0:00:01s
epoch 5  | loss: 1.09738 | val_accuracy: 0.65385 |  0:00:01s
epoch 6  | loss: 1.22148 | val_accuracy: 0.69231 |  0:00:01s
epoch 7  | loss: 0.80035 | val_accuracy: 0.76923 |  0:00:01s
epoch 8  | loss: 0.64273 | val_accuracy: 0.84615 |  0:00:01s
epoch 9  | loss: 0.66945 | val_accuracy: 0.84615 |  0:00:01s
epoch 10 | loss: 0.60359 | val_accuracy: 0.80769 |  0:00:02s
epoch 11 | loss: 0.53285 | val_accuracy: 0.80769 |  0:00:02s
epoch 12 | loss: 0.54889 | val_accuracy: 0.76923 |  0:00:02s
epoch 13 | loss: 0.46438 | val_accuracy: 0.78846 |  0:00:02s
epoch 14 | loss: 0.40713 | val_accuracy: 0.78846 |  0:00:02s
epoch 15 | loss: 0.43589 | val_accuracy: 0.75    |  0:00:02s
epoch 16 | loss: 0.38027



epoch 0  | loss: 4.47075 | val_accuracy: 0.57692 |  0:00:00s
epoch 1  | loss: 2.45177 | val_accuracy: 0.55769 |  0:00:00s
epoch 2  | loss: 1.32912 | val_accuracy: 0.57692 |  0:00:00s
epoch 3  | loss: 0.83658 | val_accuracy: 0.57692 |  0:00:00s
epoch 4  | loss: 1.08769 | val_accuracy: 0.57692 |  0:00:00s
epoch 5  | loss: 1.34801 | val_accuracy: 0.59615 |  0:00:01s
epoch 6  | loss: 1.36632 | val_accuracy: 0.63462 |  0:00:01s
epoch 7  | loss: 1.0148  | val_accuracy: 0.69231 |  0:00:01s
epoch 8  | loss: 0.7687  | val_accuracy: 0.73077 |  0:00:01s
epoch 9  | loss: 0.42984 | val_accuracy: 0.78846 |  0:00:01s
epoch 10 | loss: 0.64953 | val_accuracy: 0.78846 |  0:00:02s
epoch 11 | loss: 0.60999 | val_accuracy: 0.73077 |  0:00:02s
epoch 12 | loss: 0.6946  | val_accuracy: 0.76923 |  0:00:02s
epoch 13 | loss: 0.65182 | val_accuracy: 0.75    |  0:00:02s
epoch 14 | loss: 0.68368 | val_accuracy: 0.78846 |  0:00:02s
epoch 15 | loss: 0.62625 | val_accuracy: 0.80769 |  0:00:02s
epoch 16 | loss: 0.60126



epoch 0  | loss: 4.37533 | val_accuracy: 0.51923 |  0:00:00s
epoch 1  | loss: 2.57124 | val_accuracy: 0.51923 |  0:00:00s
epoch 2  | loss: 1.68787 | val_accuracy: 0.61538 |  0:00:00s
epoch 3  | loss: 1.07993 | val_accuracy: 0.65385 |  0:00:00s
epoch 4  | loss: 0.87001 | val_accuracy: 0.51923 |  0:00:00s
epoch 5  | loss: 0.8328  | val_accuracy: 0.59615 |  0:00:01s
epoch 6  | loss: 0.6432  | val_accuracy: 0.67308 |  0:00:01s
epoch 7  | loss: 0.53023 | val_accuracy: 0.69231 |  0:00:01s
epoch 8  | loss: 0.53802 | val_accuracy: 0.78846 |  0:00:01s
epoch 9  | loss: 0.51587 | val_accuracy: 0.76923 |  0:00:01s
epoch 10 | loss: 0.53825 | val_accuracy: 0.75    |  0:00:02s
epoch 11 | loss: 0.36417 | val_accuracy: 0.73077 |  0:00:02s
epoch 12 | loss: 0.4159  | val_accuracy: 0.71154 |  0:00:02s
epoch 13 | loss: 0.37308 | val_accuracy: 0.80769 |  0:00:02s
epoch 14 | loss: 0.33748 | val_accuracy: 0.76923 |  0:00:02s
epoch 15 | loss: 0.38243 | val_accuracy: 0.69231 |  0:00:02s
epoch 16 | loss: 0.33222



epoch 0  | loss: 4.41358 | val_accuracy: 0.51923 |  0:00:00s
epoch 1  | loss: 2.6275  | val_accuracy: 0.5     |  0:00:00s
epoch 2  | loss: 1.36612 | val_accuracy: 0.63462 |  0:00:00s
epoch 3  | loss: 1.13318 | val_accuracy: 0.65385 |  0:00:00s
epoch 4  | loss: 1.00211 | val_accuracy: 0.61538 |  0:00:00s
epoch 5  | loss: 1.02315 | val_accuracy: 0.61538 |  0:00:01s
epoch 6  | loss: 0.78591 | val_accuracy: 0.67308 |  0:00:01s
epoch 7  | loss: 0.62909 | val_accuracy: 0.71154 |  0:00:01s
epoch 8  | loss: 0.67163 | val_accuracy: 0.80769 |  0:00:01s
epoch 9  | loss: 0.73817 | val_accuracy: 0.76923 |  0:00:01s
epoch 10 | loss: 0.54918 | val_accuracy: 0.76923 |  0:00:02s
epoch 11 | loss: 0.52321 | val_accuracy: 0.78846 |  0:00:02s
epoch 12 | loss: 0.46798 | val_accuracy: 0.84615 |  0:00:02s
epoch 13 | loss: 0.41767 | val_accuracy: 0.76923 |  0:00:02s
epoch 14 | loss: 0.52638 | val_accuracy: 0.80769 |  0:00:02s
epoch 15 | loss: 0.50285 | val_accuracy: 0.82692 |  0:00:02s
epoch 16 | loss: 0.34552



epoch 0  | loss: 4.14642 | val_accuracy: 0.55769 |  0:00:00s
epoch 1  | loss: 2.75714 | val_accuracy: 0.53846 |  0:00:00s
epoch 2  | loss: 1.12296 | val_accuracy: 0.59615 |  0:00:00s
epoch 3  | loss: 0.9625  | val_accuracy: 0.59615 |  0:00:00s
epoch 4  | loss: 0.95455 | val_accuracy: 0.75    |  0:00:00s
epoch 5  | loss: 0.62419 | val_accuracy: 0.71154 |  0:00:01s
epoch 6  | loss: 0.70716 | val_accuracy: 0.78846 |  0:00:01s
epoch 7  | loss: 0.49354 | val_accuracy: 0.71154 |  0:00:01s
epoch 8  | loss: 0.67833 | val_accuracy: 0.76923 |  0:00:01s
epoch 9  | loss: 0.6036  | val_accuracy: 0.80769 |  0:00:01s
epoch 10 | loss: 0.53694 | val_accuracy: 0.80769 |  0:00:02s
epoch 11 | loss: 0.4517  | val_accuracy: 0.86538 |  0:00:02s
epoch 12 | loss: 0.6546  | val_accuracy: 0.80769 |  0:00:02s
epoch 13 | loss: 0.45669 | val_accuracy: 0.80769 |  0:00:02s
epoch 14 | loss: 0.49719 | val_accuracy: 0.80769 |  0:00:02s
epoch 15 | loss: 0.53364 | val_accuracy: 0.82692 |  0:00:02s
epoch 16 | loss: 0.52556

[I 2024-08-05 14:43:06,023] A new study created in memory with name: no-name-9598d981-beaa-4f94-87a1-4fea65c63b62


epoch 21 | loss: 0.4276  | val_accuracy: 0.75    |  0:00:03s

Early stopping occurred at epoch 21 with best_epoch = 11 and best_val_accuracy = 0.86538
                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.836066  0.927802         0.826276   
KNN                         0.836066  0.931573         0.838435   
Decision Tree               0.852459  0.854526         0.760544   
Random Forest               0.868852   0.93319         0.813861   
Gradient Boosting           0.786885   0.90194         0.809694   
XGBoost                     0.770492   0.90625         0.814031   
LightGBM                    0.868852   0.91056         0.814031   
CatBoost                    0.836066  0.920259          0.83869   
MLP                         0.815385  0.902462         0.861668   
DNN                         0.830769  0.910985         0.807692   
DCN                              0.8  0.903409         0.807692   
Wide_and_Deep               0.738462  0.88541

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:43:11,471] Trial 0 finished with value: 0.8 and parameters: {'heads': 1, 'dim': 171, 'depth': 3, 'mlp_dim': 124, 'dropout': 0.24620502729414845, 'learning_rate': 0.0028975788628894093, 'batch_size': 256, 'num_epochs': 61}. Best is trial 0 with value: 0.8.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:43:14,177] Trial 1 finished with value: 0.8923076923076924 and parameters: {'heads': 7, 'dim': 28, 'depth': 1, 'mlp_dim': 64, 'dropout': 0.35963609517883444, 'learning_rate': 0.026520826504125926, 'batch_size': 128, 'num_epochs': 72}. Best is trial 1 with value: 0.8923076923076924.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:43:16,864] Trial 2 finished with value: 0.8307692307692308 and parameters: {'heads': 4, 'dim': 232, 'depth': 1, 'mlp_dim': 125, 'dropout': 0.2796307062013726, 'learning_rate': 0.0014535208367573

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.836066  0.927802         0.826276   
KNN                         0.836066  0.931573         0.838435   
Decision Tree               0.852459  0.854526         0.760544   
Random Forest               0.868852   0.93319         0.813861   
Gradient Boosting           0.786885   0.90194         0.809694   
XGBoost                     0.770492   0.90625         0.814031   
LightGBM                    0.868852   0.91056         0.814031   
CatBoost                    0.836066  0.920259          0.83869   
MLP                         0.815385  0.902462         0.861668   
DNN                         0.830769  0.910985         0.807692   
DCN                              0.8  0.903409         0.807692   
Wide_and_Deep               0.738462  0.885417         0.830769   
XGBoost + NN                0.846154  0.926136         0.811538   
LightGBM + NN               0.846154  0.912879         0.82692

In [137]:
file_prefix = "loan"  # Change this to any word you like
df =  pd.read_csv(f'Dataset/{file_prefix}.csv')
if len(df) >= 1405:
    df = df.sample(n=1405, random_state=42, replace=True)
df =  encode_categorical_data(df)
X = df.drop('Y', axis=1)
y = df['Y']
X, y = apply_yeojohnson(X, y)
X, y = apply_smote_to_training(X, y)

result = model_comparison(df, 'Y')
print(result)
result, best_params = mlp_comparison(X, y, result)
print(result)
result, best_params = dnn_comparison(X, y, result)
print(result)
result, best_params = dcn_comparison(X, y, result)
print(result)
result, best_params = wide_and_deep_comparison(X, y, result)
print(result)
result, best_params = xgb_nn_comparison(X, y, result)
print(result)
result, best_params = lgbm_nn_comparison(X, y, result)
print(result)
result, best_params = autoint_nn_comparison(X, y, result)
print(result)
result, best_params = ft_transformer_nn_comparison(X, y, result)
print(result)
result, best_params = neural_architecture_search(X, y, result)
print(result)
result, best_params = kan_comparison(X, y, result)
print(result)
result, best_params = node_comparison(X, y, result)
print(result)
result, best_params = tabnet_comparison(X, y, result)
print(result)
result, best_params = saint_comparison(X, y, result)
print(result)

result.to_csv(f'result/comparison/classification/{file_prefix}_result.csv', index=True)

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 416, number of negative: 708
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000418 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1311
[LightGBM] [Info] Number of data points in the train set: 1124, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370107 -> initscore=-0.531759
[LightGBM] [Info] Start training from score -0.531759
[LightGBM] [Info] Number of positive: 416, number of negative: 708
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000073 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1311
[LightGBM] [Info] Number of data points in the train set: 1124, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.370107 -> initscore=-0.531759
[LightGBM] [Info] Start training from score -0.531759
[LightGBM] [Info] Numb

[I 2024-08-05 14:52:23,220] A new study created in memory with name: no-name-30aa07d0-1988-4bb5-8a07-0e7927680111


                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.925267  0.977813         0.933286        0.015355   
KNN                  0.918149  0.976972         0.902119        0.011749   
Decision Tree        0.960854  0.955485         0.968857        0.007968   
Random Forest        0.982206  0.998431         0.975968        0.006707   
Gradient Boosting    0.967972  0.994453         0.975968        0.010775   
XGBoost              0.967972  0.998123         0.978639        0.010316   
LightGBM             0.992883  0.999496         0.979544        0.008233   
CatBoost             0.982206  0.998879         0.976865        0.007123   
MLP                  0.979532  0.996847         0.961966        0.006262   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logistic Regression                    0.005984                          0.0   
KNN                                    0.005984                     0.012966   

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:52:26,409] Trial 0 finished with value: 0.9590643274853801 and parameters: {'hidden_dim_0': 59, 'hidden_dim_1': 223, 'hidden_dim_2': 46, 'learning_rate': 0.0013419667841887818, 'batch_size': 128, 'num_epochs': 48}. Best is trial 0 with value: 0.9590643274853801.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:52:30,465] Trial 1 finished with value: 0.956140350877193 and parameters: {'hidden_dim_0': 51, 'hidden_dim_1': 122, 'hidden_dim_2': 81, 'learning_rate': 0.015089682718181403, 'batch_size': 32, 'num_epochs': 24}. Best is trial 0 with value: 0.9590643274853801.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:52:45,684] Trial 2 finished with value: 0.9736842105263158 and parameters: {'hidden_dim_0': 44, 'hidden_dim_1': 172, 'hidden_dim_2': 61, 'learning_rate': 0.013878902197115182, 'batch_size': 32, 'num_epochs': 67

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.925267  0.977813         0.933286        0.015355   
KNN                  0.918149  0.976972         0.902119        0.011749   
Decision Tree        0.960854  0.955485         0.968857        0.007968   
Random Forest        0.982206  0.998431         0.975968        0.006707   
Gradient Boosting    0.967972  0.994453         0.975968        0.010775   
XGBoost              0.967972  0.998123         0.978639        0.010316   
LightGBM             0.992883  0.999496         0.979544        0.008233   
CatBoost             0.982206  0.998879         0.976865        0.007123   
MLP                  0.979532  0.996847         0.961966        0.006262   
DNN                  0.976608  0.994105         0.956097        0.013916   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logistic Regression                    0.005984                          0.0   
KNN

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:55:17,975] Trial 0 finished with value: 0.9678362573099415 and parameters: {'cross_layers': 4, 'hidden_layer_0': 113, 'hidden_layer_1': 178, 'hidden_layer_2': 131, 'learning_rate': 0.004560833944370086, 'batch_size': 32, 'num_epochs': 60}. Best is trial 0 with value: 0.9678362573099415.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:55:22,353] Trial 1 finished with value: 0.9473684210526315 and parameters: {'cross_layers': 3, 'hidden_layer_0': 66, 'hidden_layer_1': 37, 'hidden_layer_2': 158, 'learning_rate': 0.00034753325048463, 'batch_size': 256, 'num_epochs': 66}. Best is trial 0 with value: 0.9678362573099415.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 14:55:34,402] Trial 2 finished with value: 0.9736842105263158 and parameters: {'cross_layers': 2, 'hidden_layer_0': 179, 'hidden_layer_1': 194, 'hidden_layer_2': 

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.925267  0.977813         0.933286        0.015355   
KNN                  0.918149  0.976972         0.902119        0.011749   
Decision Tree        0.960854  0.955485         0.968857        0.007968   
Random Forest        0.982206  0.998431         0.975968        0.006707   
Gradient Boosting    0.967972  0.994453         0.975968        0.010775   
XGBoost              0.967972  0.998123         0.978639        0.010316   
LightGBM             0.992883  0.999496         0.979544        0.008233   
CatBoost             0.982206  0.998879         0.976865        0.007123   
MLP                  0.979532  0.996847         0.961966        0.006262   
DNN                  0.976608  0.994105         0.956097        0.013916   
DCN                  0.973684  0.994893         0.952442        0.015896   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logisti

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:00:23,679] Trial 0 finished with value: 0.9766081871345029 and parameters: {'hidden_layer_0': 202, 'hidden_layer_1': 117, 'hidden_layer_2': 141, 'learning_rate': 0.005030807470546504, 'batch_size': 128, 'num_epochs': 70}. Best is trial 0 with value: 0.9766081871345029.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:00:32,074] Trial 1 finished with value: 0.9590643274853801 and parameters: {'hidden_layer_0': 111, 'hidden_layer_1': 38, 'hidden_layer_2': 99, 'learning_rate': 0.016780183200491395, 'batch_size': 32, 'num_epochs': 51}. Best is trial 0 with value: 0.9766081871345029.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:00:34,287] Trial 2 finished with value: 0.9649122807017544 and parameters: {'hidden_layer_0': 185, 'hidden_layer_1': 115, 'hidden_layer_2': 187, 'learning_rate': 0.0004725189384325412, 'batch_size

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.925267  0.977813         0.933286        0.015355   
KNN                  0.918149  0.976972         0.902119        0.011749   
Decision Tree        0.960854  0.955485         0.968857        0.007968   
Random Forest        0.982206  0.998431         0.975968        0.006707   
Gradient Boosting    0.967972  0.994453         0.975968        0.010775   
XGBoost              0.967972  0.998123         0.978639        0.010316   
LightGBM             0.992883  0.999496         0.979544        0.008233   
CatBoost             0.982206  0.998879         0.976865        0.007123   
MLP                  0.979532  0.996847         0.961966        0.006262   
DNN                  0.976608  0.994105         0.956097        0.013916   
DCN                  0.973684  0.994893         0.952442        0.015896   
Wide_and_Deep        0.976608  0.997772         0.966335        0.014557   

           

  'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
Parameters: { "use_label_encoder" } are not used.

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:01:52,319] Trial 0 finished with value: 0.9736842105263158 and parameters: {'n_estimators': 214, 'max_depth': 10, 'xgb_learning_rate': 0.09640267098295356, 'subsample': 0.6120806922905797, 'colsample_bytree': 0.6533901311886325, 'use_hidden_layer_0': True, 'hidden_layer_0': 39, 'use_hidden_layer_1': True, 'hidden_layer_1': 79, 'use_hidden_layer_2': False, 'nn_learning_rate': 0.0057490646001789715, 'batch_size': 256, 'num_epochs': 40}. Best is trial 0 with value: 0.9736842105263158.
  'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
Parameters: { "use_label_encoder" } are not used.

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:02:08,377] Trial 1 finished with value: 0.9736842105263158 and pa

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.925267  0.977813         0.933286        0.015355   
KNN                  0.918149  0.976972         0.902119        0.011749   
Decision Tree        0.960854  0.955485         0.968857        0.007968   
Random Forest        0.982206  0.998431         0.975968        0.006707   
Gradient Boosting    0.967972  0.994453         0.975968        0.010775   
XGBoost              0.967972  0.998123         0.978639        0.010316   
LightGBM             0.992883  0.999496         0.979544        0.008233   
CatBoost             0.982206  0.998879         0.976865        0.007123   
MLP                  0.979532  0.996847         0.961966        0.006262   
DNN                  0.976608  0.994105         0.956097        0.013916   
DCN                  0.973684  0.994893         0.952442        0.015896   
Wide_and_Deep        0.976608  0.997772         0.966335        0.014557   
XGBoost + NN

[I 2024-08-05 15:04:47,413] Trial 0 finished with value: 0.9736842105263158 and parameters: {'n_estimators': 188, 'max_depth': 8, 'lgb_learning_rate': 0.0044223184744593405, 'num_leaves': 70, 'subsample': 0.9250227581997881, 'colsample_bytree': 0.8345794570019824, 'hidden_layer_0': 150, 'hidden_layer_1': 216, 'hidden_layer_2': 98, 'nn_learning_rate': 0.01428684669470048, 'batch_size': 128, 'num_epochs': 72}. Best is trial 0 with value: 0.9736842105263158.


[LightGBM] [Info] Number of positive: 654, number of negative: 713
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000170 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1994
[LightGBM] [Info] Number of data points in the train set: 1367, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.478420 -> initscore=-0.086374
[LightGBM] [Info] Start training from score -0.086374


[I 2024-08-05 15:04:54,672] Trial 1 finished with value: 0.5233918128654971 and parameters: {'n_estimators': 58, 'max_depth': 6, 'lgb_learning_rate': 0.00010775524113480543, 'num_leaves': 100, 'subsample': 0.6464564233962652, 'colsample_bytree': 0.7421099362250427, 'hidden_layer_0': 155, 'hidden_layer_1': 167, 'hidden_layer_2': 227, 'nn_learning_rate': 0.00010120108081531486, 'batch_size': 128, 'num_epochs': 85}. Best is trial 0 with value: 0.9736842105263158.


[LightGBM] [Info] Number of positive: 654, number of negative: 713
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1994
[LightGBM] [Info] Number of data points in the train set: 1367, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.478420 -> initscore=-0.086374
[LightGBM] [Info] Start training from score -0.086374


[I 2024-08-05 15:05:11,063] Trial 2 finished with value: 0.9824561403508771 and parameters: {'n_estimators': 62, 'max_depth': 8, 'lgb_learning_rate': 0.07452426030609892, 'num_leaves': 68, 'subsample': 0.5690658411033189, 'colsample_bytree': 0.8017644470762813, 'hidden_layer_0': 181, 'hidden_layer_1': 94, 'hidden_layer_2': 88, 'nn_learning_rate': 0.0021583575086083725, 'batch_size': 32, 'num_epochs': 93}. Best is trial 2 with value: 0.9824561403508771.


[LightGBM] [Info] Number of positive: 654, number of negative: 713
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1994
[LightGBM] [Info] Number of data points in the train set: 1367, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.478420 -> initscore=-0.086374
[LightGBM] [Info] Start training from score -0.086374


[I 2024-08-05 15:05:14,957] Trial 3 finished with value: 0.5233918128654971 and parameters: {'n_estimators': 112, 'max_depth': 9, 'lgb_learning_rate': 0.00014152460646043699, 'num_leaves': 100, 'subsample': 0.666444753550206, 'colsample_bytree': 0.7460790991838413, 'hidden_layer_0': 181, 'hidden_layer_1': 253, 'hidden_layer_2': 187, 'nn_learning_rate': 0.003950623344344799, 'batch_size': 64, 'num_epochs': 27}. Best is trial 2 with value: 0.9824561403508771.


[LightGBM] [Info] Number of positive: 654, number of negative: 713
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000198 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1994
[LightGBM] [Info] Number of data points in the train set: 1367, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.478420 -> initscore=-0.086374
[LightGBM] [Info] Start training from score -0.086374


[I 2024-08-05 15:05:16,561] Trial 4 finished with value: 0.9736842105263158 and parameters: {'n_estimators': 81, 'max_depth': 9, 'lgb_learning_rate': 0.0035978734579593494, 'num_leaves': 35, 'subsample': 0.6840532641074484, 'colsample_bytree': 0.8569342839240937, 'hidden_layer_0': 149, 'hidden_layer_1': 154, 'hidden_layer_2': 88, 'nn_learning_rate': 0.0006073767962767494, 'batch_size': 64, 'num_epochs': 13}. Best is trial 2 with value: 0.9824561403508771.


[LightGBM] [Info] Number of positive: 654, number of negative: 713
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000340 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1994
[LightGBM] [Info] Number of data points in the train set: 1367, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.478420 -> initscore=-0.086374
[LightGBM] [Info] Start training from score -0.086374


[I 2024-08-05 15:05:24,910] Trial 5 finished with value: 0.9736842105263158 and parameters: {'n_estimators': 148, 'max_depth': 4, 'lgb_learning_rate': 0.0005728100300107727, 'num_leaves': 75, 'subsample': 0.6037799423768931, 'colsample_bytree': 0.9682019857537856, 'hidden_layer_0': 84, 'hidden_layer_1': 47, 'hidden_layer_2': 156, 'nn_learning_rate': 0.004791058638347856, 'batch_size': 32, 'num_epochs': 49}. Best is trial 2 with value: 0.9824561403508771.


[LightGBM] [Info] Number of positive: 654, number of negative: 713
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000162 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1994
[LightGBM] [Info] Number of data points in the train set: 1367, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.478420 -> initscore=-0.086374
[LightGBM] [Info] Start training from score -0.086374


[I 2024-08-05 15:05:40,633] Trial 6 finished with value: 0.9766081871345029 and parameters: {'n_estimators': 97, 'max_depth': 7, 'lgb_learning_rate': 0.0014736790274804815, 'num_leaves': 34, 'subsample': 0.579803857953229, 'colsample_bytree': 0.9216943048478572, 'hidden_layer_0': 56, 'hidden_layer_1': 53, 'hidden_layer_2': 185, 'nn_learning_rate': 0.04782706389323639, 'batch_size': 32, 'num_epochs': 87}. Best is trial 2 with value: 0.9824561403508771.


[LightGBM] [Info] Number of positive: 654, number of negative: 713
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1994
[LightGBM] [Info] Number of data points in the train set: 1367, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.478420 -> initscore=-0.086374
[LightGBM] [Info] Start training from score -0.086374


[I 2024-08-05 15:05:47,055] Trial 7 finished with value: 0.9766081871345029 and parameters: {'n_estimators': 202, 'max_depth': 7, 'lgb_learning_rate': 0.0009232355465840531, 'num_leaves': 60, 'subsample': 0.6223228426697012, 'colsample_bytree': 0.9418823314305397, 'hidden_layer_0': 212, 'hidden_layer_1': 244, 'hidden_layer_2': 179, 'nn_learning_rate': 0.00021718785743275967, 'batch_size': 64, 'num_epochs': 46}. Best is trial 2 with value: 0.9824561403508771.


[LightGBM] [Info] Number of positive: 654, number of negative: 713
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1994
[LightGBM] [Info] Number of data points in the train set: 1367, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.478420 -> initscore=-0.086374
[LightGBM] [Info] Start training from score -0.086374


[I 2024-08-05 15:05:53,658] Trial 8 finished with value: 0.9766081871345029 and parameters: {'n_estimators': 281, 'max_depth': 6, 'lgb_learning_rate': 0.0016868351454687417, 'num_leaves': 90, 'subsample': 0.5340005618922721, 'colsample_bytree': 0.9414461542294179, 'hidden_layer_0': 59, 'hidden_layer_1': 85, 'hidden_layer_2': 126, 'nn_learning_rate': 0.0012981521378194168, 'batch_size': 64, 'num_epochs': 65}. Best is trial 2 with value: 0.9824561403508771.


[LightGBM] [Info] Number of positive: 654, number of negative: 713
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000142 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1994
[LightGBM] [Info] Number of data points in the train set: 1367, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.478420 -> initscore=-0.086374
[LightGBM] [Info] Start training from score -0.086374


[I 2024-08-05 15:05:59,320] Trial 9 finished with value: 0.9707602339181286 and parameters: {'n_estimators': 89, 'max_depth': 10, 'lgb_learning_rate': 0.0002474992308842253, 'num_leaves': 64, 'subsample': 0.8249289051922049, 'colsample_bytree': 0.5431666988103223, 'hidden_layer_0': 182, 'hidden_layer_1': 205, 'hidden_layer_2': 88, 'nn_learning_rate': 0.0008432684186959213, 'batch_size': 256, 'num_epochs': 97}. Best is trial 2 with value: 0.9824561403508771.


[LightGBM] [Info] Number of positive: 654, number of negative: 713
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002086 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1994
[LightGBM] [Info] Number of data points in the train set: 1367, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.478420 -> initscore=-0.086374
[LightGBM] [Info] Start training from score -0.086374
[LightGBM] [Info] Number of positive: 523, number of negative: 570
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000209 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1939
[LightGBM] [Info] Number of data points in the train set: 1093, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.478500 -> initscore=-0.086055
[LightGBM] [Info] Start training from score -0.086055
[LightGBM] [Info] Numb

[I 2024-08-05 15:07:49,024] A new study created in memory with name: no-name-8d13b4e9-fd96-485e-a0c2-a529684aae5d


                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.925267  0.977813         0.933286        0.015355   
KNN                  0.918149  0.976972         0.902119        0.011749   
Decision Tree        0.960854  0.955485         0.968857        0.007968   
Random Forest        0.982206  0.998431         0.975968        0.006707   
Gradient Boosting    0.967972  0.994453         0.975968        0.010775   
XGBoost              0.967972  0.998123         0.978639        0.010316   
LightGBM             0.992883  0.999496         0.979544        0.008233   
CatBoost             0.982206  0.998879         0.976865        0.007123   
MLP                  0.979532  0.996847         0.961966        0.006262   
DNN                  0.976608  0.994105         0.956097        0.013916   
DCN                  0.973684  0.994893         0.952442        0.015896   
Wide_and_Deep        0.976608  0.997772         0.966335        0.014557   
XGBoost + NN

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:08:02,089] Trial 0 finished with value: 0.9502923976608187 and parameters: {'num_heads': 4, 'embedding_dim': 20, 'num_layers': 3, 'hidden_layer_0': 37, 'hidden_layer_1': 169, 'hidden_layer_2': 202, 'nn_learning_rate': 0.01828843420119821, 'batch_size': 128, 'num_epochs': 94}. Best is trial 0 with value: 0.9502923976608187.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:08:30,010] Trial 1 finished with value: 0.9649122807017544 and parameters: {'num_heads': 2, 'embedding_dim': 44, 'num_layers': 2, 'hidden_layer_0': 170, 'hidden_layer_1': 235, 'hidden_layer_2': 57, 'nn_learning_rate': 0.008385729417559832, 'batch_size': 32, 'num_epochs': 73}. Best is trial 1 with value: 0.9649122807017544.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:08:46,302] Trial 2 finished with value: 0.9766081871345029 and pa

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.925267  0.977813         0.933286        0.015355   
KNN                  0.918149  0.976972         0.902119        0.011749   
Decision Tree        0.960854  0.955485         0.968857        0.007968   
Random Forest        0.982206  0.998431         0.975968        0.006707   
Gradient Boosting    0.967972  0.994453         0.975968        0.010775   
XGBoost              0.967972  0.998123         0.978639        0.010316   
LightGBM             0.992883  0.999496         0.979544        0.008233   
CatBoost             0.982206  0.998879         0.976865        0.007123   
MLP                  0.979532  0.996847         0.961966        0.006262   
DNN                  0.976608  0.994105         0.956097        0.013916   
DCN                  0.973684  0.994893         0.952442        0.015896   
Wide_and_Deep        0.976608  0.997772         0.966335        0.014557   
XGBoost + NN

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:11:48,278] Trial 0 finished with value: 0.9327485380116959 and parameters: {'num_heads': 5, 'embedding_dim': 60, 'num_layers': 3, 'hidden_layer_0': 105, 'hidden_layer_1': 192, 'hidden_layer_2': 250, 'nn_learning_rate': 0.023639282418998556, 'batch_size': 128, 'num_epochs': 11}. Best is trial 0 with value: 0.9327485380116959.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:12:17,037] Trial 1 finished with value: 0.935672514619883 and parameters: {'num_heads': 7, 'embedding_dim': 56, 'num_layers': 2, 'hidden_layer_0': 116, 'hidden_layer_1': 35, 'hidden_layer_2': 120, 'nn_learning_rate': 0.044880364057275905, 'batch_size': 32, 'num_epochs': 69}. Best is trial 1 with value: 0.935672514619883.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:12:52,494] Trial 2 finished with value: 0.9736842105263158 and pa

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.925267  0.977813         0.933286        0.015355   
KNN                  0.918149  0.976972         0.902119        0.011749   
Decision Tree        0.960854  0.955485         0.968857        0.007968   
Random Forest        0.982206  0.998431         0.975968        0.006707   
Gradient Boosting    0.967972  0.994453         0.975968        0.010775   
XGBoost              0.967972  0.998123         0.978639        0.010316   
LightGBM             0.992883  0.999496         0.979544        0.008233   
CatBoost             0.982206  0.998879         0.976865        0.007123   
MLP                  0.979532  0.996847         0.961966        0.006262   
DNN                  0.976608  0.994105         0.956097        0.013916   
DCN                  0.973684  0.994893         0.952442        0.015896   
Wide_and_Deep        0.976608  0.997772         0.966335        0.014557   
XGBoost + NN

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:16:05,707] Trial 0 finished with value: 0.9532163742690059 and parameters: {'num_layers': 3, 'hidden_layer_0': 252, 'hidden_layer_1': 61, 'hidden_layer_2': 119, 'learning_rate': 0.03044554832561144, 'batch_size': 64, 'num_epochs': 34}. Best is trial 0 with value: 0.9532163742690059.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:16:08,531] Trial 1 finished with value: 0.9766081871345029 and parameters: {'num_layers': 1, 'hidden_layer_0': 187, 'learning_rate': 0.004634988971356651, 'batch_size': 64, 'num_epochs': 30}. Best is trial 1 with value: 0.9766081871345029.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:16:14,298] Trial 2 finished with value: 0.9619883040935673 and parameters: {'num_layers': 2, 'hidden_layer_0': 48, 'hidden_layer_1': 130, 'learning_rate': 0.0011392679588025246, 'batch_size': 64, 'num_epochs':

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.925267  0.977813         0.933286   
KNN                         0.918149  0.976972         0.902119   
Decision Tree               0.960854  0.955485         0.968857   
Random Forest               0.982206  0.998431         0.975968   
Gradient Boosting           0.967972  0.994453         0.975968   
XGBoost                     0.967972  0.998123         0.978639   
LightGBM                    0.992883  0.999496         0.979544   
CatBoost                    0.982206  0.998879         0.976865   
MLP                         0.979532  0.996847         0.961966   
DNN                         0.976608  0.994105         0.956097   
DCN                         0.973684  0.994893         0.952442   
Wide_and_Deep               0.976608  0.997772         0.966335   
XGBoost + NN                0.982456  0.998766          0.96853   
LightGBM + NN               0.982456  0.993214         0.97511

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:17:27,805] Trial 0 finished with value: 0.9444444444444444 and parameters: {'hidden_dim': 218, 'learning_rate': 0.00011903289650220827, 'batch_size': 256, 'num_epochs': 88}. Best is trial 0 with value: 0.9444444444444444.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:17:33,890] Trial 1 finished with value: 0.9707602339181286 and parameters: {'hidden_dim': 158, 'learning_rate': 0.0035659745595738096, 'batch_size': 32, 'num_epochs': 60}. Best is trial 1 with value: 0.9707602339181286.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:17:40,146] Trial 2 finished with value: 0.9678362573099415 and parameters: {'hidden_dim': 80, 'learning_rate': 0.0017030837553959287, 'batch_size': 64, 'num_epochs': 88}. Best is trial 1 with value: 0.9707602339181286.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.925267  0.977813         0.933286   
KNN                         0.918149  0.976972         0.902119   
Decision Tree               0.960854  0.955485         0.968857   
Random Forest               0.982206  0.998431         0.975968   
Gradient Boosting           0.967972  0.994453         0.975968   
XGBoost                     0.967972  0.998123         0.978639   
LightGBM                    0.992883  0.999496         0.979544   
CatBoost                    0.982206  0.998879         0.976865   
MLP                         0.979532  0.996847         0.961966   
DNN                         0.976608  0.994105         0.956097   
DCN                         0.973684  0.994893         0.952442   
Wide_and_Deep               0.976608  0.997772         0.966335   
XGBoost + NN                0.982456  0.998766          0.96853   
LightGBM + NN               0.982456  0.993214         0.97511

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:18:34,638] Trial 0 finished with value: 0.9678362573099415 and parameters: {'num_layers': 5, 'num_trees': 2, 'tree_dim': 19, 'learning_rate': 0.006545410185470304, 'batch_size': 256, 'num_epochs': 44}. Best is trial 0 with value: 0.9678362573099415.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:18:52,721] Trial 1 finished with value: 0.9532163742690059 and parameters: {'num_layers': 3, 'num_trees': 9, 'tree_dim': 19, 'learning_rate': 0.00023526322039910388, 'batch_size': 64, 'num_epochs': 33}. Best is trial 0 with value: 0.9678362573099415.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:19:01,157] Trial 2 finished with value: 0.9707602339181286 and parameters: {'num_layers': 3, 'num_trees': 1, 'tree_dim': 53, 'learning_rate': 0.015790011293048353, 'batch_size': 128, 'num_epochs': 96}. Best is trial 2 with value: 0.

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.925267  0.977813         0.933286   
KNN                         0.918149  0.976972         0.902119   
Decision Tree               0.960854  0.955485         0.968857   
Random Forest               0.982206  0.998431         0.975968   
Gradient Boosting           0.967972  0.994453         0.975968   
XGBoost                     0.967972  0.998123         0.978639   
LightGBM                    0.992883  0.999496         0.979544   
CatBoost                    0.982206  0.998879         0.976865   
MLP                         0.979532  0.996847         0.961966   
DNN                         0.976608  0.994105         0.956097   
DCN                         0.973684  0.994893         0.952442   
Wide_and_Deep               0.976608  0.997772         0.966335   
XGBoost + NN                0.982456  0.998766          0.96853   
LightGBM + NN               0.982456  0.993214         0.97511

  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.64676 | val_accuracy: 0.69883 |  0:00:00s
epoch 1  | loss: 0.35667 | val_accuracy: 0.77778 |  0:00:01s
epoch 2  | loss: 0.3042  | val_accuracy: 0.85965 |  0:00:01s
epoch 3  | loss: 0.22232 | val_accuracy: 0.88889 |  0:00:02s
epoch 4  | loss: 0.20434 | val_accuracy: 0.92105 |  0:00:03s
epoch 5  | loss: 0.20462 | val_accuracy: 0.90058 |  0:00:03s
epoch 6  | loss: 0.16111 | val_accuracy: 0.92398 |  0:00:04s
epoch 7  | loss: 0.16355 | val_accuracy: 0.94444 |  0:00:04s
epoch 8  | loss: 0.15922 | val_accuracy: 0.93275 |  0:00:05s
epoch 9  | loss: 0.15936 | val_accuracy: 0.92398 |  0:00:06s
epoch 10 | loss: 0.17106 | val_accuracy: 0.9152  |  0:00:06s
epoch 11 | loss: 0.16418 | val_accuracy: 0.9269  |  0:00:07s
epoch 12 | loss: 0.14525 | val_accuracy: 0.94444 |  0:00:07s
epoch 13 | loss: 0.13613 | val_accuracy: 0.95029 |  0:00:08s
epoch 14 | loss: 0.13596 | val_accuracy: 0.94444 |  0:00:08s
epoch 15 | loss: 0.1546  | val_accuracy: 0.94737 |  0:00:09s
epoch 16 | loss: 0.14161

[I 2024-08-05 15:28:28,590] Trial 0 finished with value: 0.9619883040935673 and parameters: {'n_d': 59, 'n_a': 53, 'n_steps': 3, 'gamma': 1.8352640712368729, 'lambda_sparse': 0.00012896837571696582, 'learning_rate': 0.008885394152753207, 'batch_size': 256, 'num_epochs': 49}. Best is trial 0 with value: 0.9619883040935673.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.28801 | val_accuracy: 0.81287 |  0:00:02s
epoch 1  | loss: 0.26201 | val_accuracy: 0.90936 |  0:00:05s
epoch 2  | loss: 0.2271  | val_accuracy: 0.92105 |  0:00:07s
epoch 3  | loss: 0.22446 | val_accuracy: 0.93275 |  0:00:11s
epoch 4  | loss: 0.20673 | val_accuracy: 0.90058 |  0:00:14s
epoch 5  | loss: 0.21722 | val_accuracy: 0.9269  |  0:00:16s
epoch 6  | loss: 0.17285 | val_accuracy: 0.9386  |  0:00:19s
epoch 7  | loss: 0.16543 | val_accuracy: 0.92105 |  0:00:22s
epoch 8  | loss: 0.20625 | val_accuracy: 0.91813 |  0:00:25s
epoch 9  | loss: 0.18093 | val_accuracy: 0.92398 |  0:00:27s
epoch 10 | loss: 0.17784 | val_accuracy: 0.9152  |  0:00:30s
epoch 11 | loss: 0.2031  | val_accuracy: 0.94444 |  0:00:32s
epoch 12 | loss: 0.20566 | val_accuracy: 0.93275 |  0:00:35s
Stop training because you reached max_epochs = 13 with best_epoch = 11 and best_val_accuracy = 0.94444


[I 2024-08-05 15:29:06,232] Trial 1 finished with value: 0.9444444444444444 and parameters: {'n_d': 58, 'n_a': 42, 'n_steps': 6, 'gamma': 1.0301209317996087, 'lambda_sparse': 2.119537650866248e-06, 'learning_rate': 0.06081264863098829, 'batch_size': 64, 'num_epochs': 13}. Best is trial 0 with value: 0.9619883040935673.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.81765 | val_accuracy: 0.6345  |  0:00:01s
epoch 1  | loss: 0.44253 | val_accuracy: 0.7924  |  0:00:02s
epoch 2  | loss: 0.32401 | val_accuracy: 0.82456 |  0:00:04s
epoch 3  | loss: 0.27153 | val_accuracy: 0.90058 |  0:00:05s
epoch 4  | loss: 0.21645 | val_accuracy: 0.9269  |  0:00:06s
epoch 5  | loss: 0.1999  | val_accuracy: 0.90058 |  0:00:06s
epoch 6  | loss: 0.19614 | val_accuracy: 0.93275 |  0:00:07s
epoch 7  | loss: 0.18776 | val_accuracy: 0.94737 |  0:00:08s
epoch 8  | loss: 0.17025 | val_accuracy: 0.9386  |  0:00:09s
epoch 9  | loss: 0.15034 | val_accuracy: 0.94737 |  0:00:09s
epoch 10 | loss: 0.17199 | val_accuracy: 0.94444 |  0:00:10s
epoch 11 | loss: 0.1764  | val_accuracy: 0.94737 |  0:00:11s
epoch 12 | loss: 0.17187 | val_accuracy: 0.95322 |  0:00:12s
epoch 13 | loss: 0.14797 | val_accuracy: 0.96784 |  0:00:13s
epoch 14 | loss: 0.16591 | val_accuracy: 0.93275 |  0:00:13s
epoch 15 | loss: 0.17415 | val_accuracy: 0.95322 |  0:00:14s
epoch 16 | loss: 0.14392

[I 2024-08-05 15:29:28,479] Trial 2 finished with value: 0.9678362573099415 and parameters: {'n_d': 17, 'n_a': 24, 'n_steps': 4, 'gamma': 1.4697780777114533, 'lambda_sparse': 0.0007252556551310239, 'learning_rate': 0.03628622273954209, 'batch_size': 128, 'num_epochs': 44}. Best is trial 2 with value: 0.9678362573099415.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 2.04382 | val_accuracy: 0.59357 |  0:00:01s
epoch 1  | loss: 1.33768 | val_accuracy: 0.67836 |  0:00:02s
epoch 2  | loss: 0.94444 | val_accuracy: 0.74854 |  0:00:03s
epoch 3  | loss: 0.55321 | val_accuracy: 0.83041 |  0:00:05s
epoch 4  | loss: 0.62514 | val_accuracy: 0.82456 |  0:00:06s
epoch 5  | loss: 0.62125 | val_accuracy: 0.84795 |  0:00:07s
epoch 6  | loss: 0.23373 | val_accuracy: 0.86257 |  0:00:09s
epoch 7  | loss: 0.20611 | val_accuracy: 0.92398 |  0:00:10s
epoch 8  | loss: 0.20504 | val_accuracy: 0.88304 |  0:00:12s
epoch 9  | loss: 0.20117 | val_accuracy: 0.92105 |  0:00:14s
epoch 10 | loss: 0.26609 | val_accuracy: 0.93567 |  0:00:15s
epoch 11 | loss: 0.22284 | val_accuracy: 0.94152 |  0:00:17s
epoch 12 | loss: 0.17823 | val_accuracy: 0.9386  |  0:00:18s
epoch 13 | loss: 0.14953 | val_accuracy: 0.9152  |  0:00:20s
epoch 14 | loss: 0.16717 | val_accuracy: 0.9386  |  0:00:21s
epoch 15 | loss: 0.15516 | val_accuracy: 0.8655  |  0:00:22s
epoch 16 | loss: 0.17133

[I 2024-08-05 15:30:01,101] Trial 3 finished with value: 0.9532163742690059 and parameters: {'n_d': 54, 'n_a': 21, 'n_steps': 8, 'gamma': 1.9502912810396966, 'lambda_sparse': 1.3335478055486316e-05, 'learning_rate': 0.03349206281270708, 'batch_size': 256, 'num_epochs': 22}. Best is trial 2 with value: 0.9678362573099415.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.64115 | val_accuracy: 0.71345 |  0:00:01s
epoch 1  | loss: 0.36947 | val_accuracy: 0.75439 |  0:00:03s
epoch 2  | loss: 0.29745 | val_accuracy: 0.79825 |  0:00:04s
epoch 3  | loss: 0.31206 | val_accuracy: 0.84503 |  0:00:06s
epoch 4  | loss: 0.2586  | val_accuracy: 0.85965 |  0:00:07s
epoch 5  | loss: 0.26952 | val_accuracy: 0.90351 |  0:00:10s
epoch 6  | loss: 0.22679 | val_accuracy: 0.90351 |  0:00:12s
epoch 7  | loss: 0.22086 | val_accuracy: 0.91228 |  0:00:14s
epoch 8  | loss: 0.18968 | val_accuracy: 0.94444 |  0:00:16s
epoch 9  | loss: 0.20579 | val_accuracy: 0.9269  |  0:00:18s
epoch 10 | loss: 0.20064 | val_accuracy: 0.9386  |  0:00:20s
epoch 11 | loss: 0.21479 | val_accuracy: 0.92105 |  0:00:22s
epoch 12 | loss: 0.19232 | val_accuracy: 0.94152 |  0:00:24s
epoch 13 | loss: 0.20328 | val_accuracy: 0.9269  |  0:00:25s
epoch 14 | loss: 0.20172 | val_accuracy: 0.9269  |  0:00:26s
epoch 15 | loss: 0.19308 | val_accuracy: 0.94737 |  0:00:28s
epoch 16 | loss: 0.15806

[I 2024-08-05 15:30:43,506] Trial 4 finished with value: 0.956140350877193 and parameters: {'n_d': 14, 'n_a': 34, 'n_steps': 7, 'gamma': 1.3526770744239287, 'lambda_sparse': 4.687671879386102e-06, 'learning_rate': 0.04457198238558082, 'batch_size': 128, 'num_epochs': 25}. Best is trial 2 with value: 0.9678362573099415.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.29132 | val_accuracy: 0.63158 |  0:00:01s
epoch 1  | loss: 1.01665 | val_accuracy: 0.72222 |  0:00:03s
epoch 2  | loss: 0.76042 | val_accuracy: 0.72222 |  0:00:05s
epoch 3  | loss: 0.50223 | val_accuracy: 0.84211 |  0:00:09s
epoch 4  | loss: 0.42411 | val_accuracy: 0.89474 |  0:00:11s
epoch 5  | loss: 0.26413 | val_accuracy: 0.89766 |  0:00:13s
epoch 6  | loss: 0.2266  | val_accuracy: 0.87427 |  0:00:16s
epoch 7  | loss: 0.2055  | val_accuracy: 0.8655  |  0:00:18s
epoch 8  | loss: 0.27741 | val_accuracy: 0.90643 |  0:00:20s
epoch 9  | loss: 0.20234 | val_accuracy: 0.94737 |  0:00:22s
epoch 10 | loss: 0.17498 | val_accuracy: 0.94737 |  0:00:24s
epoch 11 | loss: 0.16014 | val_accuracy: 0.94152 |  0:00:27s
epoch 12 | loss: 0.17353 | val_accuracy: 0.92105 |  0:00:29s
epoch 13 | loss: 0.17881 | val_accuracy: 0.93567 |  0:00:31s
epoch 14 | loss: 0.16374 | val_accuracy: 0.95322 |  0:00:33s
epoch 15 | loss: 0.14283 | val_accuracy: 0.96199 |  0:00:35s
epoch 16 | loss: 0.14806

[I 2024-08-05 15:31:29,513] Trial 5 finished with value: 0.9619883040935673 and parameters: {'n_d': 28, 'n_a': 40, 'n_steps': 10, 'gamma': 1.6658225695209086, 'lambda_sparse': 0.00031793925719272246, 'learning_rate': 0.04297609422074005, 'batch_size': 128, 'num_epochs': 21}. Best is trial 2 with value: 0.9678362573099415.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.76591 | val_accuracy: 0.88304 |  0:00:03s
epoch 1  | loss: 0.40781 | val_accuracy: 0.92105 |  0:00:06s
epoch 2  | loss: 0.29857 | val_accuracy: 0.89766 |  0:00:08s
epoch 3  | loss: 0.25082 | val_accuracy: 0.94444 |  0:00:11s
epoch 4  | loss: 0.26849 | val_accuracy: 0.94444 |  0:00:14s
epoch 5  | loss: 0.23705 | val_accuracy: 0.93567 |  0:00:16s
epoch 6  | loss: 0.23845 | val_accuracy: 0.92105 |  0:00:19s
epoch 7  | loss: 0.22609 | val_accuracy: 0.94444 |  0:00:22s
epoch 8  | loss: 0.2202  | val_accuracy: 0.94152 |  0:00:24s
epoch 9  | loss: 0.24615 | val_accuracy: 0.94152 |  0:00:27s
epoch 10 | loss: 0.22212 | val_accuracy: 0.94737 |  0:00:30s
epoch 11 | loss: 0.20383 | val_accuracy: 0.94737 |  0:00:33s
epoch 12 | loss: 0.23438 | val_accuracy: 0.93567 |  0:00:36s
epoch 13 | loss: 0.25391 | val_accuracy: 0.94152 |  0:00:39s
epoch 14 | loss: 0.20122 | val_accuracy: 0.93275 |  0:00:41s
epoch 15 | loss: 0.22752 | val_accuracy: 0.94737 |  0:00:44s
epoch 16 | loss: 0.20202

[I 2024-08-05 15:32:50,535] Trial 6 finished with value: 0.956140350877193 and parameters: {'n_d': 64, 'n_a': 12, 'n_steps': 4, 'gamma': 1.6730601659577984, 'lambda_sparse': 2.7072881580062227e-06, 'learning_rate': 0.020110678579529997, 'batch_size': 32, 'num_epochs': 66}. Best is trial 2 with value: 0.9678362573099415.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 2.28209 | val_accuracy: 0.53801 |  0:00:03s
epoch 1  | loss: 2.0145  | val_accuracy: 0.55263 |  0:00:05s
epoch 2  | loss: 1.95531 | val_accuracy: 0.55848 |  0:00:07s
epoch 3  | loss: 1.7085  | val_accuracy: 0.55263 |  0:00:09s
epoch 4  | loss: 1.55202 | val_accuracy: 0.56725 |  0:00:11s
epoch 5  | loss: 1.49091 | val_accuracy: 0.5848  |  0:00:13s
epoch 6  | loss: 1.41674 | val_accuracy: 0.5848  |  0:00:15s
epoch 7  | loss: 1.26153 | val_accuracy: 0.5848  |  0:00:17s
epoch 8  | loss: 1.1834  | val_accuracy: 0.5848  |  0:00:20s
epoch 9  | loss: 1.13678 | val_accuracy: 0.59064 |  0:00:21s
epoch 10 | loss: 1.07214 | val_accuracy: 0.59064 |  0:00:24s
epoch 11 | loss: 1.01239 | val_accuracy: 0.61404 |  0:00:26s
epoch 12 | loss: 1.02501 | val_accuracy: 0.63743 |  0:00:29s
epoch 13 | loss: 0.97449 | val_accuracy: 0.63158 |  0:00:31s
epoch 14 | loss: 0.93699 | val_accuracy: 0.6462  |  0:00:34s
epoch 15 | loss: 0.90203 | val_accuracy: 0.65497 |  0:00:36s
epoch 16 | loss: 0.83666

[I 2024-08-05 15:34:51,761] Trial 7 finished with value: 0.8625730994152047 and parameters: {'n_d': 60, 'n_a': 53, 'n_steps': 7, 'gamma': 1.1145187318140184, 'lambda_sparse': 0.0002253134736040413, 'learning_rate': 0.00011856146656343564, 'batch_size': 128, 'num_epochs': 54}. Best is trial 2 with value: 0.9678362573099415.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.92098 | val_accuracy: 0.77778 |  0:00:02s
epoch 1  | loss: 0.6598  | val_accuracy: 0.7807  |  0:00:05s
epoch 2  | loss: 0.4949  | val_accuracy: 0.79825 |  0:00:08s
epoch 3  | loss: 0.67804 | val_accuracy: 0.82456 |  0:00:11s
epoch 4  | loss: 0.46818 | val_accuracy: 0.86257 |  0:00:13s
epoch 5  | loss: 0.37185 | val_accuracy: 0.92105 |  0:00:16s
epoch 6  | loss: 0.38057 | val_accuracy: 0.88304 |  0:00:19s
epoch 7  | loss: 0.31946 | val_accuracy: 0.9152  |  0:00:21s
epoch 8  | loss: 0.31359 | val_accuracy: 0.91228 |  0:00:24s
epoch 9  | loss: 0.28138 | val_accuracy: 0.90058 |  0:00:28s
epoch 10 | loss: 0.3225  | val_accuracy: 0.88304 |  0:00:30s
epoch 11 | loss: 0.33006 | val_accuracy: 0.90351 |  0:00:33s
epoch 12 | loss: 0.27801 | val_accuracy: 0.91813 |  0:00:36s
epoch 13 | loss: 0.25959 | val_accuracy: 0.92398 |  0:00:41s
epoch 14 | loss: 0.2492  | val_accuracy: 0.90643 |  0:00:46s
epoch 15 | loss: 0.24004 | val_accuracy: 0.92105 |  0:00:51s
epoch 16 | loss: 0.24162

[I 2024-08-05 15:36:52,672] Trial 8 finished with value: 0.956140350877193 and parameters: {'n_d': 62, 'n_a': 40, 'n_steps': 7, 'gamma': 1.5054192614921227, 'lambda_sparse': 1.4641750856363394e-05, 'learning_rate': 0.004634860140397341, 'batch_size': 64, 'num_epochs': 72}. Best is trial 2 with value: 0.9678362573099415.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.78138 | val_accuracy: 0.71345 |  0:00:03s
epoch 1  | loss: 0.53269 | val_accuracy: 0.78363 |  0:00:07s
epoch 2  | loss: 0.37549 | val_accuracy: 0.82456 |  0:00:10s
epoch 3  | loss: 0.36554 | val_accuracy: 0.85088 |  0:00:13s
epoch 4  | loss: 0.3028  | val_accuracy: 0.92105 |  0:00:15s
epoch 5  | loss: 0.28507 | val_accuracy: 0.90643 |  0:00:18s
epoch 6  | loss: 0.30698 | val_accuracy: 0.85965 |  0:00:22s
epoch 7  | loss: 0.29659 | val_accuracy: 0.90351 |  0:00:26s
epoch 8  | loss: 0.25653 | val_accuracy: 0.9386  |  0:00:29s
epoch 9  | loss: 0.28367 | val_accuracy: 0.9269  |  0:00:33s
epoch 10 | loss: 0.25519 | val_accuracy: 0.9152  |  0:00:36s
epoch 11 | loss: 0.24617 | val_accuracy: 0.91813 |  0:00:39s
epoch 12 | loss: 0.21251 | val_accuracy: 0.92398 |  0:00:42s
epoch 13 | loss: 0.20044 | val_accuracy: 0.94152 |  0:00:45s
epoch 14 | loss: 0.20938 | val_accuracy: 0.90351 |  0:00:48s
epoch 15 | loss: 0.22387 | val_accuracy: 0.91228 |  0:00:52s
epoch 16 | loss: 0.22561

[I 2024-08-05 15:38:07,295] Trial 9 finished with value: 0.9415204678362573 and parameters: {'n_d': 27, 'n_a': 62, 'n_steps': 8, 'gamma': 1.5006894785028875, 'lambda_sparse': 3.9276080580510344e-05, 'learning_rate': 0.009246356034622935, 'batch_size': 64, 'num_epochs': 71}. Best is trial 2 with value: 0.9678362573099415.


epoch 0  | loss: 0.81765 | val_accuracy: 0.6345  |  0:00:00s
epoch 1  | loss: 0.44253 | val_accuracy: 0.7924  |  0:00:01s
epoch 2  | loss: 0.32401 | val_accuracy: 0.82456 |  0:00:01s
epoch 3  | loss: 0.27153 | val_accuracy: 0.90058 |  0:00:02s
epoch 4  | loss: 0.21645 | val_accuracy: 0.9269  |  0:00:03s
epoch 5  | loss: 0.1999  | val_accuracy: 0.90058 |  0:00:03s
epoch 6  | loss: 0.19614 | val_accuracy: 0.93275 |  0:00:04s
epoch 7  | loss: 0.18776 | val_accuracy: 0.94737 |  0:00:04s
epoch 8  | loss: 0.17025 | val_accuracy: 0.9386  |  0:00:05s
epoch 9  | loss: 0.15034 | val_accuracy: 0.94737 |  0:00:05s
epoch 10 | loss: 0.17199 | val_accuracy: 0.94444 |  0:00:06s
epoch 11 | loss: 0.1764  | val_accuracy: 0.94737 |  0:00:07s
epoch 12 | loss: 0.17187 | val_accuracy: 0.95322 |  0:00:07s
epoch 13 | loss: 0.14797 | val_accuracy: 0.96784 |  0:00:08s
epoch 14 | loss: 0.16591 | val_accuracy: 0.93275 |  0:00:08s
epoch 15 | loss: 0.17415 | val_accuracy: 0.95322 |  0:00:09s
epoch 16 | loss: 0.14392



epoch 0  | loss: 0.81986 | val_accuracy: 0.66058 |  0:00:00s
epoch 1  | loss: 0.39422 | val_accuracy: 0.79197 |  0:00:01s
epoch 2  | loss: 0.24542 | val_accuracy: 0.89051 |  0:00:01s
epoch 3  | loss: 0.17815 | val_accuracy: 0.92701 |  0:00:02s
epoch 4  | loss: 0.18307 | val_accuracy: 0.94161 |  0:00:02s
epoch 5  | loss: 0.19028 | val_accuracy: 0.90511 |  0:00:03s
epoch 6  | loss: 0.19275 | val_accuracy: 0.91241 |  0:00:03s
epoch 7  | loss: 0.16279 | val_accuracy: 0.91241 |  0:00:04s
epoch 8  | loss: 0.1371  | val_accuracy: 0.90146 |  0:00:04s
epoch 9  | loss: 0.14433 | val_accuracy: 0.92701 |  0:00:05s
epoch 10 | loss: 0.17783 | val_accuracy: 0.93431 |  0:00:05s
epoch 11 | loss: 0.16077 | val_accuracy: 0.93796 |  0:00:06s
epoch 12 | loss: 0.16464 | val_accuracy: 0.95985 |  0:00:07s
epoch 13 | loss: 0.16275 | val_accuracy: 0.9635  |  0:00:07s
epoch 14 | loss: 0.15018 | val_accuracy: 0.94526 |  0:00:07s
epoch 15 | loss: 0.15452 | val_accuracy: 0.9635  |  0:00:08s
epoch 16 | loss: 0.14111



epoch 0  | loss: 0.77858 | val_accuracy: 0.73723 |  0:00:00s
epoch 1  | loss: 0.40499 | val_accuracy: 0.80657 |  0:00:01s
epoch 2  | loss: 0.34533 | val_accuracy: 0.82482 |  0:00:01s
epoch 3  | loss: 0.22718 | val_accuracy: 0.89416 |  0:00:01s
epoch 4  | loss: 0.25114 | val_accuracy: 0.93066 |  0:00:02s
epoch 5  | loss: 0.25582 | val_accuracy: 0.89781 |  0:00:02s
epoch 6  | loss: 0.20164 | val_accuracy: 0.9562  |  0:00:03s
epoch 7  | loss: 0.23738 | val_accuracy: 0.95985 |  0:00:04s
epoch 8  | loss: 0.17286 | val_accuracy: 0.95985 |  0:00:04s
epoch 9  | loss: 0.18819 | val_accuracy: 0.95985 |  0:00:05s
epoch 10 | loss: 0.15764 | val_accuracy: 0.95255 |  0:00:05s
epoch 11 | loss: 0.17949 | val_accuracy: 0.95985 |  0:00:06s
epoch 12 | loss: 0.18522 | val_accuracy: 0.95985 |  0:00:06s
epoch 13 | loss: 0.16051 | val_accuracy: 0.9562  |  0:00:07s
epoch 14 | loss: 0.18366 | val_accuracy: 0.9635  |  0:00:07s
epoch 15 | loss: 0.15607 | val_accuracy: 0.95985 |  0:00:08s
epoch 16 | loss: 0.16611



epoch 0  | loss: 0.79827 | val_accuracy: 0.55311 |  0:00:00s
epoch 1  | loss: 0.45607 | val_accuracy: 0.71429 |  0:00:01s
epoch 2  | loss: 0.35949 | val_accuracy: 0.8315  |  0:00:02s
epoch 3  | loss: 0.26355 | val_accuracy: 0.91209 |  0:00:02s
epoch 4  | loss: 0.21516 | val_accuracy: 0.86447 |  0:00:03s
epoch 5  | loss: 0.22973 | val_accuracy: 0.93407 |  0:00:04s
epoch 6  | loss: 0.21109 | val_accuracy: 0.92674 |  0:00:05s
epoch 7  | loss: 0.19636 | val_accuracy: 0.93407 |  0:00:06s
epoch 8  | loss: 0.18694 | val_accuracy: 0.95971 |  0:00:07s
epoch 9  | loss: 0.18585 | val_accuracy: 0.93773 |  0:00:07s
epoch 10 | loss: 0.18237 | val_accuracy: 0.95604 |  0:00:08s
epoch 11 | loss: 0.15706 | val_accuracy: 0.94872 |  0:00:09s
epoch 12 | loss: 0.13514 | val_accuracy: 0.95238 |  0:00:09s
epoch 13 | loss: 0.18537 | val_accuracy: 0.92674 |  0:00:10s
epoch 14 | loss: 0.14742 | val_accuracy: 0.94872 |  0:00:11s
epoch 15 | loss: 0.16445 | val_accuracy: 0.94505 |  0:00:11s
epoch 16 | loss: 0.14501



epoch 0  | loss: 0.73026 | val_accuracy: 0.71429 |  0:00:00s
epoch 1  | loss: 0.30567 | val_accuracy: 0.8022  |  0:00:01s
epoch 2  | loss: 0.21816 | val_accuracy: 0.89011 |  0:00:01s
epoch 3  | loss: 0.18734 | val_accuracy: 0.91575 |  0:00:02s
epoch 4  | loss: 0.16383 | val_accuracy: 0.92674 |  0:00:02s
epoch 5  | loss: 0.15363 | val_accuracy: 0.93773 |  0:00:03s
epoch 6  | loss: 0.18193 | val_accuracy: 0.9011  |  0:00:04s
epoch 7  | loss: 0.13736 | val_accuracy: 0.93773 |  0:00:04s
epoch 8  | loss: 0.16919 | val_accuracy: 0.95604 |  0:00:05s
epoch 9  | loss: 0.14718 | val_accuracy: 0.95604 |  0:00:06s
epoch 10 | loss: 0.13242 | val_accuracy: 0.94505 |  0:00:06s
epoch 11 | loss: 0.14459 | val_accuracy: 0.95971 |  0:00:07s
epoch 12 | loss: 0.1654  | val_accuracy: 0.95971 |  0:00:08s
epoch 13 | loss: 0.12894 | val_accuracy: 0.96703 |  0:00:09s
epoch 14 | loss: 0.13914 | val_accuracy: 0.96703 |  0:00:10s
epoch 15 | loss: 0.12922 | val_accuracy: 0.96703 |  0:00:10s
epoch 16 | loss: 0.12295



epoch 0  | loss: 0.75833 | val_accuracy: 0.71795 |  0:00:00s
epoch 1  | loss: 0.34478 | val_accuracy: 0.87912 |  0:00:01s
epoch 2  | loss: 0.24007 | val_accuracy: 0.91941 |  0:00:01s
epoch 3  | loss: 0.17123 | val_accuracy: 0.92674 |  0:00:02s
epoch 4  | loss: 0.19451 | val_accuracy: 0.94505 |  0:00:03s
epoch 5  | loss: 0.16308 | val_accuracy: 0.91941 |  0:00:03s
epoch 6  | loss: 0.17113 | val_accuracy: 0.91209 |  0:00:04s
epoch 7  | loss: 0.18351 | val_accuracy: 0.91209 |  0:00:04s
epoch 8  | loss: 0.14936 | val_accuracy: 0.92674 |  0:00:05s
epoch 9  | loss: 0.15847 | val_accuracy: 0.91575 |  0:00:05s
epoch 10 | loss: 0.18791 | val_accuracy: 0.92308 |  0:00:06s
epoch 11 | loss: 0.16746 | val_accuracy: 0.92674 |  0:00:06s
epoch 12 | loss: 0.14353 | val_accuracy: 0.91209 |  0:00:07s
epoch 13 | loss: 0.13819 | val_accuracy: 0.91209 |  0:00:08s
epoch 14 | loss: 0.14064 | val_accuracy: 0.92308 |  0:00:08s

Early stopping occurred at epoch 14 with best_epoch = 4 and best_val_accuracy = 0.94

[I 2024-08-05 15:39:34,802] A new study created in memory with name: no-name-21d6a05e-e73c-4481-822b-edd45365ceca


                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.925267  0.977813         0.933286   
KNN                         0.918149  0.976972         0.902119   
Decision Tree               0.960854  0.955485         0.968857   
Random Forest               0.982206  0.998431         0.975968   
Gradient Boosting           0.967972  0.994453         0.975968   
XGBoost                     0.967972  0.998123         0.978639   
LightGBM                    0.992883  0.999496         0.979544   
CatBoost                    0.982206  0.998879         0.976865   
MLP                         0.979532  0.996847         0.961966   
DNN                         0.976608  0.994105         0.956097   
DCN                         0.973684  0.994893         0.952442   
Wide_and_Deep               0.976608  0.997772         0.966335   
XGBoost + NN                0.982456  0.998766          0.96853   
LightGBM + NN               0.982456  0.993214         0.97511

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:43:45,628] Trial 0 finished with value: 0.8391812865497076 and parameters: {'heads': 4, 'dim': 212, 'depth': 5, 'mlp_dim': 185, 'dropout': 0.2060749310863922, 'learning_rate': 0.0007646500984566662, 'batch_size': 32, 'num_epochs': 92}. Best is trial 0 with value: 0.8391812865497076.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:43:48,216] Trial 1 finished with value: 0.9327485380116959 and parameters: {'heads': 6, 'dim': 54, 'depth': 2, 'mlp_dim': 145, 'dropout': 0.3065411290798077, 'learning_rate': 0.00018625267439508766, 'batch_size': 256, 'num_epochs': 11}. Best is trial 1 with value: 0.9327485380116959.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:44:12,690] Trial 2 finished with value: 0.9707602339181286 and parameters: {'heads': 5, 'dim': 95, 'depth': 2, 'mlp_dim': 191, 'dropout': 0.005056518660957909, 'lea

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.925267  0.977813         0.933286   
KNN                         0.918149  0.976972         0.902119   
Decision Tree               0.960854  0.955485         0.968857   
Random Forest               0.982206  0.998431         0.975968   
Gradient Boosting           0.967972  0.994453         0.975968   
XGBoost                     0.967972  0.998123         0.978639   
LightGBM                    0.992883  0.999496         0.979544   
CatBoost                    0.982206  0.998879         0.976865   
MLP                         0.979532  0.996847         0.961966   
DNN                         0.976608  0.994105         0.956097   
DCN                         0.973684  0.994893         0.952442   
Wide_and_Deep               0.976608  0.997772         0.966335   
XGBoost + NN                0.982456  0.998766          0.96853   
LightGBM + NN               0.982456  0.993214         0.97511

In [138]:
file_prefix = "machine"  # Change this to any word you like
df =  pd.read_csv(f'Dataset/{file_prefix}.csv')
if len(df) >= 1405:
    df = df.sample(n=1405, random_state=42, replace=True)
df =  encode_categorical_data(df)
X = df.drop('Y', axis=1)
y = df['Y']
X, y = apply_yeojohnson(X, y)
X, y = apply_smote_to_training(X, y)

result = model_comparison(df, 'Y')
print(result)
result, best_params = mlp_comparison(X, y, result)
print(result)
result, best_params = dnn_comparison(X, y, result)
print(result)
result, best_params = dcn_comparison(X, y, result)
print(result)
result, best_params = wide_and_deep_comparison(X, y, result)
print(result)
result, best_params = xgb_nn_comparison(X, y, result)
print(result)
result, best_params = lgbm_nn_comparison(X, y, result)
print(result)
result, best_params = autoint_nn_comparison(X, y, result)
print(result)
result, best_params = ft_transformer_nn_comparison(X, y, result)
print(result)
result, best_params = neural_architecture_search(X, y, result)
print(result)
result, best_params = kan_comparison(X, y, result)
print(result)
result, best_params = node_comparison(X, y, result)
print(result)
result, best_params = tabnet_comparison(X, y, result)
print(result)
result, best_params = saint_comparison(X, y, result)
print(result)

result.to_csv(f'result/comparison/classification/{file_prefix}_result.csv', index=True)

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 449, number of negative: 306
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000115 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 216
[LightGBM] [Info] Number of data points in the train set: 755, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.594702 -> initscore=0.383438
[LightGBM] [Info] Start training from score 0.383438
[LightGBM] [Info] Number of positive: 449, number of negative: 306
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000100 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 216
[LightGBM] [Info] Number of data points in the train set: 755, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.594702 -> initscore=0.383438
[LightGBM] [Info] Start training from score 0.383438
[LightGBM] [Info] Number of posi

[I 2024-08-05 15:55:16,136] A new study created in memory with name: no-name-2d335f0b-bcf2-489d-ac4a-c66b9fcd442a


                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.867725  0.945684         0.915232        0.019376   
KNN                  0.857143  0.911652         0.917881        0.014868   
Decision Tree        0.846561  0.893002         0.886093        0.019823   
Random Forest        0.883598  0.954586         0.915232        0.016962   
Gradient Boosting     0.89418  0.955601         0.913907        0.011082   
XGBoost              0.888889  0.956277         0.913907        0.016754   
LightGBM             0.862434  0.935542         0.911258        0.027337   
CatBoost             0.878307  0.949966         0.919205         0.02347   
MLP                  0.883721  0.960239         0.921736        0.005868   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logistic Regression                    0.004987                     0.000997   
KNN                                    0.000997                     0.002992   

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:55:19,176] Trial 0 finished with value: 0.9023255813953488 and parameters: {'hidden_dim_0': 141, 'hidden_dim_1': 86, 'hidden_dim_2': 223, 'learning_rate': 0.030142039671637005, 'batch_size': 64, 'num_epochs': 44}. Best is trial 0 with value: 0.9023255813953488.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:55:22,393] Trial 1 finished with value: 0.8930232558139535 and parameters: {'hidden_dim_0': 72, 'hidden_dim_1': 196, 'hidden_dim_2': 243, 'learning_rate': 0.002358279760605127, 'batch_size': 32, 'num_epochs': 25}. Best is trial 0 with value: 0.9023255813953488.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:55:28,919] Trial 2 finished with value: 0.8930232558139535 and parameters: {'hidden_dim_0': 230, 'hidden_dim_1': 84, 'hidden_dim_2': 104, 'learning_rate': 0.0005631677484584645, 'batch_size': 64, 'num_epochs':

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.867725  0.945684         0.915232        0.019376   
KNN                  0.857143  0.911652         0.917881        0.014868   
Decision Tree        0.846561  0.893002         0.886093        0.019823   
Random Forest        0.883598  0.954586         0.915232        0.016962   
Gradient Boosting     0.89418  0.955601         0.913907        0.011082   
XGBoost              0.888889  0.956277         0.913907        0.016754   
LightGBM             0.862434  0.935542         0.911258        0.027337   
CatBoost             0.878307  0.949966         0.919205         0.02347   
MLP                  0.883721  0.960239         0.921736        0.005868   
DNN                  0.883721  0.935248         0.908867        0.008046   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logistic Regression                    0.004987                     0.000997   
KNN

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:56:33,232] Trial 0 finished with value: 0.9023255813953488 and parameters: {'cross_layers': 4, 'hidden_layer_0': 142, 'hidden_layer_1': 245, 'hidden_layer_2': 48, 'learning_rate': 0.00028256075664875847, 'batch_size': 32, 'num_epochs': 44}. Best is trial 0 with value: 0.9023255813953488.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:56:35,107] Trial 1 finished with value: 0.8930232558139535 and parameters: {'cross_layers': 5, 'hidden_layer_0': 249, 'hidden_layer_1': 52, 'hidden_layer_2': 250, 'learning_rate': 0.0026762945783688754, 'batch_size': 256, 'num_epochs': 30}. Best is trial 0 with value: 0.9023255813953488.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:56:36,080] Trial 2 finished with value: 0.9069767441860465 and parameters: {'cross_layers': 2, 'hidden_layer_0': 42, 'hidden_layer_1': 75, 'hidden_layer_2'

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.867725  0.945684         0.915232        0.019376   
KNN                  0.857143  0.911652         0.917881        0.014868   
Decision Tree        0.846561  0.893002         0.886093        0.019823   
Random Forest        0.883598  0.954586         0.915232        0.016962   
Gradient Boosting     0.89418  0.955601         0.913907        0.011082   
XGBoost              0.888889  0.956277         0.913907        0.016754   
LightGBM             0.862434  0.935542         0.911258        0.027337   
CatBoost             0.878307  0.949966         0.919205         0.02347   
MLP                  0.883721  0.960239         0.921736        0.005868   
DNN                  0.883721  0.935248         0.908867        0.008046   
DCN                  0.893023  0.963574         0.912362        0.014425   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logisti

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:57:12,935] Trial 0 finished with value: 0.8976744186046511 and parameters: {'hidden_layer_0': 97, 'hidden_layer_1': 193, 'hidden_layer_2': 86, 'learning_rate': 0.00982127547776246, 'batch_size': 64, 'num_epochs': 87}. Best is trial 0 with value: 0.8976744186046511.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:57:15,196] Trial 1 finished with value: 0.9023255813953488 and parameters: {'hidden_layer_0': 68, 'hidden_layer_1': 79, 'hidden_layer_2': 113, 'learning_rate': 0.000555630889440022, 'batch_size': 128, 'num_epochs': 44}. Best is trial 1 with value: 0.9023255813953488.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:57:16,910] Trial 2 finished with value: 0.8976744186046511 and parameters: {'hidden_layer_0': 40, 'hidden_layer_1': 179, 'hidden_layer_2': 106, 'learning_rate': 0.023626094717428894, 'batch_size': 25

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.867725  0.945684         0.915232        0.019376   
KNN                  0.857143  0.911652         0.917881        0.014868   
Decision Tree        0.846561  0.893002         0.886093        0.019823   
Random Forest        0.883598  0.954586         0.915232        0.016962   
Gradient Boosting     0.89418  0.955601         0.913907        0.011082   
XGBoost              0.888889  0.956277         0.913907        0.016754   
LightGBM             0.862434  0.935542         0.911258        0.027337   
CatBoost             0.878307  0.949966         0.919205         0.02347   
MLP                  0.883721  0.960239         0.921736        0.005868   
DNN                  0.883721  0.935248         0.908867        0.008046   
DCN                  0.893023  0.963574         0.912362        0.014425   
Wide_and_Deep        0.883721  0.960152         0.913518        0.015986   

           

  'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
Parameters: { "use_label_encoder" } are not used.

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:58:09,696] Trial 0 finished with value: 0.8837209302325582 and parameters: {'n_estimators': 73, 'max_depth': 6, 'xgb_learning_rate': 0.00015031325848728627, 'subsample': 0.6527651860136146, 'colsample_bytree': 0.7867696473150069, 'use_hidden_layer_0': False, 'use_hidden_layer_1': True, 'hidden_layer_1': 201, 'use_hidden_layer_2': False, 'nn_learning_rate': 0.0001225583803028, 'batch_size': 256, 'num_epochs': 38}. Best is trial 0 with value: 0.8837209302325582.
  'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
Parameters: { "use_label_encoder" } are not used.

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 15:58:10,937] Trial 1 finished with value: 0.9023255813953488 and parameters: {'n_estimato

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.867725  0.945684         0.915232        0.019376   
KNN                  0.857143  0.911652         0.917881        0.014868   
Decision Tree        0.846561  0.893002         0.886093        0.019823   
Random Forest        0.883598  0.954586         0.915232        0.016962   
Gradient Boosting     0.89418  0.955601         0.913907        0.011082   
XGBoost              0.888889  0.956277         0.913907        0.016754   
LightGBM             0.862434  0.935542         0.911258        0.027337   
CatBoost             0.878307  0.949966         0.919205         0.02347   
MLP                  0.883721  0.960239         0.921736        0.005868   
DNN                  0.883721  0.935248         0.908867        0.008046   
DCN                  0.893023  0.963574         0.912362        0.014425   
Wide_and_Deep        0.883721  0.960152         0.913518        0.015986   
XGBoost + NN

[I 2024-08-05 15:58:42,401] Trial 0 finished with value: 0.9023255813953488 and parameters: {'n_estimators': 211, 'max_depth': 10, 'lgb_learning_rate': 0.00018578494055889654, 'num_leaves': 27, 'subsample': 0.8763521815466333, 'colsample_bytree': 0.6483684159056777, 'hidden_layer_0': 235, 'hidden_layer_1': 44, 'hidden_layer_2': 251, 'nn_learning_rate': 0.005242750155384842, 'batch_size': 128, 'num_epochs': 79}. Best is trial 0 with value: 0.9023255813953488.


[LightGBM] [Info] Number of positive: 440, number of negative: 416
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 363
[LightGBM] [Info] Number of data points in the train set: 856, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.514019 -> initscore=0.056089
[LightGBM] [Info] Start training from score 0.056089


[I 2024-08-05 15:58:46,193] Trial 1 finished with value: 0.5162790697674419 and parameters: {'n_estimators': 179, 'max_depth': 6, 'lgb_learning_rate': 0.0002035933621308802, 'num_leaves': 88, 'subsample': 0.5492677666101498, 'colsample_bytree': 0.6098307932098086, 'hidden_layer_0': 120, 'hidden_layer_1': 191, 'hidden_layer_2': 93, 'nn_learning_rate': 0.022127449075542715, 'batch_size': 32, 'num_epochs': 28}. Best is trial 0 with value: 0.9023255813953488.


[LightGBM] [Info] Number of positive: 440, number of negative: 416
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 363
[LightGBM] [Info] Number of data points in the train set: 856, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.514019 -> initscore=0.056089
[LightGBM] [Info] Start training from score 0.056089


[I 2024-08-05 15:58:49,508] Trial 2 finished with value: 0.9069767441860465 and parameters: {'n_estimators': 199, 'max_depth': 8, 'lgb_learning_rate': 0.015127283072437084, 'num_leaves': 77, 'subsample': 0.8718975112548851, 'colsample_bytree': 0.6218879753739841, 'hidden_layer_0': 135, 'hidden_layer_1': 181, 'hidden_layer_2': 177, 'nn_learning_rate': 0.00020125103777636418, 'batch_size': 256, 'num_epochs': 47}. Best is trial 2 with value: 0.9069767441860465.


[LightGBM] [Info] Number of positive: 440, number of negative: 416
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000231 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 363
[LightGBM] [Info] Number of data points in the train set: 856, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.514019 -> initscore=0.056089
[LightGBM] [Info] Start training from score 0.056089


[I 2024-08-05 15:58:55,874] Trial 3 finished with value: 0.8930232558139535 and parameters: {'n_estimators': 245, 'max_depth': 7, 'lgb_learning_rate': 0.0014720222166155837, 'num_leaves': 55, 'subsample': 0.6829041763550172, 'colsample_bytree': 0.7842654935757418, 'hidden_layer_0': 64, 'hidden_layer_1': 128, 'hidden_layer_2': 125, 'nn_learning_rate': 0.0025673477238046132, 'batch_size': 128, 'num_epochs': 91}. Best is trial 2 with value: 0.9069767441860465.


[LightGBM] [Info] Number of positive: 440, number of negative: 416
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000163 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 363
[LightGBM] [Info] Number of data points in the train set: 856, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.514019 -> initscore=0.056089
[LightGBM] [Info] Start training from score 0.056089


[I 2024-08-05 15:59:10,127] Trial 4 finished with value: 0.8883720930232558 and parameters: {'n_estimators': 182, 'max_depth': 8, 'lgb_learning_rate': 0.0042566597192444195, 'num_leaves': 76, 'subsample': 0.7856362289288812, 'colsample_bytree': 0.6203046713870296, 'hidden_layer_0': 237, 'hidden_layer_1': 252, 'hidden_layer_2': 198, 'nn_learning_rate': 0.0004015724133394843, 'batch_size': 32, 'num_epochs': 47}. Best is trial 2 with value: 0.9069767441860465.


[LightGBM] [Info] Number of positive: 440, number of negative: 416
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009358 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 363
[LightGBM] [Info] Number of data points in the train set: 856, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.514019 -> initscore=0.056089
[LightGBM] [Info] Start training from score 0.056089


[I 2024-08-05 15:59:13,462] Trial 5 finished with value: 0.8976744186046511 and parameters: {'n_estimators': 284, 'max_depth': 8, 'lgb_learning_rate': 0.07520204493099683, 'num_leaves': 54, 'subsample': 0.8783084636329034, 'colsample_bytree': 0.9532888300152481, 'hidden_layer_0': 178, 'hidden_layer_1': 153, 'hidden_layer_2': 224, 'nn_learning_rate': 0.021487873964809577, 'batch_size': 256, 'num_epochs': 22}. Best is trial 2 with value: 0.9069767441860465.


[LightGBM] [Info] Number of positive: 440, number of negative: 416
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000298 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 363
[LightGBM] [Info] Number of data points in the train set: 856, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.514019 -> initscore=0.056089
[LightGBM] [Info] Start training from score 0.056089


[I 2024-08-05 15:59:17,961] Trial 6 finished with value: 0.8837209302325582 and parameters: {'n_estimators': 90, 'max_depth': 5, 'lgb_learning_rate': 0.005775021096146096, 'num_leaves': 69, 'subsample': 0.5091472107745048, 'colsample_bytree': 0.9977354746889041, 'hidden_layer_0': 256, 'hidden_layer_1': 138, 'hidden_layer_2': 230, 'nn_learning_rate': 0.04428927826539831, 'batch_size': 256, 'num_epochs': 71}. Best is trial 2 with value: 0.9069767441860465.


[LightGBM] [Info] Number of positive: 440, number of negative: 416
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000116 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 363
[LightGBM] [Info] Number of data points in the train set: 856, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.514019 -> initscore=0.056089
[LightGBM] [Info] Start training from score 0.056089


[I 2024-08-05 15:59:20,707] Trial 7 finished with value: 0.8883720930232558 and parameters: {'n_estimators': 84, 'max_depth': 8, 'lgb_learning_rate': 0.008938666467370607, 'num_leaves': 61, 'subsample': 0.5508773351286536, 'colsample_bytree': 0.6401283105781812, 'hidden_layer_0': 101, 'hidden_layer_1': 35, 'hidden_layer_2': 117, 'nn_learning_rate': 0.000632951842198883, 'batch_size': 128, 'num_epochs': 32}. Best is trial 2 with value: 0.9069767441860465.


[LightGBM] [Info] Number of positive: 440, number of negative: 416
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000193 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 363
[LightGBM] [Info] Number of data points in the train set: 856, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.514019 -> initscore=0.056089
[LightGBM] [Info] Start training from score 0.056089


[I 2024-08-05 15:59:23,641] Trial 8 finished with value: 0.8883720930232558 and parameters: {'n_estimators': 115, 'max_depth': 10, 'lgb_learning_rate': 0.008201893360076025, 'num_leaves': 25, 'subsample': 0.5345264889814592, 'colsample_bytree': 0.9892893750338112, 'hidden_layer_0': 239, 'hidden_layer_1': 128, 'hidden_layer_2': 125, 'nn_learning_rate': 0.09750152341686597, 'batch_size': 128, 'num_epochs': 27}. Best is trial 2 with value: 0.9069767441860465.


[LightGBM] [Info] Number of positive: 440, number of negative: 416
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000216 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 363
[LightGBM] [Info] Number of data points in the train set: 856, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.514019 -> initscore=0.056089
[LightGBM] [Info] Start training from score 0.056089


[I 2024-08-05 15:59:26,529] Trial 9 finished with value: 0.9023255813953488 and parameters: {'n_estimators': 151, 'max_depth': 8, 'lgb_learning_rate': 0.0014771216363316858, 'num_leaves': 40, 'subsample': 0.5624143010906493, 'colsample_bytree': 0.604483894927405, 'hidden_layer_0': 250, 'hidden_layer_1': 219, 'hidden_layer_2': 175, 'nn_learning_rate': 0.018088132571815407, 'batch_size': 256, 'num_epochs': 48}. Best is trial 2 with value: 0.9069767441860465.


[LightGBM] [Info] Number of positive: 440, number of negative: 416
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000260 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 363
[LightGBM] [Info] Number of data points in the train set: 856, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.514019 -> initscore=0.056089
[LightGBM] [Info] Start training from score 0.056089
[LightGBM] [Info] Number of positive: 352, number of negative: 332
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000200 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 322
[LightGBM] [Info] Number of data points in the train set: 684, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.514620 -> initscore=0.058496
[LightGBM] [Info] Start training from score 0.058496
[LightGBM] [Info] Number of posi

[I 2024-08-05 15:59:53,138] A new study created in memory with name: no-name-a4d1a067-1a07-42ef-88c0-7a1449044a2a


                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.867725  0.945684         0.915232        0.019376   
KNN                  0.857143  0.911652         0.917881        0.014868   
Decision Tree        0.846561  0.893002         0.886093        0.019823   
Random Forest        0.883598  0.954586         0.915232        0.016962   
Gradient Boosting     0.89418  0.955601         0.913907        0.011082   
XGBoost              0.888889  0.956277         0.913907        0.016754   
LightGBM             0.862434  0.935542         0.911258        0.027337   
CatBoost             0.878307  0.949966         0.919205         0.02347   
MLP                  0.883721  0.960239         0.921736        0.005868   
DNN                  0.883721  0.935248         0.908867        0.008046   
DCN                  0.893023  0.963574         0.912362        0.014425   
Wide_and_Deep        0.883721  0.960152         0.913518        0.015986   
XGBoost + NN

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:00:00,603] Trial 0 finished with value: 0.8883720930232558 and parameters: {'num_heads': 3, 'embedding_dim': 27, 'num_layers': 3, 'hidden_layer_0': 67, 'hidden_layer_1': 126, 'hidden_layer_2': 34, 'nn_learning_rate': 0.02144609154016537, 'batch_size': 128, 'num_epochs': 36}. Best is trial 0 with value: 0.8883720930232558.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:00:05,914] Trial 1 finished with value: 0.8837209302325582 and parameters: {'num_heads': 2, 'embedding_dim': 58, 'num_layers': 2, 'hidden_layer_0': 166, 'hidden_layer_1': 91, 'hidden_layer_2': 232, 'nn_learning_rate': 0.00018027790647758563, 'batch_size': 128, 'num_epochs': 38}. Best is trial 0 with value: 0.8883720930232558.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:00:10,882] Trial 2 finished with value: 0.8697674418604651 and 

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.867725  0.945684         0.915232        0.019376   
KNN                  0.857143  0.911652         0.917881        0.014868   
Decision Tree        0.846561  0.893002         0.886093        0.019823   
Random Forest        0.883598  0.954586         0.915232        0.016962   
Gradient Boosting     0.89418  0.955601         0.913907        0.011082   
XGBoost              0.888889  0.956277         0.913907        0.016754   
LightGBM             0.862434  0.935542         0.911258        0.027337   
CatBoost             0.878307  0.949966         0.919205         0.02347   
MLP                  0.883721  0.960239         0.921736        0.005868   
DNN                  0.883721  0.935248         0.908867        0.008046   
DCN                  0.893023  0.963574         0.912362        0.014425   
Wide_and_Deep        0.883721  0.960152         0.913518        0.015986   
XGBoost + NN

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:03:02,739] Trial 0 finished with value: 0.8837209302325582 and parameters: {'num_heads': 3, 'embedding_dim': 42, 'num_layers': 3, 'hidden_layer_0': 147, 'hidden_layer_1': 49, 'hidden_layer_2': 236, 'nn_learning_rate': 0.000461633468141971, 'batch_size': 64, 'num_epochs': 65}. Best is trial 0 with value: 0.8837209302325582.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:03:22,948] Trial 1 finished with value: 0.9023255813953488 and parameters: {'num_heads': 1, 'embedding_dim': 9, 'num_layers': 2, 'hidden_layer_0': 206, 'hidden_layer_1': 152, 'hidden_layer_2': 174, 'nn_learning_rate': 0.0008105890270654898, 'batch_size': 32, 'num_epochs': 96}. Best is trial 1 with value: 0.9023255813953488.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:03:30,506] Trial 2 finished with value: 0.8883720930232558 and p

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.867725  0.945684         0.915232        0.019376   
KNN                  0.857143  0.911652         0.917881        0.014868   
Decision Tree        0.846561  0.893002         0.886093        0.019823   
Random Forest        0.883598  0.954586         0.915232        0.016962   
Gradient Boosting     0.89418  0.955601         0.913907        0.011082   
XGBoost              0.888889  0.956277         0.913907        0.016754   
LightGBM             0.862434  0.935542         0.911258        0.027337   
CatBoost             0.878307  0.949966         0.919205         0.02347   
MLP                  0.883721  0.960239         0.921736        0.005868   
DNN                  0.883721  0.935248         0.908867        0.008046   
DCN                  0.893023  0.963574         0.912362        0.014425   
Wide_and_Deep        0.883721  0.960152         0.913518        0.015986   
XGBoost + NN

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:06:16,400] Trial 0 finished with value: 0.8930232558139535 and parameters: {'num_layers': 5, 'hidden_layer_0': 206, 'hidden_layer_1': 51, 'hidden_layer_2': 250, 'hidden_layer_3': 181, 'hidden_layer_4': 219, 'learning_rate': 0.0007116033303322644, 'batch_size': 64, 'num_epochs': 40}. Best is trial 0 with value: 0.8930232558139535.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:06:19,622] Trial 1 finished with value: 0.9023255813953488 and parameters: {'num_layers': 4, 'hidden_layer_0': 195, 'hidden_layer_1': 48, 'hidden_layer_2': 206, 'hidden_layer_3': 180, 'learning_rate': 0.006437469513675124, 'batch_size': 256, 'num_epochs': 64}. Best is trial 1 with value: 0.9023255813953488.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:06:23,745] Trial 2 finished with value: 0.8930232558139535 and parameters: {'num_layers': 3,

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.867725  0.945684         0.915232   
KNN                         0.857143  0.911652         0.917881   
Decision Tree               0.846561  0.893002         0.886093   
Random Forest               0.883598  0.954586         0.915232   
Gradient Boosting            0.89418  0.955601         0.913907   
XGBoost                     0.888889  0.956277         0.913907   
LightGBM                    0.862434  0.935542         0.911258   
CatBoost                    0.878307  0.949966         0.919205   
MLP                         0.883721  0.960239         0.921736   
DNN                         0.883721  0.935248         0.908867   
DCN                         0.893023  0.963574         0.912362   
Wide_and_Deep               0.883721  0.960152         0.913518   
XGBoost + NN                0.893023  0.940662         0.907738   
LightGBM + NN               0.897674  0.961452         0.91471

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:07:11,172] Trial 0 finished with value: 0.8930232558139535 and parameters: {'hidden_dim': 255, 'learning_rate': 0.017370088774020083, 'batch_size': 128, 'num_epochs': 88}. Best is trial 0 with value: 0.8930232558139535.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:07:14,480] Trial 1 finished with value: 0.9023255813953488 and parameters: {'hidden_dim': 151, 'learning_rate': 0.039370831999493996, 'batch_size': 128, 'num_epochs': 95}. Best is trial 1 with value: 0.9023255813953488.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:07:16,888] Trial 2 finished with value: 0.8930232558139535 and parameters: {'hidden_dim': 85, 'learning_rate': 0.008982620496452987, 'batch_size': 128, 'num_epochs': 73}. Best is trial 1 with value: 0.9023255813953488.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.867725  0.945684         0.915232   
KNN                         0.857143  0.911652         0.917881   
Decision Tree               0.846561  0.893002         0.886093   
Random Forest               0.883598  0.954586         0.915232   
Gradient Boosting            0.89418  0.955601         0.913907   
XGBoost                     0.888889  0.956277         0.913907   
LightGBM                    0.862434  0.935542         0.911258   
CatBoost                    0.878307  0.949966         0.919205   
MLP                         0.883721  0.960239         0.921736   
DNN                         0.883721  0.935248         0.908867   
DCN                         0.893023  0.963574         0.912362   
Wide_and_Deep               0.883721  0.960152         0.913518   
XGBoost + NN                0.893023  0.940662         0.907738   
LightGBM + NN               0.897674  0.961452         0.91471

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:07:59,880] Trial 0 finished with value: 0.8976744186046511 and parameters: {'num_layers': 2, 'num_trees': 8, 'tree_dim': 29, 'learning_rate': 0.00039555105363215195, 'batch_size': 64, 'num_epochs': 54}. Best is trial 0 with value: 0.8976744186046511.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:08:04,851] Trial 1 finished with value: 0.8976744186046511 and parameters: {'num_layers': 2, 'num_trees': 5, 'tree_dim': 57, 'learning_rate': 0.00011553071545447813, 'batch_size': 32, 'num_epochs': 23}. Best is trial 0 with value: 0.8976744186046511.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:08:26,096] Trial 2 finished with value: 0.8790697674418605 and parameters: {'num_layers': 2, 'num_trees': 8, 'tree_dim': 60, 'learning_rate': 0.02765848966481464, 'batch_size': 32, 'num_epochs': 69}. Best is trial 0 with value: 0.8

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.867725  0.945684         0.915232   
KNN                         0.857143  0.911652         0.917881   
Decision Tree               0.846561  0.893002         0.886093   
Random Forest               0.883598  0.954586         0.915232   
Gradient Boosting            0.89418  0.955601         0.913907   
XGBoost                     0.888889  0.956277         0.913907   
LightGBM                    0.862434  0.935542         0.911258   
CatBoost                    0.878307  0.949966         0.919205   
MLP                         0.883721  0.960239         0.921736   
DNN                         0.883721  0.935248         0.908867   
DCN                         0.893023  0.963574         0.912362   
Wide_and_Deep               0.883721  0.960152         0.913518   
XGBoost + NN                0.893023  0.940662         0.907738   
LightGBM + NN               0.897674  0.961452         0.91471

  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 2.60004 | val_accuracy: 0.46512 |  0:00:00s
epoch 1  | loss: 2.51821 | val_accuracy: 0.45581 |  0:00:01s
epoch 2  | loss: 2.23647 | val_accuracy: 0.51163 |  0:00:02s
epoch 3  | loss: 2.17869 | val_accuracy: 0.53953 |  0:00:03s
epoch 4  | loss: 2.02681 | val_accuracy: 0.50233 |  0:00:04s
epoch 5  | loss: 1.95148 | val_accuracy: 0.53488 |  0:00:04s
epoch 6  | loss: 1.88962 | val_accuracy: 0.47907 |  0:00:05s
epoch 7  | loss: 1.78346 | val_accuracy: 0.51163 |  0:00:06s
epoch 8  | loss: 1.74427 | val_accuracy: 0.51163 |  0:00:07s
epoch 9  | loss: 1.61833 | val_accuracy: 0.53488 |  0:00:08s
epoch 10 | loss: 1.66919 | val_accuracy: 0.55349 |  0:00:09s
epoch 11 | loss: 1.47528 | val_accuracy: 0.54884 |  0:00:09s
epoch 12 | loss: 1.55747 | val_accuracy: 0.56744 |  0:00:10s
epoch 13 | loss: 1.41541 | val_accuracy: 0.5907  |  0:00:11s
epoch 14 | loss: 1.39808 | val_accuracy: 0.6093  |  0:00:12s
epoch 15 | loss: 1.34349 | val_accuracy: 0.64186 |  0:00:13s
epoch 16 | loss: 1.28832

[I 2024-08-05 16:11:20,289] Trial 0 finished with value: 0.7488372093023256 and parameters: {'n_d': 45, 'n_a': 56, 'n_steps': 9, 'gamma': 1.7634244095308778, 'lambda_sparse': 1.298996583747124e-06, 'learning_rate': 0.00023257590346168315, 'batch_size': 256, 'num_epochs': 35}. Best is trial 0 with value: 0.7488372093023256.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 2.29858 | val_accuracy: 0.50698 |  0:00:00s
epoch 1  | loss: 1.92451 | val_accuracy: 0.52093 |  0:00:01s
epoch 2  | loss: 1.59523 | val_accuracy: 0.56744 |  0:00:02s
epoch 3  | loss: 1.29945 | val_accuracy: 0.6093  |  0:00:02s
epoch 4  | loss: 1.11777 | val_accuracy: 0.61395 |  0:00:03s
epoch 5  | loss: 0.96377 | val_accuracy: 0.62791 |  0:00:03s
epoch 6  | loss: 0.77029 | val_accuracy: 0.63256 |  0:00:04s
epoch 7  | loss: 0.74631 | val_accuracy: 0.68837 |  0:00:05s
epoch 8  | loss: 0.6302  | val_accuracy: 0.69302 |  0:00:05s
epoch 9  | loss: 0.54659 | val_accuracy: 0.71628 |  0:00:06s
epoch 10 | loss: 0.55801 | val_accuracy: 0.72093 |  0:00:07s
epoch 11 | loss: 0.51282 | val_accuracy: 0.74419 |  0:00:07s
epoch 12 | loss: 0.46444 | val_accuracy: 0.78605 |  0:00:08s
epoch 13 | loss: 0.46719 | val_accuracy: 0.78605 |  0:00:09s
epoch 14 | loss: 0.39335 | val_accuracy: 0.7814  |  0:00:09s
epoch 15 | loss: 0.41279 | val_accuracy: 0.8093  |  0:00:10s
epoch 16 | loss: 0.33306

[I 2024-08-05 16:11:59,231] Trial 1 finished with value: 0.9162790697674419 and parameters: {'n_d': 13, 'n_a': 18, 'n_steps': 7, 'gamma': 1.0652514258060441, 'lambda_sparse': 0.0002331471903711399, 'learning_rate': 0.0008973129865278344, 'batch_size': 128, 'num_epochs': 58}. Best is trial 1 with value: 0.9162790697674419.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.89246 | val_accuracy: 0.48372 |  0:00:01s
epoch 1  | loss: 1.46742 | val_accuracy: 0.52558 |  0:00:03s
epoch 2  | loss: 1.22568 | val_accuracy: 0.57674 |  0:00:04s
epoch 3  | loss: 0.98942 | val_accuracy: 0.61395 |  0:00:05s
epoch 4  | loss: 0.8463  | val_accuracy: 0.64186 |  0:00:07s
epoch 5  | loss: 0.79205 | val_accuracy: 0.64651 |  0:00:08s
epoch 6  | loss: 0.71542 | val_accuracy: 0.66512 |  0:00:09s
epoch 7  | loss: 0.61698 | val_accuracy: 0.69767 |  0:00:11s
epoch 8  | loss: 0.64021 | val_accuracy: 0.72558 |  0:00:13s
epoch 9  | loss: 0.5691  | val_accuracy: 0.74884 |  0:00:14s
epoch 10 | loss: 0.49622 | val_accuracy: 0.77209 |  0:00:16s
epoch 11 | loss: 0.55676 | val_accuracy: 0.75349 |  0:00:17s
epoch 12 | loss: 0.47942 | val_accuracy: 0.77674 |  0:00:19s
epoch 13 | loss: 0.46659 | val_accuracy: 0.7814  |  0:00:20s
epoch 14 | loss: 0.51493 | val_accuracy: 0.7907  |  0:00:22s
epoch 15 | loss: 0.42814 | val_accuracy: 0.80465 |  0:00:23s
epoch 16 | loss: 0.48908

[I 2024-08-05 16:12:48,336] Trial 2 finished with value: 0.8697674418604651 and parameters: {'n_d': 24, 'n_a': 62, 'n_steps': 6, 'gamma': 1.5909590611354925, 'lambda_sparse': 1.2357244386775995e-06, 'learning_rate': 0.0005359975980491221, 'batch_size': 64, 'num_epochs': 35}. Best is trial 1 with value: 0.9162790697674419.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.89553 | val_accuracy: 0.73488 |  0:00:00s
epoch 1  | loss: 0.36906 | val_accuracy: 0.7907  |  0:00:01s
epoch 2  | loss: 0.29352 | val_accuracy: 0.87442 |  0:00:01s
epoch 3  | loss: 0.25751 | val_accuracy: 0.86047 |  0:00:02s
epoch 4  | loss: 0.24665 | val_accuracy: 0.87442 |  0:00:02s
epoch 5  | loss: 0.24253 | val_accuracy: 0.87442 |  0:00:03s
epoch 6  | loss: 0.23983 | val_accuracy: 0.88372 |  0:00:03s
epoch 7  | loss: 0.21146 | val_accuracy: 0.89767 |  0:00:04s
epoch 8  | loss: 0.20534 | val_accuracy: 0.89302 |  0:00:04s
epoch 9  | loss: 0.19807 | val_accuracy: 0.89767 |  0:00:05s
epoch 10 | loss: 0.21559 | val_accuracy: 0.88372 |  0:00:05s
epoch 11 | loss: 0.20116 | val_accuracy: 0.88372 |  0:00:06s
epoch 12 | loss: 0.19485 | val_accuracy: 0.88372 |  0:00:06s
epoch 13 | loss: 0.17688 | val_accuracy: 0.90698 |  0:00:07s
epoch 14 | loss: 0.18274 | val_accuracy: 0.88372 |  0:00:07s
epoch 15 | loss: 0.20218 | val_accuracy: 0.89767 |  0:00:08s
epoch 16 | loss: 0.19621

[I 2024-08-05 16:13:00,771] Trial 3 finished with value: 0.9069767441860465 and parameters: {'n_d': 36, 'n_a': 40, 'n_steps': 4, 'gamma': 1.592492338139086, 'lambda_sparse': 7.495063025528939e-06, 'learning_rate': 0.053799138916747306, 'batch_size': 128, 'num_epochs': 88}. Best is trial 1 with value: 0.9162790697674419.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.27456 | val_accuracy: 0.6093  |  0:00:01s
epoch 1  | loss: 1.0075  | val_accuracy: 0.65581 |  0:00:02s
epoch 2  | loss: 0.86231 | val_accuracy: 0.72558 |  0:00:04s
epoch 3  | loss: 0.67927 | val_accuracy: 0.74419 |  0:00:05s
epoch 4  | loss: 0.5935  | val_accuracy: 0.77674 |  0:00:06s
epoch 5  | loss: 0.60003 | val_accuracy: 0.79535 |  0:00:08s
epoch 6  | loss: 0.45045 | val_accuracy: 0.8093  |  0:00:09s
epoch 7  | loss: 0.46333 | val_accuracy: 0.8186  |  0:00:11s
epoch 8  | loss: 0.45899 | val_accuracy: 0.84186 |  0:00:12s
epoch 9  | loss: 0.46372 | val_accuracy: 0.84651 |  0:00:14s
epoch 10 | loss: 0.41775 | val_accuracy: 0.86047 |  0:00:15s
epoch 11 | loss: 0.43807 | val_accuracy: 0.86512 |  0:00:16s
epoch 12 | loss: 0.37312 | val_accuracy: 0.88837 |  0:00:17s
epoch 13 | loss: 0.35437 | val_accuracy: 0.88372 |  0:00:19s
epoch 14 | loss: 0.38405 | val_accuracy: 0.87907 |  0:00:20s
epoch 15 | loss: 0.36518 | val_accuracy: 0.88837 |  0:00:22s
epoch 16 | loss: 0.35488

[I 2024-08-05 16:13:33,586] Trial 4 finished with value: 0.8883720930232558 and parameters: {'n_d': 40, 'n_a': 40, 'n_steps': 6, 'gamma': 1.2465173751009724, 'lambda_sparse': 0.00043733272358946755, 'learning_rate': 0.0004068013008438339, 'batch_size': 64, 'num_epochs': 76}. Best is trial 1 with value: 0.9162790697674419.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 3.14065 | val_accuracy: 0.78605 |  0:00:00s
epoch 1  | loss: 0.83821 | val_accuracy: 0.80465 |  0:00:01s
epoch 2  | loss: 0.69579 | val_accuracy: 0.78605 |  0:00:02s
epoch 3  | loss: 0.76181 | val_accuracy: 0.80465 |  0:00:02s
epoch 4  | loss: 0.89774 | val_accuracy: 0.80465 |  0:00:03s
epoch 5  | loss: 1.26445 | val_accuracy: 0.75814 |  0:00:04s
epoch 6  | loss: 0.68714 | val_accuracy: 0.7814  |  0:00:04s
epoch 7  | loss: 0.89658 | val_accuracy: 0.86977 |  0:00:05s
epoch 8  | loss: 0.53699 | val_accuracy: 0.75349 |  0:00:06s
epoch 9  | loss: 0.42202 | val_accuracy: 0.86977 |  0:00:06s
epoch 10 | loss: 0.41841 | val_accuracy: 0.85116 |  0:00:07s
epoch 11 | loss: 0.33794 | val_accuracy: 0.82326 |  0:00:08s
epoch 12 | loss: 0.27198 | val_accuracy: 0.84186 |  0:00:08s
epoch 13 | loss: 0.24873 | val_accuracy: 0.83256 |  0:00:09s
epoch 14 | loss: 0.27173 | val_accuracy: 0.85581 |  0:00:10s
epoch 15 | loss: 0.24087 | val_accuracy: 0.86512 |  0:00:10s
epoch 16 | loss: 0.22442

[I 2024-08-05 16:13:57,197] Trial 5 finished with value: 0.9023255813953488 and parameters: {'n_d': 59, 'n_a': 29, 'n_steps': 8, 'gamma': 1.9699427800389462, 'lambda_sparse': 1.0679289740745368e-06, 'learning_rate': 0.03921274600344708, 'batch_size': 256, 'num_epochs': 90}. Best is trial 1 with value: 0.9162790697674419.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 2.45551 | val_accuracy: 0.52558 |  0:00:00s
epoch 1  | loss: 2.34307 | val_accuracy: 0.52558 |  0:00:01s
epoch 2  | loss: 2.23183 | val_accuracy: 0.53023 |  0:00:01s
epoch 3  | loss: 2.1862  | val_accuracy: 0.52093 |  0:00:02s
epoch 4  | loss: 2.19372 | val_accuracy: 0.52093 |  0:00:02s
epoch 5  | loss: 2.09779 | val_accuracy: 0.52093 |  0:00:03s
epoch 6  | loss: 2.01858 | val_accuracy: 0.52093 |  0:00:03s
epoch 7  | loss: 1.9361  | val_accuracy: 0.52093 |  0:00:04s
epoch 8  | loss: 1.91821 | val_accuracy: 0.52093 |  0:00:05s
epoch 9  | loss: 1.85501 | val_accuracy: 0.52093 |  0:00:05s
epoch 10 | loss: 1.77674 | val_accuracy: 0.52093 |  0:00:06s
epoch 11 | loss: 1.74937 | val_accuracy: 0.51628 |  0:00:06s
epoch 12 | loss: 1.68305 | val_accuracy: 0.51163 |  0:00:07s

Early stopping occurred at epoch 12 with best_epoch = 2 and best_val_accuracy = 0.53023


[I 2024-08-05 16:14:05,100] Trial 6 finished with value: 0.5302325581395348 and parameters: {'n_d': 34, 'n_a': 25, 'n_steps': 5, 'gamma': 1.2767753832142326, 'lambda_sparse': 0.0002807061312865466, 'learning_rate': 0.0001207983399941284, 'batch_size': 128, 'num_epochs': 34}. Best is trial 1 with value: 0.9162790697674419.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.50944 | val_accuracy: 0.82791 |  0:00:00s
epoch 1  | loss: 0.29718 | val_accuracy: 0.84651 |  0:00:00s
epoch 2  | loss: 0.26219 | val_accuracy: 0.85116 |  0:00:01s
epoch 3  | loss: 0.25528 | val_accuracy: 0.86512 |  0:00:01s
epoch 4  | loss: 0.23228 | val_accuracy: 0.88372 |  0:00:02s
epoch 5  | loss: 0.21717 | val_accuracy: 0.88837 |  0:00:02s
epoch 6  | loss: 0.21811 | val_accuracy: 0.90698 |  0:00:02s
epoch 7  | loss: 0.22797 | val_accuracy: 0.89767 |  0:00:03s
epoch 8  | loss: 0.21252 | val_accuracy: 0.88837 |  0:00:03s
epoch 9  | loss: 0.1889  | val_accuracy: 0.87907 |  0:00:04s
epoch 10 | loss: 0.18899 | val_accuracy: 0.88372 |  0:00:04s
epoch 11 | loss: 0.21908 | val_accuracy: 0.87907 |  0:00:04s
epoch 12 | loss: 0.20739 | val_accuracy: 0.89302 |  0:00:05s
epoch 13 | loss: 0.20309 | val_accuracy: 0.88837 |  0:00:05s
epoch 14 | loss: 0.21023 | val_accuracy: 0.90698 |  0:00:06s
epoch 15 | loss: 0.20888 | val_accuracy: 0.90233 |  0:00:06s


[I 2024-08-05 16:14:12,175] Trial 7 finished with value: 0.9069767441860465 and parameters: {'n_d': 12, 'n_a': 10, 'n_steps': 4, 'gamma': 1.1256065333811425, 'lambda_sparse': 2.9207802252400244e-05, 'learning_rate': 0.044898829759345345, 'batch_size': 128, 'num_epochs': 86}. Best is trial 1 with value: 0.9162790697674419.


epoch 16 | loss: 0.20063 | val_accuracy: 0.87442 |  0:00:06s

Early stopping occurred at epoch 16 with best_epoch = 6 and best_val_accuracy = 0.90698


  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.80162 | val_accuracy: 0.81395 |  0:00:02s
epoch 1  | loss: 0.43294 | val_accuracy: 0.83721 |  0:00:04s
epoch 2  | loss: 0.32823 | val_accuracy: 0.87907 |  0:00:06s
epoch 3  | loss: 0.36951 | val_accuracy: 0.86512 |  0:00:07s
epoch 4  | loss: 0.30588 | val_accuracy: 0.85581 |  0:00:09s
epoch 5  | loss: 0.29523 | val_accuracy: 0.88372 |  0:00:11s
epoch 6  | loss: 0.28707 | val_accuracy: 0.86512 |  0:00:13s
epoch 7  | loss: 0.28609 | val_accuracy: 0.88372 |  0:00:15s
epoch 8  | loss: 0.24491 | val_accuracy: 0.88372 |  0:00:17s
epoch 9  | loss: 0.25588 | val_accuracy: 0.88372 |  0:00:19s
epoch 10 | loss: 0.21603 | val_accuracy: 0.89302 |  0:00:21s
epoch 11 | loss: 0.24134 | val_accuracy: 0.89302 |  0:00:23s
epoch 12 | loss: 0.24184 | val_accuracy: 0.87442 |  0:00:25s
epoch 13 | loss: 0.23579 | val_accuracy: 0.89767 |  0:00:27s
epoch 14 | loss: 0.21923 | val_accuracy: 0.87907 |  0:00:29s
epoch 15 | loss: 0.23564 | val_accuracy: 0.86047 |  0:00:31s
epoch 16 | loss: 0.22185

[I 2024-08-05 16:14:52,247] Trial 8 finished with value: 0.9023255813953488 and parameters: {'n_d': 35, 'n_a': 37, 'n_steps': 5, 'gamma': 1.821550712309526, 'lambda_sparse': 3.9752958606979065e-06, 'learning_rate': 0.029821673939605206, 'batch_size': 32, 'num_epochs': 20}. Best is trial 1 with value: 0.9162790697674419.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.81809 | val_accuracy: 0.72093 |  0:00:01s
epoch 1  | loss: 0.51724 | val_accuracy: 0.83721 |  0:00:02s
epoch 2  | loss: 0.45472 | val_accuracy: 0.85116 |  0:00:03s
epoch 3  | loss: 0.39648 | val_accuracy: 0.84651 |  0:00:05s
epoch 4  | loss: 0.3786  | val_accuracy: 0.85581 |  0:00:06s
epoch 5  | loss: 0.32339 | val_accuracy: 0.85581 |  0:00:07s
epoch 6  | loss: 0.34543 | val_accuracy: 0.87907 |  0:00:09s
epoch 7  | loss: 0.29082 | val_accuracy: 0.87907 |  0:00:10s
epoch 8  | loss: 0.28236 | val_accuracy: 0.88372 |  0:00:11s
epoch 9  | loss: 0.28189 | val_accuracy: 0.88837 |  0:00:13s
epoch 10 | loss: 0.27052 | val_accuracy: 0.88837 |  0:00:14s
epoch 11 | loss: 0.28068 | val_accuracy: 0.89767 |  0:00:16s
epoch 12 | loss: 0.25622 | val_accuracy: 0.90233 |  0:00:17s
epoch 13 | loss: 0.25488 | val_accuracy: 0.90698 |  0:00:18s
epoch 14 | loss: 0.24309 | val_accuracy: 0.90698 |  0:00:20s
epoch 15 | loss: 0.23079 | val_accuracy: 0.89302 |  0:00:21s
epoch 16 | loss: 0.24257

[I 2024-08-05 16:15:23,834] Trial 9 finished with value: 0.9069767441860465 and parameters: {'n_d': 16, 'n_a': 46, 'n_steps': 6, 'gamma': 1.1285791287931088, 'lambda_sparse': 1.9838560324528737e-06, 'learning_rate': 0.0018821880822365957, 'batch_size': 64, 'num_epochs': 23}. Best is trial 1 with value: 0.9162790697674419.


epoch 0  | loss: 2.29858 | val_accuracy: 0.50698 |  0:00:00s
epoch 1  | loss: 1.92451 | val_accuracy: 0.52093 |  0:00:01s
epoch 2  | loss: 1.59523 | val_accuracy: 0.56744 |  0:00:01s
epoch 3  | loss: 1.29945 | val_accuracy: 0.6093  |  0:00:02s
epoch 4  | loss: 1.11777 | val_accuracy: 0.61395 |  0:00:03s
epoch 5  | loss: 0.96377 | val_accuracy: 0.62791 |  0:00:03s
epoch 6  | loss: 0.77029 | val_accuracy: 0.63256 |  0:00:04s
epoch 7  | loss: 0.74631 | val_accuracy: 0.68837 |  0:00:05s
epoch 8  | loss: 0.6302  | val_accuracy: 0.69302 |  0:00:05s
epoch 9  | loss: 0.54659 | val_accuracy: 0.71628 |  0:00:06s
epoch 10 | loss: 0.55801 | val_accuracy: 0.72093 |  0:00:07s
epoch 11 | loss: 0.51282 | val_accuracy: 0.74419 |  0:00:07s
epoch 12 | loss: 0.46444 | val_accuracy: 0.78605 |  0:00:08s
epoch 13 | loss: 0.46719 | val_accuracy: 0.78605 |  0:00:09s
epoch 14 | loss: 0.39335 | val_accuracy: 0.7814  |  0:00:09s
epoch 15 | loss: 0.41279 | val_accuracy: 0.8093  |  0:00:10s
epoch 16 | loss: 0.33306



epoch 0  | loss: 2.35298 | val_accuracy: 0.52326 |  0:00:00s
epoch 1  | loss: 1.92552 | val_accuracy: 0.58721 |  0:00:01s
epoch 2  | loss: 1.77091 | val_accuracy: 0.56395 |  0:00:01s
epoch 3  | loss: 1.48289 | val_accuracy: 0.56395 |  0:00:02s
epoch 4  | loss: 1.19797 | val_accuracy: 0.5814  |  0:00:02s
epoch 5  | loss: 1.05966 | val_accuracy: 0.61628 |  0:00:03s
epoch 6  | loss: 0.93149 | val_accuracy: 0.65698 |  0:00:03s
epoch 7  | loss: 0.8645  | val_accuracy: 0.6686  |  0:00:04s
epoch 8  | loss: 0.74184 | val_accuracy: 0.66279 |  0:00:05s
epoch 9  | loss: 0.66877 | val_accuracy: 0.66279 |  0:00:05s
epoch 10 | loss: 0.65535 | val_accuracy: 0.7093  |  0:00:06s
epoch 11 | loss: 0.56055 | val_accuracy: 0.71512 |  0:00:06s
epoch 12 | loss: 0.55576 | val_accuracy: 0.75581 |  0:00:07s
epoch 13 | loss: 0.52199 | val_accuracy: 0.77907 |  0:00:07s
epoch 14 | loss: 0.50244 | val_accuracy: 0.79651 |  0:00:08s
epoch 15 | loss: 0.45095 | val_accuracy: 0.7907  |  0:00:09s
epoch 16 | loss: 0.44972



epoch 0  | loss: 2.33001 | val_accuracy: 0.54386 |  0:00:00s
epoch 1  | loss: 1.98526 | val_accuracy: 0.53801 |  0:00:01s
epoch 2  | loss: 1.63235 | val_accuracy: 0.62573 |  0:00:02s
epoch 3  | loss: 1.39452 | val_accuracy: 0.63743 |  0:00:02s
epoch 4  | loss: 1.20219 | val_accuracy: 0.66667 |  0:00:03s
epoch 5  | loss: 1.05085 | val_accuracy: 0.67836 |  0:00:03s
epoch 6  | loss: 0.90278 | val_accuracy: 0.68421 |  0:00:04s
epoch 7  | loss: 0.84449 | val_accuracy: 0.69006 |  0:00:05s
epoch 8  | loss: 0.76786 | val_accuracy: 0.7076  |  0:00:05s
epoch 9  | loss: 0.70628 | val_accuracy: 0.74854 |  0:00:06s
epoch 10 | loss: 0.62647 | val_accuracy: 0.74269 |  0:00:06s
epoch 11 | loss: 0.57974 | val_accuracy: 0.76023 |  0:00:07s
epoch 12 | loss: 0.59308 | val_accuracy: 0.77193 |  0:00:08s
epoch 13 | loss: 0.50945 | val_accuracy: 0.78947 |  0:00:08s
epoch 14 | loss: 0.48104 | val_accuracy: 0.81871 |  0:00:09s
epoch 15 | loss: 0.44974 | val_accuracy: 0.83626 |  0:00:09s
epoch 16 | loss: 0.47025



epoch 0  | loss: 2.44963 | val_accuracy: 0.50877 |  0:00:00s
epoch 1  | loss: 2.03852 | val_accuracy: 0.53801 |  0:00:01s
epoch 2  | loss: 1.76105 | val_accuracy: 0.5848  |  0:00:01s
epoch 3  | loss: 1.48413 | val_accuracy: 0.61404 |  0:00:02s
epoch 4  | loss: 1.37062 | val_accuracy: 0.63158 |  0:00:03s
epoch 5  | loss: 1.16767 | val_accuracy: 0.69006 |  0:00:03s
epoch 6  | loss: 1.02174 | val_accuracy: 0.73684 |  0:00:04s
epoch 7  | loss: 0.88879 | val_accuracy: 0.76608 |  0:00:04s
epoch 8  | loss: 0.78079 | val_accuracy: 0.76608 |  0:00:05s
epoch 9  | loss: 0.70157 | val_accuracy: 0.76023 |  0:00:05s
epoch 10 | loss: 0.624   | val_accuracy: 0.78363 |  0:00:06s
epoch 11 | loss: 0.59524 | val_accuracy: 0.78947 |  0:00:06s
epoch 12 | loss: 0.57487 | val_accuracy: 0.78363 |  0:00:07s
epoch 13 | loss: 0.54472 | val_accuracy: 0.77778 |  0:00:08s
epoch 14 | loss: 0.45835 | val_accuracy: 0.77193 |  0:00:08s
epoch 15 | loss: 0.46746 | val_accuracy: 0.78947 |  0:00:09s
epoch 16 | loss: 0.50619



epoch 0  | loss: 2.31377 | val_accuracy: 0.51462 |  0:00:00s
epoch 1  | loss: 2.04394 | val_accuracy: 0.50877 |  0:00:01s
epoch 2  | loss: 1.74471 | val_accuracy: 0.50877 |  0:00:01s
epoch 3  | loss: 1.49479 | val_accuracy: 0.51462 |  0:00:02s
epoch 4  | loss: 1.26079 | val_accuracy: 0.53216 |  0:00:02s
epoch 5  | loss: 1.16333 | val_accuracy: 0.54386 |  0:00:03s
epoch 6  | loss: 0.98871 | val_accuracy: 0.59649 |  0:00:03s
epoch 7  | loss: 0.89695 | val_accuracy: 0.61988 |  0:00:04s
epoch 8  | loss: 0.7388  | val_accuracy: 0.60819 |  0:00:05s
epoch 9  | loss: 0.68825 | val_accuracy: 0.68421 |  0:00:05s
epoch 10 | loss: 0.68512 | val_accuracy: 0.67836 |  0:00:06s
epoch 11 | loss: 0.586   | val_accuracy: 0.69591 |  0:00:06s
epoch 12 | loss: 0.61249 | val_accuracy: 0.73099 |  0:00:07s
epoch 13 | loss: 0.5095  | val_accuracy: 0.73099 |  0:00:08s
epoch 14 | loss: 0.52159 | val_accuracy: 0.76023 |  0:00:08s
epoch 15 | loss: 0.47512 | val_accuracy: 0.74854 |  0:00:09s
epoch 16 | loss: 0.42561



epoch 0  | loss: 2.32747 | val_accuracy: 0.51462 |  0:00:00s
epoch 1  | loss: 2.03111 | val_accuracy: 0.51462 |  0:00:01s
epoch 2  | loss: 1.74432 | val_accuracy: 0.56725 |  0:00:01s
epoch 3  | loss: 1.50115 | val_accuracy: 0.5848  |  0:00:02s
epoch 4  | loss: 1.17137 | val_accuracy: 0.62573 |  0:00:02s
epoch 5  | loss: 1.10357 | val_accuracy: 0.63743 |  0:00:03s
epoch 6  | loss: 0.95991 | val_accuracy: 0.67251 |  0:00:04s
epoch 7  | loss: 0.82014 | val_accuracy: 0.69591 |  0:00:04s
epoch 8  | loss: 0.74791 | val_accuracy: 0.73684 |  0:00:05s
epoch 9  | loss: 0.61677 | val_accuracy: 0.75439 |  0:00:05s
epoch 10 | loss: 0.57564 | val_accuracy: 0.78947 |  0:00:06s
epoch 11 | loss: 0.56688 | val_accuracy: 0.77778 |  0:00:06s
epoch 12 | loss: 0.53995 | val_accuracy: 0.80117 |  0:00:07s
epoch 13 | loss: 0.47725 | val_accuracy: 0.82456 |  0:00:08s
epoch 14 | loss: 0.48994 | val_accuracy: 0.82456 |  0:00:08s
epoch 15 | loss: 0.48189 | val_accuracy: 0.85965 |  0:00:09s
epoch 16 | loss: 0.44553

[I 2024-08-05 16:18:14,155] A new study created in memory with name: no-name-bd0fa84b-a905-4d94-ad41-9547195cdef0


                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.867725  0.945684         0.915232   
KNN                         0.857143  0.911652         0.917881   
Decision Tree               0.846561  0.893002         0.886093   
Random Forest               0.883598  0.954586         0.915232   
Gradient Boosting            0.89418  0.955601         0.913907   
XGBoost                     0.888889  0.956277         0.913907   
LightGBM                    0.862434  0.935542         0.911258   
CatBoost                    0.878307  0.949966         0.919205   
MLP                         0.883721  0.960239         0.921736   
DNN                         0.883721  0.935248         0.908867   
DCN                         0.893023  0.963574         0.912362   
Wide_and_Deep               0.883721  0.960152         0.913518   
XGBoost + NN                0.893023  0.940662         0.907738   
LightGBM + NN               0.897674  0.961452         0.91471

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:19:34,699] Trial 0 finished with value: 0.48372093023255813 and parameters: {'heads': 3, 'dim': 234, 'depth': 5, 'mlp_dim': 143, 'dropout': 0.04781469994094323, 'learning_rate': 0.06275454795338709, 'batch_size': 32, 'num_epochs': 47}. Best is trial 0 with value: 0.48372093023255813.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:19:35,717] Trial 1 finished with value: 0.8790697674418605 and parameters: {'heads': 1, 'dim': 10, 'depth': 1, 'mlp_dim': 236, 'dropout': 0.2519492654352334, 'learning_rate': 0.0005678794920130348, 'batch_size': 256, 'num_epochs': 18}. Best is trial 1 with value: 0.8790697674418605.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:19:43,475] Trial 2 finished with value: 0.8837209302325582 and parameters: {'heads': 2, 'dim': 40, 'depth': 2, 'mlp_dim': 146, 'dropout': 0.14848789937636014, 'lear

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.867725  0.945684         0.915232   
KNN                         0.857143  0.911652         0.917881   
Decision Tree               0.846561  0.893002         0.886093   
Random Forest               0.883598  0.954586         0.915232   
Gradient Boosting            0.89418  0.955601         0.913907   
XGBoost                     0.888889  0.956277         0.913907   
LightGBM                    0.862434  0.935542         0.911258   
CatBoost                    0.878307  0.949966         0.919205   
MLP                         0.883721  0.960239         0.921736   
DNN                         0.883721  0.935248         0.908867   
DCN                         0.893023  0.963574         0.912362   
Wide_and_Deep               0.883721  0.960152         0.913518   
XGBoost + NN                0.893023  0.940662         0.907738   
LightGBM + NN               0.897674  0.961452         0.91471

In [139]:
file_prefix = "red"  # Change this to any word you like
df =  pd.read_csv(f'Dataset/{file_prefix}.csv')
if len(df) >= 1405:
    df = df.sample(n=1405, random_state=42, replace=True)
df =  encode_categorical_data(df)
X = df.drop('Y', axis=1)
y = df['Y']
X, y = apply_yeojohnson(X, y)
X, y = apply_smote_to_training(X, y)

result = model_comparison(df, 'Y')
print(result)
result, best_params = mlp_comparison(X, y, result)
print(result)
result, best_params = dnn_comparison(X, y, result)
print(result)
result, best_params = dcn_comparison(X, y, result)
print(result)
result, best_params = wide_and_deep_comparison(X, y, result)
print(result)
result, best_params = xgb_nn_comparison(X, y, result)
print(result)
result, best_params = lgbm_nn_comparison(X, y, result)
print(result)
result, best_params = autoint_nn_comparison(X, y, result)
print(result)
result, best_params = ft_transformer_nn_comparison(X, y, result)
print(result)
result, best_params = neural_architecture_search(X, y, result)
print(result)
result, best_params = kan_comparison(X, y, result)
print(result)
result, best_params = node_comparison(X, y, result)
print(result)
result, best_params = tabnet_comparison(X, y, result)
print(result)
result, best_params = saint_comparison(X, y, result)
print(result)

result.to_csv(f'result/comparison/classification/{file_prefix}_result.csv', index=True)

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 537, number of negative: 587
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000167 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 918
[LightGBM] [Info] Number of data points in the train set: 1124, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.477758 -> initscore=-0.089027
[LightGBM] [Info] Start training from score -0.089027
[LightGBM] [Info] Number of positive: 537, number of negative: 587
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 918
[LightGBM] [Info] Number of data points in the train set: 1124, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.477758 -> initscore=-0.089027
[LightGBM] [Info] Start training from score -0.089027
[LightGBM] [Info] Number

[I 2024-08-05 16:35:40,073] A new study created in memory with name: no-name-7e66aa0c-fa00-4ed2-8535-11f2c2ee2d83


                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.736655  0.815283         0.754448        0.007097   
KNN                  0.886121  0.959857         0.869222        0.016732   
Decision Tree        0.871886  0.873841         0.847861        0.020758   
Random Forest        0.882562  0.958151         0.881683        0.021651   
Gradient Boosting     0.88968  0.950891         0.885246        0.015404   
XGBoost              0.896797  0.952674         0.878127        0.012646   
LightGBM             0.886121  0.952929         0.875452        0.012179   
CatBoost             0.907473  0.957412         0.885238        0.020116   
MLP                  0.880546  0.903029         0.856153        0.010632   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logistic Regression                    0.003988                          0.0   
KNN                                    0.002992                     0.006981   

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:35:45,648] Trial 0 finished with value: 0.8703071672354948 and parameters: {'hidden_dim_0': 198, 'hidden_dim_1': 150, 'hidden_dim_2': 140, 'learning_rate': 0.009509590924985739, 'batch_size': 128, 'num_epochs': 76}. Best is trial 0 with value: 0.8703071672354948.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:35:47,101] Trial 1 finished with value: 0.7406143344709898 and parameters: {'hidden_dim_0': 203, 'hidden_dim_1': 55, 'hidden_dim_2': 32, 'learning_rate': 0.00014044041866849267, 'batch_size': 128, 'num_epochs': 21}. Best is trial 0 with value: 0.8703071672354948.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:35:49,273] Trial 2 finished with value: 0.8327645051194539 and parameters: {'hidden_dim_0': 238, 'hidden_dim_1': 39, 'hidden_dim_2': 159, 'learning_rate': 0.00407399920046271, 'batch_size': 128, 'num_epoch

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.736655  0.815283         0.754448        0.007097   
KNN                  0.886121  0.959857         0.869222        0.016732   
Decision Tree        0.871886  0.873841         0.847861        0.020758   
Random Forest        0.882562  0.958151         0.881683        0.021651   
Gradient Boosting     0.88968  0.950891         0.885246        0.015404   
XGBoost              0.896797  0.952674         0.878127        0.012646   
LightGBM             0.886121  0.952929         0.875452        0.012179   
CatBoost             0.907473  0.957412         0.885238        0.020116   
MLP                  0.880546  0.903029         0.856153        0.010632   
DNN                  0.887372  0.925489          0.87415        0.013898   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logistic Regression                    0.003988                          0.0   
KNN

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:36:57,115] Trial 0 finished with value: 0.8839590443686007 and parameters: {'cross_layers': 1, 'hidden_layer_0': 178, 'hidden_layer_1': 75, 'hidden_layer_2': 154, 'learning_rate': 0.0021807867337274266, 'batch_size': 128, 'num_epochs': 85}. Best is trial 0 with value: 0.8839590443686007.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:36:59,589] Trial 1 finished with value: 0.7918088737201365 and parameters: {'cross_layers': 1, 'hidden_layer_0': 241, 'hidden_layer_1': 54, 'hidden_layer_2': 243, 'learning_rate': 0.012394638265171091, 'batch_size': 64, 'num_epochs': 19}. Best is trial 0 with value: 0.8839590443686007.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:37:00,857] Trial 2 finished with value: 0.7542662116040956 and parameters: {'cross_layers': 3, 'hidden_layer_0': 217, 'hidden_layer_1': 56, 'hidden_layer_2':

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.736655  0.815283         0.754448        0.007097   
KNN                  0.886121  0.959857         0.869222        0.016732   
Decision Tree        0.871886  0.873841         0.847861        0.020758   
Random Forest        0.882562  0.958151         0.881683        0.021651   
Gradient Boosting     0.88968  0.950891         0.885246        0.015404   
XGBoost              0.896797  0.952674         0.878127        0.012646   
LightGBM             0.886121  0.952929         0.875452        0.012179   
CatBoost             0.907473  0.957412         0.885238        0.020116   
MLP                  0.880546  0.903029         0.856153        0.010632   
DNN                  0.887372  0.925489          0.87415        0.013898   
DCN                   0.87372  0.921202         0.871571        0.007737   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logisti

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:38:33,602] Trial 0 finished with value: 0.8771331058020477 and parameters: {'hidden_layer_0': 63, 'hidden_layer_1': 184, 'hidden_layer_2': 205, 'learning_rate': 0.009975289563756802, 'batch_size': 256, 'num_epochs': 44}. Best is trial 0 with value: 0.8771331058020477.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:38:42,676] Trial 1 finished with value: 0.8805460750853242 and parameters: {'hidden_layer_0': 239, 'hidden_layer_1': 138, 'hidden_layer_2': 220, 'learning_rate': 0.0004352197675509856, 'batch_size': 32, 'num_epochs': 43}. Best is trial 1 with value: 0.8805460750853242.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:38:48,506] Trial 2 finished with value: 0.8532423208191127 and parameters: {'hidden_layer_0': 38, 'hidden_layer_1': 250, 'hidden_layer_2': 200, 'learning_rate': 0.0058701482193410865, 'batch_siz

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.736655  0.815283         0.754448        0.007097   
KNN                  0.886121  0.959857         0.869222        0.016732   
Decision Tree        0.871886  0.873841         0.847861        0.020758   
Random Forest        0.882562  0.958151         0.881683        0.021651   
Gradient Boosting     0.88968  0.950891         0.885246        0.015404   
XGBoost              0.896797  0.952674         0.878127        0.012646   
LightGBM             0.886121  0.952929         0.875452        0.012179   
CatBoost             0.907473  0.957412         0.885238        0.020116   
MLP                  0.880546  0.903029         0.856153        0.010632   
DNN                  0.887372  0.925489          0.87415        0.013898   
DCN                   0.87372  0.921202         0.871571        0.007737   
Wide_and_Deep         0.87372  0.924697         0.875841         0.01422   

           

  'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
Parameters: { "use_label_encoder" } are not used.

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:40:17,279] Trial 0 finished with value: 0.8703071672354948 and parameters: {'n_estimators': 184, 'max_depth': 10, 'xgb_learning_rate': 0.09347328243285082, 'subsample': 0.6223811688788973, 'colsample_bytree': 0.5177685004782948, 'use_hidden_layer_0': False, 'use_hidden_layer_1': False, 'use_hidden_layer_2': True, 'hidden_layer_2': 81, 'nn_learning_rate': 0.0003962304454732642, 'batch_size': 32, 'num_epochs': 100}. Best is trial 0 with value: 0.8703071672354948.
  'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
Parameters: { "use_label_encoder" } are not used.

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:40:25,809] Trial 1 finished with value: 0.6825938566552902 and parameters: {'n_estimat

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.736655  0.815283         0.754448        0.007097   
KNN                  0.886121  0.959857         0.869222        0.016732   
Decision Tree        0.871886  0.873841         0.847861        0.020758   
Random Forest        0.882562  0.958151         0.881683        0.021651   
Gradient Boosting     0.88968  0.950891         0.885246        0.015404   
XGBoost              0.896797  0.952674         0.878127        0.012646   
LightGBM             0.886121  0.952929         0.875452        0.012179   
CatBoost             0.907473  0.957412         0.885238        0.020116   
MLP                  0.880546  0.903029         0.856153        0.010632   
DNN                  0.887372  0.925489          0.87415        0.013898   
DCN                   0.87372  0.921202         0.871571        0.007737   
Wide_and_Deep         0.87372  0.924697         0.875841         0.01422   
XGBoost + NN

[I 2024-08-05 16:42:03,884] Trial 0 finished with value: 0.8839590443686007 and parameters: {'n_estimators': 200, 'max_depth': 6, 'lgb_learning_rate': 0.05852186124924521, 'num_leaves': 98, 'subsample': 0.9384889018215415, 'colsample_bytree': 0.6238470003933929, 'hidden_layer_0': 193, 'hidden_layer_1': 127, 'hidden_layer_2': 143, 'nn_learning_rate': 0.05447065770557885, 'batch_size': 64, 'num_epochs': 26}. Best is trial 0 with value: 0.8839590443686007.


[LightGBM] [Info] Number of positive: 578, number of negative: 590
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000156 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 990
[LightGBM] [Info] Number of data points in the train set: 1168, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.494863 -> initscore=-0.020549
[LightGBM] [Info] Start training from score -0.020549


[I 2024-08-05 16:42:09,527] Trial 1 finished with value: 0.8839590443686007 and parameters: {'n_estimators': 130, 'max_depth': 8, 'lgb_learning_rate': 0.03731044537498617, 'num_leaves': 98, 'subsample': 0.8328686978176625, 'colsample_bytree': 0.5025657064458324, 'hidden_layer_0': 41, 'hidden_layer_1': 41, 'hidden_layer_2': 173, 'nn_learning_rate': 0.000162176380139197, 'batch_size': 64, 'num_epochs': 66}. Best is trial 0 with value: 0.8839590443686007.


[LightGBM] [Info] Number of positive: 578, number of negative: 590
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000170 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 990
[LightGBM] [Info] Number of data points in the train set: 1168, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.494863 -> initscore=-0.020549
[LightGBM] [Info] Start training from score -0.020549


[I 2024-08-05 16:42:22,845] Trial 2 finished with value: 0.7303754266211604 and parameters: {'n_estimators': 101, 'max_depth': 4, 'lgb_learning_rate': 0.001623084380217426, 'num_leaves': 90, 'subsample': 0.7067257830279958, 'colsample_bytree': 0.9709541533405355, 'hidden_layer_0': 135, 'hidden_layer_1': 230, 'hidden_layer_2': 204, 'nn_learning_rate': 0.00947467425403387, 'batch_size': 64, 'num_epochs': 96}. Best is trial 0 with value: 0.8839590443686007.


[LightGBM] [Info] Number of positive: 578, number of negative: 590
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000182 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 990
[LightGBM] [Info] Number of data points in the train set: 1168, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.494863 -> initscore=-0.020549
[LightGBM] [Info] Start training from score -0.020549


[I 2024-08-05 16:42:24,906] Trial 3 finished with value: 0.7372013651877133 and parameters: {'n_estimators': 288, 'max_depth': 3, 'lgb_learning_rate': 0.00038884483485432755, 'num_leaves': 58, 'subsample': 0.6842073284948539, 'colsample_bytree': 0.6797461198437256, 'hidden_layer_0': 174, 'hidden_layer_1': 188, 'hidden_layer_2': 77, 'nn_learning_rate': 0.0003020455146230712, 'batch_size': 32, 'num_epochs': 10}. Best is trial 0 with value: 0.8839590443686007.


[LightGBM] [Info] Number of positive: 578, number of negative: 590
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004488 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 990
[LightGBM] [Info] Number of data points in the train set: 1168, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.494863 -> initscore=-0.020549
[LightGBM] [Info] Start training from score -0.020549


[I 2024-08-05 16:42:30,947] Trial 4 finished with value: 0.78839590443686 and parameters: {'n_estimators': 143, 'max_depth': 7, 'lgb_learning_rate': 0.00027787898876008825, 'num_leaves': 30, 'subsample': 0.5120087568618505, 'colsample_bytree': 0.6520335939905932, 'hidden_layer_0': 174, 'hidden_layer_1': 93, 'hidden_layer_2': 64, 'nn_learning_rate': 0.002690124667897203, 'batch_size': 64, 'num_epochs': 62}. Best is trial 0 with value: 0.8839590443686007.


[LightGBM] [Info] Number of positive: 578, number of negative: 590
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001742 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 990
[LightGBM] [Info] Number of data points in the train set: 1168, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.494863 -> initscore=-0.020549
[LightGBM] [Info] Start training from score -0.020549


[I 2024-08-05 16:42:49,159] Trial 5 finished with value: 0.7440273037542662 and parameters: {'n_estimators': 218, 'max_depth': 3, 'lgb_learning_rate': 0.0028591361740450193, 'num_leaves': 31, 'subsample': 0.9262318355430337, 'colsample_bytree': 0.619180730844414, 'hidden_layer_0': 107, 'hidden_layer_1': 212, 'hidden_layer_2': 162, 'nn_learning_rate': 0.040060633606208214, 'batch_size': 32, 'num_epochs': 84}. Best is trial 0 with value: 0.8839590443686007.


[LightGBM] [Info] Number of positive: 578, number of negative: 590
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000333 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 990
[LightGBM] [Info] Number of data points in the train set: 1168, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.494863 -> initscore=-0.020549
[LightGBM] [Info] Start training from score -0.020549


[I 2024-08-05 16:43:02,057] Trial 6 finished with value: 0.7815699658703071 and parameters: {'n_estimators': 289, 'max_depth': 10, 'lgb_learning_rate': 0.00013646067180737685, 'num_leaves': 35, 'subsample': 0.6008042888742465, 'colsample_bytree': 0.7993700898167841, 'hidden_layer_0': 238, 'hidden_layer_1': 183, 'hidden_layer_2': 213, 'nn_learning_rate': 0.0022123816378083775, 'batch_size': 64, 'num_epochs': 96}. Best is trial 0 with value: 0.8839590443686007.


[LightGBM] [Info] Number of positive: 578, number of negative: 590
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000307 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 990
[LightGBM] [Info] Number of data points in the train set: 1168, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.494863 -> initscore=-0.020549
[LightGBM] [Info] Start training from score -0.020549


[I 2024-08-05 16:43:06,118] Trial 7 finished with value: 0.7406143344709898 and parameters: {'n_estimators': 248, 'max_depth': 3, 'lgb_learning_rate': 0.0031219569027135094, 'num_leaves': 73, 'subsample': 0.6105023363924706, 'colsample_bytree': 0.7928818165894082, 'hidden_layer_0': 87, 'hidden_layer_1': 165, 'hidden_layer_2': 179, 'nn_learning_rate': 0.0003943688661699188, 'batch_size': 32, 'num_epochs': 21}. Best is trial 0 with value: 0.8839590443686007.


[LightGBM] [Info] Number of positive: 578, number of negative: 590
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000145 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 990
[LightGBM] [Info] Number of data points in the train set: 1168, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.494863 -> initscore=-0.020549
[LightGBM] [Info] Start training from score -0.020549


[I 2024-08-05 16:43:16,154] Trial 8 finished with value: 0.8873720136518771 and parameters: {'n_estimators': 125, 'max_depth': 8, 'lgb_learning_rate': 0.07605377114609456, 'num_leaves': 68, 'subsample': 0.9825955595752316, 'colsample_bytree': 0.9885123395000996, 'hidden_layer_0': 213, 'hidden_layer_1': 158, 'hidden_layer_2': 178, 'nn_learning_rate': 0.0237810718338925, 'batch_size': 32, 'num_epochs': 43}. Best is trial 8 with value: 0.8873720136518771.


[LightGBM] [Info] Number of positive: 578, number of negative: 590
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000147 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 990
[LightGBM] [Info] Number of data points in the train set: 1168, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.494863 -> initscore=-0.020549
[LightGBM] [Info] Start training from score -0.020549


[I 2024-08-05 16:43:17,943] Trial 9 finished with value: 0.8088737201365188 and parameters: {'n_estimators': 223, 'max_depth': 9, 'lgb_learning_rate': 0.00041434485502950193, 'num_leaves': 40, 'subsample': 0.7781850696004784, 'colsample_bytree': 0.6688315278484578, 'hidden_layer_0': 193, 'hidden_layer_1': 223, 'hidden_layer_2': 178, 'nn_learning_rate': 0.013337256021463831, 'batch_size': 64, 'num_epochs': 12}. Best is trial 8 with value: 0.8873720136518771.


[LightGBM] [Info] Number of positive: 578, number of negative: 590
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000189 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 990
[LightGBM] [Info] Number of data points in the train set: 1168, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.494863 -> initscore=-0.020549
[LightGBM] [Info] Start training from score -0.020549
[LightGBM] [Info] Number of positive: 462, number of negative: 472
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 899
[LightGBM] [Info] Number of data points in the train set: 934, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.494647 -> initscore=-0.021414
[LightGBM] [Info] Start training from score -0.021414
[LightGBM] [Info] Number 

[I 2024-08-05 16:44:14,211] A new study created in memory with name: no-name-c3940eb4-86a9-4015-b52e-ce66a600a011


                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.736655  0.815283         0.754448        0.007097   
KNN                  0.886121  0.959857         0.869222        0.016732   
Decision Tree        0.871886  0.873841         0.847861        0.020758   
Random Forest        0.882562  0.958151         0.881683        0.021651   
Gradient Boosting     0.88968  0.950891         0.885246        0.015404   
XGBoost              0.896797  0.952674         0.878127        0.012646   
LightGBM             0.886121  0.952929         0.875452        0.012179   
CatBoost             0.907473  0.957412         0.885238        0.020116   
MLP                  0.880546  0.903029         0.856153        0.010632   
DNN                  0.887372  0.925489          0.87415        0.013898   
DCN                   0.87372  0.921202         0.871571        0.007737   
Wide_and_Deep         0.87372  0.924697         0.875841         0.01422   
XGBoost + NN

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:44:18,926] Trial 0 finished with value: 0.764505119453925 and parameters: {'num_heads': 3, 'embedding_dim': 30, 'num_layers': 2, 'hidden_layer_0': 65, 'hidden_layer_1': 242, 'hidden_layer_2': 172, 'nn_learning_rate': 0.0004110782719172624, 'batch_size': 256, 'num_epochs': 38}. Best is trial 0 with value: 0.764505119453925.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:44:25,894] Trial 1 finished with value: 0.7167235494880546 and parameters: {'num_heads': 6, 'embedding_dim': 42, 'num_layers': 1, 'hidden_layer_0': 50, 'hidden_layer_1': 221, 'hidden_layer_2': 256, 'nn_learning_rate': 0.06139526713717291, 'batch_size': 256, 'num_epochs': 97}. Best is trial 0 with value: 0.764505119453925.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:44:36,391] Trial 2 finished with value: 0.5460750853242321 and par

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.736655  0.815283         0.754448        0.007097   
KNN                  0.886121  0.959857         0.869222        0.016732   
Decision Tree        0.871886  0.873841         0.847861        0.020758   
Random Forest        0.882562  0.958151         0.881683        0.021651   
Gradient Boosting     0.88968  0.950891         0.885246        0.015404   
XGBoost              0.896797  0.952674         0.878127        0.012646   
LightGBM             0.886121  0.952929         0.875452        0.012179   
CatBoost             0.907473  0.957412         0.885238        0.020116   
MLP                  0.880546  0.903029         0.856153        0.010632   
DNN                  0.887372  0.925489          0.87415        0.013898   
DCN                   0.87372  0.921202         0.871571        0.007737   
Wide_and_Deep         0.87372  0.924697         0.875841         0.01422   
XGBoost + NN

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:46:14,846] Trial 0 finished with value: 0.8498293515358362 and parameters: {'num_heads': 6, 'embedding_dim': 6, 'num_layers': 2, 'hidden_layer_0': 218, 'hidden_layer_1': 140, 'hidden_layer_2': 97, 'nn_learning_rate': 0.0017419785821041417, 'batch_size': 32, 'num_epochs': 56}. Best is trial 0 with value: 0.8498293515358362.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:46:24,863] Trial 1 finished with value: 0.7542662116040956 and parameters: {'num_heads': 4, 'embedding_dim': 60, 'num_layers': 2, 'hidden_layer_0': 111, 'hidden_layer_1': 174, 'hidden_layer_2': 126, 'nn_learning_rate': 0.00035376628150675014, 'batch_size': 256, 'num_epochs': 33}. Best is trial 0 with value: 0.8498293515358362.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:46:36,012] Trial 2 finished with value: 0.8703071672354948 an

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.736655  0.815283         0.754448        0.007097   
KNN                  0.886121  0.959857         0.869222        0.016732   
Decision Tree        0.871886  0.873841         0.847861        0.020758   
Random Forest        0.882562  0.958151         0.881683        0.021651   
Gradient Boosting     0.88968  0.950891         0.885246        0.015404   
XGBoost              0.896797  0.952674         0.878127        0.012646   
LightGBM             0.886121  0.952929         0.875452        0.012179   
CatBoost             0.907473  0.957412         0.885238        0.020116   
MLP                  0.880546  0.903029         0.856153        0.010632   
DNN                  0.887372  0.925489          0.87415        0.013898   
DCN                   0.87372  0.921202         0.871571        0.007737   
Wide_and_Deep         0.87372  0.924697         0.875841         0.01422   
XGBoost + NN

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:48:52,367] Trial 0 finished with value: 0.8020477815699659 and parameters: {'num_layers': 3, 'hidden_layer_0': 180, 'hidden_layer_1': 71, 'hidden_layer_2': 208, 'learning_rate': 0.00028142414142106757, 'batch_size': 128, 'num_epochs': 60}. Best is trial 0 with value: 0.8020477815699659.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:49:01,040] Trial 1 finished with value: 0.8225255972696246 and parameters: {'num_layers': 5, 'hidden_layer_0': 59, 'hidden_layer_1': 213, 'hidden_layer_2': 141, 'hidden_layer_3': 171, 'hidden_layer_4': 53, 'learning_rate': 0.020415918039068632, 'batch_size': 128, 'num_epochs': 88}. Best is trial 1 with value: 0.8225255972696246.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:49:03,169] Trial 2 finished with value: 0.7508532423208191 and parameters: {'num_layers': 2, 'hidden_layer_0': 193

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.736655  0.815283         0.754448   
KNN                         0.886121  0.959857         0.869222   
Decision Tree               0.871886  0.873841         0.847861   
Random Forest               0.882562  0.958151         0.881683   
Gradient Boosting            0.88968  0.950891         0.885246   
XGBoost                     0.896797  0.952674         0.878127   
LightGBM                    0.886121  0.952929         0.875452   
CatBoost                    0.907473  0.957412         0.885238   
MLP                         0.880546  0.903029         0.856153   
DNN                         0.887372  0.925489          0.87415   
DCN                          0.87372  0.921202         0.871571   
Wide_and_Deep                0.87372  0.924697         0.875841   
XGBoost + NN                 0.87372  0.931687         0.858758   
LightGBM + NN                0.87372  0.901911         0.88700

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:50:49,563] Trial 0 finished with value: 0.7303754266211604 and parameters: {'hidden_dim': 129, 'learning_rate': 0.0038884887286175307, 'batch_size': 256, 'num_epochs': 27}. Best is trial 0 with value: 0.7303754266211604.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:50:50,365] Trial 1 finished with value: 0.7440273037542662 and parameters: {'hidden_dim': 231, 'learning_rate': 0.003131766261255994, 'batch_size': 32, 'num_epochs': 10}. Best is trial 1 with value: 0.7440273037542662.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:50:54,065] Trial 2 finished with value: 0.8532423208191127 and parameters: {'hidden_dim': 250, 'learning_rate': 0.01463144393383424, 'batch_size': 64, 'num_epochs': 60}. Best is trial 2 with value: 0.8532423208191127.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.736655  0.815283         0.754448   
KNN                         0.886121  0.959857         0.869222   
Decision Tree               0.871886  0.873841         0.847861   
Random Forest               0.882562  0.958151         0.881683   
Gradient Boosting            0.88968  0.950891         0.885246   
XGBoost                     0.896797  0.952674         0.878127   
LightGBM                    0.886121  0.952929         0.875452   
CatBoost                    0.907473  0.957412         0.885238   
MLP                         0.880546  0.903029         0.856153   
DNN                         0.887372  0.925489          0.87415   
DCN                          0.87372  0.921202         0.871571   
Wide_and_Deep                0.87372  0.924697         0.875841   
XGBoost + NN                 0.87372  0.931687         0.858758   
LightGBM + NN                0.87372  0.901911         0.88700

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:51:47,893] Trial 0 finished with value: 0.764505119453925 and parameters: {'num_layers': 3, 'num_trees': 5, 'tree_dim': 41, 'learning_rate': 0.0005870031675304745, 'batch_size': 64, 'num_epochs': 87}. Best is trial 0 with value: 0.764505119453925.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:51:49,925] Trial 1 finished with value: 0.7918088737201365 and parameters: {'num_layers': 5, 'num_trees': 1, 'tree_dim': 40, 'learning_rate': 0.015679396190945705, 'batch_size': 256, 'num_epochs': 35}. Best is trial 1 with value: 0.7918088737201365.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 16:52:25,745] Trial 2 finished with value: 0.8873720136518771 and parameters: {'num_layers': 5, 'num_trees': 6, 'tree_dim': 50, 'learning_rate': 0.02044143349611657, 'batch_size': 64, 'num_epochs': 98}. Best is trial 2 with value: 0.88737

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.736655  0.815283         0.754448   
KNN                         0.886121  0.959857         0.869222   
Decision Tree               0.871886  0.873841         0.847861   
Random Forest               0.882562  0.958151         0.881683   
Gradient Boosting            0.88968  0.950891         0.885246   
XGBoost                     0.896797  0.952674         0.878127   
LightGBM                    0.886121  0.952929         0.875452   
CatBoost                    0.907473  0.957412         0.885238   
MLP                         0.880546  0.903029         0.856153   
DNN                         0.887372  0.925489          0.87415   
DCN                          0.87372  0.921202         0.871571   
Wide_and_Deep                0.87372  0.924697         0.875841   
XGBoost + NN                 0.87372  0.931687         0.858758   
LightGBM + NN                0.87372  0.901911         0.88700

  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.42826 | val_accuracy: 0.5529  |  0:00:01s
epoch 1  | loss: 1.20266 | val_accuracy: 0.54608 |  0:00:03s
epoch 2  | loss: 1.06829 | val_accuracy: 0.54608 |  0:00:04s
epoch 3  | loss: 0.95296 | val_accuracy: 0.59386 |  0:00:06s
epoch 4  | loss: 0.88342 | val_accuracy: 0.5802  |  0:00:07s
epoch 5  | loss: 0.86449 | val_accuracy: 0.56655 |  0:00:09s
epoch 6  | loss: 0.79935 | val_accuracy: 0.58362 |  0:00:10s
epoch 7  | loss: 0.74839 | val_accuracy: 0.61433 |  0:00:12s
epoch 8  | loss: 0.77389 | val_accuracy: 0.59727 |  0:00:13s
epoch 9  | loss: 0.74338 | val_accuracy: 0.62457 |  0:00:15s
epoch 10 | loss: 0.71361 | val_accuracy: 0.63823 |  0:00:17s
epoch 11 | loss: 0.66346 | val_accuracy: 0.6587  |  0:00:18s
epoch 12 | loss: 0.70518 | val_accuracy: 0.66553 |  0:00:20s
epoch 13 | loss: 0.69106 | val_accuracy: 0.66212 |  0:00:21s
epoch 14 | loss: 0.71272 | val_accuracy: 0.65529 |  0:00:23s
epoch 15 | loss: 0.66789 | val_accuracy: 0.67918 |  0:00:24s
epoch 16 | loss: 0.7055 

[I 2024-08-05 16:57:07,583] Trial 0 finished with value: 0.6791808873720137 and parameters: {'n_d': 46, 'n_a': 28, 'n_steps': 3, 'gamma': 1.1124995832317377, 'lambda_sparse': 0.00015093343031598766, 'learning_rate': 0.00012382888649758187, 'batch_size': 32, 'num_epochs': 20}. Best is trial 0 with value: 0.6791808873720137.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 4.53646 | val_accuracy: 0.50171 |  0:00:01s
epoch 1  | loss: 4.34503 | val_accuracy: 0.49488 |  0:00:02s
epoch 2  | loss: 4.18584 | val_accuracy: 0.49147 |  0:00:04s
epoch 3  | loss: 3.99454 | val_accuracy: 0.49147 |  0:00:05s
epoch 4  | loss: 3.95054 | val_accuracy: 0.49147 |  0:00:06s
epoch 5  | loss: 3.87047 | val_accuracy: 0.48464 |  0:00:08s
epoch 6  | loss: 3.65334 | val_accuracy: 0.48805 |  0:00:09s
epoch 7  | loss: 3.4039  | val_accuracy: 0.50512 |  0:00:11s
epoch 8  | loss: 3.23757 | val_accuracy: 0.50512 |  0:00:12s
epoch 9  | loss: 3.169   | val_accuracy: 0.51536 |  0:00:14s
epoch 10 | loss: 3.14359 | val_accuracy: 0.49147 |  0:00:16s
epoch 11 | loss: 2.93423 | val_accuracy: 0.48464 |  0:00:17s
epoch 12 | loss: 2.64251 | val_accuracy: 0.52218 |  0:00:19s
epoch 13 | loss: 2.66896 | val_accuracy: 0.51536 |  0:00:20s
epoch 14 | loss: 2.63011 | val_accuracy: 0.49829 |  0:00:21s
epoch 15 | loss: 2.43072 | val_accuracy: 0.50171 |  0:00:23s
epoch 16 | loss: 2.22988

[I 2024-08-05 16:57:42,350] Trial 1 finished with value: 0.5221843003412969 and parameters: {'n_d': 43, 'n_a': 37, 'n_steps': 9, 'gamma': 1.4073131656127047, 'lambda_sparse': 0.00019277100367513696, 'learning_rate': 0.00013699998888669384, 'batch_size': 128, 'num_epochs': 92}. Best is trial 0 with value: 0.6791808873720137.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 2.9077  | val_accuracy: 0.50512 |  0:00:00s
epoch 1  | loss: 2.46059 | val_accuracy: 0.50512 |  0:00:01s
epoch 2  | loss: 2.12198 | val_accuracy: 0.50853 |  0:00:01s
epoch 3  | loss: 1.86259 | val_accuracy: 0.51536 |  0:00:02s
epoch 4  | loss: 1.52035 | val_accuracy: 0.51536 |  0:00:03s
epoch 5  | loss: 1.27502 | val_accuracy: 0.53242 |  0:00:03s
epoch 6  | loss: 1.01936 | val_accuracy: 0.55631 |  0:00:04s
epoch 7  | loss: 0.84762 | val_accuracy: 0.57338 |  0:00:05s
epoch 8  | loss: 0.77058 | val_accuracy: 0.62457 |  0:00:05s
epoch 9  | loss: 0.72476 | val_accuracy: 0.63481 |  0:00:06s
epoch 10 | loss: 0.67557 | val_accuracy: 0.64846 |  0:00:06s
epoch 11 | loss: 0.64467 | val_accuracy: 0.64164 |  0:00:07s
epoch 12 | loss: 0.63751 | val_accuracy: 0.6314  |  0:00:08s
epoch 13 | loss: 0.6102  | val_accuracy: 0.62799 |  0:00:08s
epoch 14 | loss: 0.59817 | val_accuracy: 0.6587  |  0:00:09s
epoch 15 | loss: 0.57571 | val_accuracy: 0.67235 |  0:00:10s
epoch 16 | loss: 0.56674

[I 2024-08-05 16:58:03,179] Trial 2 finished with value: 0.7474402730375427 and parameters: {'n_d': 26, 'n_a': 42, 'n_steps': 4, 'gamma': 1.1999341761958173, 'lambda_sparse': 2.1589607983276974e-06, 'learning_rate': 0.0006322359727102824, 'batch_size': 128, 'num_epochs': 61}. Best is trial 2 with value: 0.7474402730375427.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 3.6051  | val_accuracy: 0.49488 |  0:00:01s
epoch 1  | loss: 2.59937 | val_accuracy: 0.51536 |  0:00:03s
epoch 2  | loss: 1.87051 | val_accuracy: 0.57679 |  0:00:06s
epoch 3  | loss: 1.38367 | val_accuracy: 0.57679 |  0:00:08s
epoch 4  | loss: 1.15988 | val_accuracy: 0.6041  |  0:00:09s
epoch 5  | loss: 1.09082 | val_accuracy: 0.61092 |  0:00:11s
epoch 6  | loss: 1.01295 | val_accuracy: 0.66212 |  0:00:14s
epoch 7  | loss: 0.91463 | val_accuracy: 0.68259 |  0:00:16s
epoch 8  | loss: 0.93726 | val_accuracy: 0.6587  |  0:00:17s
epoch 9  | loss: 0.91464 | val_accuracy: 0.65188 |  0:00:19s
epoch 10 | loss: 0.86961 | val_accuracy: 0.66894 |  0:00:21s
epoch 11 | loss: 0.78892 | val_accuracy: 0.64846 |  0:00:23s
epoch 12 | loss: 0.84908 | val_accuracy: 0.66212 |  0:00:25s
epoch 13 | loss: 0.75974 | val_accuracy: 0.66212 |  0:00:27s
epoch 14 | loss: 0.75807 | val_accuracy: 0.67235 |  0:00:29s
epoch 15 | loss: 0.78493 | val_accuracy: 0.67235 |  0:00:31s
epoch 16 | loss: 0.78527

[I 2024-08-05 16:59:48,771] Trial 3 finished with value: 0.757679180887372 and parameters: {'n_d': 62, 'n_a': 62, 'n_steps': 10, 'gamma': 1.0385725811024762, 'lambda_sparse': 4.843864282621532e-05, 'learning_rate': 0.0005013950359170847, 'batch_size': 128, 'num_epochs': 58}. Best is trial 3 with value: 0.757679180887372.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.47095 | val_accuracy: 0.62116 |  0:00:01s
epoch 1  | loss: 0.86178 | val_accuracy: 0.67235 |  0:00:02s
epoch 2  | loss: 0.69124 | val_accuracy: 0.66894 |  0:00:03s
epoch 3  | loss: 0.6584  | val_accuracy: 0.6587  |  0:00:05s
epoch 4  | loss: 0.64156 | val_accuracy: 0.64846 |  0:00:06s
epoch 5  | loss: 0.67024 | val_accuracy: 0.67577 |  0:00:07s
epoch 6  | loss: 0.60918 | val_accuracy: 0.70307 |  0:00:09s
epoch 7  | loss: 0.5716  | val_accuracy: 0.67235 |  0:00:10s
epoch 8  | loss: 0.57655 | val_accuracy: 0.69283 |  0:00:12s
epoch 9  | loss: 0.53355 | val_accuracy: 0.74744 |  0:00:13s
epoch 10 | loss: 0.53664 | val_accuracy: 0.72014 |  0:00:14s
epoch 11 | loss: 0.5126  | val_accuracy: 0.72696 |  0:00:16s
epoch 12 | loss: 0.49899 | val_accuracy: 0.72355 |  0:00:17s
epoch 13 | loss: 0.4706  | val_accuracy: 0.71331 |  0:00:18s
epoch 14 | loss: 0.52689 | val_accuracy: 0.69625 |  0:00:20s
epoch 15 | loss: 0.56428 | val_accuracy: 0.71331 |  0:00:21s
epoch 16 | loss: 0.47429

[I 2024-08-05 17:00:16,561] Trial 4 finished with value: 0.7474402730375427 and parameters: {'n_d': 44, 'n_a': 10, 'n_steps': 9, 'gamma': 1.074249338417025, 'lambda_sparse': 6.15113127874922e-05, 'learning_rate': 0.006068901327300934, 'batch_size': 128, 'num_epochs': 86}. Best is trial 3 with value: 0.757679180887372.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.01247 | val_accuracy: 0.57338 |  0:00:01s
epoch 1  | loss: 0.6669  | val_accuracy: 0.64505 |  0:00:03s
epoch 2  | loss: 0.62775 | val_accuracy: 0.64846 |  0:00:05s
epoch 3  | loss: 0.53128 | val_accuracy: 0.67235 |  0:00:06s
epoch 4  | loss: 0.53997 | val_accuracy: 0.67918 |  0:00:08s
epoch 5  | loss: 0.55491 | val_accuracy: 0.67235 |  0:00:10s
epoch 6  | loss: 0.57192 | val_accuracy: 0.73379 |  0:00:12s
epoch 7  | loss: 0.52288 | val_accuracy: 0.70648 |  0:00:13s
epoch 8  | loss: 0.49478 | val_accuracy: 0.69966 |  0:00:15s
epoch 9  | loss: 0.53053 | val_accuracy: 0.70307 |  0:00:17s
epoch 10 | loss: 0.52211 | val_accuracy: 0.70648 |  0:00:19s
epoch 11 | loss: 0.51444 | val_accuracy: 0.7099  |  0:00:20s
epoch 12 | loss: 0.52007 | val_accuracy: 0.71672 |  0:00:22s
epoch 13 | loss: 0.49566 | val_accuracy: 0.72014 |  0:00:24s
epoch 14 | loss: 0.49498 | val_accuracy: 0.68601 |  0:00:26s
epoch 15 | loss: 0.52898 | val_accuracy: 0.69283 |  0:00:27s
epoch 16 | loss: 0.49166

[I 2024-08-05 17:00:46,980] Trial 5 finished with value: 0.7337883959044369 and parameters: {'n_d': 46, 'n_a': 51, 'n_steps': 6, 'gamma': 1.1628611963347204, 'lambda_sparse': 7.530459122699947e-05, 'learning_rate': 0.02828498224825726, 'batch_size': 64, 'num_epochs': 38}. Best is trial 3 with value: 0.757679180887372.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.7404  | val_accuracy: 0.67577 |  0:00:00s
epoch 1  | loss: 0.57605 | val_accuracy: 0.61775 |  0:00:01s
epoch 2  | loss: 0.55145 | val_accuracy: 0.63823 |  0:00:02s
epoch 3  | loss: 0.55773 | val_accuracy: 0.69283 |  0:00:03s
epoch 4  | loss: 0.51069 | val_accuracy: 0.68259 |  0:00:04s
epoch 5  | loss: 0.53267 | val_accuracy: 0.6587  |  0:00:04s
epoch 6  | loss: 0.52277 | val_accuracy: 0.70648 |  0:00:05s
epoch 7  | loss: 0.53324 | val_accuracy: 0.70648 |  0:00:06s
epoch 8  | loss: 0.50884 | val_accuracy: 0.71672 |  0:00:07s
epoch 9  | loss: 0.51545 | val_accuracy: 0.68942 |  0:00:07s
epoch 10 | loss: 0.50361 | val_accuracy: 0.7099  |  0:00:08s
epoch 11 | loss: 0.49526 | val_accuracy: 0.70648 |  0:00:09s
epoch 12 | loss: 0.48113 | val_accuracy: 0.70648 |  0:00:10s
epoch 13 | loss: 0.47259 | val_accuracy: 0.75768 |  0:00:11s
epoch 14 | loss: 0.47975 | val_accuracy: 0.71331 |  0:00:11s
epoch 15 | loss: 0.47498 | val_accuracy: 0.70307 |  0:00:12s
epoch 16 | loss: 0.49111

[I 2024-08-05 17:01:06,320] Trial 6 finished with value: 0.757679180887372 and parameters: {'n_d': 27, 'n_a': 43, 'n_steps': 3, 'gamma': 1.0166565449726666, 'lambda_sparse': 3.4314686229859243e-06, 'learning_rate': 0.048372988775517954, 'batch_size': 64, 'num_epochs': 36}. Best is trial 3 with value: 0.757679180887372.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.1806  | val_accuracy: 0.63481 |  0:00:00s
epoch 1  | loss: 0.71227 | val_accuracy: 0.59727 |  0:00:00s
epoch 2  | loss: 0.57965 | val_accuracy: 0.61433 |  0:00:01s
epoch 3  | loss: 0.52176 | val_accuracy: 0.61433 |  0:00:01s
epoch 4  | loss: 0.4924  | val_accuracy: 0.68601 |  0:00:01s
epoch 5  | loss: 0.47888 | val_accuracy: 0.69966 |  0:00:02s
epoch 6  | loss: 0.45381 | val_accuracy: 0.7099  |  0:00:02s
epoch 7  | loss: 0.44706 | val_accuracy: 0.73379 |  0:00:03s
epoch 8  | loss: 0.42424 | val_accuracy: 0.69966 |  0:00:03s
epoch 9  | loss: 0.41766 | val_accuracy: 0.72696 |  0:00:03s
epoch 10 | loss: 0.40836 | val_accuracy: 0.72696 |  0:00:04s
epoch 11 | loss: 0.42007 | val_accuracy: 0.71331 |  0:00:04s
epoch 12 | loss: 0.41846 | val_accuracy: 0.7099  |  0:00:05s
epoch 13 | loss: 0.40863 | val_accuracy: 0.69625 |  0:00:05s
epoch 14 | loss: 0.39444 | val_accuracy: 0.69625 |  0:00:05s
epoch 15 | loss: 0.39321 | val_accuracy: 0.7099  |  0:00:06s
epoch 16 | loss: 0.37763

[I 2024-08-05 17:01:13,549] Trial 7 finished with value: 0.7406143344709898 and parameters: {'n_d': 58, 'n_a': 41, 'n_steps': 3, 'gamma': 1.2588698351716066, 'lambda_sparse': 5.728044360072223e-06, 'learning_rate': 0.03695940838527392, 'batch_size': 256, 'num_epochs': 18}. Best is trial 3 with value: 0.757679180887372.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.938   | val_accuracy: 0.48805 |  0:00:00s
epoch 1  | loss: 0.95045 | val_accuracy: 0.46075 |  0:00:01s
epoch 2  | loss: 0.92961 | val_accuracy: 0.45392 |  0:00:01s
epoch 3  | loss: 0.91855 | val_accuracy: 0.49829 |  0:00:02s
epoch 4  | loss: 0.89427 | val_accuracy: 0.52218 |  0:00:02s
epoch 5  | loss: 0.86679 | val_accuracy: 0.53584 |  0:00:03s
epoch 6  | loss: 0.85444 | val_accuracy: 0.54266 |  0:00:04s
epoch 7  | loss: 0.8705  | val_accuracy: 0.5529  |  0:00:04s
epoch 8  | loss: 0.8343  | val_accuracy: 0.53242 |  0:00:05s
epoch 9  | loss: 0.82485 | val_accuracy: 0.55973 |  0:00:05s
epoch 10 | loss: 0.81786 | val_accuracy: 0.54949 |  0:00:06s
epoch 11 | loss: 0.80542 | val_accuracy: 0.56314 |  0:00:06s
epoch 12 | loss: 0.77731 | val_accuracy: 0.54949 |  0:00:07s
epoch 13 | loss: 0.80036 | val_accuracy: 0.5802  |  0:00:08s
Stop training because you reached max_epochs = 14 with best_epoch = 13 and best_val_accuracy = 0.5802


[I 2024-08-05 17:01:22,132] Trial 8 finished with value: 0.5802047781569966 and parameters: {'n_d': 11, 'n_a': 40, 'n_steps': 4, 'gamma': 1.9045823534651638, 'lambda_sparse': 3.3856239583933703e-06, 'learning_rate': 0.0001298478178506225, 'batch_size': 128, 'num_epochs': 14}. Best is trial 3 with value: 0.757679180887372.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.69439 | val_accuracy: 0.60751 |  0:00:04s
epoch 1  | loss: 1.64446 | val_accuracy: 0.63481 |  0:00:09s
epoch 2  | loss: 0.80054 | val_accuracy: 0.56655 |  0:00:13s
epoch 3  | loss: 0.66694 | val_accuracy: 0.62799 |  0:00:18s
epoch 4  | loss: 0.5969  | val_accuracy: 0.64164 |  0:00:22s
epoch 5  | loss: 0.61563 | val_accuracy: 0.6587  |  0:00:27s
epoch 6  | loss: 0.60053 | val_accuracy: 0.68601 |  0:00:32s
epoch 7  | loss: 0.61047 | val_accuracy: 0.67918 |  0:00:37s
epoch 8  | loss: 0.57267 | val_accuracy: 0.63823 |  0:00:41s
epoch 9  | loss: 0.56873 | val_accuracy: 0.7099  |  0:00:46s
epoch 10 | loss: 0.55338 | val_accuracy: 0.69966 |  0:00:51s
epoch 11 | loss: 0.54497 | val_accuracy: 0.7099  |  0:00:55s
epoch 12 | loss: 0.54837 | val_accuracy: 0.69283 |  0:01:00s
epoch 13 | loss: 0.54642 | val_accuracy: 0.69625 |  0:01:05s
epoch 14 | loss: 0.53055 | val_accuracy: 0.69625 |  0:01:09s
epoch 15 | loss: 0.54768 | val_accuracy: 0.72355 |  0:01:14s
epoch 16 | loss: 0.53357

[I 2024-08-05 17:04:03,645] Trial 9 finished with value: 0.7337883959044369 and parameters: {'n_d': 52, 'n_a': 61, 'n_steps': 10, 'gamma': 1.5612554786511423, 'lambda_sparse': 2.82535236190168e-05, 'learning_rate': 0.029210990773204007, 'batch_size': 32, 'num_epochs': 34}. Best is trial 3 with value: 0.757679180887372.


epoch 0  | loss: 3.6051  | val_accuracy: 0.49488 |  0:00:01s
epoch 1  | loss: 2.59937 | val_accuracy: 0.51536 |  0:00:04s
epoch 2  | loss: 1.87051 | val_accuracy: 0.57679 |  0:00:06s
epoch 3  | loss: 1.38367 | val_accuracy: 0.57679 |  0:00:08s
epoch 4  | loss: 1.15988 | val_accuracy: 0.6041  |  0:00:09s
epoch 5  | loss: 1.09082 | val_accuracy: 0.61092 |  0:00:12s
epoch 6  | loss: 1.01295 | val_accuracy: 0.66212 |  0:00:14s
epoch 7  | loss: 0.91463 | val_accuracy: 0.68259 |  0:00:16s
epoch 8  | loss: 0.93726 | val_accuracy: 0.6587  |  0:00:18s
epoch 9  | loss: 0.91464 | val_accuracy: 0.65188 |  0:00:20s
epoch 10 | loss: 0.86961 | val_accuracy: 0.66894 |  0:00:22s
epoch 11 | loss: 0.78892 | val_accuracy: 0.64846 |  0:00:24s
epoch 12 | loss: 0.84908 | val_accuracy: 0.66212 |  0:00:26s
epoch 13 | loss: 0.75974 | val_accuracy: 0.66212 |  0:00:27s
epoch 14 | loss: 0.75807 | val_accuracy: 0.67235 |  0:00:29s
epoch 15 | loss: 0.78493 | val_accuracy: 0.67235 |  0:00:31s
epoch 16 | loss: 0.78527



epoch 0  | loss: 3.57494 | val_accuracy: 0.48718 |  0:00:02s
epoch 1  | loss: 2.71853 | val_accuracy: 0.55556 |  0:00:03s
epoch 2  | loss: 1.86778 | val_accuracy: 0.5641  |  0:00:05s
epoch 3  | loss: 1.28678 | val_accuracy: 0.61111 |  0:00:07s
epoch 4  | loss: 1.17863 | val_accuracy: 0.65385 |  0:00:08s
epoch 5  | loss: 1.11476 | val_accuracy: 0.67949 |  0:00:10s
epoch 6  | loss: 1.00141 | val_accuracy: 0.67949 |  0:00:11s
epoch 7  | loss: 1.06601 | val_accuracy: 0.67521 |  0:00:13s
epoch 8  | loss: 1.00929 | val_accuracy: 0.68803 |  0:00:15s
epoch 9  | loss: 0.86284 | val_accuracy: 0.67949 |  0:00:17s
epoch 10 | loss: 0.93424 | val_accuracy: 0.73077 |  0:00:18s
epoch 11 | loss: 0.90408 | val_accuracy: 0.73504 |  0:00:20s
epoch 12 | loss: 0.82996 | val_accuracy: 0.73504 |  0:00:21s
epoch 13 | loss: 0.77894 | val_accuracy: 0.7265  |  0:00:23s
epoch 14 | loss: 0.73854 | val_accuracy: 0.70513 |  0:00:25s
epoch 15 | loss: 0.76258 | val_accuracy: 0.73504 |  0:00:26s
epoch 16 | loss: 0.74797



epoch 0  | loss: 3.67642 | val_accuracy: 0.46581 |  0:00:01s
epoch 1  | loss: 2.76174 | val_accuracy: 0.55128 |  0:00:03s
epoch 2  | loss: 2.07277 | val_accuracy: 0.61538 |  0:00:04s
epoch 3  | loss: 1.48025 | val_accuracy: 0.6453  |  0:00:06s
epoch 4  | loss: 1.28714 | val_accuracy: 0.63675 |  0:00:08s
epoch 5  | loss: 1.16602 | val_accuracy: 0.68803 |  0:00:09s
epoch 6  | loss: 1.03726 | val_accuracy: 0.66239 |  0:00:11s
epoch 7  | loss: 0.92548 | val_accuracy: 0.65812 |  0:00:13s
epoch 8  | loss: 1.03113 | val_accuracy: 0.68803 |  0:00:14s
epoch 9  | loss: 0.9239  | val_accuracy: 0.69658 |  0:00:16s
epoch 10 | loss: 0.95463 | val_accuracy: 0.70513 |  0:00:18s
epoch 11 | loss: 0.91025 | val_accuracy: 0.70513 |  0:00:19s
epoch 12 | loss: 0.8665  | val_accuracy: 0.74786 |  0:00:21s
epoch 13 | loss: 0.79418 | val_accuracy: 0.76068 |  0:00:23s
epoch 14 | loss: 0.79204 | val_accuracy: 0.76496 |  0:00:24s
epoch 15 | loss: 0.82991 | val_accuracy: 0.76923 |  0:00:26s
epoch 16 | loss: 0.77943



epoch 0  | loss: 3.6838  | val_accuracy: 0.46581 |  0:00:01s
epoch 1  | loss: 2.62987 | val_accuracy: 0.49573 |  0:00:03s
epoch 2  | loss: 1.97057 | val_accuracy: 0.58547 |  0:00:04s
epoch 3  | loss: 1.43765 | val_accuracy: 0.5812  |  0:00:06s
epoch 4  | loss: 1.14015 | val_accuracy: 0.63675 |  0:00:08s
epoch 5  | loss: 1.09323 | val_accuracy: 0.66667 |  0:00:09s
epoch 6  | loss: 1.05487 | val_accuracy: 0.64103 |  0:00:11s
epoch 7  | loss: 0.99222 | val_accuracy: 0.67094 |  0:00:13s
epoch 8  | loss: 0.94333 | val_accuracy: 0.67949 |  0:00:14s
epoch 9  | loss: 0.89116 | val_accuracy: 0.69658 |  0:00:16s
epoch 10 | loss: 0.89535 | val_accuracy: 0.66239 |  0:00:18s
epoch 11 | loss: 0.88557 | val_accuracy: 0.72222 |  0:00:19s
epoch 12 | loss: 0.89883 | val_accuracy: 0.73504 |  0:00:21s
epoch 13 | loss: 0.82643 | val_accuracy: 0.7265  |  0:00:23s
epoch 14 | loss: 0.81967 | val_accuracy: 0.69658 |  0:00:24s
epoch 15 | loss: 0.798   | val_accuracy: 0.7265  |  0:00:26s
epoch 16 | loss: 0.73857



epoch 0  | loss: 3.6291  | val_accuracy: 0.51931 |  0:00:01s
epoch 1  | loss: 2.67227 | val_accuracy: 0.58369 |  0:00:03s
epoch 2  | loss: 1.99572 | val_accuracy: 0.60515 |  0:00:04s
epoch 3  | loss: 1.61653 | val_accuracy: 0.56652 |  0:00:06s
epoch 4  | loss: 1.19135 | val_accuracy: 0.61373 |  0:00:08s
epoch 5  | loss: 1.14758 | val_accuracy: 0.63948 |  0:00:09s
epoch 6  | loss: 1.00644 | val_accuracy: 0.62661 |  0:00:11s
epoch 7  | loss: 0.93841 | val_accuracy: 0.66524 |  0:00:13s
epoch 8  | loss: 0.88802 | val_accuracy: 0.6867  |  0:00:14s
epoch 9  | loss: 0.84776 | val_accuracy: 0.69957 |  0:00:16s
epoch 10 | loss: 0.9258  | val_accuracy: 0.69957 |  0:00:18s
epoch 11 | loss: 0.80053 | val_accuracy: 0.71245 |  0:00:19s
epoch 12 | loss: 0.85644 | val_accuracy: 0.70815 |  0:00:21s
epoch 13 | loss: 0.78358 | val_accuracy: 0.71674 |  0:00:22s
epoch 14 | loss: 0.7931  | val_accuracy: 0.71674 |  0:00:24s
epoch 15 | loss: 0.77642 | val_accuracy: 0.70815 |  0:00:26s
epoch 16 | loss: 0.75413



epoch 0  | loss: 3.57096 | val_accuracy: 0.49785 |  0:00:01s
epoch 1  | loss: 2.57699 | val_accuracy: 0.59227 |  0:00:03s
epoch 2  | loss: 1.99501 | val_accuracy: 0.6309  |  0:00:05s
epoch 3  | loss: 1.52672 | val_accuracy: 0.67811 |  0:00:06s
epoch 4  | loss: 1.21139 | val_accuracy: 0.72532 |  0:00:08s
epoch 5  | loss: 1.08047 | val_accuracy: 0.70386 |  0:00:10s
epoch 6  | loss: 1.02252 | val_accuracy: 0.73391 |  0:00:11s
epoch 7  | loss: 0.92226 | val_accuracy: 0.69528 |  0:00:13s
epoch 8  | loss: 0.86331 | val_accuracy: 0.71245 |  0:00:14s
epoch 9  | loss: 0.90707 | val_accuracy: 0.74249 |  0:00:16s
epoch 10 | loss: 0.87984 | val_accuracy: 0.72103 |  0:00:18s
epoch 11 | loss: 0.76381 | val_accuracy: 0.6824  |  0:00:20s
epoch 12 | loss: 0.91947 | val_accuracy: 0.72103 |  0:00:21s
epoch 13 | loss: 0.75675 | val_accuracy: 0.73391 |  0:00:23s
epoch 14 | loss: 0.7701  | val_accuracy: 0.74249 |  0:00:24s
epoch 15 | loss: 0.7361  | val_accuracy: 0.7382  |  0:00:26s
epoch 16 | loss: 0.75436

[I 2024-08-05 17:10:54,327] A new study created in memory with name: no-name-825c5f65-4825-4a79-a241-076c30222408


                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.736655  0.815283         0.754448   
KNN                         0.886121  0.959857         0.869222   
Decision Tree               0.871886  0.873841         0.847861   
Random Forest               0.882562  0.958151         0.881683   
Gradient Boosting            0.88968  0.950891         0.885246   
XGBoost                     0.896797  0.952674         0.878127   
LightGBM                    0.886121  0.952929         0.875452   
CatBoost                    0.907473  0.957412         0.885238   
MLP                         0.880546  0.903029         0.856153   
DNN                         0.887372  0.925489          0.87415   
DCN                          0.87372  0.921202         0.871571   
Wide_and_Deep                0.87372  0.924697         0.875841   
XGBoost + NN                 0.87372  0.931687         0.858758   
LightGBM + NN                0.87372  0.901911         0.88700

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:11:06,568] Trial 0 finished with value: 0.6860068259385665 and parameters: {'heads': 5, 'dim': 140, 'depth': 1, 'mlp_dim': 152, 'dropout': 0.2366729190801864, 'learning_rate': 0.020564149420318787, 'batch_size': 128, 'num_epochs': 59}. Best is trial 0 with value: 0.6860068259385665.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:11:27,145] Trial 1 finished with value: 0.7781569965870307 and parameters: {'heads': 3, 'dim': 15, 'depth': 2, 'mlp_dim': 85, 'dropout': 0.2239530984814862, 'learning_rate': 0.0008349904221785114, 'batch_size': 64, 'num_epochs': 89}. Best is trial 1 with value: 0.7781569965870307.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:11:49,198] Trial 2 finished with value: 0.4948805460750853 and parameters: {'heads': 8, 'dim': 120, 'depth': 3, 'mlp_dim': 250, 'dropout': 0.2392658275394774, 'learnin

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.736655  0.815283         0.754448   
KNN                         0.886121  0.959857         0.869222   
Decision Tree               0.871886  0.873841         0.847861   
Random Forest               0.882562  0.958151         0.881683   
Gradient Boosting            0.88968  0.950891         0.885246   
XGBoost                     0.896797  0.952674         0.878127   
LightGBM                    0.886121  0.952929         0.875452   
CatBoost                    0.907473  0.957412         0.885238   
MLP                         0.880546  0.903029         0.856153   
DNN                         0.887372  0.925489          0.87415   
DCN                          0.87372  0.921202         0.871571   
Wide_and_Deep                0.87372  0.924697         0.875841   
XGBoost + NN                 0.87372  0.931687         0.858758   
LightGBM + NN                0.87372  0.901911         0.88700

In [140]:
file_prefix = "water"  # Change this to any word you like
df =  pd.read_csv(f'Dataset/{file_prefix}.csv')
if len(df) >= 1405:
    df = df.sample(n=1405, random_state=42, replace=True)
df =  encode_categorical_data(df)
X = df.drop('Y', axis=1)
y = df['Y']
X, y = apply_yeojohnson(X, y)
X, y = apply_smote_to_training(X, y)

result = model_comparison(df, 'Y')
print(result)
result, best_params = mlp_comparison(X, y, result)
print(result)
result, best_params = dnn_comparison(X, y, result)
print(result)
result, best_params = dcn_comparison(X, y, result)
print(result)
result, best_params = wide_and_deep_comparison(X, y, result)
print(result)
result, best_params = xgb_nn_comparison(X, y, result)
print(result)
result, best_params = lgbm_nn_comparison(X, y, result)
print(result)
result, best_params = autoint_nn_comparison(X, y, result)
print(result)
result, best_params = ft_transformer_nn_comparison(X, y, result)
print(result)
result, best_params = neural_architecture_search(X, y, result)
print(result)
result, best_params = kan_comparison(X, y, result)
print(result)
result, best_params = node_comparison(X, y, result)
print(result)
result, best_params = tabnet_comparison(X, y, result)
print(result)
result, best_params = saint_comparison(X, y, result)
print(result)

result.to_csv(f'result/comparison/classification/{file_prefix}_result.csv', index=True)

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 218, number of negative: 906
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000305 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2770
[LightGBM] [Info] Number of data points in the train set: 1124, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.193950 -> initscore=-1.424544
[LightGBM] [Info] Start training from score -1.424544
[LightGBM] [Info] Number of positive: 218, number of negative: 906
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000117 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2770
[LightGBM] [Info] Number of data points in the train set: 1124, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.193950 -> initscore=-1.424544
[LightGBM] [Info] Start training from score -1.424544
[LightGBM] [Info] Numb

[I 2024-08-05 17:41:09,387] A new study created in memory with name: no-name-c6e22a97-ab89-41a9-a261-943d4039e325


                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.871886  0.861541         0.862099        0.008892   
KNN                  0.864769  0.887051         0.854075        0.010557   
Decision Tree        0.928826  0.900774         0.944849        0.009936   
Random Forest        0.939502  0.983462          0.92794        0.012979   
Gradient Boosting     0.94306  0.991027         0.956413        0.009873   
XGBoost              0.953737  0.992435         0.962647        0.016988   
LightGBM             0.953737  0.991291         0.963536        0.014147   
CatBoost              0.97153  0.994194         0.953754        0.013908   
MLP                  0.985748  0.998273         0.956548         0.00803   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logistic Regression                    0.002993                          0.0   
KNN                                    0.001996                     0.004986   

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:41:13,648] Trial 0 finished with value: 0.9786223277909739 and parameters: {'hidden_dim_0': 134, 'hidden_dim_1': 221, 'hidden_dim_2': 164, 'learning_rate': 0.06916851521910337, 'batch_size': 128, 'num_epochs': 41}. Best is trial 0 with value: 0.9786223277909739.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:41:30,353] Trial 1 finished with value: 0.6769596199524941 and parameters: {'hidden_dim_0': 150, 'hidden_dim_1': 230, 'hidden_dim_2': 82, 'learning_rate': 0.061909971816620196, 'batch_size': 64, 'num_epochs': 89}. Best is trial 0 with value: 0.9786223277909739.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:41:34,229] Trial 2 finished with value: 0.9786223277909739 and parameters: {'hidden_dim_0': 249, 'hidden_dim_1': 34, 'hidden_dim_2': 187, 'learning_rate': 0.010046052319977072, 'batch_size': 128, 'num_epochs'

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.871886  0.861541         0.862099        0.008892   
KNN                  0.864769  0.887051         0.854075        0.010557   
Decision Tree        0.928826  0.900774         0.944849        0.009936   
Random Forest        0.939502  0.983462          0.92794        0.012979   
Gradient Boosting     0.94306  0.991027         0.956413        0.009873   
XGBoost              0.953737  0.992435         0.962647        0.016988   
LightGBM             0.953737  0.991291         0.963536        0.014147   
CatBoost              0.97153  0.994194         0.953754        0.013908   
MLP                  0.985748  0.998273         0.956548         0.00803   
DNN                  0.988124  0.997841          0.96131         0.00996   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logistic Regression                    0.002993                          0.0   
KNN

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:43:05,910] Trial 0 finished with value: 0.9833729216152018 and parameters: {'cross_layers': 4, 'hidden_layer_0': 53, 'hidden_layer_1': 192, 'hidden_layer_2': 92, 'learning_rate': 0.0046284577138425665, 'batch_size': 64, 'num_epochs': 90}. Best is trial 0 with value: 0.9833729216152018.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:43:16,287] Trial 1 finished with value: 0.9833729216152018 and parameters: {'cross_layers': 3, 'hidden_layer_0': 140, 'hidden_layer_1': 156, 'hidden_layer_2': 86, 'learning_rate': 0.024721967011831308, 'batch_size': 128, 'num_epochs': 80}. Best is trial 0 with value: 0.9833729216152018.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:43:32,572] Trial 2 finished with value: 0.9786223277909739 and parameters: {'cross_layers': 3, 'hidden_layer_0': 32, 'hidden_layer_1': 190, 'hidden_layer_2': 

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.871886  0.861541         0.862099        0.008892   
KNN                  0.864769  0.887051         0.854075        0.010557   
Decision Tree        0.928826  0.900774         0.944849        0.009936   
Random Forest        0.939502  0.983462          0.92794        0.012979   
Gradient Boosting     0.94306  0.991027         0.956413        0.009873   
XGBoost              0.953737  0.992435         0.962647        0.016988   
LightGBM             0.953737  0.991291         0.963536        0.014147   
CatBoost              0.97153  0.994194         0.953754        0.013908   
MLP                  0.985748  0.998273         0.956548         0.00803   
DNN                  0.988124  0.997841          0.96131         0.00996   
DCN                  0.978622  0.997909         0.963095        0.012286   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logisti

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:44:54,684] Trial 0 finished with value: 0.9691211401425178 and parameters: {'hidden_layer_0': 44, 'hidden_layer_1': 60, 'hidden_layer_2': 229, 'learning_rate': 0.0003537788616396478, 'batch_size': 128, 'num_epochs': 46}. Best is trial 0 with value: 0.9691211401425178.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:44:56,531] Trial 1 finished with value: 0.9833729216152018 and parameters: {'hidden_layer_0': 198, 'hidden_layer_1': 235, 'hidden_layer_2': 75, 'learning_rate': 0.00628568315603586, 'batch_size': 256, 'num_epochs': 24}. Best is trial 1 with value: 0.9833729216152018.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:44:57,269] Trial 2 finished with value: 0.9809976247030879 and parameters: {'hidden_layer_0': 81, 'hidden_layer_1': 89, 'hidden_layer_2': 74, 'learning_rate': 0.003857053793983834, 'batch_size': 2

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.871886  0.861541         0.862099        0.008892   
KNN                  0.864769  0.887051         0.854075        0.010557   
Decision Tree        0.928826  0.900774         0.944849        0.009936   
Random Forest        0.939502  0.983462          0.92794        0.012979   
Gradient Boosting     0.94306  0.991027         0.956413        0.009873   
XGBoost              0.953737  0.992435         0.962647        0.016988   
LightGBM             0.953737  0.991291         0.963536        0.014147   
CatBoost              0.97153  0.994194         0.953754        0.013908   
MLP                  0.985748  0.998273         0.956548         0.00803   
DNN                  0.988124  0.997841          0.96131         0.00996   
DCN                  0.978622  0.997909         0.963095        0.012286   
Wide_and_Deep        0.978622  0.997205         0.961905        0.015567   

           

  'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
Parameters: { "use_label_encoder" } are not used.

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:46:00,422] Trial 0 finished with value: 0.9524940617577197 and parameters: {'n_estimators': 65, 'max_depth': 8, 'xgb_learning_rate': 0.0015358140356985458, 'subsample': 0.6411172962761726, 'colsample_bytree': 0.5688935455379807, 'use_hidden_layer_0': True, 'hidden_layer_0': 227, 'use_hidden_layer_1': True, 'hidden_layer_1': 117, 'use_hidden_layer_2': False, 'nn_learning_rate': 0.0004184149996814555, 'batch_size': 256, 'num_epochs': 63}. Best is trial 0 with value: 0.9524940617577197.
  'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
Parameters: { "use_label_encoder" } are not used.

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:46:08,920] Trial 1 finished with value: 0.5415676959619953 and 

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.871886  0.861541         0.862099        0.008892   
KNN                  0.864769  0.887051         0.854075        0.010557   
Decision Tree        0.928826  0.900774         0.944849        0.009936   
Random Forest        0.939502  0.983462          0.92794        0.012979   
Gradient Boosting     0.94306  0.991027         0.956413        0.009873   
XGBoost              0.953737  0.992435         0.962647        0.016988   
LightGBM             0.953737  0.991291         0.963536        0.014147   
CatBoost              0.97153  0.994194         0.953754        0.013908   
MLP                  0.985748  0.998273         0.956548         0.00803   
DNN                  0.988124  0.997841          0.96131         0.00996   
DCN                  0.978622  0.997909         0.963095        0.012286   
Wide_and_Deep        0.978622  0.997205         0.961905        0.015567   
XGBoost + NN

[I 2024-08-05 17:48:00,781] Trial 0 finished with value: 0.9691211401425178 and parameters: {'n_estimators': 50, 'max_depth': 10, 'lgb_learning_rate': 0.01651362869896309, 'num_leaves': 99, 'subsample': 0.9765610736865655, 'colsample_bytree': 0.5754167552772246, 'hidden_layer_0': 153, 'hidden_layer_1': 184, 'hidden_layer_2': 205, 'nn_learning_rate': 0.0010626399493598208, 'batch_size': 32, 'num_epochs': 36}. Best is trial 0 with value: 0.9691211401425178.


[LightGBM] [Info] Number of positive: 770, number of negative: 910
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000482 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5021
[LightGBM] [Info] Number of data points in the train set: 1680, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458333 -> initscore=-0.167054
[LightGBM] [Info] Start training from score -0.167054


[I 2024-08-05 17:48:03,779] Trial 1 finished with value: 0.8907363420427553 and parameters: {'n_estimators': 253, 'max_depth': 3, 'lgb_learning_rate': 0.0005032359932552024, 'num_leaves': 22, 'subsample': 0.9386725377982326, 'colsample_bytree': 0.9557707175538537, 'hidden_layer_0': 81, 'hidden_layer_1': 44, 'hidden_layer_2': 213, 'nn_learning_rate': 0.0005326063709893331, 'batch_size': 256, 'num_epochs': 49}. Best is trial 0 with value: 0.9691211401425178.


[LightGBM] [Info] Number of positive: 770, number of negative: 910
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000377 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5021
[LightGBM] [Info] Number of data points in the train set: 1680, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458333 -> initscore=-0.167054
[LightGBM] [Info] Start training from score -0.167054


[I 2024-08-05 17:48:10,843] Trial 2 finished with value: 0.9501187648456056 and parameters: {'n_estimators': 136, 'max_depth': 5, 'lgb_learning_rate': 0.00012414578670086055, 'num_leaves': 91, 'subsample': 0.7069100905082698, 'colsample_bytree': 0.702621163322799, 'hidden_layer_0': 157, 'hidden_layer_1': 248, 'hidden_layer_2': 123, 'nn_learning_rate': 0.0016653315732621252, 'batch_size': 256, 'num_epochs': 94}. Best is trial 0 with value: 0.9691211401425178.


[LightGBM] [Info] Number of positive: 770, number of negative: 910
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000351 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5021
[LightGBM] [Info] Number of data points in the train set: 1680, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458333 -> initscore=-0.167054
[LightGBM] [Info] Start training from score -0.167054


[I 2024-08-05 17:48:27,742] Trial 3 finished with value: 0.9097387173396675 and parameters: {'n_estimators': 129, 'max_depth': 3, 'lgb_learning_rate': 0.0005009282043413882, 'num_leaves': 63, 'subsample': 0.8259104278757172, 'colsample_bytree': 0.6402176834063582, 'hidden_layer_0': 78, 'hidden_layer_1': 215, 'hidden_layer_2': 237, 'nn_learning_rate': 0.009417893403377519, 'batch_size': 64, 'num_epochs': 100}. Best is trial 0 with value: 0.9691211401425178.


[LightGBM] [Info] Number of positive: 770, number of negative: 910
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000310 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5021
[LightGBM] [Info] Number of data points in the train set: 1680, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458333 -> initscore=-0.167054
[LightGBM] [Info] Start training from score -0.167054


[I 2024-08-05 17:48:35,065] Trial 4 finished with value: 0.9501187648456056 and parameters: {'n_estimators': 290, 'max_depth': 6, 'lgb_learning_rate': 0.00018543415636284853, 'num_leaves': 88, 'subsample': 0.8192727269818755, 'colsample_bytree': 0.7442103357421186, 'hidden_layer_0': 111, 'hidden_layer_1': 83, 'hidden_layer_2': 36, 'nn_learning_rate': 0.0013201189492352003, 'batch_size': 128, 'num_epochs': 92}. Best is trial 0 with value: 0.9691211401425178.


[LightGBM] [Info] Number of positive: 770, number of negative: 910
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000343 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5021
[LightGBM] [Info] Number of data points in the train set: 1680, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458333 -> initscore=-0.167054
[LightGBM] [Info] Start training from score -0.167054


[I 2024-08-05 17:48:52,680] Trial 5 finished with value: 0.9809976247030879 and parameters: {'n_estimators': 187, 'max_depth': 6, 'lgb_learning_rate': 0.06236292919897812, 'num_leaves': 33, 'subsample': 0.5027304461333527, 'colsample_bytree': 0.6036859615527888, 'hidden_layer_0': 57, 'hidden_layer_1': 227, 'hidden_layer_2': 82, 'nn_learning_rate': 0.04676338363796537, 'batch_size': 64, 'num_epochs': 78}. Best is trial 5 with value: 0.9809976247030879.


[LightGBM] [Info] Number of positive: 770, number of negative: 910
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000380 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5021
[LightGBM] [Info] Number of data points in the train set: 1680, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458333 -> initscore=-0.167054
[LightGBM] [Info] Start training from score -0.167054


[I 2024-08-05 17:49:09,290] Trial 6 finished with value: 0.9311163895486936 and parameters: {'n_estimators': 209, 'max_depth': 9, 'lgb_learning_rate': 0.00015489997906605139, 'num_leaves': 87, 'subsample': 0.6215921494961418, 'colsample_bytree': 0.8888764438988633, 'hidden_layer_0': 228, 'hidden_layer_1': 223, 'hidden_layer_2': 64, 'nn_learning_rate': 0.0070414382321843235, 'batch_size': 32, 'num_epochs': 56}. Best is trial 5 with value: 0.9809976247030879.


[LightGBM] [Info] Number of positive: 770, number of negative: 910
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000428 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5021
[LightGBM] [Info] Number of data points in the train set: 1680, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458333 -> initscore=-0.167054
[LightGBM] [Info] Start training from score -0.167054


[I 2024-08-05 17:49:12,429] Trial 7 finished with value: 0.9334916864608076 and parameters: {'n_estimators': 221, 'max_depth': 7, 'lgb_learning_rate': 0.001167201310575697, 'num_leaves': 65, 'subsample': 0.7171475788473485, 'colsample_bytree': 0.9190652143269711, 'hidden_layer_0': 45, 'hidden_layer_1': 240, 'hidden_layer_2': 222, 'nn_learning_rate': 0.00011791281773031218, 'batch_size': 64, 'num_epochs': 18}. Best is trial 5 with value: 0.9809976247030879.


[LightGBM] [Info] Number of positive: 770, number of negative: 910
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000358 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5021
[LightGBM] [Info] Number of data points in the train set: 1680, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458333 -> initscore=-0.167054
[LightGBM] [Info] Start training from score -0.167054


[I 2024-08-05 17:49:15,708] Trial 8 finished with value: 0.9501187648456056 and parameters: {'n_estimators': 249, 'max_depth': 7, 'lgb_learning_rate': 0.0005756184641615182, 'num_leaves': 23, 'subsample': 0.7976898471958008, 'colsample_bytree': 0.7792865816640062, 'hidden_layer_0': 231, 'hidden_layer_1': 225, 'hidden_layer_2': 205, 'nn_learning_rate': 0.07016264829990697, 'batch_size': 256, 'num_epochs': 39}. Best is trial 5 with value: 0.9809976247030879.


[LightGBM] [Info] Number of positive: 770, number of negative: 910
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000371 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5021
[LightGBM] [Info] Number of data points in the train set: 1680, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458333 -> initscore=-0.167054
[LightGBM] [Info] Start training from score -0.167054


[I 2024-08-05 17:49:20,385] Trial 9 finished with value: 0.9667458432304038 and parameters: {'n_estimators': 242, 'max_depth': 6, 'lgb_learning_rate': 0.004149772311249264, 'num_leaves': 54, 'subsample': 0.5065694297626314, 'colsample_bytree': 0.7566562156474357, 'hidden_layer_0': 204, 'hidden_layer_1': 47, 'hidden_layer_2': 165, 'nn_learning_rate': 0.09952244186578194, 'batch_size': 256, 'num_epochs': 75}. Best is trial 5 with value: 0.9809976247030879.


[LightGBM] [Info] Number of positive: 770, number of negative: 910
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000327 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5021
[LightGBM] [Info] Number of data points in the train set: 1680, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458333 -> initscore=-0.167054
[LightGBM] [Info] Start training from score -0.167054
[LightGBM] [Info] Number of positive: 616, number of negative: 728
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000276 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5012
[LightGBM] [Info] Number of data points in the train set: 1344, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.458333 -> initscore=-0.167054
[LightGBM] [Info] Start training from score -0.167054
[LightGBM] [Info] Numb

[I 2024-08-05 17:50:21,235] A new study created in memory with name: no-name-591f0e56-08e7-41b4-a9dc-08b5643b2491


                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.871886  0.861541         0.862099        0.008892   
KNN                  0.864769  0.887051         0.854075        0.010557   
Decision Tree        0.928826  0.900774         0.944849        0.009936   
Random Forest        0.939502  0.983462          0.92794        0.012979   
Gradient Boosting     0.94306  0.991027         0.956413        0.009873   
XGBoost              0.953737  0.992435         0.962647        0.016988   
LightGBM             0.953737  0.991291         0.963536        0.014147   
CatBoost              0.97153  0.994194         0.953754        0.013908   
MLP                  0.985748  0.998273         0.956548         0.00803   
DNN                  0.988124  0.997841          0.96131         0.00996   
DCN                  0.978622  0.997909         0.963095        0.012286   
Wide_and_Deep        0.978622  0.997205         0.961905        0.015567   
XGBoost + NN

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:50:27,572] Trial 0 finished with value: 0.828978622327791 and parameters: {'num_heads': 4, 'embedding_dim': 4, 'num_layers': 2, 'hidden_layer_0': 67, 'hidden_layer_1': 85, 'hidden_layer_2': 108, 'nn_learning_rate': 0.0006552620574685227, 'batch_size': 256, 'num_epochs': 54}. Best is trial 0 with value: 0.828978622327791.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:50:36,584] Trial 1 finished with value: 0.9809976247030879 and parameters: {'num_heads': 2, 'embedding_dim': 54, 'num_layers': 3, 'hidden_layer_0': 89, 'hidden_layer_1': 109, 'hidden_layer_2': 176, 'nn_learning_rate': 0.015128025952575846, 'batch_size': 256, 'num_epochs': 67}. Best is trial 1 with value: 0.9809976247030879.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:51:01,655] Trial 2 finished with value: 0.9833729216152018 and par

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.871886  0.861541         0.862099        0.008892   
KNN                  0.864769  0.887051         0.854075        0.010557   
Decision Tree        0.928826  0.900774         0.944849        0.009936   
Random Forest        0.939502  0.983462          0.92794        0.012979   
Gradient Boosting     0.94306  0.991027         0.956413        0.009873   
XGBoost              0.953737  0.992435         0.962647        0.016988   
LightGBM             0.953737  0.991291         0.963536        0.014147   
CatBoost              0.97153  0.994194         0.953754        0.013908   
MLP                  0.985748  0.998273         0.956548         0.00803   
DNN                  0.988124  0.997841          0.96131         0.00996   
DCN                  0.978622  0.997909         0.963095        0.012286   
Wide_and_Deep        0.978622  0.997205         0.961905        0.015567   
XGBoost + NN

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:53:16,113] Trial 0 finished with value: 0.6555819477434679 and parameters: {'num_heads': 3, 'embedding_dim': 18, 'num_layers': 1, 'hidden_layer_0': 160, 'hidden_layer_1': 111, 'hidden_layer_2': 148, 'nn_learning_rate': 0.06481906693821284, 'batch_size': 128, 'num_epochs': 47}. Best is trial 0 with value: 0.6555819477434679.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:53:37,618] Trial 1 finished with value: 0.9786223277909739 and parameters: {'num_heads': 3, 'embedding_dim': 36, 'num_layers': 2, 'hidden_layer_0': 234, 'hidden_layer_1': 245, 'hidden_layer_2': 193, 'nn_learning_rate': 0.01770693268941067, 'batch_size': 128, 'num_epochs': 94}. Best is trial 1 with value: 0.9786223277909739.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:53:49,870] Trial 2 finished with value: 0.9809976247030879 and 

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.871886  0.861541         0.862099        0.008892   
KNN                  0.864769  0.887051         0.854075        0.010557   
Decision Tree        0.928826  0.900774         0.944849        0.009936   
Random Forest        0.939502  0.983462          0.92794        0.012979   
Gradient Boosting     0.94306  0.991027         0.956413        0.009873   
XGBoost              0.953737  0.992435         0.962647        0.016988   
LightGBM             0.953737  0.991291         0.963536        0.014147   
CatBoost              0.97153  0.994194         0.953754        0.013908   
MLP                  0.985748  0.998273         0.956548         0.00803   
DNN                  0.988124  0.997841          0.96131         0.00996   
DCN                  0.978622  0.997909         0.963095        0.012286   
Wide_and_Deep        0.978622  0.997205         0.961905        0.015567   
XGBoost + NN

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:57:10,412] Trial 0 finished with value: 0.9833729216152018 and parameters: {'num_layers': 4, 'hidden_layer_0': 71, 'hidden_layer_1': 144, 'hidden_layer_2': 198, 'hidden_layer_3': 207, 'learning_rate': 0.019582026596354208, 'batch_size': 256, 'num_epochs': 80}. Best is trial 0 with value: 0.9833729216152018.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:57:14,977] Trial 1 finished with value: 0.9857482185273159 and parameters: {'num_layers': 5, 'hidden_layer_0': 71, 'hidden_layer_1': 65, 'hidden_layer_2': 190, 'hidden_layer_3': 46, 'hidden_layer_4': 76, 'learning_rate': 0.006138888509638095, 'batch_size': 128, 'num_epochs': 42}. Best is trial 1 with value: 0.9857482185273159.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:57:21,143] Trial 2 finished with value: 0.9786223277909739 and parameters: {'num_layers': 3, 'h

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.871886  0.861541         0.862099   
KNN                         0.864769  0.887051         0.854075   
Decision Tree               0.928826  0.900774         0.944849   
Random Forest               0.939502  0.983462          0.92794   
Gradient Boosting            0.94306  0.991027         0.956413   
XGBoost                     0.953737  0.992435         0.962647   
LightGBM                    0.953737  0.991291         0.963536   
CatBoost                     0.97153  0.994194         0.953754   
MLP                         0.985748  0.998273         0.956548   
DNN                         0.988124  0.997841          0.96131   
DCN                         0.978622  0.997909         0.963095   
Wide_and_Deep               0.978622  0.997205         0.961905   
XGBoost + NN                0.541568       0.5         0.704762   
LightGBM + NN               0.978622  0.992512           0.962

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:58:59,402] Trial 0 finished with value: 0.9501187648456056 and parameters: {'hidden_dim': 86, 'learning_rate': 0.0007034481781349946, 'batch_size': 256, 'num_epochs': 69}. Best is trial 0 with value: 0.9501187648456056.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:58:59,848] Trial 1 finished with value: 0.9643705463182898 and parameters: {'hidden_dim': 210, 'learning_rate': 0.007151941035651966, 'batch_size': 256, 'num_epochs': 10}. Best is trial 1 with value: 0.9643705463182898.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 17:59:03,379] Trial 2 finished with value: 0.9809976247030879 and parameters: {'hidden_dim': 75, 'learning_rate': 0.001280659883373202, 'batch_size': 64, 'num_epochs': 64}. Best is trial 2 with value: 0.9809976247030879.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.871886  0.861541         0.862099   
KNN                         0.864769  0.887051         0.854075   
Decision Tree               0.928826  0.900774         0.944849   
Random Forest               0.939502  0.983462          0.92794   
Gradient Boosting            0.94306  0.991027         0.956413   
XGBoost                     0.953737  0.992435         0.962647   
LightGBM                    0.953737  0.991291         0.963536   
CatBoost                     0.97153  0.994194         0.953754   
MLP                         0.985748  0.998273         0.956548   
DNN                         0.988124  0.997841          0.96131   
DCN                         0.978622  0.997909         0.963095   
Wide_and_Deep               0.978622  0.997205         0.961905   
XGBoost + NN                0.541568       0.5         0.704762   
LightGBM + NN               0.978622  0.992512           0.962

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:00:41,784] Trial 0 finished with value: 0.9786223277909739 and parameters: {'num_layers': 3, 'num_trees': 6, 'tree_dim': 27, 'learning_rate': 0.04792803597657599, 'batch_size': 32, 'num_epochs': 99}. Best is trial 0 with value: 0.9786223277909739.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:00:48,724] Trial 1 finished with value: 0.9786223277909739 and parameters: {'num_layers': 4, 'num_trees': 2, 'tree_dim': 45, 'learning_rate': 0.004188628504095935, 'batch_size': 256, 'num_epochs': 70}. Best is trial 0 with value: 0.9786223277909739.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:00:58,836] Trial 2 finished with value: 0.9477434679334917 and parameters: {'num_layers': 3, 'num_trees': 4, 'tree_dim': 63, 'learning_rate': 0.0003905339031085652, 'batch_size': 128, 'num_epochs': 47}. Best is trial 0 with value: 0.97

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.871886  0.861541         0.862099   
KNN                         0.864769  0.887051         0.854075   
Decision Tree               0.928826  0.900774         0.944849   
Random Forest               0.939502  0.983462          0.92794   
Gradient Boosting            0.94306  0.991027         0.956413   
XGBoost                     0.953737  0.992435         0.962647   
LightGBM                    0.953737  0.991291         0.963536   
CatBoost                     0.97153  0.994194         0.953754   
MLP                         0.985748  0.998273         0.956548   
DNN                         0.988124  0.997841          0.96131   
DCN                         0.978622  0.997909         0.963095   
Wide_and_Deep               0.978622  0.997205         0.961905   
XGBoost + NN                0.541568       0.5         0.704762   
LightGBM + NN               0.978622  0.992512           0.962

  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.78008 | val_accuracy: 0.65796 |  0:00:00s
epoch 1  | loss: 0.55137 | val_accuracy: 0.73159 |  0:00:01s
epoch 2  | loss: 0.50887 | val_accuracy: 0.78147 |  0:00:02s
epoch 3  | loss: 0.44744 | val_accuracy: 0.81235 |  0:00:03s
epoch 4  | loss: 0.41861 | val_accuracy: 0.82423 |  0:00:03s
epoch 5  | loss: 0.39271 | val_accuracy: 0.80523 |  0:00:04s
epoch 6  | loss: 0.37276 | val_accuracy: 0.82898 |  0:00:05s
epoch 7  | loss: 0.36284 | val_accuracy: 0.8361  |  0:00:06s
epoch 8  | loss: 0.34256 | val_accuracy: 0.84561 |  0:00:06s
epoch 9  | loss: 0.32502 | val_accuracy: 0.85511 |  0:00:07s
epoch 10 | loss: 0.28019 | val_accuracy: 0.88361 |  0:00:08s
epoch 11 | loss: 0.2967  | val_accuracy: 0.89311 |  0:00:09s
epoch 12 | loss: 0.24897 | val_accuracy: 0.91211 |  0:00:09s
epoch 13 | loss: 0.24202 | val_accuracy: 0.90261 |  0:00:10s
epoch 14 | loss: 0.24715 | val_accuracy: 0.90499 |  0:00:11s
epoch 15 | loss: 0.22002 | val_accuracy: 0.89549 |  0:00:12s
epoch 16 | loss: 0.21345

[I 2024-08-05 18:02:59,425] Trial 0 finished with value: 0.9596199524940617 and parameters: {'n_d': 33, 'n_a': 54, 'n_steps': 3, 'gamma': 1.3158680017266198, 'lambda_sparse': 8.193163174162726e-05, 'learning_rate': 0.002861960153556385, 'batch_size': 128, 'num_epochs': 94}. Best is trial 0 with value: 0.9596199524940617.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.86985 | val_accuracy: 0.55107 |  0:00:01s
epoch 1  | loss: 1.75744 | val_accuracy: 0.53919 |  0:00:02s
epoch 2  | loss: 1.50621 | val_accuracy: 0.55582 |  0:00:03s
epoch 3  | loss: 1.36137 | val_accuracy: 0.52019 |  0:00:04s
epoch 4  | loss: 1.23624 | val_accuracy: 0.52257 |  0:00:06s
epoch 5  | loss: 1.11351 | val_accuracy: 0.52732 |  0:00:07s
epoch 6  | loss: 1.05487 | val_accuracy: 0.54394 |  0:00:08s
epoch 7  | loss: 1.03262 | val_accuracy: 0.57482 |  0:00:10s
epoch 8  | loss: 1.00784 | val_accuracy: 0.5677  |  0:00:11s
epoch 9  | loss: 0.98151 | val_accuracy: 0.58907 |  0:00:12s
epoch 10 | loss: 0.93528 | val_accuracy: 0.5962  |  0:00:13s
epoch 11 | loss: 0.87889 | val_accuracy: 0.58195 |  0:00:15s
epoch 12 | loss: 0.88405 | val_accuracy: 0.56532 |  0:00:16s
epoch 13 | loss: 0.8683  | val_accuracy: 0.57957 |  0:00:17s
epoch 14 | loss: 0.88612 | val_accuracy: 0.59145 |  0:00:18s
epoch 15 | loss: 0.83675 | val_accuracy: 0.59145 |  0:00:20s
epoch 16 | loss: 0.78747

[I 2024-08-05 18:03:34,329] Trial 1 finished with value: 0.6579572446555819 and parameters: {'n_d': 27, 'n_a': 32, 'n_steps': 6, 'gamma': 1.2348697207811474, 'lambda_sparse': 2.4188279106388276e-05, 'learning_rate': 0.0003036741330312083, 'batch_size': 128, 'num_epochs': 27}. Best is trial 0 with value: 0.9596199524940617.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.81388 | val_accuracy: 0.66271 |  0:00:00s
epoch 1  | loss: 0.5537  | val_accuracy: 0.70309 |  0:00:01s
epoch 2  | loss: 0.53402 | val_accuracy: 0.74347 |  0:00:02s
epoch 3  | loss: 0.4534  | val_accuracy: 0.80048 |  0:00:02s
epoch 4  | loss: 0.41425 | val_accuracy: 0.85036 |  0:00:03s
epoch 5  | loss: 0.35927 | val_accuracy: 0.87648 |  0:00:04s
epoch 6  | loss: 0.302   | val_accuracy: 0.88836 |  0:00:05s
epoch 7  | loss: 0.29546 | val_accuracy: 0.91924 |  0:00:05s
epoch 8  | loss: 0.25542 | val_accuracy: 0.90024 |  0:00:06s
epoch 9  | loss: 0.2382  | val_accuracy: 0.91211 |  0:00:07s
epoch 10 | loss: 0.2483  | val_accuracy: 0.92162 |  0:00:07s
epoch 11 | loss: 0.23243 | val_accuracy: 0.90261 |  0:00:08s
epoch 12 | loss: 0.21344 | val_accuracy: 0.92874 |  0:00:09s
epoch 13 | loss: 0.19355 | val_accuracy: 0.93112 |  0:00:10s
epoch 14 | loss: 0.19187 | val_accuracy: 0.94774 |  0:00:10s
epoch 15 | loss: 0.18448 | val_accuracy: 0.92637 |  0:00:11s
epoch 16 | loss: 0.1659 

[I 2024-08-05 18:04:04,205] Trial 2 finished with value: 0.9809976247030879 and parameters: {'n_d': 27, 'n_a': 51, 'n_steps': 3, 'gamma': 1.575016598489016, 'lambda_sparse': 1.2465755398477497e-05, 'learning_rate': 0.0076289127924148645, 'batch_size': 128, 'num_epochs': 46}. Best is trial 2 with value: 0.9809976247030879.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.51394 | val_accuracy: 0.56057 |  0:00:00s
epoch 1  | loss: 1.12461 | val_accuracy: 0.60808 |  0:00:01s
epoch 2  | loss: 0.92125 | val_accuracy: 0.65558 |  0:00:02s
epoch 3  | loss: 0.73102 | val_accuracy: 0.68171 |  0:00:04s
epoch 4  | loss: 0.61617 | val_accuracy: 0.72922 |  0:00:05s
epoch 5  | loss: 0.59214 | val_accuracy: 0.76485 |  0:00:06s
epoch 6  | loss: 0.57484 | val_accuracy: 0.70546 |  0:00:07s
epoch 7  | loss: 0.54126 | val_accuracy: 0.76247 |  0:00:08s
epoch 8  | loss: 0.53402 | val_accuracy: 0.74584 |  0:00:09s
epoch 9  | loss: 0.52993 | val_accuracy: 0.72447 |  0:00:10s
epoch 10 | loss: 0.51274 | val_accuracy: 0.75297 |  0:00:11s
Stop training because you reached max_epochs = 11 with best_epoch = 5 and best_val_accuracy = 0.76485


[I 2024-08-05 18:04:16,631] Trial 3 finished with value: 0.7648456057007126 and parameters: {'n_d': 36, 'n_a': 38, 'n_steps': 8, 'gamma': 1.8510663902075055, 'lambda_sparse': 2.078677330550548e-05, 'learning_rate': 0.00392957726235692, 'batch_size': 256, 'num_epochs': 11}. Best is trial 2 with value: 0.9809976247030879.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.85755 | val_accuracy: 0.57482 |  0:00:00s
epoch 1  | loss: 0.68148 | val_accuracy: 0.61283 |  0:00:01s
epoch 2  | loss: 0.65072 | val_accuracy: 0.67458 |  0:00:02s
epoch 3  | loss: 0.58091 | val_accuracy: 0.70309 |  0:00:02s
epoch 4  | loss: 0.55486 | val_accuracy: 0.71021 |  0:00:03s
epoch 5  | loss: 0.51738 | val_accuracy: 0.71734 |  0:00:04s
epoch 6  | loss: 0.51641 | val_accuracy: 0.72447 |  0:00:05s
epoch 7  | loss: 0.49836 | val_accuracy: 0.70546 |  0:00:05s
epoch 8  | loss: 0.46621 | val_accuracy: 0.71496 |  0:00:06s
epoch 9  | loss: 0.45452 | val_accuracy: 0.71734 |  0:00:07s
epoch 10 | loss: 0.44753 | val_accuracy: 0.76722 |  0:00:07s
epoch 11 | loss: 0.44323 | val_accuracy: 0.7886  |  0:00:08s
epoch 12 | loss: 0.42499 | val_accuracy: 0.79572 |  0:00:09s
epoch 13 | loss: 0.40012 | val_accuracy: 0.7981  |  0:00:09s
epoch 14 | loss: 0.38444 | val_accuracy: 0.78147 |  0:00:10s
epoch 15 | loss: 0.36098 | val_accuracy: 0.7981  |  0:00:11s
epoch 16 | loss: 0.37028

[I 2024-08-05 18:04:42,677] Trial 4 finished with value: 0.8669833729216152 and parameters: {'n_d': 42, 'n_a': 36, 'n_steps': 3, 'gamma': 1.7220879590591411, 'lambda_sparse': 1.895317589021574e-06, 'learning_rate': 0.0009030470967727872, 'batch_size': 128, 'num_epochs': 96}. Best is trial 2 with value: 0.9809976247030879.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.1893  | val_accuracy: 0.57245 |  0:00:02s
epoch 1  | loss: 0.78131 | val_accuracy: 0.68171 |  0:00:05s
epoch 2  | loss: 0.74002 | val_accuracy: 0.69834 |  0:00:07s
epoch 3  | loss: 0.68735 | val_accuracy: 0.67696 |  0:00:10s
epoch 4  | loss: 0.64269 | val_accuracy: 0.70784 |  0:00:12s
epoch 5  | loss: 0.68918 | val_accuracy: 0.65321 |  0:00:15s
epoch 6  | loss: 0.59334 | val_accuracy: 0.75297 |  0:00:18s
epoch 7  | loss: 0.68353 | val_accuracy: 0.73634 |  0:00:20s
epoch 8  | loss: 0.608   | val_accuracy: 0.72922 |  0:00:23s
epoch 9  | loss: 0.56071 | val_accuracy: 0.73872 |  0:00:25s
epoch 10 | loss: 0.54383 | val_accuracy: 0.75772 |  0:00:28s
epoch 11 | loss: 0.51798 | val_accuracy: 0.77435 |  0:00:31s
epoch 12 | loss: 0.5437  | val_accuracy: 0.74584 |  0:00:33s
epoch 13 | loss: 0.52212 | val_accuracy: 0.76485 |  0:00:36s
epoch 14 | loss: 0.504   | val_accuracy: 0.7886  |  0:00:38s
epoch 15 | loss: 0.52928 | val_accuracy: 0.75297 |  0:00:41s
epoch 16 | loss: 0.5268 

[I 2024-08-05 18:06:13,864] Trial 5 finished with value: 0.850356294536817 and parameters: {'n_d': 28, 'n_a': 64, 'n_steps': 7, 'gamma': 1.8712340147112796, 'lambda_sparse': 0.0005940236341695327, 'learning_rate': 0.0029401700700387636, 'batch_size': 64, 'num_epochs': 35}. Best is trial 2 with value: 0.9809976247030879.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 0.96999 | val_accuracy: 0.62233 |  0:00:02s
epoch 1  | loss: 0.61361 | val_accuracy: 0.67221 |  0:00:05s
epoch 2  | loss: 0.59919 | val_accuracy: 0.72447 |  0:00:07s
epoch 3  | loss: 0.54763 | val_accuracy: 0.78147 |  0:00:10s
epoch 4  | loss: 0.5183  | val_accuracy: 0.77197 |  0:00:13s
epoch 5  | loss: 0.5235  | val_accuracy: 0.74584 |  0:00:15s
epoch 6  | loss: 0.47071 | val_accuracy: 0.76722 |  0:00:18s
epoch 7  | loss: 0.44283 | val_accuracy: 0.78385 |  0:00:21s
epoch 8  | loss: 0.47305 | val_accuracy: 0.74347 |  0:00:23s
epoch 9  | loss: 0.47642 | val_accuracy: 0.77197 |  0:00:26s
epoch 10 | loss: 0.50346 | val_accuracy: 0.7601  |  0:00:29s
epoch 11 | loss: 0.43601 | val_accuracy: 0.82423 |  0:00:31s
epoch 12 | loss: 0.4203  | val_accuracy: 0.83373 |  0:00:34s
epoch 13 | loss: 0.41489 | val_accuracy: 0.85511 |  0:00:37s
epoch 14 | loss: 0.43118 | val_accuracy: 0.84086 |  0:00:39s
epoch 15 | loss: 0.42146 | val_accuracy: 0.82185 |  0:00:42s
epoch 16 | loss: 0.39175

[I 2024-08-05 18:07:10,473] Trial 6 finished with value: 0.8574821852731591 and parameters: {'n_d': 9, 'n_a': 33, 'n_steps': 9, 'gamma': 1.2205976839509358, 'lambda_sparse': 9.780284525104682e-05, 'learning_rate': 0.02164065946773165, 'batch_size': 64, 'num_epochs': 21}. Best is trial 2 with value: 0.9809976247030879.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.03638 | val_accuracy: 0.63658 |  0:00:04s
epoch 1  | loss: 0.93473 | val_accuracy: 0.65558 |  0:00:08s
epoch 2  | loss: 0.75544 | val_accuracy: 0.67933 |  0:00:12s
epoch 3  | loss: 0.72563 | val_accuracy: 0.67458 |  0:00:16s
epoch 4  | loss: 0.66974 | val_accuracy: 0.66746 |  0:00:21s
epoch 5  | loss: 0.61116 | val_accuracy: 0.68171 |  0:00:25s
epoch 6  | loss: 0.61441 | val_accuracy: 0.73397 |  0:00:29s
epoch 7  | loss: 0.60559 | val_accuracy: 0.69834 |  0:00:34s
epoch 8  | loss: 0.57013 | val_accuracy: 0.73634 |  0:00:38s
epoch 9  | loss: 0.52125 | val_accuracy: 0.77672 |  0:00:42s
epoch 10 | loss: 0.54944 | val_accuracy: 0.76485 |  0:00:46s
epoch 11 | loss: 0.50623 | val_accuracy: 0.78622 |  0:00:50s
epoch 12 | loss: 0.46209 | val_accuracy: 0.7791  |  0:00:55s
epoch 13 | loss: 0.46111 | val_accuracy: 0.78147 |  0:00:59s
epoch 14 | loss: 0.44929 | val_accuracy: 0.82185 |  0:01:03s
epoch 15 | loss: 0.43325 | val_accuracy: 0.83848 |  0:01:07s
epoch 16 | loss: 0.46537

[I 2024-08-05 18:11:53,422] Trial 7 finished with value: 0.9714964370546318 and parameters: {'n_d': 28, 'n_a': 49, 'n_steps': 7, 'gamma': 1.8766487221363977, 'lambda_sparse': 0.0006675400645434709, 'learning_rate': 0.0058113942255016895, 'batch_size': 32, 'num_epochs': 87}. Best is trial 2 with value: 0.9809976247030879.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.76699 | val_accuracy: 0.51781 |  0:00:01s
epoch 1  | loss: 1.60447 | val_accuracy: 0.51781 |  0:00:03s
epoch 2  | loss: 1.59605 | val_accuracy: 0.53682 |  0:00:04s
epoch 3  | loss: 1.54787 | val_accuracy: 0.53207 |  0:00:06s
epoch 4  | loss: 1.42637 | val_accuracy: 0.54394 |  0:00:07s
epoch 5  | loss: 1.48927 | val_accuracy: 0.51544 |  0:00:09s
epoch 6  | loss: 1.35796 | val_accuracy: 0.50831 |  0:00:10s
epoch 7  | loss: 1.45367 | val_accuracy: 0.52732 |  0:00:12s
epoch 8  | loss: 1.34717 | val_accuracy: 0.51544 |  0:00:13s
epoch 9  | loss: 1.35595 | val_accuracy: 0.53207 |  0:00:15s
epoch 10 | loss: 1.32709 | val_accuracy: 0.54394 |  0:00:16s
epoch 11 | loss: 1.23012 | val_accuracy: 0.54632 |  0:00:18s
epoch 12 | loss: 1.20813 | val_accuracy: 0.58432 |  0:00:19s
epoch 13 | loss: 1.15357 | val_accuracy: 0.5772  |  0:00:21s
epoch 14 | loss: 1.13791 | val_accuracy: 0.59145 |  0:00:22s
epoch 15 | loss: 1.10194 | val_accuracy: 0.57957 |  0:00:24s
epoch 16 | loss: 1.09427

[I 2024-08-05 18:12:20,107] Trial 8 finished with value: 0.6009501187648456 and parameters: {'n_d': 62, 'n_a': 37, 'n_steps': 6, 'gamma': 1.6201562407007255, 'lambda_sparse': 1.0384834490944147e-05, 'learning_rate': 0.00010708685120389549, 'batch_size': 128, 'num_epochs': 17}. Best is trial 2 with value: 0.9809976247030879.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.29181 | val_accuracy: 0.55344 |  0:00:02s
epoch 1  | loss: 0.58301 | val_accuracy: 0.77435 |  0:00:04s
epoch 2  | loss: 0.51059 | val_accuracy: 0.7791  |  0:00:06s
epoch 3  | loss: 0.43529 | val_accuracy: 0.79572 |  0:00:08s
epoch 4  | loss: 0.45393 | val_accuracy: 0.82423 |  0:00:10s
epoch 5  | loss: 0.46067 | val_accuracy: 0.80523 |  0:00:12s
epoch 6  | loss: 0.43696 | val_accuracy: 0.80998 |  0:00:14s
epoch 7  | loss: 0.43929 | val_accuracy: 0.84086 |  0:00:16s
epoch 8  | loss: 0.41706 | val_accuracy: 0.84086 |  0:00:18s
epoch 9  | loss: 0.38072 | val_accuracy: 0.87173 |  0:00:20s
epoch 10 | loss: 0.36995 | val_accuracy: 0.87886 |  0:00:22s
epoch 11 | loss: 0.32455 | val_accuracy: 0.91211 |  0:00:24s
epoch 12 | loss: 0.29282 | val_accuracy: 0.88124 |  0:00:27s
epoch 13 | loss: 0.29754 | val_accuracy: 0.90499 |  0:00:29s
epoch 14 | loss: 0.27137 | val_accuracy: 0.90974 |  0:00:31s
epoch 15 | loss: 0.27407 | val_accuracy: 0.89786 |  0:00:33s
epoch 16 | loss: 0.21496

[I 2024-08-05 18:14:03,987] Trial 9 finished with value: 0.9881235154394299 and parameters: {'n_d': 52, 'n_a': 56, 'n_steps': 5, 'gamma': 1.25933998458459, 'lambda_sparse': 1.7488156171776915e-06, 'learning_rate': 0.09809841423191722, 'batch_size': 64, 'num_epochs': 82}. Best is trial 9 with value: 0.9881235154394299.


epoch 0  | loss: 1.29181 | val_accuracy: 0.55344 |  0:00:02s
epoch 1  | loss: 0.58301 | val_accuracy: 0.77435 |  0:00:04s
epoch 2  | loss: 0.51059 | val_accuracy: 0.7791  |  0:00:06s
epoch 3  | loss: 0.43529 | val_accuracy: 0.79572 |  0:00:08s
epoch 4  | loss: 0.45393 | val_accuracy: 0.82423 |  0:00:10s
epoch 5  | loss: 0.46067 | val_accuracy: 0.80523 |  0:00:13s
epoch 6  | loss: 0.43696 | val_accuracy: 0.80998 |  0:00:15s
epoch 7  | loss: 0.43929 | val_accuracy: 0.84086 |  0:00:17s
epoch 8  | loss: 0.41706 | val_accuracy: 0.84086 |  0:00:19s
epoch 9  | loss: 0.38072 | val_accuracy: 0.87173 |  0:00:21s
epoch 10 | loss: 0.36995 | val_accuracy: 0.87886 |  0:00:23s
epoch 11 | loss: 0.32455 | val_accuracy: 0.91211 |  0:00:25s
epoch 12 | loss: 0.29282 | val_accuracy: 0.88124 |  0:00:28s
epoch 13 | loss: 0.29754 | val_accuracy: 0.90499 |  0:00:30s
epoch 14 | loss: 0.27137 | val_accuracy: 0.90974 |  0:00:32s
epoch 15 | loss: 0.27407 | val_accuracy: 0.89786 |  0:00:34s
epoch 16 | loss: 0.21496



epoch 0  | loss: 1.38288 | val_accuracy: 0.64286 |  0:00:01s
epoch 1  | loss: 0.57713 | val_accuracy: 0.80357 |  0:00:03s
epoch 2  | loss: 0.53368 | val_accuracy: 0.72321 |  0:00:05s
epoch 3  | loss: 0.48937 | val_accuracy: 0.76786 |  0:00:06s
epoch 4  | loss: 0.48496 | val_accuracy: 0.83333 |  0:00:08s
epoch 5  | loss: 0.49387 | val_accuracy: 0.78274 |  0:00:10s
epoch 6  | loss: 0.52512 | val_accuracy: 0.73214 |  0:00:11s
epoch 7  | loss: 0.54728 | val_accuracy: 0.80357 |  0:00:13s
epoch 8  | loss: 0.52968 | val_accuracy: 0.80357 |  0:00:15s
epoch 9  | loss: 0.49363 | val_accuracy: 0.81548 |  0:00:16s
epoch 10 | loss: 0.48266 | val_accuracy: 0.83631 |  0:00:18s
epoch 11 | loss: 0.45182 | val_accuracy: 0.71429 |  0:00:20s
epoch 12 | loss: 0.48429 | val_accuracy: 0.79167 |  0:00:21s
epoch 13 | loss: 0.48161 | val_accuracy: 0.84226 |  0:00:23s
epoch 14 | loss: 0.46544 | val_accuracy: 0.8244  |  0:00:25s
epoch 15 | loss: 0.4692  | val_accuracy: 0.8006  |  0:00:26s
epoch 16 | loss: 0.4118 



epoch 0  | loss: 1.56676 | val_accuracy: 0.67857 |  0:00:01s
epoch 1  | loss: 0.65907 | val_accuracy: 0.77976 |  0:00:03s
epoch 2  | loss: 0.53966 | val_accuracy: 0.79464 |  0:00:05s
epoch 3  | loss: 0.49402 | val_accuracy: 0.79167 |  0:00:06s
epoch 4  | loss: 0.52019 | val_accuracy: 0.78274 |  0:00:08s
epoch 5  | loss: 0.45172 | val_accuracy: 0.80357 |  0:00:09s
epoch 6  | loss: 0.44777 | val_accuracy: 0.82738 |  0:00:11s
epoch 7  | loss: 0.41476 | val_accuracy: 0.77083 |  0:00:13s
epoch 8  | loss: 0.44487 | val_accuracy: 0.86607 |  0:00:14s
epoch 9  | loss: 0.37832 | val_accuracy: 0.7619  |  0:00:16s
epoch 10 | loss: 0.39866 | val_accuracy: 0.84524 |  0:00:18s
epoch 11 | loss: 0.39669 | val_accuracy: 0.85119 |  0:00:19s
epoch 12 | loss: 0.38325 | val_accuracy: 0.8631  |  0:00:21s
epoch 13 | loss: 0.34421 | val_accuracy: 0.88393 |  0:00:23s
epoch 14 | loss: 0.33623 | val_accuracy: 0.81845 |  0:00:24s
epoch 15 | loss: 0.34128 | val_accuracy: 0.8244  |  0:00:26s
epoch 16 | loss: 0.36298



epoch 0  | loss: 1.283   | val_accuracy: 0.63988 |  0:00:01s
epoch 1  | loss: 0.60551 | val_accuracy: 0.69643 |  0:00:03s
epoch 2  | loss: 0.54523 | val_accuracy: 0.77083 |  0:00:04s
epoch 3  | loss: 0.47271 | val_accuracy: 0.69643 |  0:00:06s
epoch 4  | loss: 0.40081 | val_accuracy: 0.80655 |  0:00:08s
epoch 5  | loss: 0.40475 | val_accuracy: 0.8006  |  0:00:10s
epoch 6  | loss: 0.43311 | val_accuracy: 0.78869 |  0:00:11s
epoch 7  | loss: 0.42547 | val_accuracy: 0.79762 |  0:00:13s
epoch 8  | loss: 0.37884 | val_accuracy: 0.8244  |  0:00:14s
epoch 9  | loss: 0.38675 | val_accuracy: 0.8006  |  0:00:16s
epoch 10 | loss: 0.35053 | val_accuracy: 0.82143 |  0:00:18s
epoch 11 | loss: 0.35182 | val_accuracy: 0.8244  |  0:00:19s
epoch 12 | loss: 0.33827 | val_accuracy: 0.83929 |  0:00:21s
epoch 13 | loss: 0.33788 | val_accuracy: 0.8125  |  0:00:23s
epoch 14 | loss: 0.31385 | val_accuracy: 0.80357 |  0:00:24s
epoch 15 | loss: 0.31424 | val_accuracy: 0.83929 |  0:00:26s
epoch 16 | loss: 0.29874



epoch 0  | loss: 1.36169 | val_accuracy: 0.65476 |  0:00:01s
epoch 1  | loss: 0.6352  | val_accuracy: 0.70536 |  0:00:03s
epoch 2  | loss: 0.4915  | val_accuracy: 0.64583 |  0:00:05s
epoch 3  | loss: 0.43215 | val_accuracy: 0.83333 |  0:00:06s
epoch 4  | loss: 0.44289 | val_accuracy: 0.83333 |  0:00:08s
epoch 5  | loss: 0.41008 | val_accuracy: 0.83631 |  0:00:09s
epoch 6  | loss: 0.43502 | val_accuracy: 0.82143 |  0:00:11s
epoch 7  | loss: 0.45078 | val_accuracy: 0.81845 |  0:00:13s
epoch 8  | loss: 0.42384 | val_accuracy: 0.79762 |  0:00:15s
epoch 9  | loss: 0.43032 | val_accuracy: 0.8006  |  0:00:16s
epoch 10 | loss: 0.43416 | val_accuracy: 0.86012 |  0:00:18s
epoch 11 | loss: 0.39751 | val_accuracy: 0.8631  |  0:00:19s
epoch 12 | loss: 0.41822 | val_accuracy: 0.83333 |  0:00:21s
epoch 13 | loss: 0.41428 | val_accuracy: 0.82738 |  0:00:23s
epoch 14 | loss: 0.38011 | val_accuracy: 0.86905 |  0:00:24s
epoch 15 | loss: 0.41704 | val_accuracy: 0.83036 |  0:00:26s
epoch 16 | loss: 0.42196



epoch 0  | loss: 1.38353 | val_accuracy: 0.68452 |  0:00:01s
epoch 1  | loss: 0.54795 | val_accuracy: 0.77083 |  0:00:03s
epoch 2  | loss: 0.5152  | val_accuracy: 0.77381 |  0:00:04s
epoch 3  | loss: 0.47752 | val_accuracy: 0.75298 |  0:00:06s
epoch 4  | loss: 0.39811 | val_accuracy: 0.79464 |  0:00:08s
epoch 5  | loss: 0.39928 | val_accuracy: 0.80655 |  0:00:09s
epoch 6  | loss: 0.37572 | val_accuracy: 0.8244  |  0:00:11s
epoch 7  | loss: 0.34347 | val_accuracy: 0.84821 |  0:00:13s
epoch 8  | loss: 0.31888 | val_accuracy: 0.83631 |  0:00:14s
epoch 9  | loss: 0.3636  | val_accuracy: 0.8006  |  0:00:16s
epoch 10 | loss: 0.3283  | val_accuracy: 0.86607 |  0:00:18s
epoch 11 | loss: 0.34345 | val_accuracy: 0.81548 |  0:00:19s
epoch 12 | loss: 0.3532  | val_accuracy: 0.8244  |  0:00:21s
epoch 13 | loss: 0.35629 | val_accuracy: 0.77083 |  0:00:22s
epoch 14 | loss: 0.39899 | val_accuracy: 0.80357 |  0:00:24s
epoch 15 | loss: 0.38545 | val_accuracy: 0.83036 |  0:00:26s
epoch 16 | loss: 0.38895

[I 2024-08-05 18:19:30,051] A new study created in memory with name: no-name-e7a172ef-cfc8-4756-8e34-740280264f1b


                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.871886  0.861541         0.862099   
KNN                         0.864769  0.887051         0.854075   
Decision Tree               0.928826  0.900774         0.944849   
Random Forest               0.939502  0.983462          0.92794   
Gradient Boosting            0.94306  0.991027         0.956413   
XGBoost                     0.953737  0.992435         0.962647   
LightGBM                    0.953737  0.991291         0.963536   
CatBoost                     0.97153  0.994194         0.953754   
MLP                         0.985748  0.998273         0.956548   
DNN                         0.988124  0.997841          0.96131   
DCN                         0.978622  0.997909         0.963095   
Wide_and_Deep               0.978622  0.997205         0.961905   
XGBoost + NN                0.541568       0.5         0.704762   
LightGBM + NN               0.978622  0.992512           0.962

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:19:36,117] Trial 0 finished with value: 0.9738717339667459 and parameters: {'heads': 7, 'dim': 196, 'depth': 3, 'mlp_dim': 236, 'dropout': 0.228319200157021, 'learning_rate': 0.0003629086941593016, 'batch_size': 256, 'num_epochs': 12}. Best is trial 0 with value: 0.9738717339667459.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:20:32,694] Trial 1 finished with value: 0.684085510688836 and parameters: {'heads': 2, 'dim': 66, 'depth': 5, 'mlp_dim': 96, 'dropout': 0.14598094903150444, 'learning_rate': 0.00359812326841539, 'batch_size': 64, 'num_epochs': 69}. Best is trial 0 with value: 0.9738717339667459.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:21:33,113] Trial 2 finished with value: 0.5415676959619953 and parameters: {'heads': 6, 'dim': 216, 'depth': 4, 'mlp_dim': 113, 'dropout': 0.46906775637683346, 'learning

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.871886  0.861541         0.862099   
KNN                         0.864769  0.887051         0.854075   
Decision Tree               0.928826  0.900774         0.944849   
Random Forest               0.939502  0.983462          0.92794   
Gradient Boosting            0.94306  0.991027         0.956413   
XGBoost                     0.953737  0.992435         0.962647   
LightGBM                    0.953737  0.991291         0.963536   
CatBoost                     0.97153  0.994194         0.953754   
MLP                         0.985748  0.998273         0.956548   
DNN                         0.988124  0.997841          0.96131   
DCN                         0.978622  0.997909         0.963095   
Wide_and_Deep               0.978622  0.997205         0.961905   
XGBoost + NN                0.541568       0.5         0.704762   
LightGBM + NN               0.978622  0.992512           0.962

In [141]:
file_prefix = "weather"  # Change this to any word you like
df =  pd.read_csv(f'Dataset/{file_prefix}.csv')
df =  encode_categorical_data(df)
X = df.drop('Y', axis=1)
y = df['Y']
X, y = apply_yeojohnson(X, y)
X, y = apply_smote_to_training(X, y)

result = model_comparison(df, 'Y')
print(result)
result, best_params = mlp_comparison(X, y, result)
print(result)
result, best_params = dnn_comparison(X, y, result)
print(result)
result, best_params = dcn_comparison(X, y, result)
print(result)
result, best_params = wide_and_deep_comparison(X, y, result)
print(result)
result, best_params = xgb_nn_comparison(X, y, result)
print(result)
result, best_params = lgbm_nn_comparison(X, y, result)
print(result)
result, best_params = autoint_nn_comparison(X, y, result)
print(result)
result, best_params = ft_transformer_nn_comparison(X, y, result)
print(result)
result, best_params = neural_architecture_search(X, y, result)
print(result)
result, best_params = kan_comparison(X, y, result)
print(result)
result, best_params = node_comparison(X, y, result)
print(result)
result, best_params = tabnet_comparison(X, y, result)
print(result)
result, best_params = saint_comparison(X, y, result)
print(result)

result.to_csv(f'result/comparison/classification/{file_prefix}_result.csv', index=True)

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000236 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 639
[LightGBM] [Info] Number of data points in the train set: 1124, number of used features: 10
[LightGBM] [Info] Start training from score -1.281646
[LightGBM] [Info] Start training from score -1.426227
[LightGBM] [Info] Start training from score -1.415177
[LightGBM] [Info] Start training from score -1.429938
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000076 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 639
[LightGBM] [Info] Number of data points in the train set: 1124, number of used features: 10
[LightGBM] [Info] Start training from score -1.281646
[LightGBM] [Info] Start training from score -1.426227
[LightGBM] [Info] Start training from score -1.415177
[LightGBM] [Info] Start training from score -1.4

[I 2024-08-05 18:47:51,147] A new study created in memory with name: no-name-55628b14-fa4c-4317-91b5-25ed556d7757


                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.846975  0.936392         0.847877        0.012241   
KNN                  0.914591  0.976787         0.908385        0.018048   
Decision Tree        0.925267  0.951957         0.910147        0.018065   
Random Forest        0.946619  0.995741         0.926171        0.015496   
Gradient Boosting    0.935943  0.994383         0.928849        0.017494   
XGBoost              0.935943  0.994944         0.931516         0.01599   
LightGBM             0.939502  0.996228         0.930623        0.015492   
CatBoost             0.932384  0.993246         0.922611        0.017416   
MLP                   0.92691  0.989907             0.93        0.012416   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logistic Regression                    0.010972                     0.000996   
KNN                                    0.002992                     0.005983   

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:47:52,365] Trial 0 finished with value: 0.9335548172757475 and parameters: {'hidden_dim_0': 155, 'hidden_dim_1': 81, 'hidden_dim_2': 190, 'learning_rate': 0.005087570447831714, 'batch_size': 128, 'num_epochs': 17}. Best is trial 0 with value: 0.9335548172757475.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:47:55,950] Trial 1 finished with value: 0.9136212624584718 and parameters: {'hidden_dim_0': 34, 'hidden_dim_1': 124, 'hidden_dim_2': 228, 'learning_rate': 0.02030329692430608, 'batch_size': 64, 'num_epochs': 37}. Best is trial 0 with value: 0.9335548172757475.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:48:10,111] Trial 2 finished with value: 0.9036544850498339 and parameters: {'hidden_dim_0': 149, 'hidden_dim_1': 209, 'hidden_dim_2': 200, 'learning_rate': 0.043642025526224364, 'batch_size': 64, 'num_epochs':

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.846975  0.936392         0.847877        0.012241   
KNN                  0.914591  0.976787         0.908385        0.018048   
Decision Tree        0.925267  0.951957         0.910147        0.018065   
Random Forest        0.946619  0.995741         0.926171        0.015496   
Gradient Boosting    0.935943  0.994383         0.928849        0.017494   
XGBoost              0.935943  0.994944         0.931516         0.01599   
LightGBM             0.939502  0.996228         0.930623        0.015492   
CatBoost             0.932384  0.993246         0.922611        0.017416   
MLP                   0.92691  0.989907             0.93        0.012416   
DNN                  0.933555  0.979618         0.925833        0.007169   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logistic Regression                    0.010972                     0.000996   
KNN

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:49:25,023] Trial 0 finished with value: 0.9435215946843853 and parameters: {'cross_layers': 3, 'hidden_layer_0': 177, 'hidden_layer_1': 183, 'hidden_layer_2': 76, 'learning_rate': 0.0038500109097153344, 'batch_size': 128, 'num_epochs': 93}. Best is trial 0 with value: 0.9435215946843853.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:49:27,327] Trial 1 finished with value: 0.8837209302325582 and parameters: {'cross_layers': 2, 'hidden_layer_0': 33, 'hidden_layer_1': 94, 'hidden_layer_2': 172, 'learning_rate': 0.0006840100687007838, 'batch_size': 256, 'num_epochs': 42}. Best is trial 0 with value: 0.9435215946843853.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:49:39,102] Trial 2 finished with value: 0.9169435215946844 and parameters: {'cross_layers': 4, 'hidden_layer_0': 66, 'hidden_layer_1': 215, 'hidden_layer_2'

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.846975  0.936392         0.847877        0.012241   
KNN                  0.914591  0.976787         0.908385        0.018048   
Decision Tree        0.925267  0.951957         0.910147        0.018065   
Random Forest        0.946619  0.995741         0.926171        0.015496   
Gradient Boosting    0.935943  0.994383         0.928849        0.017494   
XGBoost              0.935943  0.994944         0.931516         0.01599   
LightGBM             0.939502  0.996228         0.930623        0.015492   
CatBoost             0.932384  0.993246         0.922611        0.017416   
MLP                   0.92691  0.989907             0.93        0.012416   
DNN                  0.933555  0.979618         0.925833        0.007169   
DCN                  0.930233  0.979752           0.9225        0.009354   

                    Training Time (Best Params) Inference Time (Best Params)  \
Logisti

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:51:19,106] Trial 0 finished with value: 0.9302325581395349 and parameters: {'hidden_layer_0': 95, 'hidden_layer_1': 247, 'hidden_layer_2': 203, 'learning_rate': 0.001208249203661858, 'batch_size': 64, 'num_epochs': 79}. Best is trial 0 with value: 0.9302325581395349.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:51:24,440] Trial 1 finished with value: 0.9169435215946844 and parameters: {'hidden_layer_0': 48, 'hidden_layer_1': 164, 'hidden_layer_2': 100, 'learning_rate': 0.03721447085035683, 'batch_size': 128, 'num_epochs': 80}. Best is trial 0 with value: 0.9302325581395349.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:51:35,389] Trial 2 finished with value: 0.9169435215946844 and parameters: {'hidden_layer_0': 154, 'hidden_layer_1': 180, 'hidden_layer_2': 62, 'learning_rate': 0.002170363009016053, 'batch_size': 

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.846975  0.936392         0.847877        0.012241   
KNN                  0.914591  0.976787         0.908385        0.018048   
Decision Tree        0.925267  0.951957         0.910147        0.018065   
Random Forest        0.946619  0.995741         0.926171        0.015496   
Gradient Boosting    0.935943  0.994383         0.928849        0.017494   
XGBoost              0.935943  0.994944         0.931516         0.01599   
LightGBM             0.939502  0.996228         0.930623        0.015492   
CatBoost             0.932384  0.993246         0.922611        0.017416   
MLP                   0.92691  0.989907             0.93        0.012416   
DNN                  0.933555  0.979618         0.925833        0.007169   
DCN                  0.930233  0.979752           0.9225        0.009354   
Wide_and_Deep         0.92691  0.980429           0.9225        0.010737   

           

  'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
Parameters: { "use_label_encoder" } are not used.

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:53:04,830] Trial 0 finished with value: 0.9435215946843853 and parameters: {'n_estimators': 155, 'max_depth': 8, 'xgb_learning_rate': 0.002034683451998202, 'subsample': 0.6958406650988738, 'colsample_bytree': 0.7494304421949056, 'use_hidden_layer_0': False, 'use_hidden_layer_1': False, 'use_hidden_layer_2': False, 'nn_learning_rate': 0.06482820521770243, 'batch_size': 256, 'num_epochs': 75}. Best is trial 0 with value: 0.9435215946843853.
  'learning_rate': trial.suggest_loguniform('xgb_learning_rate', 1e-4, 1e-1),
Parameters: { "use_label_encoder" } are not used.

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:53:15,046] Trial 1 finished with value: 0.8704318936877077 and parameters: {'n_estimators': 217, 'max_depth':

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.846975  0.936392         0.847877        0.012241   
KNN                  0.914591  0.976787         0.908385        0.018048   
Decision Tree        0.925267  0.951957         0.910147        0.018065   
Random Forest        0.946619  0.995741         0.926171        0.015496   
Gradient Boosting    0.935943  0.994383         0.928849        0.017494   
XGBoost              0.935943  0.994944         0.931516         0.01599   
LightGBM             0.939502  0.996228         0.930623        0.015492   
CatBoost             0.932384  0.993246         0.922611        0.017416   
MLP                   0.92691  0.989907             0.93        0.012416   
DNN                  0.933555  0.979618         0.925833        0.007169   
DCN                  0.930233  0.979752           0.9225        0.009354   
Wide_and_Deep         0.92691  0.980429           0.9225        0.010737   
XGBoost + NN

[I 2024-08-05 18:54:34,439] Trial 0 finished with value: 0.4186046511627907 and parameters: {'n_estimators': 130, 'max_depth': 9, 'lgb_learning_rate': 0.00030578765760566043, 'num_leaves': 65, 'subsample': 0.7330554387992254, 'colsample_bytree': 0.7562403892120108, 'hidden_layer_0': 221, 'hidden_layer_1': 173, 'hidden_layer_2': 154, 'nn_learning_rate': 0.056035479588634765, 'batch_size': 256, 'num_epochs': 47}. Best is trial 0 with value: 0.4186046511627907.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 1200, number of used features: 10
[LightGBM] [Info] Start training from score -1.369765
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.396345
[LightGBM] [Info] Start training from score -1.392983


[I 2024-08-05 18:54:38,191] Trial 1 finished with value: 0.9169435215946844 and parameters: {'n_estimators': 101, 'max_depth': 5, 'lgb_learning_rate': 0.006782594039377417, 'num_leaves': 70, 'subsample': 0.841063634782272, 'colsample_bytree': 0.7734130706536662, 'hidden_layer_0': 54, 'hidden_layer_1': 115, 'hidden_layer_2': 88, 'nn_learning_rate': 0.0011702439320829103, 'batch_size': 64, 'num_epochs': 39}. Best is trial 1 with value: 0.9169435215946844.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000136 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 1200, number of used features: 10
[LightGBM] [Info] Start training from score -1.369765
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.396345
[LightGBM] [Info] Start training from score -1.392983


[I 2024-08-05 18:54:40,196] Trial 2 finished with value: 0.9501661129568106 and parameters: {'n_estimators': 171, 'max_depth': 7, 'lgb_learning_rate': 0.0831814806342246, 'num_leaves': 21, 'subsample': 0.5432073026120943, 'colsample_bytree': 0.9386326408657575, 'hidden_layer_0': 80, 'hidden_layer_1': 146, 'hidden_layer_2': 74, 'nn_learning_rate': 0.004640847466225095, 'batch_size': 256, 'num_epochs': 31}. Best is trial 2 with value: 0.9501661129568106.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 1200, number of used features: 10
[LightGBM] [Info] Start training from score -1.369765
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.396345
[LightGBM] [Info] Start training from score -1.392983


[I 2024-08-05 18:54:47,406] Trial 3 finished with value: 0.9368770764119602 and parameters: {'n_estimators': 124, 'max_depth': 4, 'lgb_learning_rate': 0.04476011794840235, 'num_leaves': 72, 'subsample': 0.5408412126223936, 'colsample_bytree': 0.7241618541131097, 'hidden_layer_0': 111, 'hidden_layer_1': 195, 'hidden_layer_2': 182, 'nn_learning_rate': 0.0006306617566476257, 'batch_size': 64, 'num_epochs': 60}. Best is trial 2 with value: 0.9501661129568106.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000125 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 1200, number of used features: 10
[LightGBM] [Info] Start training from score -1.369765
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.396345
[LightGBM] [Info] Start training from score -1.392983


[I 2024-08-05 18:54:52,912] Trial 4 finished with value: 0.9069767441860465 and parameters: {'n_estimators': 279, 'max_depth': 9, 'lgb_learning_rate': 0.0003078519932744617, 'num_leaves': 58, 'subsample': 0.9077198794219161, 'colsample_bytree': 0.8628942251978691, 'hidden_layer_0': 180, 'hidden_layer_1': 140, 'hidden_layer_2': 91, 'nn_learning_rate': 0.00027311146256349546, 'batch_size': 128, 'num_epochs': 66}. Best is trial 2 with value: 0.9501661129568106.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000148 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 1200, number of used features: 10
[LightGBM] [Info] Start training from score -1.369765
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.396345
[LightGBM] [Info] Start training from score -1.392983


[I 2024-08-05 18:55:11,217] Trial 5 finished with value: 0.9435215946843853 and parameters: {'n_estimators': 264, 'max_depth': 9, 'lgb_learning_rate': 0.02560986131023241, 'num_leaves': 74, 'subsample': 0.6955973908846138, 'colsample_bytree': 0.6168001070894384, 'hidden_layer_0': 217, 'hidden_layer_1': 164, 'hidden_layer_2': 104, 'nn_learning_rate': 0.0032951573837203736, 'batch_size': 32, 'num_epochs': 86}. Best is trial 2 with value: 0.9501661129568106.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000109 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 1200, number of used features: 10
[LightGBM] [Info] Start training from score -1.369765
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.396345
[LightGBM] [Info] Start training from score -1.392983


[I 2024-08-05 18:55:16,685] Trial 6 finished with value: 0.9102990033222591 and parameters: {'n_estimators': 147, 'max_depth': 5, 'lgb_learning_rate': 0.0005476242887919761, 'num_leaves': 73, 'subsample': 0.655526514674687, 'colsample_bytree': 0.8461355649724654, 'hidden_layer_0': 169, 'hidden_layer_1': 234, 'hidden_layer_2': 76, 'nn_learning_rate': 0.02949003446435111, 'batch_size': 256, 'num_epochs': 100}. Best is trial 2 with value: 0.9501661129568106.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000172 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 1200, number of used features: 10
[LightGBM] [Info] Start training from score -1.369765
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.396345
[LightGBM] [Info] Start training from score -1.392983


[I 2024-08-05 18:55:23,869] Trial 7 finished with value: 0.9401993355481728 and parameters: {'n_estimators': 226, 'max_depth': 9, 'lgb_learning_rate': 0.07849805473932417, 'num_leaves': 77, 'subsample': 0.5362570809513426, 'colsample_bytree': 0.6037556938599677, 'hidden_layer_0': 94, 'hidden_layer_1': 50, 'hidden_layer_2': 93, 'nn_learning_rate': 0.010724805295183625, 'batch_size': 64, 'num_epochs': 77}. Best is trial 2 with value: 0.9501661129568106.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000118 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 1200, number of used features: 10
[LightGBM] [Info] Start training from score -1.369765
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.396345
[LightGBM] [Info] Start training from score -1.392983


[I 2024-08-05 18:55:27,079] Trial 8 finished with value: 0.9102990033222591 and parameters: {'n_estimators': 235, 'max_depth': 6, 'lgb_learning_rate': 0.001858198438495912, 'num_leaves': 44, 'subsample': 0.9487161949058833, 'colsample_bytree': 0.8298830139287023, 'hidden_layer_0': 73, 'hidden_layer_1': 192, 'hidden_layer_2': 252, 'nn_learning_rate': 0.04188841013064104, 'batch_size': 64, 'num_epochs': 24}. Best is trial 2 with value: 0.9501661129568106.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000123 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 1200, number of used features: 10
[LightGBM] [Info] Start training from score -1.369765
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.396345
[LightGBM] [Info] Start training from score -1.392983


[I 2024-08-05 18:55:39,911] Trial 9 finished with value: 0.8803986710963455 and parameters: {'n_estimators': 64, 'max_depth': 3, 'lgb_learning_rate': 0.002199829292168629, 'num_leaves': 75, 'subsample': 0.7582318907433913, 'colsample_bytree': 0.9392262962327416, 'hidden_layer_0': 102, 'hidden_layer_1': 170, 'hidden_layer_2': 234, 'nn_learning_rate': 0.0038879127476306573, 'batch_size': 32, 'num_epochs': 61}. Best is trial 2 with value: 0.9501661129568106.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000119 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 1200, number of used features: 10
[LightGBM] [Info] Start training from score -1.369765
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.396345
[LightGBM] [Info] Start training from score -1.392983
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 709
[LightGBM] [Info] Number of data points in the train set: 960, number of used features: 10
[LightGBM] [Info] Start training from score -1.369765
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.398873
[LightGBM] [Info] Start training from score -1.39

[I 2024-08-05 18:55:50,510] A new study created in memory with name: no-name-62a90355-34b6-436e-b9c8-1f0b33934405


                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.846975  0.936392         0.847877        0.012241   
KNN                  0.914591  0.976787         0.908385        0.018048   
Decision Tree        0.925267  0.951957         0.910147        0.018065   
Random Forest        0.946619  0.995741         0.926171        0.015496   
Gradient Boosting    0.935943  0.994383         0.928849        0.017494   
XGBoost              0.935943  0.994944         0.931516         0.01599   
LightGBM             0.939502  0.996228         0.930623        0.015492   
CatBoost             0.932384  0.993246         0.922611        0.017416   
MLP                   0.92691  0.989907             0.93        0.012416   
DNN                  0.933555  0.979618         0.925833        0.007169   
DCN                  0.930233  0.979752           0.9225        0.009354   
Wide_and_Deep         0.92691  0.980429           0.9225        0.010737   
XGBoost + NN

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:55:54,934] Trial 0 finished with value: 0.920265780730897 and parameters: {'num_heads': 4, 'embedding_dim': 16, 'num_layers': 2, 'hidden_layer_0': 118, 'hidden_layer_1': 82, 'hidden_layer_2': 69, 'nn_learning_rate': 0.0031827428445577698, 'batch_size': 128, 'num_epochs': 27}. Best is trial 0 with value: 0.920265780730897.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:56:00,231] Trial 1 finished with value: 0.9269102990033222 and parameters: {'num_heads': 6, 'embedding_dim': 42, 'num_layers': 3, 'hidden_layer_0': 181, 'hidden_layer_1': 177, 'hidden_layer_2': 143, 'nn_learning_rate': 0.008686183787890246, 'batch_size': 256, 'num_epochs': 28}. Best is trial 1 with value: 0.9269102990033222.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:56:05,829] Trial 2 finished with value: 0.8704318936877077 and p

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.846975  0.936392         0.847877        0.012241   
KNN                  0.914591  0.976787         0.908385        0.018048   
Decision Tree        0.925267  0.951957         0.910147        0.018065   
Random Forest        0.946619  0.995741         0.926171        0.015496   
Gradient Boosting    0.935943  0.994383         0.928849        0.017494   
XGBoost              0.935943  0.994944         0.931516         0.01599   
LightGBM             0.939502  0.996228         0.930623        0.015492   
CatBoost             0.932384  0.993246         0.922611        0.017416   
MLP                   0.92691  0.989907             0.93        0.012416   
DNN                  0.933555  0.979618         0.925833        0.007169   
DCN                  0.930233  0.979752           0.9225        0.009354   
Wide_and_Deep         0.92691  0.980429           0.9225        0.010737   
XGBoost + NN

  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:57:46,548] Trial 0 finished with value: 0.8903654485049833 and parameters: {'num_heads': 8, 'embedding_dim': 24, 'num_layers': 2, 'hidden_layer_0': 129, 'hidden_layer_1': 226, 'hidden_layer_2': 126, 'nn_learning_rate': 0.00016948220941437605, 'batch_size': 64, 'num_epochs': 51}. Best is trial 0 with value: 0.8903654485049833.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:57:58,297] Trial 1 finished with value: 0.9335548172757475 and parameters: {'num_heads': 2, 'embedding_dim': 22, 'num_layers': 2, 'hidden_layer_0': 197, 'hidden_layer_1': 106, 'hidden_layer_2': 212, 'nn_learning_rate': 0.008666025495407903, 'batch_size': 64, 'num_epochs': 48}. Best is trial 1 with value: 0.9335548172757475.
  nn_learning_rate = trial.suggest_loguniform('nn_learning_rate', 1e-4, 1e-1)
[I 2024-08-05 18:58:09,515] Trial 2 finished with value: 0.9368770764119602 an

                     Accuracy AUC Score CV Mean Accuracy CV Std Accuracy  \
Logistic Regression  0.846975  0.936392         0.847877        0.012241   
KNN                  0.914591  0.976787         0.908385        0.018048   
Decision Tree        0.925267  0.951957         0.910147        0.018065   
Random Forest        0.946619  0.995741         0.926171        0.015496   
Gradient Boosting    0.935943  0.994383         0.928849        0.017494   
XGBoost              0.935943  0.994944         0.931516         0.01599   
LightGBM             0.939502  0.996228         0.930623        0.015492   
CatBoost             0.932384  0.993246         0.922611        0.017416   
MLP                   0.92691  0.989907             0.93        0.012416   
DNN                  0.933555  0.979618         0.925833        0.007169   
DCN                  0.930233  0.979752           0.9225        0.009354   
Wide_and_Deep         0.92691  0.980429           0.9225        0.010737   
XGBoost + NN

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 19:00:37,058] Trial 0 finished with value: 0.9235880398671097 and parameters: {'num_layers': 4, 'hidden_layer_0': 212, 'hidden_layer_1': 230, 'hidden_layer_2': 111, 'hidden_layer_3': 242, 'learning_rate': 0.0008468400166191659, 'batch_size': 128, 'num_epochs': 73}. Best is trial 0 with value: 0.9235880398671097.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 19:00:42,899] Trial 1 finished with value: 0.893687707641196 and parameters: {'num_layers': 2, 'hidden_layer_0': 209, 'hidden_layer_1': 248, 'learning_rate': 0.00015547375656743535, 'batch_size': 128, 'num_epochs': 72}. Best is trial 0 with value: 0.9235880398671097.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 19:00:57,658] Trial 2 finished with value: 0.9169435215946844 and parameters: {'num_layers': 4, 'hidden_layer_0': 169, 'hidden_layer_1': 172, 'hidden_layer_2': 

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.846975  0.936392         0.847877   
KNN                         0.914591  0.976787         0.908385   
Decision Tree               0.925267  0.951957         0.910147   
Random Forest               0.946619  0.995741         0.926171   
Gradient Boosting           0.935943  0.994383         0.928849   
XGBoost                     0.935943  0.994944         0.931516   
LightGBM                    0.939502  0.996228         0.930623   
CatBoost                    0.932384  0.993246         0.922611   
MLP                          0.92691  0.989907             0.93   
DNN                         0.933555  0.979618         0.925833   
DCN                         0.930233  0.979752           0.9225   
Wide_and_Deep                0.92691  0.980429           0.9225   
XGBoost + NN                 0.92691  0.973848         0.931667   
LightGBM + NN               0.953488  0.982328           0.937

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 19:02:31,180] Trial 0 finished with value: 0.7873754152823921 and parameters: {'hidden_dim': 42, 'learning_rate': 0.00047062757468444705, 'batch_size': 256, 'num_epochs': 46}. Best is trial 0 with value: 0.7873754152823921.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 19:02:32,440] Trial 1 finished with value: 0.813953488372093 and parameters: {'hidden_dim': 211, 'learning_rate': 0.00027672719156023046, 'batch_size': 256, 'num_epochs': 36}. Best is trial 1 with value: 0.813953488372093.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 19:02:33,697] Trial 2 finished with value: 0.9302325581395349 and parameters: {'hidden_dim': 237, 'learning_rate': 0.03385327373365779, 'batch_size': 256, 'num_epochs': 30}. Best is trial 2 with value: 0.9302325581395349.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.846975  0.936392         0.847877   
KNN                         0.914591  0.976787         0.908385   
Decision Tree               0.925267  0.951957         0.910147   
Random Forest               0.946619  0.995741         0.926171   
Gradient Boosting           0.935943  0.994383         0.928849   
XGBoost                     0.935943  0.994944         0.931516   
LightGBM                    0.939502  0.996228         0.930623   
CatBoost                    0.932384  0.993246         0.922611   
MLP                          0.92691  0.989907             0.93   
DNN                         0.933555  0.979618         0.925833   
DCN                         0.930233  0.979752           0.9225   
Wide_and_Deep                0.92691  0.980429           0.9225   
XGBoost + NN                 0.92691  0.973848         0.931667   
LightGBM + NN               0.953488  0.982328           0.937

[I 2024-08-05 19:03:54,995] Trial 0 finished with value: 0.9169435215946844 and parameters: {'num_layers': 5, 'num_trees': 9, 'tree_dim': 28, 'learning_rate': 0.019658148029781285, 'batch_size': 32, 'num_epochs': 53}. Best is trial 0 with value: 0.9169435215946844.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 19:04:06,878] Trial 1 finished with value: 0.7807308970099668 and parameters: {'num_layers': 4, 'num_trees': 3, 'tree_dim': 11, 'learning_rate': 0.00011007774861855892, 'batch_size': 128, 'num_epochs': 90}. Best is trial 0 with value: 0.9169435215946844.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 19:04:15,030] Trial 2 finished with value: 0.9302325581395349 and parameters: {'num_layers': 5, 'num_trees': 2, 'tree_dim': 64, 'learning_rate': 0.017411749610624618, 'batch_size': 64, 'num_epochs': 52}. Best is trial 2 with value: 0.9302325581395349.
  learning_rate = trial.suggest_loguniform('learning_ra

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.846975  0.936392         0.847877   
KNN                         0.914591  0.976787         0.908385   
Decision Tree               0.925267  0.951957         0.910147   
Random Forest               0.946619  0.995741         0.926171   
Gradient Boosting           0.935943  0.994383         0.928849   
XGBoost                     0.935943  0.994944         0.931516   
LightGBM                    0.939502  0.996228         0.930623   
CatBoost                    0.932384  0.993246         0.922611   
MLP                          0.92691  0.989907             0.93   
DNN                         0.933555  0.979618         0.925833   
DCN                         0.930233  0.979752           0.9225   
Wide_and_Deep                0.92691  0.980429           0.9225   
XGBoost + NN                 0.92691  0.973848         0.931667   
LightGBM + NN               0.953488  0.982328           0.937

  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 5.07935 | val_accuracy: 0.35216 |  0:00:02s
epoch 1  | loss: 2.34342 | val_accuracy: 0.5515  |  0:00:05s
epoch 2  | loss: 1.40437 | val_accuracy: 0.65781 |  0:00:07s
epoch 3  | loss: 1.05329 | val_accuracy: 0.69767 |  0:00:10s
epoch 4  | loss: 0.94426 | val_accuracy: 0.75415 |  0:00:12s
epoch 5  | loss: 0.90987 | val_accuracy: 0.73422 |  0:00:15s
epoch 6  | loss: 0.81989 | val_accuracy: 0.80066 |  0:00:17s
epoch 7  | loss: 0.76519 | val_accuracy: 0.81395 |  0:00:20s
epoch 8  | loss: 0.72253 | val_accuracy: 0.81728 |  0:00:23s
epoch 9  | loss: 0.76731 | val_accuracy: 0.83056 |  0:00:26s
epoch 10 | loss: 0.72874 | val_accuracy: 0.82392 |  0:00:28s
epoch 11 | loss: 0.67568 | val_accuracy: 0.84718 |  0:00:31s
epoch 12 | loss: 0.61755 | val_accuracy: 0.84053 |  0:00:34s
epoch 13 | loss: 0.65868 | val_accuracy: 0.85382 |  0:00:36s
epoch 14 | loss: 0.65163 | val_accuracy: 0.80731 |  0:00:39s
epoch 15 | loss: 0.64403 | val_accuracy: 0.77076 |  0:00:42s
epoch 16 | loss: 0.60348

[I 2024-08-05 19:06:57,473] Trial 0 finished with value: 0.8870431893687708 and parameters: {'n_d': 47, 'n_a': 36, 'n_steps': 10, 'gamma': 1.2760850065223903, 'lambda_sparse': 8.515360245851288e-06, 'learning_rate': 0.0017623485198515027, 'batch_size': 64, 'num_epochs': 50}. Best is trial 0 with value: 0.8870431893687708.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 2.5591  | val_accuracy: 0.23588 |  0:00:01s
epoch 1  | loss: 2.0706  | val_accuracy: 0.31561 |  0:00:02s
epoch 2  | loss: 1.70501 | val_accuracy: 0.43189 |  0:00:04s
epoch 3  | loss: 1.39095 | val_accuracy: 0.50831 |  0:00:05s
epoch 4  | loss: 1.21637 | val_accuracy: 0.52159 |  0:00:06s
epoch 5  | loss: 1.17256 | val_accuracy: 0.56811 |  0:00:08s
epoch 6  | loss: 1.05903 | val_accuracy: 0.60133 |  0:00:09s
epoch 7  | loss: 0.97784 | val_accuracy: 0.66113 |  0:00:11s
epoch 8  | loss: 0.92038 | val_accuracy: 0.65116 |  0:00:12s
epoch 9  | loss: 0.87104 | val_accuracy: 0.71429 |  0:00:14s
epoch 10 | loss: 0.82107 | val_accuracy: 0.74419 |  0:00:15s
epoch 11 | loss: 0.85067 | val_accuracy: 0.77076 |  0:00:17s
epoch 12 | loss: 0.81911 | val_accuracy: 0.78073 |  0:00:18s
epoch 13 | loss: 0.72001 | val_accuracy: 0.80399 |  0:00:20s
epoch 14 | loss: 0.72834 | val_accuracy: 0.80731 |  0:00:21s
epoch 15 | loss: 0.74126 | val_accuracy: 0.81063 |  0:00:22s
epoch 16 | loss: 0.68005

[I 2024-08-05 19:07:32,659] Trial 1 finished with value: 0.8504983388704319 and parameters: {'n_d': 18, 'n_a': 46, 'n_steps': 5, 'gamma': 1.5143136992955126, 'lambda_sparse': 0.0005945156504542123, 'learning_rate': 0.0007808866199240725, 'batch_size': 64, 'num_epochs': 24}. Best is trial 0 with value: 0.8870431893687708.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.62986 | val_accuracy: 0.60465 |  0:00:00s
epoch 1  | loss: 0.88612 | val_accuracy: 0.75415 |  0:00:01s
epoch 2  | loss: 0.75156 | val_accuracy: 0.80066 |  0:00:02s
epoch 3  | loss: 0.65008 | val_accuracy: 0.82724 |  0:00:03s
epoch 4  | loss: 0.59518 | val_accuracy: 0.82724 |  0:00:04s
epoch 5  | loss: 0.5352  | val_accuracy: 0.83056 |  0:00:04s
epoch 6  | loss: 0.52753 | val_accuracy: 0.8206  |  0:00:05s
epoch 7  | loss: 0.46641 | val_accuracy: 0.83721 |  0:00:06s
epoch 8  | loss: 0.4716  | val_accuracy: 0.84053 |  0:00:07s
epoch 9  | loss: 0.40498 | val_accuracy: 0.84385 |  0:00:08s
epoch 10 | loss: 0.38528 | val_accuracy: 0.85714 |  0:00:08s
epoch 11 | loss: 0.39435 | val_accuracy: 0.85714 |  0:00:09s
epoch 12 | loss: 0.34145 | val_accuracy: 0.85714 |  0:00:10s
epoch 13 | loss: 0.31645 | val_accuracy: 0.87043 |  0:00:11s
epoch 14 | loss: 0.3717  | val_accuracy: 0.8804  |  0:00:12s
epoch 15 | loss: 0.31527 | val_accuracy: 0.87708 |  0:00:13s
epoch 16 | loss: 0.32882

[I 2024-08-05 19:08:07,352] Trial 2 finished with value: 0.920265780730897 and parameters: {'n_d': 63, 'n_a': 10, 'n_steps': 3, 'gamma': 1.2249926687622161, 'lambda_sparse': 0.0003772097415076976, 'learning_rate': 0.0019051635664489265, 'batch_size': 64, 'num_epochs': 42}. Best is trial 2 with value: 0.920265780730897.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 2.98348 | val_accuracy: 0.16611 |  0:00:02s
epoch 1  | loss: 2.74952 | val_accuracy: 0.21262 |  0:00:04s
epoch 2  | loss: 2.47397 | val_accuracy: 0.2093  |  0:00:06s
epoch 3  | loss: 2.39924 | val_accuracy: 0.19934 |  0:00:08s
epoch 4  | loss: 2.18737 | val_accuracy: 0.22924 |  0:00:10s
epoch 5  | loss: 2.01445 | val_accuracy: 0.25914 |  0:00:12s
epoch 6  | loss: 1.87478 | val_accuracy: 0.33887 |  0:00:14s
epoch 7  | loss: 1.71198 | val_accuracy: 0.34551 |  0:00:16s
epoch 8  | loss: 1.60574 | val_accuracy: 0.37542 |  0:00:18s
epoch 9  | loss: 1.46573 | val_accuracy: 0.45183 |  0:00:20s
epoch 10 | loss: 1.42341 | val_accuracy: 0.48837 |  0:00:22s
epoch 11 | loss: 1.4202  | val_accuracy: 0.52492 |  0:00:24s
epoch 12 | loss: 1.33402 | val_accuracy: 0.56146 |  0:00:26s
epoch 13 | loss: 1.32473 | val_accuracy: 0.61794 |  0:00:28s
epoch 14 | loss: 1.25113 | val_accuracy: 0.59468 |  0:00:30s
epoch 15 | loss: 1.16936 | val_accuracy: 0.62458 |  0:00:32s
epoch 16 | loss: 1.19131

[I 2024-08-05 19:10:01,661] Trial 3 finished with value: 0.7807308970099668 and parameters: {'n_d': 36, 'n_a': 27, 'n_steps': 4, 'gamma': 1.5922790334334165, 'lambda_sparse': 0.0006042910436743288, 'learning_rate': 0.0001430292002982192, 'batch_size': 32, 'num_epochs': 86}. Best is trial 2 with value: 0.920265780730897.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 4.39141 | val_accuracy: 0.2691  |  0:00:00s
epoch 1  | loss: 4.19231 | val_accuracy: 0.26578 |  0:00:01s
epoch 2  | loss: 3.97399 | val_accuracy: 0.28571 |  0:00:01s
epoch 3  | loss: 3.79885 | val_accuracy: 0.28571 |  0:00:02s
epoch 4  | loss: 3.63599 | val_accuracy: 0.25581 |  0:00:02s
epoch 5  | loss: 3.49104 | val_accuracy: 0.29236 |  0:00:03s
epoch 6  | loss: 3.41396 | val_accuracy: 0.32226 |  0:00:03s
epoch 7  | loss: 3.16697 | val_accuracy: 0.3289  |  0:00:04s
epoch 8  | loss: 3.06226 | val_accuracy: 0.31561 |  0:00:04s
epoch 9  | loss: 2.93929 | val_accuracy: 0.32558 |  0:00:05s
epoch 10 | loss: 2.73612 | val_accuracy: 0.33223 |  0:00:06s
epoch 11 | loss: 2.75807 | val_accuracy: 0.33887 |  0:00:06s
epoch 12 | loss: 2.46077 | val_accuracy: 0.34551 |  0:00:07s
epoch 13 | loss: 2.3627  | val_accuracy: 0.38538 |  0:00:07s
epoch 14 | loss: 2.28019 | val_accuracy: 0.38538 |  0:00:08s
epoch 15 | loss: 2.12022 | val_accuracy: 0.39867 |  0:00:08s
epoch 16 | loss: 2.05979

[I 2024-08-05 19:10:18,412] Trial 4 finished with value: 0.6511627906976745 and parameters: {'n_d': 55, 'n_a': 37, 'n_steps': 5, 'gamma': 1.2242812070657014, 'lambda_sparse': 1.0380094374302126e-06, 'learning_rate': 0.00016049406918851867, 'batch_size': 256, 'num_epochs': 29}. Best is trial 2 with value: 0.920265780730897.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.24217 | val_accuracy: 0.6711  |  0:00:02s
epoch 1  | loss: 0.83478 | val_accuracy: 0.79734 |  0:00:04s
epoch 2  | loss: 0.6592  | val_accuracy: 0.81395 |  0:00:07s
epoch 3  | loss: 0.59495 | val_accuracy: 0.75748 |  0:00:09s
epoch 4  | loss: 0.60879 | val_accuracy: 0.83056 |  0:00:11s
epoch 5  | loss: 0.60242 | val_accuracy: 0.82392 |  0:00:14s
epoch 6  | loss: 0.63929 | val_accuracy: 0.81395 |  0:00:16s
epoch 7  | loss: 0.60362 | val_accuracy: 0.82724 |  0:00:18s
epoch 8  | loss: 0.61424 | val_accuracy: 0.86047 |  0:00:21s
epoch 9  | loss: 0.51274 | val_accuracy: 0.86379 |  0:00:23s
epoch 10 | loss: 0.49058 | val_accuracy: 0.84718 |  0:00:26s
epoch 11 | loss: 0.50306 | val_accuracy: 0.84385 |  0:00:28s
epoch 12 | loss: 0.51612 | val_accuracy: 0.84718 |  0:00:30s
epoch 13 | loss: 0.50814 | val_accuracy: 0.83056 |  0:00:33s
epoch 14 | loss: 0.4675  | val_accuracy: 0.84053 |  0:00:35s
epoch 15 | loss: 0.4898  | val_accuracy: 0.84718 |  0:00:38s
epoch 16 | loss: 0.51361

[I 2024-08-05 19:11:07,113] Trial 5 finished with value: 0.8637873754152824 and parameters: {'n_d': 32, 'n_a': 21, 'n_steps': 5, 'gamma': 1.418718911766582, 'lambda_sparse': 0.0001573802701452926, 'learning_rate': 0.06415144628169532, 'batch_size': 32, 'num_epochs': 48}. Best is trial 2 with value: 0.920265780730897.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.81582 | val_accuracy: 0.61462 |  0:00:00s
epoch 1  | loss: 0.99259 | val_accuracy: 0.68439 |  0:00:02s
epoch 2  | loss: 0.7971  | val_accuracy: 0.76744 |  0:00:03s
epoch 3  | loss: 0.72514 | val_accuracy: 0.74751 |  0:00:04s
epoch 4  | loss: 0.63497 | val_accuracy: 0.74419 |  0:00:05s
epoch 5  | loss: 0.66684 | val_accuracy: 0.82724 |  0:00:06s
epoch 6  | loss: 0.61673 | val_accuracy: 0.84718 |  0:00:07s
epoch 7  | loss: 0.53499 | val_accuracy: 0.84053 |  0:00:08s
epoch 8  | loss: 0.53797 | val_accuracy: 0.86711 |  0:00:09s
epoch 9  | loss: 0.45951 | val_accuracy: 0.85714 |  0:00:10s
epoch 10 | loss: 0.52052 | val_accuracy: 0.87043 |  0:00:11s
epoch 11 | loss: 0.40204 | val_accuracy: 0.86379 |  0:00:12s
epoch 12 | loss: 0.45536 | val_accuracy: 0.84385 |  0:00:14s
epoch 13 | loss: 0.42151 | val_accuracy: 0.88704 |  0:00:14s
epoch 14 | loss: 0.40376 | val_accuracy: 0.87375 |  0:00:15s
epoch 15 | loss: 0.40377 | val_accuracy: 0.89701 |  0:00:16s
epoch 16 | loss: 0.38772

[I 2024-08-05 19:11:26,122] Trial 6 finished with value: 0.8970099667774086 and parameters: {'n_d': 56, 'n_a': 45, 'n_steps': 5, 'gamma': 1.8477827752300038, 'lambda_sparse': 0.00024560607905856313, 'learning_rate': 0.007870965986027267, 'batch_size': 128, 'num_epochs': 18}. Best is trial 2 with value: 0.920265780730897.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 3.22403 | val_accuracy: 0.24917 |  0:00:01s
epoch 1  | loss: 2.21979 | val_accuracy: 0.3588  |  0:00:02s
epoch 2  | loss: 1.60989 | val_accuracy: 0.51163 |  0:00:04s
epoch 3  | loss: 1.39098 | val_accuracy: 0.57475 |  0:00:05s
epoch 4  | loss: 1.19471 | val_accuracy: 0.65781 |  0:00:07s
epoch 5  | loss: 1.01537 | val_accuracy: 0.69767 |  0:00:08s
epoch 6  | loss: 0.91668 | val_accuracy: 0.71429 |  0:00:10s
epoch 7  | loss: 0.90427 | val_accuracy: 0.70764 |  0:00:11s
epoch 8  | loss: 0.85966 | val_accuracy: 0.75083 |  0:00:12s
epoch 9  | loss: 0.84848 | val_accuracy: 0.75415 |  0:00:14s
epoch 10 | loss: 0.77218 | val_accuracy: 0.77741 |  0:00:15s
epoch 11 | loss: 0.70932 | val_accuracy: 0.78405 |  0:00:17s
epoch 12 | loss: 0.74708 | val_accuracy: 0.79734 |  0:00:18s
epoch 13 | loss: 0.68807 | val_accuracy: 0.80399 |  0:00:20s
epoch 14 | loss: 0.6805  | val_accuracy: 0.7907  |  0:00:21s
epoch 15 | loss: 0.67845 | val_accuracy: 0.81395 |  0:00:23s
epoch 16 | loss: 0.66944

[I 2024-08-05 19:12:04,074] Trial 7 finished with value: 0.8438538205980066 and parameters: {'n_d': 41, 'n_a': 9, 'n_steps': 5, 'gamma': 1.4259336695212221, 'lambda_sparse': 9.042130544573978e-05, 'learning_rate': 0.0010048012925397728, 'batch_size': 64, 'num_epochs': 25}. Best is trial 2 with value: 0.920265780730897.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 1.82739 | val_accuracy: 0.66113 |  0:00:03s
epoch 1  | loss: 0.86834 | val_accuracy: 0.79734 |  0:00:06s
epoch 2  | loss: 0.72381 | val_accuracy: 0.78073 |  0:00:09s
epoch 3  | loss: 0.67447 | val_accuracy: 0.8206  |  0:00:12s
epoch 4  | loss: 0.6666  | val_accuracy: 0.82724 |  0:00:15s
epoch 5  | loss: 0.56545 | val_accuracy: 0.84053 |  0:00:18s
epoch 6  | loss: 0.56701 | val_accuracy: 0.8505  |  0:00:22s
epoch 7  | loss: 0.5303  | val_accuracy: 0.83721 |  0:00:25s
epoch 8  | loss: 0.5248  | val_accuracy: 0.89037 |  0:00:28s
epoch 9  | loss: 0.50921 | val_accuracy: 0.84053 |  0:00:31s
epoch 10 | loss: 0.56213 | val_accuracy: 0.84718 |  0:00:34s
epoch 11 | loss: 0.5588  | val_accuracy: 0.82724 |  0:00:37s
epoch 12 | loss: 0.49184 | val_accuracy: 0.84718 |  0:00:40s
epoch 13 | loss: 0.49555 | val_accuracy: 0.83721 |  0:00:44s
epoch 14 | loss: 0.52471 | val_accuracy: 0.83721 |  0:00:47s
epoch 15 | loss: 0.47208 | val_accuracy: 0.87708 |  0:00:50s
epoch 16 | loss: 0.52463

[I 2024-08-05 19:13:05,120] Trial 8 finished with value: 0.8903654485049833 and parameters: {'n_d': 15, 'n_a': 41, 'n_steps': 7, 'gamma': 1.084193456898707, 'lambda_sparse': 0.00011084569692861578, 'learning_rate': 0.0061614620108009195, 'batch_size': 32, 'num_epochs': 62}. Best is trial 2 with value: 0.920265780730897.
  lambda_sparse = trial.suggest_loguniform('lambda_sparse', 1e-6, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)


epoch 0  | loss: 8.01257 | val_accuracy: 0.24252 |  0:00:01s
epoch 1  | loss: 7.85489 | val_accuracy: 0.22591 |  0:00:02s
epoch 2  | loss: 7.58892 | val_accuracy: 0.21927 |  0:00:03s
epoch 3  | loss: 7.21578 | val_accuracy: 0.20598 |  0:00:04s
epoch 4  | loss: 6.98196 | val_accuracy: 0.19934 |  0:00:06s
epoch 5  | loss: 6.53923 | val_accuracy: 0.20266 |  0:00:07s
epoch 6  | loss: 6.44323 | val_accuracy: 0.20598 |  0:00:08s
epoch 7  | loss: 6.09983 | val_accuracy: 0.19934 |  0:00:09s
epoch 8  | loss: 5.8303  | val_accuracy: 0.2093  |  0:00:10s
epoch 9  | loss: 5.71629 | val_accuracy: 0.21927 |  0:00:12s
epoch 10 | loss: 5.62288 | val_accuracy: 0.22591 |  0:00:13s

Early stopping occurred at epoch 10 with best_epoch = 0 and best_val_accuracy = 0.24252


[I 2024-08-05 19:13:19,303] Trial 9 finished with value: 0.2425249169435216 and parameters: {'n_d': 61, 'n_a': 34, 'n_steps': 9, 'gamma': 1.7195223215704283, 'lambda_sparse': 0.0005157105700373835, 'learning_rate': 0.0001951729022635888, 'batch_size': 256, 'num_epochs': 89}. Best is trial 2 with value: 0.920265780730897.


epoch 0  | loss: 1.62986 | val_accuracy: 0.60465 |  0:00:00s
epoch 1  | loss: 0.88612 | val_accuracy: 0.75415 |  0:00:01s
epoch 2  | loss: 0.75156 | val_accuracy: 0.80066 |  0:00:02s
epoch 3  | loss: 0.65008 | val_accuracy: 0.82724 |  0:00:03s
epoch 4  | loss: 0.59518 | val_accuracy: 0.82724 |  0:00:04s
epoch 5  | loss: 0.5352  | val_accuracy: 0.83056 |  0:00:05s
epoch 6  | loss: 0.52753 | val_accuracy: 0.8206  |  0:00:06s
epoch 7  | loss: 0.46641 | val_accuracy: 0.83721 |  0:00:06s
epoch 8  | loss: 0.4716  | val_accuracy: 0.84053 |  0:00:07s
epoch 9  | loss: 0.40498 | val_accuracy: 0.84385 |  0:00:08s
epoch 10 | loss: 0.38528 | val_accuracy: 0.85714 |  0:00:09s
epoch 11 | loss: 0.39435 | val_accuracy: 0.85714 |  0:00:10s
epoch 12 | loss: 0.34145 | val_accuracy: 0.85714 |  0:00:11s
epoch 13 | loss: 0.31645 | val_accuracy: 0.87043 |  0:00:12s
epoch 14 | loss: 0.3717  | val_accuracy: 0.8804  |  0:00:13s
epoch 15 | loss: 0.31527 | val_accuracy: 0.87708 |  0:00:14s
epoch 16 | loss: 0.32882



epoch 0  | loss: 1.72192 | val_accuracy: 0.5875  |  0:00:00s
epoch 1  | loss: 0.96141 | val_accuracy: 0.7875  |  0:00:01s
epoch 2  | loss: 0.82703 | val_accuracy: 0.8125  |  0:00:02s
epoch 3  | loss: 0.73843 | val_accuracy: 0.79583 |  0:00:02s
epoch 4  | loss: 0.62201 | val_accuracy: 0.84167 |  0:00:03s
epoch 5  | loss: 0.62518 | val_accuracy: 0.8375  |  0:00:04s
epoch 6  | loss: 0.59971 | val_accuracy: 0.84583 |  0:00:04s
epoch 7  | loss: 0.55427 | val_accuracy: 0.85833 |  0:00:05s
epoch 8  | loss: 0.53009 | val_accuracy: 0.85417 |  0:00:06s
epoch 9  | loss: 0.50098 | val_accuracy: 0.87083 |  0:00:06s
epoch 10 | loss: 0.52989 | val_accuracy: 0.87083 |  0:00:07s
epoch 11 | loss: 0.48165 | val_accuracy: 0.86667 |  0:00:08s
epoch 12 | loss: 0.45683 | val_accuracy: 0.875   |  0:00:08s
epoch 13 | loss: 0.43744 | val_accuracy: 0.88333 |  0:00:09s
epoch 14 | loss: 0.37439 | val_accuracy: 0.90417 |  0:00:10s
epoch 15 | loss: 0.40221 | val_accuracy: 0.8875  |  0:00:10s
epoch 16 | loss: 0.36946



epoch 0  | loss: 1.69431 | val_accuracy: 0.575   |  0:00:00s
epoch 1  | loss: 0.96272 | val_accuracy: 0.72917 |  0:00:01s
epoch 2  | loss: 0.83639 | val_accuracy: 0.75417 |  0:00:02s
epoch 3  | loss: 0.7154  | val_accuracy: 0.82083 |  0:00:02s
epoch 4  | loss: 0.64974 | val_accuracy: 0.85    |  0:00:03s
epoch 5  | loss: 0.61858 | val_accuracy: 0.84167 |  0:00:04s
epoch 6  | loss: 0.55733 | val_accuracy: 0.84583 |  0:00:04s
epoch 7  | loss: 0.58382 | val_accuracy: 0.86667 |  0:00:05s
epoch 8  | loss: 0.52457 | val_accuracy: 0.86667 |  0:00:06s
epoch 9  | loss: 0.49014 | val_accuracy: 0.8625  |  0:00:06s
epoch 10 | loss: 0.46786 | val_accuracy: 0.86667 |  0:00:07s
epoch 11 | loss: 0.44269 | val_accuracy: 0.86667 |  0:00:07s
epoch 12 | loss: 0.41973 | val_accuracy: 0.86667 |  0:00:08s
epoch 13 | loss: 0.43099 | val_accuracy: 0.87917 |  0:00:09s
epoch 14 | loss: 0.39316 | val_accuracy: 0.84583 |  0:00:09s
epoch 15 | loss: 0.38945 | val_accuracy: 0.85417 |  0:00:10s
epoch 16 | loss: 0.37322



epoch 0  | loss: 1.70793 | val_accuracy: 0.57917 |  0:00:00s
epoch 1  | loss: 0.93388 | val_accuracy: 0.73333 |  0:00:01s
epoch 2  | loss: 0.73895 | val_accuracy: 0.75833 |  0:00:01s
epoch 3  | loss: 0.63195 | val_accuracy: 0.78333 |  0:00:02s
epoch 4  | loss: 0.60391 | val_accuracy: 0.8125  |  0:00:03s
epoch 5  | loss: 0.57357 | val_accuracy: 0.82917 |  0:00:03s
epoch 6  | loss: 0.51421 | val_accuracy: 0.85417 |  0:00:04s
epoch 7  | loss: 0.47399 | val_accuracy: 0.8375  |  0:00:05s
epoch 8  | loss: 0.42501 | val_accuracy: 0.86667 |  0:00:05s
epoch 9  | loss: 0.46539 | val_accuracy: 0.875   |  0:00:06s
epoch 10 | loss: 0.43254 | val_accuracy: 0.875   |  0:00:07s
epoch 11 | loss: 0.41445 | val_accuracy: 0.8875  |  0:00:08s
epoch 12 | loss: 0.40792 | val_accuracy: 0.88333 |  0:00:08s
epoch 13 | loss: 0.35522 | val_accuracy: 0.875   |  0:00:09s
epoch 14 | loss: 0.36359 | val_accuracy: 0.88333 |  0:00:10s
epoch 15 | loss: 0.33073 | val_accuracy: 0.8875  |  0:00:10s
epoch 16 | loss: 0.35991



epoch 0  | loss: 1.6956  | val_accuracy: 0.62083 |  0:00:00s
epoch 1  | loss: 1.04864 | val_accuracy: 0.73333 |  0:00:01s
epoch 2  | loss: 0.80001 | val_accuracy: 0.7875  |  0:00:01s
epoch 3  | loss: 0.68642 | val_accuracy: 0.81667 |  0:00:02s
epoch 4  | loss: 0.65028 | val_accuracy: 0.82083 |  0:00:03s
epoch 5  | loss: 0.56207 | val_accuracy: 0.82917 |  0:00:03s
epoch 6  | loss: 0.53791 | val_accuracy: 0.84167 |  0:00:04s
epoch 7  | loss: 0.51794 | val_accuracy: 0.84167 |  0:00:05s
epoch 8  | loss: 0.45807 | val_accuracy: 0.85833 |  0:00:05s
epoch 9  | loss: 0.44796 | val_accuracy: 0.84167 |  0:00:06s
epoch 10 | loss: 0.41533 | val_accuracy: 0.82083 |  0:00:07s
epoch 11 | loss: 0.34171 | val_accuracy: 0.875   |  0:00:08s
epoch 12 | loss: 0.3944  | val_accuracy: 0.87083 |  0:00:08s
epoch 13 | loss: 0.36316 | val_accuracy: 0.85833 |  0:00:09s
epoch 14 | loss: 0.375   | val_accuracy: 0.85417 |  0:00:09s
epoch 15 | loss: 0.42313 | val_accuracy: 0.85833 |  0:00:10s
epoch 16 | loss: 0.35633



epoch 0  | loss: 1.68508 | val_accuracy: 0.62917 |  0:00:00s
epoch 1  | loss: 0.93315 | val_accuracy: 0.75833 |  0:00:01s
epoch 2  | loss: 0.71862 | val_accuracy: 0.80833 |  0:00:01s
epoch 3  | loss: 0.64224 | val_accuracy: 0.825   |  0:00:02s
epoch 4  | loss: 0.58843 | val_accuracy: 0.825   |  0:00:03s
epoch 5  | loss: 0.54892 | val_accuracy: 0.8625  |  0:00:04s
epoch 6  | loss: 0.51558 | val_accuracy: 0.87917 |  0:00:04s
epoch 7  | loss: 0.4844  | val_accuracy: 0.88333 |  0:00:05s
epoch 8  | loss: 0.47551 | val_accuracy: 0.85833 |  0:00:06s
epoch 9  | loss: 0.46249 | val_accuracy: 0.85833 |  0:00:06s
epoch 10 | loss: 0.44723 | val_accuracy: 0.8625  |  0:00:07s
epoch 11 | loss: 0.47626 | val_accuracy: 0.86667 |  0:00:07s
epoch 12 | loss: 0.40574 | val_accuracy: 0.85    |  0:00:08s
epoch 13 | loss: 0.43343 | val_accuracy: 0.8625  |  0:00:09s
epoch 14 | loss: 0.43196 | val_accuracy: 0.87917 |  0:00:09s
epoch 15 | loss: 0.41089 | val_accuracy: 0.87083 |  0:00:10s
epoch 16 | loss: 0.36974

[I 2024-08-05 19:15:56,150] A new study created in memory with name: no-name-a90e6a8a-012b-4352-9501-16f2c24a6b75


                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.846975  0.936392         0.847877   
KNN                         0.914591  0.976787         0.908385   
Decision Tree               0.925267  0.951957         0.910147   
Random Forest               0.946619  0.995741         0.926171   
Gradient Boosting           0.935943  0.994383         0.928849   
XGBoost                     0.935943  0.994944         0.931516   
LightGBM                    0.939502  0.996228         0.930623   
CatBoost                    0.932384  0.993246         0.922611   
MLP                          0.92691  0.989907             0.93   
DNN                         0.933555  0.979618         0.925833   
DCN                         0.930233  0.979752           0.9225   
Wide_and_Deep                0.92691  0.980429           0.9225   
XGBoost + NN                 0.92691  0.973848         0.931667   
LightGBM + NN               0.953488  0.982328           0.937

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 19:16:00,466] Trial 0 finished with value: 0.8438538205980066 and parameters: {'heads': 3, 'dim': 24, 'depth': 4, 'mlp_dim': 43, 'dropout': 0.19640388738635278, 'learning_rate': 0.0002773849920978986, 'batch_size': 256, 'num_epochs': 19}. Best is trial 0 with value: 0.8438538205980066.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 19:16:19,061] Trial 1 finished with value: 0.8471760797342193 and parameters: {'heads': 1, 'dim': 232, 'depth': 3, 'mlp_dim': 124, 'dropout': 0.39272031126274065, 'learning_rate': 0.001920741134079693, 'batch_size': 64, 'num_epochs': 27}. Best is trial 1 with value: 0.8471760797342193.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
[I 2024-08-05 19:16:52,632] Trial 2 finished with value: 0.9235880398671097 and parameters: {'heads': 2, 'dim': 182, 'depth': 4, 'mlp_dim': 64, 'dropout': 0.30841897162532345, 'learn

                            Accuracy AUC Score CV Mean Accuracy  \
Logistic Regression         0.846975  0.936392         0.847877   
KNN                         0.914591  0.976787         0.908385   
Decision Tree               0.925267  0.951957         0.910147   
Random Forest               0.946619  0.995741         0.926171   
Gradient Boosting           0.935943  0.994383         0.928849   
XGBoost                     0.935943  0.994944         0.931516   
LightGBM                    0.939502  0.996228         0.930623   
CatBoost                    0.932384  0.993246         0.922611   
MLP                          0.92691  0.989907             0.93   
DNN                         0.933555  0.979618         0.925833   
DCN                         0.930233  0.979752           0.9225   
Wide_and_Deep                0.92691  0.980429           0.9225   
XGBoost + NN                 0.92691  0.973848         0.931667   
LightGBM + NN               0.953488  0.982328           0.937