In [None]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeClassifier
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
import numpy as np
import pandas as pd
import xgboost as xgb
from hyperopt import hp, tpe, fmin, Trials, space_eval
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
!pip install optuna
import optuna

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.3-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.3 alembic-1.13.1 colorlog-6.8.2 optuna-3.6.1


GENERATE DATASET

In [None]:
n_samples=10000
n_features=20
n_informative=10
n_redundant=5
n_clusters_per_class=2
weights=[0.7]
class_sep=0.8
random_state=61

In [None]:
X, y = make_classification(n_samples=n_samples, n_features=n_features, n_informative=n_informative,
                           n_redundant=n_redundant, n_clusters_per_class=n_clusters_per_class,
                           weights=weights, class_sep=class_sep, random_state=random_state)

In [None]:
X += np.random.normal(0, 0.5, X.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

XGBOOST

In [None]:
# XGBoost classifier
xgb_classifier = xgb.XGBClassifier(tree_method="hist", device="cuda", max_bin=64, nthread=2, random_state=random_state)

In [None]:
# Hyperparameter grid for grid search and random search
param_grid_xgb = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_depth': [3, 6, 9, 12, 15],
    'learning_rate': [0.1, 0.01, 0.001, 0.0001]
}

# Search space for Bayesian optimization
space_xgb = {
    'n_estimators': hp.randint('n_estimators', 50, 500),
    'max_depth': hp.randint('max_depth', 2, 15),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.0001), np.log(0.1))
}


# Define the objective function to minimize
def objective_xgb(params):
    params['tree_method'] = 'hist'
    params['device'] = 'cuda'
    model = xgb.XGBClassifier(**params, random_state=random_state)
    score = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()
    return -score  # Minimize negative accuracy


In [None]:
# Grid search
%%time
grid_search_xgb = GridSearchCV(xgb_classifier, param_grid_xgb, cv=5, scoring='accuracy', verbose=2)
grid_search_xgb.fit(X_train, y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=100; total time=   0.5s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=100; total time=   0.2s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=100; total time=   0.1s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=100; total time=   0.1s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=100; total time=   0.1s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=200; total time=   0.2s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=200; total time=   0.2s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=200; total time=   0.2s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=200; total time=   0.2s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=200; total time=   0.2s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=300; total time=   0.3s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=300; total time=   0.3s
[CV] END ...learning_rate=0.

In [None]:
print("Grid Search Best Parameters:", grid_search_xgb.best_params_)
print("Grid Search Best Accuracy:", grid_search_xgb.best_score_)

Grid Search Best Parameters: {'learning_rate': 0.1, 'max_depth': 9, 'n_estimators': 500}
Grid Search Best Accuracy: 0.95475


In [None]:
# Evaluate the final tuned model on the test set
final_model_xgb = grid_search_xgb.best_estimator_
test_accuracy_xgb = final_model_xgb.score(X_test, y_test)
print("Final Model Test Accuracy:", test_accuracy_xgb)

Final Model Test Accuracy: 0.9645


In [None]:
# Random search
%%time
random_search_xgb = RandomizedSearchCV(xgb_classifier, param_grid_xgb, n_iter=20, cv=5, scoring='accuracy', random_state=random_state, verbose=2)
random_search_xgb.fit(X_train, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV] END learning_rate=0.0001, max_depth=6, n_estimators=400; total time=   0.8s
[CV] END learning_rate=0.0001, max_depth=6, n_estimators=400; total time=   0.7s
[CV] END learning_rate=0.0001, max_depth=6, n_estimators=400; total time=   0.7s
[CV] END learning_rate=0.0001, max_depth=6, n_estimators=400; total time=   0.7s
[CV] END learning_rate=0.0001, max_depth=6, n_estimators=400; total time=   0.7s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=200; total time=   0.2s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=200; total time=   0.2s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=200; total time=   0.2s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=200; total time=   0.2s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=200; total time=   0.2s
[CV] END .learning_rate=0.001, max_depth=6, n_estimators=100; total time=   0.2s
[CV] END .learning_rate=0.001, max_depth=6, n_e

In [None]:
print("Random Search Best Parameters:", random_search_xgb.best_params_)
print("Random Search Best Accuracy:", random_search_xgb.best_score_)

Random Search Best Parameters: {'n_estimators': 500, 'max_depth': 15, 'learning_rate': 0.1}
Random Search Best Accuracy: 0.9546250000000001


In [None]:
# Evaluate the final tuned model from random search on the test set
final_model_xgb_random = random_search_xgb.best_estimator_
test_accuracy_xgb_random = final_model_xgb_random.score(X_test, y_test)
print("Final Model (Random Search) Test Accuracy:", test_accuracy_xgb_random)

Final Model (Random Search) Test Accuracy: 0.966


In [None]:
# Bayesian optimization
%%time
trials_xgb = Trials()
best_xgb = fmin(objective_xgb, space_xgb, algo=tpe.suggest, max_evals=50, trials=trials_xgb, show_progressbar=True, rstate=np.random.default_rng(random_state))

100%|██████████| 50/50 [05:12<00:00,  6.25s/trial, best loss: -0.954375]
CPU times: user 5min 18s, sys: 1.8 s, total: 5min 19s
Wall time: 5min 12s


In [None]:
print("Bayesian optimization Best Parameters:", best_xgb)

Bayesian optimization Best Parameters: {'learning_rate': 0.06387345052193515, 'max_depth': 9, 'n_estimators': 437}


In [None]:
# Get the best hyperparameters found during optimization
best_params_xgb = space_eval(space_xgb, best_xgb)

# Train a new model with the best hyperparameters on the full training set
final_model_xgb_bayesian = xgb.XGBClassifier(**best_params_xgb, random_state=random_state)
final_model_xgb_bayesian.fit(X_train, y_train)

# Evaluate the best model on the test set
test_predictions_xgb = final_model_xgb_bayesian.predict(X_test)
test_accuracy_xgb = accuracy_score(y_test, test_predictions_xgb)
print("Test Accuracy (Bayesian Optimization):", test_accuracy_xgb)

Test Accuracy (Bayesian Optimization): 0.9655


In [None]:
#bayesian with less attempts
%%time
trials_xgb2 = Trials()
best_xgb2 = fmin(objective_xgb, space_xgb, algo=tpe.suggest, max_evals=20, trials=trials_xgb2, show_progressbar=True, rstate=np.random.default_rng(random_state))

100%|██████████| 20/20 [02:10<00:00,  6.54s/trial, best loss: -0.952]
CPU times: user 2min 14s, sys: 715 ms, total: 2min 14s
Wall time: 2min 10s


In [None]:
print("Bayesian optimization Best Parameters:", best_xgb2)

Bayesian optimization Best Parameters: {'learning_rate': 0.029929252719907394, 'max_depth': 9, 'n_estimators': 443}


In [None]:
# Get the best hyperparameters found during optimization
best_params_xgb2 = space_eval(space_xgb, best_xgb2)

# Train a new model with the best hyperparameters on the full training set
final_model_xgb_bayesian2 = xgb.XGBClassifier(**best_params_xgb2, random_state=random_state)
final_model_xgb_bayesian2.fit(X_train, y_train)

# Evaluate the best model on the test set
test_predictions_xgb2 = final_model_xgb_bayesian2.predict(X_test)
test_accuracy_xgb2 = accuracy_score(y_test, test_predictions_xgb2)
print("Test Accuracy (Bayesian Optimization):", test_accuracy_xgb2)

Test Accuracy (Bayesian Optimization): 0.964


RIDGE REGRESSION

In [None]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Define the RidgeClassifier model
ridge_clf = RidgeClassifier()

In [None]:
# Define the grid of hyperparameters to search over for grid search
param_grid_ridge = {
    'alpha': [0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
}

# Define the search space for hyperparameters for bayesian opt
space_ridge = {
    'alpha': hp.loguniform('alpha', np.log(0.01), np.log(1000))  # Log-uniform distribution for alpha
}

# Define the objective function to minimize (negative accuracy for maximization)
def objective_ridge(params):
    alpha = params['alpha']
    model = RidgeClassifier(alpha=alpha)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    score = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='accuracy').mean()
    return -score





In [None]:
# grid search

In [None]:
# Set up the GridSearchCV with 5-fold cross-validation
%%time
grid_search_ridge = GridSearchCV(estimator=ridge_clf, param_grid=param_grid_ridge, cv=5, scoring='accuracy')

# Perform grid search on the training data
grid_search_ridge.fit(X_train_scaled, y_train)



CPU times: user 264 ms, sys: 208 ms, total: 472 ms
Wall time: 267 ms


In [None]:
# Get the best model and its hyperparameters
best_model_ridge = grid_search_ridge.best_estimator_
best_params_ridge = grid_search_ridge.best_params_
print("Best Parameters:", best_params_ridge)

Best Parameters: {'alpha': 0.01}


In [None]:
# Predict on the test set using the best model
y_pred_ridge = best_model_ridge.predict(X_test_scaled)

# Evaluate accuracy
accuracy_ridge = accuracy_score(y_test, y_pred_ridge)
print("Test Accuracy:", accuracy_ridge)

Test Accuracy: 0.8505


In [None]:
# Bayesian optimization

In [None]:
# Set up the Trials object
%%time
trials_ridge = Trials()
# Perform Bayesian optimization using hyperopt
best_params_ridge = fmin(fn=objective_ridge, space=space_ridge, algo=tpe.suggest, max_evals=100, trials=trials_ridge, show_progressbar=True, rstate=np.random.default_rng(random_state))



100%|██████████| 100/100 [00:05<00:00, 17.95trial/s, best loss: -0.8487500000000001]
CPU times: user 5.95 s, sys: 4.5 s, total: 10.5 s
Wall time: 5.58 s


In [None]:
# Retrieve the best hyperparameters
best_alpha_ridge = best_params_ridge['alpha']
print("Best alpha:", best_alpha_ridge)

Best alpha: 46.26579490027193


In [None]:
# Train the final model with the best hyperparameters
best_model_bay_ridge = RidgeClassifier(alpha=best_alpha_ridge)
best_model_bay_ridge.fit(X_train_scaled, y_train)

# Evaluate the final model on the test set
y_pred_bay_ridge = best_model_bay_ridge.predict(X_test_scaled)
accuracy_bay_ridge = accuracy_score(y_test, y_pred_bay_ridge)
print("Test Accuracy:", accuracy_bay_ridge)

Test Accuracy: 0.851


NEURAL NETWORKS

In [None]:
X_train, X_test = torch.tensor(X_train).float(), torch.tensor(X_test).float()
y_train, y_test = torch.tensor(y_train).long(), torch.tensor(y_test).long()


In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x


In [None]:
def objective(trial):
    # Sample hyperparameters to optimize
    hidden_dim = trial.suggest_int('hidden_dim', 32, 256)
    lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])

    # Create data loaders
    train_dataset = TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Initialize model and optimizer
    model = NeuralNetwork(input_dim=n_features, hidden_dim=hidden_dim, output_dim=2)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # Training loop
    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            output = model(batch_X)
            loss = criterion(output, batch_y)
            loss.backward()
            optimizer.step()

    # Evaluate on validation set
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test)
        _, predicted = torch.max(y_pred, 1)
        accuracy = (predicted == y_test).sum().item() / len(y_test)

    return accuracy

In [None]:
#random search
%%time
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[I 2024-04-18 16:49:01,639] A new study created in memory with name: no-name-b28659c9-f4f7-48d9-ab77-1ea0c094b167
  lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
[I 2024-04-18 16:49:08,154] Trial 0 finished with value: 0.967 and parameters: {'hidden_dim': 193, 'lr': 0.0006560966716344451, 'batch_size': 32}. Best is trial 0 with value: 0.967.
  lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
[I 2024-04-18 16:49:10,547] Trial 1 finished with value: 0.9595 and parameters: {'hidden_dim': 76, 'lr': 0.005201897434103237, 'batch_size': 64}. Best is trial 0 with value: 0.967.
[I 2024-04-18 16:49:11,792] Trial 2 finished with value: 0.949 and parameters: {'hidden_dim': 53, 'lr': 0.000765079135413452, 'batch_size': 128}. Best is trial 0 with value: 0.967.
[I 2024-04-18 16:49:13,050] Trial 3 finished with value: 0.9585 and parameters: {'hidden_dim': 126, 'lr': 0.0009166491536694051, 'batch_size': 128}. Best is trial 0 with value: 0.967.
[I 2024-04-18 16:49:16,372] Trial 4 finished with value: 0

CPU times: user 2min 18s, sys: 1.56 s, total: 2min 20s
Wall time: 2min 21s


In [None]:
best_params = study.best_params
print("Best Parameters:", best_params)

best_hidden_dim = best_params['hidden_dim']
best_lr = best_params['lr']
best_batch_size = best_params['batch_size']

best_model = NeuralNetwork(input_dim=n_features, hidden_dim=best_hidden_dim, output_dim=2)
best_optimizer = optim.Adam(best_model.parameters(), lr=best_lr)
best_criterion = nn.CrossEntropyLoss()

Best Parameters: {'hidden_dim': 224, 'lr': 0.001193744259773048, 'batch_size': 32}


In [None]:
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)

In [None]:
num_epochs = 50
for epoch in range(num_epochs):
    best_model.train()
    for batch_X, batch_y in train_loader:
        best_optimizer.zero_grad()
        output = best_model(batch_X)
        loss = best_criterion(output, batch_y)
        loss.backward()
        best_optimizer.step()

In [None]:
best_model.eval()
with torch.no_grad():
    y_pred = best_model(X_test)
    _, predicted = torch.max(y_pred, 1)
    final_accuracy = (predicted == y_test).sum().item() / len(y_test)

print("Final Test Accuracy:", final_accuracy)

Final Test Accuracy: 0.9665


In [None]:
#bayesian

In [None]:
%%time
study_bay = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler())
study_bay.optimize(objective, n_trials=50)

[I 2024-04-18 16:52:30,977] A new study created in memory with name: no-name-18eb9e66-fa7f-44d3-a476-6c1dae77f448
  lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
[I 2024-04-18 16:52:34,849] Trial 0 finished with value: 0.96 and parameters: {'hidden_dim': 159, 'lr': 0.002064908851864655, 'batch_size': 32}. Best is trial 0 with value: 0.96.
[I 2024-04-18 16:52:38,519] Trial 1 finished with value: 0.9645 and parameters: {'hidden_dim': 33, 'lr': 0.003911060783597867, 'batch_size': 32}. Best is trial 1 with value: 0.9645.
[I 2024-04-18 16:52:41,851] Trial 2 finished with value: 0.966 and parameters: {'hidden_dim': 37, 'lr': 0.0023943885232297762, 'batch_size': 32}. Best is trial 2 with value: 0.966.
[I 2024-04-18 16:52:45,283] Trial 3 finished with value: 0.9645 and parameters: {'hidden_dim': 51, 'lr': 0.0030933498339749425, 'batch_size': 32}. Best is trial 2 with value: 0.966.
[I 2024-04-18 16:52:49,438] Trial 4 finished with value: 0.9635 and parameters: {'hidden_dim': 197, 'lr': 0.0006

CPU times: user 2min 26s, sys: 1.43 s, total: 2min 27s
Wall time: 2min 28s


In [None]:
best_params_bay = study_bay.best_params
print("Best Parameters:", best_params_bay)

best_hidden_dim_bay = best_params_bay['hidden_dim']
best_lr_bay = best_params_bay['lr']
best_batch_size_bay = best_params_bay['batch_size']

best_model_bay = NeuralNetwork(input_dim=n_features, hidden_dim=best_hidden_dim_bay, output_dim=2)
best_optimizer_bay = optim.Adam(best_model_bay.parameters(), lr=best_lr_bay)
best_criterion_bay = nn.CrossEntropyLoss()

Best Parameters: {'hidden_dim': 220, 'lr': 0.004848470753441942, 'batch_size': 128}


In [None]:
train_loader_bay = DataLoader(train_dataset, batch_size=best_batch_size_bay, shuffle=True)

In [None]:
num_epochs = 50
for epoch in range(num_epochs):
    best_model_bay.train()
    for batch_X, batch_y in train_loader_bay:
        best_optimizer_bay.zero_grad()
        output_bay = best_model_bay(batch_X)
        loss_bay = best_criterion_bay(output_bay, batch_y)
        loss_bay.backward()
        best_optimizer_bay.step()

In [None]:
best_model_bay.eval()
with torch.no_grad():
    y_pred_bay = best_model_bay(X_test)
    _, predicted_bay = torch.max(y_pred_bay, 1)
    final_accuracy_bay = (predicted_bay == y_test).sum().item() / len(y_test)

print("Final Test Accuracy:", final_accuracy_bay)

Final Test Accuracy: 0.96
