In [1]:
#Import packages

import numpy as np
from numpy.typing import NDArray
import pickle
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split


def onehot2cat(y:NDArray) -> NDArray:
    '''Convert y in one_hot to categorical'''
    return np.argmax(y, axis=1) 


def loadDataset(fn:str, toCat:bool=False) -> NDArray:
    '''load dataset'''
    with open(fn, 'rb') as f:
        data = pickle.load(f)
        
    X = data['X'] 
    if toCat: y = onehot2cat(data['Y'])
    else:     y = data['Y'] 
    
    return X, y


def saveSKLModel(fn:str, model) -> None:
    '''save SKLearn model as pickle'''
    with open(fn, 'wb') as f:
        pickle.dump(model, f)
        

In [2]:
#load dataset

fnt = 'wtdt-part.pickle'
X, y = loadDataset(fnt, toCat=True)
print(X.shape)
print(y.shape)

(14000, 787)
(14000,)


In [5]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score

# Import necessary libraries
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize

# Define your dataset
# Replace X and y with your actual dataset
# X = ... (features)
# y = ... (target labels)

# Initialize the Logistic Regression model
model = LogisticRegression()

# Define the search space for hyperparameters
space = [
    Real(1e-5, 1e0, prior="log-uniform", name="C"),  # Regularization parameter
    Integer(100, 1000, name="max_iter"),           # Maximum iterations
    Real(0.0, 1.0, name="tol"),                    # Tolerance for stopping criteria
]

# Define the objective function
@use_named_args(space)
def objective(**params):
    model.set_params(**params)  # Set hyperparameters for the model
    score = np.mean(cross_val_score(
        model, X, y, cv=5, scoring="accuracy", n_jobs=-1
    ))  # Use accuracy as the scoring metric
    return -score  # Minimize negative accuracy for optimization

# Perform Bayesian Optimization
result = gp_minimize(
    func=objective,
    dimensions=space,
    n_calls=50,  # Number of function evaluations
    random_state=42,
)

# Print the results
print("Best parameters:", result.x)
print("Best score:", -result.fun)


KeyboardInterrupt: 