In [2]:
!pip install optuna
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import optuna
import optuna.visualization as optuna_viz
import pandas as pd
from sklearn.model_selection import train_test_split

data = pd.read_csv('wineq.csv')
X = data.drop('quality', axis=1)
y = data['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

def objective(trial):
    classifier_name = trial.suggest_categorical('classifier', ['RandomForest', 'SVM'])
    if classifier_name == 'RandomForest':
        n_estimators = trial.suggest_int('n_estimators', 10, 200)
        max_depth = trial.suggest_int('max_depth', 2, 40, log=True)
        clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    else:
        C = trial.suggest_loguniform('C', 1e-5, 1e5)
        gamma = trial.suggest_loguniform('gamma', 1e-5, 1e5)
        clf = SVC(C=C, gamma=gamma, random_state=42)

    numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
    categorical_features = X.select_dtypes(include=['object']).columns

    # Hyperparameters for numeric transformer
    num_imputer_strategy = trial.suggest_categorical('num_imputer_strategy', ['mean', 'median', 'most_frequent'])
    num_scaler = trial.suggest_categorical('num_scaler', ['StandardScaler', 'MinMaxScaler'])

    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy=num_imputer_strategy)),
        ('scaler', StandardScaler() if num_scaler == 'StandardScaler' else MinMaxScaler())
    ])

    # Hyperparameters for categorical transformer
    cat_imputer_strategy = trial.suggest_categorical('cat_imputer_strategy', ['most_frequent', 'constant'])
    cat_encoder_handle_unknown = trial.suggest_categorical('cat_encoder_handle_unknown', ['error', 'ignore'])

    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy=cat_imputer_strategy)),
        ('onehot', OneHotEncoder(handle_unknown=cat_encoder_handle_unknown))
    ])

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)
        ])

    pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                ('classifier', clf)])

    pipeline.fit(X_train, y_train)

    y_pred = pipeline.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy

# Before Optimization
base_pipeline = Pipeline(steps=[
    ('preprocessor', ColumnTransformer(transformers=[
        ('num', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ]), X.select_dtypes(include=['int64', 'float64']).columns),
        ('cat', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='error'))
        ]), X.select_dtypes(include=['object']).columns)
    ])),
    ('classifier', RandomForestClassifier(random_state=42))
])

base_pipeline.fit(X_train, y_train)
y_base_pred = base_pipeline.predict(X_test)
accuracy_base = accuracy_score(y_test, y_base_pred)

print("Base Accuracy: {}".format(accuracy_base))

# Optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

# After Optimization
best_trial = study.best_trial
best_params = best_trial.params

print('Optimized Accuracy: {}'.format(best_trial.value))
print("Best Hyperparameters: {}".format(best_params))

optuna_viz.plot_optimization_history(study)
optuna_viz.plot_slice(study)




[I 2023-12-13 07:18:06,726] A new study created in memory with name: no-name-a0f1e759-0b39-411c-8c32-d03a96ff4292


Base Accuracy: 0.71875



suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.


suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.

[I 2023-12-13 07:18:07,032] Trial 0 finished with value: 0.421875 and parameters: {'classifier': 'SVM', 'C': 1.2748464830659763e-05, 'gamma': 2.002695552642629, 'num_imputer_strategy': 'mean', 'num_scaler': 'StandardScaler', 'cat_imputer_strategy': 'constant', 'cat_encoder_handle_unknown': 'error'}. Best is trial 0 with value: 0.421875.
[I 2023-12-13 07:18:08,034] Trial 1 finished with value: 0.659375 and parameters: {'classifier': 'RandomForest', 'n_estimators': 184, 'max_depth': 5, 'num_imputer_strategy': 'most_frequent', 'num_scaler': 'StandardScaler', 'cat_imputer_strategy': 'most_frequent', 'cat_encoder_

Optimized Accuracy: 0.746875
Best Hyperparameters: {'classifier': 'RandomForest', 'n_estimators': 103, 'max_depth': 11, 'num_imputer_strategy': 'most_frequent', 'num_scaler': 'StandardScaler', 'cat_imputer_strategy': 'most_frequent', 'cat_encoder_handle_unknown': 'ignore'}
