In [6]:
# !pip install bayesian-optimization



In [30]:
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization
from sklearn.metrics import r2_score, accuracy_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV

In [37]:
# Load the dataset
class_data = pd.read_csv('wine_quality_data.csv')
print(class_data.shape)

# Assume the target variable is the last column
target_column = class_data.columns[-1]

# Split the dataset into features (X) and target variable (y)
X_class = class_data.drop(target_column, axis=1)
y_class = class_data[target_column]

# Split the data into training and testing sets
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)


(1143, 12)


In [31]:
# # Load the dataset
# class_data = pd.read_csv('stroke_data.csv')
# print(class_data.shape)

# # Filter numerical columns
# numerical_columns = class_data.select_dtypes(include=['number']).columns
# class_data['bmi'] = class_data['bmi'].fillna(class_data['bmi'].mean())

# # Assume the target variable is the last column
# target_column = numerical_columns[-1]

# # Split the dataset into features (X) and target variable (y) using only numerical columns
# X_class = class_data[numerical_columns[:-1]]  # Exclude the target column
# y_class = class_data[target_column]

# # Split the data into training and testing sets
# X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)

(5110, 12)


In [38]:
#@title HPO-GS-CLASS

dtcgs_s = time.time()
# Hyperparameter tuning grid for Decision Tree Classifier
dt_classifier_params = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
# Decision Tree Classifier on class_data
X_class_train, X_class_test, y_class_train, y_class_test = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
dt_classifier = DecisionTreeClassifier(random_state=0)
grid_classifier = GridSearchCV(dt_classifier, dt_classifier_params, cv=3, scoring='accuracy')
grid_classifier.fit(X_class_train, y_class_train)
y_class_pred = grid_classifier.best_estimator_.predict(X_class_test)
accuracy = accuracy_score(y_class_test, y_class_pred)
print("\nAccuracy for Classification:", accuracy)
print("Best Parameters for Classification:", grid_classifier.best_params_)
dtcgs_e = time.time()
print(f"GS : DTC - {dtcgs_e-dtcgs_s}")

rfcgs_s = time.time()
# Hyperparameter tuning grid for Random Forest Classifier
rf_classifier_params = {
    'n_estimators': [10, 20, 30],
    'max_depth': [15, 20, 30, 50],
    'min_samples_leaf': [1, 2, 4, 8],
    'bootstrap': [True, False]
}
# Random Forest Classifier on class_data
X_class_train, X_class_test, y_class_train, y_class_test = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
rf_classifier = RandomForestClassifier(random_state=0)
grid_classifier = GridSearchCV(rf_classifier, rf_classifier_params, cv=3, scoring='accuracy')
grid_classifier.fit(X_class_train, y_class_train)
y_class_pred = grid_classifier.best_estimator_.predict(X_class_test)
accuracy = accuracy_score(y_class_test, y_class_pred)
print("\nAccuracy for Classification:", accuracy)
print("Best Parameters for Classification:", grid_classifier.best_params_)
rfcgs_e = time.time()
print(f"GS : RFC - {rfcgs_e-rfcgs_s}")

gbcgs_s = time.time()
# Hyperparameter tuning grid for GBM Classifier
gbm_classifier_params = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.05, 0.1, 0.2],
    'min_samples_split': [2, 3, 4]
}
# GBM Classifier on class_data
X_class_train, X_class_test, y_class_train, y_class_test = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
gbm_classifier = GradientBoostingClassifier(random_state=0)
grid_classifier = GridSearchCV(gbm_classifier, gbm_classifier_params, cv=3, scoring='accuracy')
grid_classifier.fit(X_class_train, y_class_train)
y_class_pred = grid_classifier.best_estimator_.predict(X_class_test)
accuracy = accuracy_score(y_class_test, y_class_pred)
print("\nAccuracy for Classification:", accuracy)
print("Best Parameters for Classification:", grid_classifier.best_params_)
gbcgs_e = time.time()
print(f"GS : GBC - {gbcgs_e-gbcgs_s}")


Accuracy for Classification: 0.62882096069869
Best Parameters for Classification: {'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 2}
GS : DTC - 0.9445114135742188

Accuracy for Classification: 0.6375545851528385
Best Parameters for Classification: {'bootstrap': True, 'max_depth': 15, 'min_samples_leaf': 1, 'n_estimators': 10}
GS : RFC - 17.632131099700928

Accuracy for Classification: 0.6375545851528385
Best Parameters for Classification: {'learning_rate': 0.05, 'min_samples_split': 2, 'n_estimators': 50}
GS : GBC - 123.91177272796631


In [27]:
#@title HPO-BO-CLASS

# dtcbo_s = time.time()
# # Decision Tree Classifier
# def optimize_dtc(max_depth, min_samples_split, min_samples_leaf):
#     dtc = DecisionTreeClassifier(max_depth=int(max_depth), min_samples_split=int(min_samples_split),
#                                   min_samples_leaf=int(min_samples_leaf))
#     dtc.fit(X_train_class, y_train_class)
#     y_pred = dtc.predict(X_test_class)
#     return accuracy_score(y_test_class, y_pred)

# dtc_bounds = {'max_depth': (1, 50), 'min_samples_split': (2, 20),
#               'min_samples_leaf': (1, 20)}

# bayes_dtc = BayesianOptimization(f=optimize_dtc, pbounds=dtc_bounds, random_state=42)
# bayes_dtc.maximize(init_points=10, n_iter=10)
# print("Decision Tree Classifier: Best parameters -", bayes_dtc.max)
# print("Accuracy:", bayes_dtc.max['target'])
# dtcbo_e = time.time()
# print(f"BO : DTC - {dtcbo_e-dtcbo_s}")
# print()

# rfcbo_s = time.time()
# # Random Forest Classifier
# def optimize_rfc(n_estimators, max_depth, min_samples_split, min_samples_leaf):
#     rfc = RandomForestClassifier(n_estimators=int(n_estimators), max_depth=int(max_depth),
#                                   min_samples_split=int(min_samples_split), min_samples_leaf=int(min_samples_leaf))
#     rfc.fit(X_train_class, y_train_class)
#     y_pred = rfc.predict(X_test_class)
#     return accuracy_score(y_test_class, y_pred)

# rfc_bounds = {'n_estimators': (10, 100), 'max_depth': (1, 50), 'min_samples_split': (2, 20),
#               'min_samples_leaf': (1, 20)}

# bayes_rfc = BayesianOptimization(f=optimize_rfc, pbounds=rfc_bounds, random_state=42)
# bayes_rfc.maximize(init_points=10, n_iter=10)
# print("Random Forest Classifier: Best parameters -", bayes_rfc.max)
# print("Accuracy:", bayes_rfc.max['target'])
# rfcbo_e = time.time()
# print(f"BO : RFC - {rfcbo_e-rfcbo_s}")
# print()

# gbcbo_s = time.time()
# # Gradient Boosting Classifier
# def optimize_gbc(n_estimators, learning_rate, max_depth, min_samples_split, min_samples_leaf, subsample):
#     gbc = GradientBoostingClassifier(n_estimators=int(n_estimators), learning_rate=learning_rate,
#                                      max_depth=int(max_depth), min_samples_split=int(min_samples_split),
#                                      min_samples_leaf=int(min_samples_leaf), subsample=subsample)
#     gbc.fit(X_train_class, y_train_class)
#     y_pred = gbc.predict(X_test_class)
#     return accuracy_score(y_test_class, y_pred)

# gbc_bounds = {'n_estimators': (10, 100), 'learning_rate': (0.001, 1.0), 'max_depth': (1, 50),
#               'min_samples_split': (2, 20), 'min_samples_leaf': (1, 20), 'subsample': (0.1, 1.0)}

# bayes_gbc = BayesianOptimization(f=optimize_gbc, pbounds=gbc_bounds, random_state=42)
# bayes_gbc.maximize(init_points=10, n_iter=10)
# print("GBM Classifier: Best parameters -", bayes_gbc.max)
# print("Accuracy:", bayes_gbc.max['target'])
# gbcbo_e = time.time()
# print(f"BO : GBC - {gbcbo_e-gbcbo_s}")

|   iter    |  target   | max_depth | min_sa... | min_sa... |
-------------------------------------------------------------
| [0m1        [0m | [0m0.9403   [0m | [0m19.35    [0m | [0m19.06    [0m | [0m15.18    [0m |
| [0m2        [0m | [0m0.91     [0m | [0m30.33    [0m | [0m3.964    [0m | [0m4.808    [0m |
| [0m3        [0m | [0m0.9393   [0m | [0m3.846    [0m | [0m17.46    [0m | [0m12.82    [0m |
| [0m4        [0m | [0m0.9305   [0m | [0m35.7     [0m | [0m1.391    [0m | [0m19.46    [0m |
| [0m5        [0m | [0m0.9295   [0m | [0m41.79    [0m | [0m5.034    [0m | [0m5.273    [0m |
| [0m6        [0m | [0m0.9315   [0m | [0m9.987    [0m | [0m6.781    [0m | [0m11.45    [0m |
| [0m7        [0m | [0m0.9315   [0m | [0m22.17    [0m | [0m6.533    [0m | [0m13.01    [0m |
| [0m8        [0m | [0m0.9344   [0m | [0m7.835    [0m | [0m6.551    [0m | [0m8.595    [0m |
| [0m9        [0m | [0m0.9403   [0m | [0m23.35    [0m 