In [2]:
#!pip install bayesian-optimization

Collecting bayesian-optimization
  Downloading bayesian_optimization-1.4.3-py3-none-any.whl (18 kB)
Collecting colorama>=0.4.6 (from bayesian-optimization)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama, bayesian-optimization
Successfully installed bayesian-optimization-1.4.3 colorama-0.4.6


In [None]:
#@title HPO-GS-TEST
import pandas as pd
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.svm import SVR, SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, accuracy_score
from sklearn.model_selection import train_test_split

reg_data = pd.read_csv('powerplant_energy_data.csv')
class_data = pd.read_csv('thyroid_cancer_data.csv')

X_reg = reg_data.drop('energy_output', axis=1)
y_reg = reg_data['energy_output']

X_class = class_data.drop('diagnosis', axis=1)
y_class = class_data['diagnosis']

# Hyperparameter tuning grid for Decision Tree Regressor
dt_regressor_params = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Hyperparameter tuning grid for Decision Tree Classifier
dt_classifier_params = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Decision Tree Regressor on reg_data
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)
dt_regressor = DecisionTreeRegressor(random_state=0)
grid_regressor = GridSearchCV(dt_regressor, dt_regressor_params, cv=3, scoring='neg_mean_squared_error')
grid_regressor.fit(X_reg_train, y_reg_train)
y_reg_pred = grid_regressor.best_estimator_.predict(X_reg_test)
r2 = r2_score(y_reg_test, y_reg_pred)
print("R2 score for Regression:", r2)
print("Best Parameters for Regression:", grid_regressor.best_params_)

# Decision Tree Classifier on class_data
X_class_train, X_class_test, y_class_train, y_class_test = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
dt_classifier = DecisionTreeClassifier(random_state=0)
grid_classifier = GridSearchCV(dt_classifier, dt_classifier_params, cv=3, scoring='accuracy')
grid_classifier.fit(X_class_train, y_class_train)
y_class_pred = grid_classifier.best_estimator_.predict(X_class_test)
accuracy = accuracy_score(y_class_test, y_class_pred)
print("\nAccuracy for Classification:", accuracy)
print("Best Parameters for Classification:", grid_classifier.best_params_)

print()

# Hyperparameter tuning grid for Random Forest Regressor
rf_regressor_params = {
    'n_estimators': [10, 20, 30],
    'max_depth': [15, 20, 30, 50],
    'min_samples_leaf': [1, 2, 4, 8],
    'bootstrap': [True, False]
}

# Hyperparameter tuning grid for Random Forest Classifier
rf_classifier_params = {
    'n_estimators': [10, 20, 30],
    'max_depth': [15, 20, 30, 50],
    'min_samples_leaf': [1, 2, 4, 8],
    'bootstrap': [True, False]
}

# Random Forest Regressor on reg_data
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)
rf_regressor = RandomForestRegressor(random_state=0)
grid_regressor = GridSearchCV(rf_regressor, rf_regressor_params, cv=3, scoring='neg_mean_squared_error')
grid_regressor.fit(X_reg_train, y_reg_train)
y_reg_pred = grid_regressor.best_estimator_.predict(X_reg_test)
r2 = r2_score(y_reg_test, y_reg_pred)
print("R2 score for Regression:", r2)
print("Best Parameters for Regression:", grid_regressor.best_params_)

# Random Forest Classifier on class_data
X_class_train, X_class_test, y_class_train, y_class_test = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
rf_classifier = RandomForestClassifier(random_state=0)
grid_classifier = GridSearchCV(rf_classifier, rf_classifier_params, cv=3, scoring='accuracy')
grid_classifier.fit(X_class_train, y_class_train)
y_class_pred = grid_classifier.best_estimator_.predict(X_class_test)
accuracy = accuracy_score(y_class_test, y_class_pred)
print("\nAccuracy for Classification:", accuracy)
print("Best Parameters for Classification:", grid_classifier.best_params_)

print()

# Hyperparameter tuning grid for GBM Regressor
gbm_regressor_params = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.05, 0.1, 0.2],
    'min_samples_split': [2, 3, 4]
}

# Hyperparameter tuning grid for GBM Classifier
gbm_classifier_params = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.05, 0.1, 0.2],
    'min_samples_split': [2, 3, 4]
}

# GBM Regressor on reg_data
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)
gbm_regressor = GradientBoostingRegressor(random_state=0)
grid_regressor = GridSearchCV(gbm_regressor, gbm_regressor_params, cv=3, scoring='neg_mean_squared_error')
grid_regressor.fit(X_reg_train, y_reg_train)
y_reg_pred = grid_regressor.best_estimator_.predict(X_reg_test)
r2 = r2_score(y_reg_test, y_reg_pred)
print("R2 score for Regression:", r2)
print("Best Parameters for Regression:", grid_regressor.best_params_)

# GBM Classifier on class_data
X_class_train, X_class_test, y_class_train, y_class_test = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
gbm_classifier = GradientBoostingClassifier(random_state=0)
grid_classifier = GridSearchCV(gbm_classifier, gbm_classifier_params, cv=3, scoring='accuracy')
grid_classifier.fit(X_class_train, y_class_train)
y_class_pred = grid_classifier.best_estimator_.predict(X_class_test)
accuracy = accuracy_score(y_class_test, y_class_pred)
print("\nAccuracy for Classification:", accuracy)
print("Best Parameters for Classification:", grid_classifier.best_params_)

print()

# Hyperparameter tuning grid for SVM Regressor
svm_regressor_params = {
    'kernel': ['linear', 'poly', 'rbf'],
    'C': [0.1, 1, 10],
    'epsilon': [0.1, 0.2, 0.5]
}

# Hyperparameter tuning grid for SVM Classifier
svm_classifier_params = {
    'kernel': ['linear', 'poly', 'rbf'],
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto'],
    'class_weight': ['balanced', None]
}

# SVM Regressor on reg_data
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)
svm_regressor = SVR()
grid_regressor = GridSearchCV(svm_regressor, svm_regressor_params, cv=3, scoring='neg_mean_squared_error')
grid_regressor.fit(X_reg_train, y_reg_train)
y_reg_pred = grid_regressor.best_estimator_.predict(X_reg_test)
r2 = r2_score(y_reg_test, y_reg_pred)
print("R2 score for Regression:", r2)
print("Best Parameters for Regression:", grid_regressor.best_params_)

# SVM Classifier on class_data
X_class_train, X_class_test, y_class_train, y_class_test = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
svm_classifier = SVC()
grid_classifier = GridSearchCV(svm_classifier, svm_classifier_params, cv=3, scoring='accuracy')
grid_classifier.fit(X_class_train, y_class_train)
y_class_pred = grid_classifier.best_estimator_.predict(X_class_test)
accuracy = accuracy_score(y_class_test, y_class_pred)
print("\nAccuracy for Classification:", accuracy)
print("Best Parameters for Classification:", grid_classifier.best_params_)

In [None]:
#@title HPO-BO-REGRESSION
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization
from sklearn.metrics import r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR

# Load regression data
reg_data = pd.read_csv('powerplant_energy_data.csv')

X_reg = reg_data.drop('energy_output', axis=1)
y_reg = reg_data['energy_output']

# Split data into train and test sets
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

# Decision Tree Regressor
def optimize_dtr(max_depth, min_samples_split, min_samples_leaf):
    dtr = DecisionTreeRegressor(max_depth=int(max_depth), min_samples_split=int(min_samples_split),
                                 min_samples_leaf=int(min_samples_leaf))
    dtr.fit(X_train_reg, y_train_reg)
    y_pred = dtr.predict(X_test_reg)
    return r2_score(y_test_reg, y_pred)

dtr_bounds = {'max_depth': (1, 50), 'min_samples_split': (2, 20),
              'min_samples_leaf': (1, 20)}

bayes_dtr = BayesianOptimization(f=optimize_dtr, pbounds=dtr_bounds, random_state=42)
bayes_dtr.maximize(init_points=10, n_iter=10)

print()

# Random Forest Regressor
def optimize_rfr(n_estimators, max_depth, min_samples_split, min_samples_leaf):
    rfr = RandomForestRegressor(n_estimators=int(n_estimators), max_depth=int(max_depth),
                                 min_samples_split=int(min_samples_split), min_samples_leaf=int(min_samples_leaf))
    rfr.fit(X_train_reg, y_train_reg)
    y_pred = rfr.predict(X_test_reg)
    return r2_score(y_test_reg, y_pred)

rfr_bounds = {'n_estimators': (5, 50), 'max_depth': (1, 50), 'min_samples_split': (2, 20),
              'min_samples_leaf': (1, 20)}

bayes_rfr = BayesianOptimization(f=optimize_rfr, pbounds=rfr_bounds, random_state=42)
bayes_rfr.maximize(init_points=10, n_iter=10)

print()

# Gradient Boosting Regressor
def optimize_gbr(n_estimators, learning_rate, max_depth, min_samples_split, min_samples_leaf, subsample):
    gbr = GradientBoostingRegressor(n_estimators=int(n_estimators), learning_rate=learning_rate,
                                     max_depth=int(max_depth), min_samples_split=int(min_samples_split),
                                     min_samples_leaf=int(min_samples_leaf))
    gbr.fit(X_train_reg, y_train_reg)
    y_pred = gbr.predict(X_test_reg)
    return r2_score(y_test_reg, y_pred)

gbr_bounds = {'n_estimators': (5, 50), 'learning_rate': (0.001, 1.0), 'max_depth': (1, 50),
              'min_samples_split': (2, 20), 'min_samples_leaf': (1, 20)}

bayes_gbr = BayesianOptimization(f=optimize_gbr, pbounds=gbr_bounds, random_state=42)
bayes_gbr.maximize(init_points=10, n_iter=10)

print()

print("Decision Tree Regressor: Best parameters -", bayes_dtr.max)
print("R-squared Score:", bayes_dtr.max['target'])
print()
print("Random Forest Regressor: Best parameters -", bayes_rfr.max)
print("R-squared Score:", bayes_rfr.max['target'])
print()
print("GBM Regressor: Best parameters -", bayes_gbr.max)
print("R-squared Score:", bayes_gbr.max['target'])

|   iter    |  target   | max_depth | min_sa... | min_sa... |
-------------------------------------------------------------
| [0m1        [0m | [0m0.9477   [0m | [0m19.35    [0m | [0m19.06    [0m | [0m15.18    [0m |
| [0m2        [0m | [0m0.9413   [0m | [0m30.33    [0m | [0m3.964    [0m | [0m4.808    [0m |
| [0m3        [0m | [0m0.9079   [0m | [0m3.846    [0m | [0m17.46    [0m | [0m12.82    [0m |
| [0m4        [0m | [0m0.9433   [0m | [0m35.7     [0m | [0m1.391    [0m | [0m19.46    [0m |
| [0m5        [0m | [0m0.9442   [0m | [0m41.79    [0m | [0m5.034    [0m | [0m5.273    [0m |
| [0m6        [0m | [0m0.9474   [0m | [0m9.987    [0m | [0m6.781    [0m | [0m11.45    [0m |
| [0m7        [0m | [0m0.9462   [0m | [0m22.17    [0m | [0m6.533    [0m | [0m13.01    [0m |
| [0m8        [0m | [0m0.9426   [0m | [0m7.835    [0m | [0m6.551    [0m | [0m8.595    [0m |
| [0m9        [0m | [0m0.9474   [0m | [0m23.35    [0m 

In [None]:
#@title HPO-BO-CLASSIFICATION
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC

# Load classification data
class_data = pd.read_csv('thyroid_cancer_data.csv')

X_class = class_data.drop('diagnosis', axis=1)
y_class = class_data['diagnosis']

# Split data into train and test sets
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)

# Decision Tree Classifier
def optimize_dtc(max_depth, min_samples_split, min_samples_leaf):
    dtc = DecisionTreeClassifier(max_depth=int(max_depth), min_samples_split=int(min_samples_split),
                                  min_samples_leaf=int(min_samples_leaf))
    dtc.fit(X_train_class, y_train_class)
    y_pred = dtc.predict(X_test_class)
    return accuracy_score(y_test_class, y_pred)

dtc_bounds = {'max_depth': (1, 50), 'min_samples_split': (2, 20),
              'min_samples_leaf': (1, 20)}

bayes_dtc = BayesianOptimization(f=optimize_dtc, pbounds=dtc_bounds, random_state=42)
bayes_dtc.maximize(init_points=10, n_iter=10)

print()

# Random Forest Classifier
def optimize_rfc(n_estimators, max_depth, min_samples_split, min_samples_leaf):
    rfc = RandomForestClassifier(n_estimators=int(n_estimators), max_depth=int(max_depth),
                                  min_samples_split=int(min_samples_split), min_samples_leaf=int(min_samples_leaf))
    rfc.fit(X_train_class, y_train_class)
    y_pred = rfc.predict(X_test_class)
    return accuracy_score(y_test_class, y_pred)

rfc_bounds = {'n_estimators': (5, 50), 'max_depth': (1, 50), 'min_samples_split': (2, 20),
              'min_samples_leaf': (1, 20)}

bayes_rfc = BayesianOptimization(f=optimize_rfc, pbounds=rfc_bounds, random_state=42)
bayes_rfc.maximize(init_points=10, n_iter=10)

print()

# Gradient Boosting Classifier
def optimize_gbc(n_estimators, learning_rate, max_depth, min_samples_split, min_samples_leaf, subsample):
    gbc = GradientBoostingClassifier(n_estimators=int(n_estimators), learning_rate=learning_rate,
                                     max_depth=int(max_depth), min_samples_split=int(min_samples_split),
                                     min_samples_leaf=int(min_samples_leaf))
    gbc.fit(X_train_class, y_train_class)
    y_pred = gbc.predict(X_test_class)
    return accuracy_score(y_test_class, y_pred)

gbc_bounds = {'n_estimators': (5, 50), 'learning_rate': (0.001, 1.0), 'max_depth': (1, 50),
              'min_samples_split': (2, 20), 'min_samples_leaf': (1, 20)}

bayes_gbc = BayesianOptimization(f=optimize_gbc, pbounds=gbc_bounds, random_state=42)
bayes_gbc.maximize(init_points=10, n_iter=10)

print()

print("Decision Tree Classifier: Best parameters -", bayes_dtc.max)
print("Accuracy:", bayes_dtc.max['target'])
print()
print("Random Forest Classifier: Best parameters -", bayes_rfc.max)
print("Accuracy:", bayes_rfc.max['target'])
print()
print("GBM Classifier: Best parameters -", bayes_gbc.max)
print("Accuracy:", bayes_gbc.max['target'])

|   iter    |  target   | max_depth | min_sa... | min_sa... |
-------------------------------------------------------------
| [0m1        [0m | [0m0.9561   [0m | [0m19.35    [0m | [0m19.06    [0m | [0m15.18    [0m |
| [0m2        [0m | [0m0.9035   [0m | [0m30.33    [0m | [0m3.964    [0m | [0m4.808    [0m |
| [0m3        [0m | [0m0.9123   [0m | [0m3.846    [0m | [0m17.46    [0m | [0m12.82    [0m |
| [0m4        [0m | [0m0.9386   [0m | [0m35.7     [0m | [0m1.391    [0m | [0m19.46    [0m |
| [0m5        [0m | [0m0.9211   [0m | [0m41.79    [0m | [0m5.034    [0m | [0m5.273    [0m |
| [0m6        [0m | [0m0.9211   [0m | [0m9.987    [0m | [0m6.781    [0m | [0m11.45    [0m |
| [0m7        [0m | [0m0.9211   [0m | [0m22.17    [0m | [0m6.533    [0m | [0m13.01    [0m |
| [0m8        [0m | [0m0.9298   [0m | [0m7.835    [0m | [0m6.551    [0m | [0m8.595    [0m |
| [0m9        [0m | [0m0.9561   [0m | [0m23.35    [0m 