In [None]:
import numpy as np
import pandas as pd
from sklearn.impute import KNNImputer, SimpleImputer
from scipy.stats import zscore
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#@title Data Cleaning
class DataCleaning:
    def __init__(self, duplicates=True, missing_num=None, missing_categ=None, outliers_method=None, scaler=None, extract_datetime=False, encode_categ=None):
        self.duplicates = duplicates
        self.missing_num = missing_num
        self.missing_categ = missing_categ
        self.outliers_method = outliers_method
        self.scaler = scaler
        self.extract_datetime = extract_datetime
        self.encode_categ = encode_categ

    def fit_transform(self, df):
        original_dtypes = df.dtypes

        if self.duplicates:
            df = Duplicates().handle(df)
        if self.missing_num or self.missing_categ:
            df = MissingValues(self.missing_num, self.missing_categ).handle(df)
        if self.outliers_method:
            df = Outliers().handle(df)
        if self.scaler or self.extract_datetime:
            df = Adjust(self.scaler, self.extract_datetime).handle(df)
        if self.encode_categ:
            df = EncodeCateg(self.encode_categ).handle(df)

        for col in df.columns:
            if original_dtypes[col] in [np.float64, np.int64]:
                df[col] = df[col].astype(original_dtypes[col])

        return df


class Duplicates:
    def handle(self, df):
        df.drop_duplicates(inplace=True, ignore_index=True)
        return df


class MissingValues:
    def __init__(self, missing_num=None, missing_categ=None):
        self.missing_num = missing_num
        self.missing_categ = missing_categ

    def handle(self, df, _n_neighbors=5):
        if self.missing_num or self.missing_categ:
            if df.isna().sum().sum() != 0:
                if self.missing_num:
                    df = self._handle_missing_num(df, _n_neighbors)
                if self.missing_categ:
                    df = self._handle_missing_categ(df, _n_neighbors)
        return df

    def _handle_missing_num(self, df, _n_neighbors):
        num_cols = df.select_dtypes(include=np.number).columns
        for col in num_cols:
            if self.missing_num in ['auto', 'knn']:
                imputer = KNNImputer(n_neighbors=_n_neighbors)
                df[col] = imputer.fit_transform(df[[col]])
                df[col] = df[col].round().astype('Int64')
        return df

    def _handle_missing_categ(self, df, _n_neighbors):
        cat_cols = set(df.columns) - set(df.select_dtypes(include=np.number).columns)
        for col in cat_cols:
            if self.missing_categ in ['auto', 'logreg', 'most_frequent']:
                if self.missing_categ == 'most_frequent':
                    strategy = self.missing_categ
                else:
                    strategy = 'constant'
                imputer = SimpleImputer(strategy=strategy)
                df[col] = imputer.fit_transform(df[[col]])
        return df


class Outliers:
    def handle(self, df):
        df = self.replace_outliers(df)
        return df

    def detect_outliers(self, df):
        Q1 = df.quantile(0.25)
        Q3 = df.quantile(0.75)
        IQR = Q3 - Q1
        return ((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)

    def replace_outliers(self, df, replacement_value=None):
        if replacement_value is None:
            replacement_value = df.median(numeric_only=True)
        for col in df.columns:
            if df[col].dtype != 'O':
                Q1 = df[col].quantile(0.25)
                Q3 = df[col].quantile(0.75)
                IQR = Q3 - Q1
                lower_bound = Q1 - 1.5 * IQR
                upper_bound = Q3 + 1.5 * IQR
                df[col] = np.where((df[col] < lower_bound) | (df[col] > upper_bound), replacement_value[col], df[col])
        return df


class Adjust:
    def __init__(self, scaler=None, extract_datetime=False):
        self.scaler = scaler
        self.extract_datetime = extract_datetime

    def handle(self, df):
        if self.scaler or self.extract_datetime:
            df = self._convert_datetime(df)
            if self.scaler:
                if self.scaler in ['MinMax', 'Standard', 'Robust']:
                    scaler = preprocessing.__getattribute__(self.scaler+'Scaler')()
                    df[df.columns] = scaler.fit_transform(df[df.columns])
        return df

    def _convert_datetime(self, df):
        cols = set(df.columns) & set(self.extract_datetime)
        for col in cols:
            try:
                df[col] = pd.to_datetime(df[col], errors='coerce')
                if self.extract_datetime:
                    df[col + '_year'] = df[col].dt.year
                    df[col + '_month'] = df[col].dt.month
                    df[col + '_day'] = df[col].dt.day
                    df.drop(columns=[col], inplace=True)
            except:
                pass
        return df


class EncodeCateg:
    def __init__(self, encode_categ=None):
        self.encode_categ = encode_categ

    def handle(self, df):
        if self.encode_categ:
            if self.encode_categ == 'auto':
                self._auto_encode(df)
            elif isinstance(self.encode_categ, list):
                for col in self.encode_categ:
                    if col in df.columns:
                        self._auto_encode(df, col)
        return df

    def _auto_encode(self, df, col=None):
        if col:
            if df[col].dtype == 'O':
                if len(df[col].unique()) <= 10:
                    df[col] = df[col].astype('category')
                    df = pd.get_dummies(df, columns=[col], prefix=[col], drop_first=True)
                else:
                    le = LabelEncoder()
                    df[col] = le.fit_transform(df[col])
        else:
            for col in df.select_dtypes(include='object'):
                if len(df[col].unique()) <= 10:
                    df[col] = df[col].astype('category')
                    df = pd.get_dummies(df, columns=[col], prefix=[col], drop_first=True)
                else:
                    le = LabelEncoder()
                    df[col] = le.fit_transform(df[col])
        return df

In [None]:
data = pd.read_csv("loan_data.csv")
print('original data info :')
print(data.info())
def detect_outliers_iqr(data):
    outliers = pd.DataFrame()
    for column in data.columns:
        if data[column].dtype in ['int64', 'float64']:  # Check if column is numerical
            q1 = data[column].quantile(0.25)
            q3 = data[column].quantile(0.75)
            iqr = q3 - q1
            lower_bound = q1 - 1.5 * iqr
            upper_bound = q3 + 1.5 * iqr
            column_outliers = data[(data[column] < lower_bound) | (data[column] > upper_bound)][column]
            outliers = pd.concat([outliers, column_outliers], axis=1)
    return outliers

outliers = detect_outliers_iqr(data)
print()
print('Outliers in original dataset :')
print(outliers.any())

original data info :
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4587 entries, 0 to 4586
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   ApplicantIncome    4157 non-null   float64
 1   CoapplicantIncome  4587 non-null   float64
 2   LoanAmount         4145 non-null   float64
 3   Loan_Amount_Term   4587 non-null   int64  
 4   Credit_History     4587 non-null   int64  
 5   Education          4587 non-null   object 
 6   Property_Area      4587 non-null   object 
 7   Loan_Status        4587 non-null   object 
dtypes: float64(3), int64(2), object(3)
memory usage: 286.8+ KB
None

Outliers in original dataset :
ApplicantIncome       True
CoapplicantIncome     True
LoanAmount            True
Loan_Amount_Term     False
Credit_History       False
dtype: bool


In [None]:
clean_data = DataCleaning(duplicates=True,missing_num='knn',missing_categ='most_frequent',outliers_method=True,
                                scaler='minMax',extract_datetime='year',encode_categ='auto')

In [None]:
cleaned_data = clean_data.fit_transform(data)
print('cleaned data info :')
print(cleaned_data.info())

def detect_outliers_iqr(data):
    outliers = pd.DataFrame()
    for column in data.columns:
        if data[column].dtype in ['int64', 'float64']:
            q1 = data[column].quantile(0.25)
            q3 = data[column].quantile(0.75)
            iqr = q3 - q1
            lower_bound = q1 - 1.5 * iqr
            upper_bound = q3 + 1.5 * iqr
            column_outliers = data[(data[column] < lower_bound) | (data[column] > upper_bound)][column]
            outliers = pd.concat([outliers, column_outliers], axis=1)
    return outliers

outliers = detect_outliers_iqr(cleaned_data)
print()
print('Outliers in cleaned dataset :')
print(outliers.any())

cleaned data info :
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4587 entries, 0 to 4586
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   ApplicantIncome    4587 non-null   float64
 1   CoapplicantIncome  4587 non-null   float64
 2   LoanAmount         4587 non-null   float64
 3   Loan_Amount_Term   4587 non-null   int64  
 4   Credit_History     4587 non-null   int64  
 5   Education          4587 non-null   object 
 6   Property_Area      4587 non-null   object 
 7   Loan_Status        4587 non-null   object 
dtypes: float64(3), int64(2), object(3)
memory usage: 286.8+ KB
None

Outliers in cleaned dataset :
ApplicantIncome      False
CoapplicantIncome    False
LoanAmount           False
Loan_Amount_Term     False
Credit_History       False
dtype: bool


In [None]:
#@title HPO-GridSearch
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, accuracy_score

################################################################################

#Decision Tree HPO
#Decision Tree Regressor
dt_regressor_params = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

dt_regressor = DecisionTreeRegressor(random_state=0)
grid_regressor = GridSearchCV(dt_regressor, dt_regressor_params, cv=3, scoring='neg_mean_squared_error')
grid_regressor.fit(X, y)

print("Decision Tree Regressor Best Parameters:")
print(grid_regressor.best_params_)
print("MSE:"+ str(-grid_regressor.best_score_))

#Decision Tree Classifier
dt_classifier_params = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

dt_classifier = DecisionTreeClassifier(random_state=0)
grid_classifier = GridSearchCV(dt_classifier, dt_classifier_params, cv=3, scoring='accuracy')
grid_classifier.fit(X, y)

print("\nDecision Tree Classifier Best Parameters:")
print(grid_classifier.best_params_)
print("Accuracy:"+ str(grid_classifier.best_score_))

################################################################################

#Random Forest HPO
#Random Forest Regressor
rf_regressor_params = {
    'n_estimators': [10, 20, 30],
    'max_depth': [15, 20, 30, 50],
    'min_samples_leaf': [1, 2, 4, 8],
    "bootstrap": [True, False],
    "criterion": ['mse', 'mae']
}

rf_regressor = RandomForestRegressor(random_state=0)
grid_regressor = GridSearchCV(rf_regressor, rf_regressor_params, cv=3, scoring='neg_mean_squared_error')
grid_regressor.fit(X, y)

print("Random Forest Regressor Best Parameters:")
print(grid_regressor.best_params_)
print("MSE:" + str(-grid_regressor.best_score_))

#Random Forest Classifier
rf_classifier_params = {
    'n_estimators': [10, 20, 30],
    'max_depth': [15, 20, 30, 50],
    'min_samples_leaf': [1, 2, 4, 8],
    "bootstrap": [True, False],
    "criterion": ['gini', 'entropy']
}

rf_classifier = RandomForestClassifier(random_state=0)
grid_classifier = GridSearchCV(rf_classifier, rf_classifier_params, cv=3, scoring='accuracy')
grid_classifier.fit(X, y)

print("\nRandom Forest Classifier Best Parameters:")
print(grid_classifier.best_params_)
print("Accuracy:" + str(grid_classifier.best_score_))

################################################################################

#Gradient Boost Machine HPO
#GBM Regressor
gbm_regressor_params = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5],
    'min_samples_split': [2, 3, 4]
}

gbm_regressor = GradientBoostingRegressor()
grid_regressor = GridSearchCV(gbm_regressor, gbm_regressor_params, cv=3, scoring='neg_mean_squared_error')
grid_regressor.fit(X, y)

print("GBM Regressor Best Parameters:")
print(grid_regressor.best_params_)
print("MSE:" + str(-grid_regressor.best_score_))

#GBM Classifier
gbm_classifier_params = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5],
    'min_samples_split': [2, 3, 4],
    'subsample': [0.8, 0.9, 1.0]
}

gbm_classifier = GradientBoostingClassifier()
grid_classifier = GridSearchCV(gbm_classifier, gbm_classifier_params, cv=3, scoring='accuracy')
grid_classifier.fit(X, y)

print("\nGBM Classifier Best Parameters:")
print(grid_classifier.best_params_)
print("Accuracy:" + str(grid_classifier.best_score_))

################################################################################

#Support Vector Machine HPO
#SVM Regressor
svm_regressor_params = {
    'kernel': ['linear', 'poly', 'rbf'],
    'C': [0.1, 1, 10],
    'epsilon': [0.1, 0.2, 0.5]
}

svm_regressor = SVR()
grid_regressor = GridSearchCV(svm_regressor, svm_regressor_params, cv=3, scoring='neg_mean_squared_error')
grid_regressor.fit(X, y)

print("SVM Regressor Best Parameters:")
print(grid_regressor.best_params_)
print("MSE:" + str(-grid_regressor.best_score_))

#SVM Classifier
svm_classifier_params = {
    'kernel': ['linear', 'poly', 'rbf'],
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto'],
    'class_weight': ['balanced', None]
}

svm_classifier = SVC()
grid_classifier = GridSearchCV(svm_classifier, svm_classifier_params, cv=3, scoring='accuracy')
grid_classifier.fit(X, y)

print("\nSVM Classifier Best Parameters:")
print(grid_classifier.best_params_)
print("Accuracy:" + str(grid_classifier.best_score_))

################################################################################

In [None]:
#@title HPO-skopt
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical

################################################################################

#Decision Tree HPO
#Hyperparameter tuning for Decision Tree Regressor
dtr_params = {
    'max_depth': Integer(1, 50),
    'min_samples_split': Integer(2, 20),
    'min_samples_leaf': Integer(1, 20),
    'criterion': Categorical(['mse', 'mae'])
}

dtr = DecisionTreeRegressor(random_state=42)
bayes_dtr = BayesSearchCV(dtr, dtr_params, n_iter=20, cv=3, scoring='neg_mean_squared_error', random_state=42)
bayes_dtr.fit(x, y)

print("Decision Tree Regressor: Best parameters -", bayes_dtr.best_params_)
print("Negative Mean Squared Error:", -bayes_dtr.best_score_)

#Hyperparameter tuning for Decision Tree Classifier
dtc_params = {
    'max_depth': Integer(1, 50),
    'min_samples_split': Integer(2, 20),
    'min_samples_leaf': Integer(1, 20),
    'criterion': Categorical(['gini', 'entropy'])
}

dtc = DecisionTreeClassifier(random_state=42)
bayes_dtc = BayesSearchCV(dtc, dtc_params, n_iter=20, cv=3, scoring='accuracy', random_state=42)
bayes_dtc.fit(x, y)

print("Decision Tree Classifier: Best parameters -", bayes_dtc.best_params_)
print("Accuracy:", bayes_dtc.best_score_)

################################################################################

#Random Forest HPO
#Hyperparameter tuning for Random Forest Regressor
rfr_params = {
    'n_estimators': Integer(10, 100),
    'max_depth': Integer(1, 50),
    'min_samples_split': Integer(2, 20),
    'min_samples_leaf': Integer(1, 20),
    'criterion': Categorical(['mse', 'mae'])
}

rfr = RandomForestRegressor(random_state=42)
bayes_rfr = BayesSearchCV(rfr, rfr_params, n_iter=20, cv=3, scoring='neg_mean_squared_error', random_state=42)
bayes_rfr.fit(X, y)

print("Random Forest Regressor: Best parameters -", bayes_rfr.best_params_)
print("Negative Mean Squared Error:", -bayes_rfr.best_score_)

#Hyperparameter tuning for Random Forest Classifier
rfc_params = {
    'n_estimators': Integer(10, 100),
    'max_depth': Integer(1, 50),
    'min_samples_split': Integer(2, 20),
    'min_samples_leaf': Integer(1, 20),
    'criterion': Categorical(['gini', 'entropy'])
}

rfc = RandomForestClassifier(random_state=42)
bayes_rfc = BayesSearchCV(rfc, rfc_params, n_iter=20, cv=3, scoring='accuracy', random_state=42)
bayes_rfc.fit(X, y)

print("Random Forest Classifier: Best parameters -", bayes_rfc.best_params_)
print("Accuracy:", bayes_rfc.best_score_)

################################################################################

#Gradient Boost Machine HPO
#Hyperparameter tuning for GBM Regressor
gbr_params = {
    'n_estimators': Integer(10, 100),
    'learning_rate': Real(0.001, 1.0, 'log-uniform'),
    'max_depth': Integer(1, 50),
    'min_samples_split': Integer(2, 20),
    'min_samples_leaf': Integer(1, 20),
    'subsample': Real(0.1, 1.0, 'uniform')
}

gbr = GradientBoostingRegressor(random_state=42)
bayes_gbr = BayesSearchCV(gbr, gbr_params, n_iter=20, cv=3, scoring='neg_mean_squared_error', random_state=42)
bayes_gbr.fit(X, y)

print("GBM Regressor: Best parameters -", bayes_gbr.best_params_)
print("Negative Mean Squared Error:", -bayes_gbr.best_score_)

#Hyperparameter tuning for GBM Classifier
gbc_params = {
    'n_estimators': Integer(10, 100),
    'learning_rate': Real(0.001, 1.0, 'log-uniform'),
    'max_depth': Integer(1, 50),
    'min_samples_split': Integer(2, 20),
    'min_samples_leaf': Integer(1, 20),
    'subsample': Real(0.1, 1.0, 'uniform')
}

gbc = GradientBoostingClassifier(random_state=42)
bayes_gbc = BayesSearchCV(gbc, gbc_params, n_iter=20, cv=3, scoring='accuracy', random_state=42)
bayes_gbc.fit(X, y)

print("GBM Classifier: Best parameters -", bayes_gbc.best_params_)
print("Accuracy:", bayes_gbc.best_score_)

################################################################################

#Support Vector Machine HPO
#Hyperparameter tuning for SVM Regressor
svr_params = {
    'C': Real(1e-6, 1e+6, prior='log-uniform'),
    'kernel': Categorical(['linear', 'poly', 'rbf', 'sigmoid']),
    'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
    'epsilon': Real(1e-6, 1e+1, prior='log-uniform')
}

svr = SVR()
bayes_svr = BayesSearchCV(svr, svr_params, n_iter=20, cv=3, scoring='neg_mean_squared_error', random_state=42)
bayes_svr.fit(X, y)

print("SVM Regressor: Best parameters -", bayes_svr.best_params_)
print("Negative Mean Squared Error:", -bayes_svr.best_score_)

#Hyperparameter tuning for SVM Classifier
svc_params = {
    'C': Real(1e-6, 1e+6, prior='log-uniform'),
    'kernel': Categorical(['linear', 'poly', 'rbf', 'sigmoid']),
    'gamma': Real(1e-6, 1e+1, prior='log-uniform')
}

svc = SVC()
bayes_svc = BayesSearchCV(svc, svc_params, n_iter=20, cv=3, scoring='accuracy', random_state=42)
bayes_svc.fit(X, y)

print("SVM Classifier: Best parameters -", bayes_svc.best_params_)
print("Accuracy:", bayes_svc.best_score_)

################################################################################

In [None]:
#@title HPO-BayesianOptimization
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from bayes_opt import BayesianOptimization

################################################################################

# Decision Tree HPO
# Hyperparameter tuning for Decision Tree Regressor
def optimize_dtr(max_depth, min_samples_split, min_samples_leaf, criterion):
    dtr = DecisionTreeRegressor(max_depth=int(max_depth), min_samples_split=int(min_samples_split),
                                 min_samples_leaf=int(min_samples_leaf), criterion=criterion)
    return -cross_val_score(dtr, X, y, cv=3, scoring='neg_mean_squared_error').mean()

dtr_bounds = {'max_depth': (1, 50), 'min_samples_split': (2, 20),
              'min_samples_leaf': (1, 20), 'criterion': ['mse', 'mae']}

bayes_dtr = BayesianOptimization(f=optimize_dtr, pbounds=dtr_bounds, random_state=42)
bayes_dtr.maximize(init_points=10, n_iter=10)

print("Decision Tree Regressor: Best parameters -", bayes_dtr.max)
print("Negative Mean Squared Error:", -bayes_dtr.max['target'])

# Hyperparameter tuning for Decision Tree Classifier
def optimize_dtc(max_depth, min_samples_split, min_samples_leaf, criterion):
    dtc = DecisionTreeClassifier(max_depth=int(max_depth), min_samples_split=int(min_samples_split),
                                  min_samples_leaf=int(min_samples_leaf), criterion=criterion)
    return cross_val_score(dtc, X, y, cv=3, scoring='accuracy').mean()

dtc_bounds = {'max_depth': (1, 50), 'min_samples_split': (2, 20),
              'min_samples_leaf': (1, 20), 'criterion': ['gini', 'entropy']}

bayes_dtc = BayesianOptimization(f=optimize_dtc, pbounds=dtc_bounds, random_state=42)
bayes_dtc.maximize(init_points=10, n_iter=10)

print("Decision Tree Classifier: Best parameters -", bayes_dtc.max)
print("Accuracy:", bayes_dtc.max['target'])

################################################################################

# Random Forest HPO
# Hyperparameter tuning for Random Forest Regressor
def optimize_rfr(n_estimators, max_depth, min_samples_split, min_samples_leaf, criterion):
    rfr = RandomForestRegressor(n_estimators=int(n_estimators), max_depth=int(max_depth),
                                 min_samples_split=int(min_samples_split), min_samples_leaf=int(min_samples_leaf),
                                 criterion=criterion)
    return -cross_val_score(rfr, X, y, cv=3, scoring='neg_mean_squared_error').mean()

rfr_bounds = {'n_estimators': (10, 100), 'max_depth': (1, 50), 'min_samples_split': (2, 20),
              'min_samples_leaf': (1, 20), 'criterion': ['mse', 'mae']}

bayes_rfr = BayesianOptimization(f=optimize_rfr, pbounds=rfr_bounds, random_state=42)
bayes_rfr.maximize(init_points=10, n_iter=10)

print("Random Forest Regressor: Best parameters -", bayes_rfr.max)
print("Negative Mean Squared Error:", -bayes_rfr.max['target'])

# Hyperparameter tuning for Random Forest Classifier
def optimize_rfc(n_estimators, max_depth, min_samples_split, min_samples_leaf, criterion):
    rfc = RandomForestClassifier(n_estimators=int(n_estimators), max_depth=int(max_depth),
                                  min_samples_split=int(min_samples_split), min_samples_leaf=int(min_samples_leaf),
                                  criterion=criterion)
    return cross_val_score(rfc, X, y, cv=3, scoring='accuracy').mean()

rfc_bounds = {'n_estimators': (10, 100), 'max_depth': (1, 50), 'min_samples_split': (2, 20),
              'min_samples_leaf': (1, 20), 'criterion': ['gini', 'entropy']}

bayes_rfc = BayesianOptimization(f=optimize_rfc, pbounds=rfc_bounds, random_state=42)
bayes_rfc.maximize(init_points=10, n_iter=10)

print("Random Forest Classifier: Best parameters -", bayes_rfc.max)
print("Accuracy:", bayes_rfc.max['target'])

################################################################################

# Gradient Boost Machine HPO
# Hyperparameter tuning for GBM Regressor
def optimize_gbr(n_estimators, learning_rate, max_depth, min_samples_split, min_samples_leaf, subsample):
    gbr = GradientBoostingRegressor(n_estimators=int(n_estimators), learning_rate=learning_rate,
                                     max_depth=int(max_depth), min_samples_split=int(min_samples_split),
                                     min_samples_leaf=int(min_samples_leaf), subsample=subsample)
    return -cross_val_score(gbr, X, y, cv=3, scoring='neg_mean_squared_error').mean()

gbr_bounds = {'n_estimators': (10, 100), 'learning_rate': (0.001, 1.0), 'max_depth': (1, 50),
              'min_samples_split': (2, 20), 'min_samples_leaf': (1, 20), 'subsample': (0.1, 1.0)}

bayes_gbr = BayesianOptimization(f=optimize_gbr, pbounds=gbr_bounds, random_state=42)
bayes_gbr.maximize(init_points=10, n_iter=10)

print("GBM Regressor: Best parameters -", bayes_gbr.max)
print("Negative Mean Squared Error:", -bayes_gbr.max['target'])

# Hyperparameter tuning for GBM Classifier
def optimize_gbc(n_estimators, learning_rate, max_depth, min_samples_split, min_samples_leaf, subsample):
    gbc = GradientBoostingClassifier(n_estimators=int(n_estimators), learning_rate=learning_rate,
                                     max_depth=int(max_depth), min_samples_split=int(min_samples_split),
                                     min_samples_leaf=int(min_samples_leaf), subsample=subsample)
    return cross_val_score(gbc, X, y, cv=3, scoring='accuracy').mean()

gbc_bounds = {'n_estimators': (10, 100), 'learning_rate': (0.001, 1.0), 'max_depth': (1, 50),
              'min_samples_split': (2, 20), 'min_samples_leaf': (1, 20), 'subsample': (0.1, 1.0)}

bayes_gbc = BayesianOptimization(f=optimize_gbc, pbounds=gbc_bounds, random_state=42)
bayes_gbc.maximize(init_points=10, n_iter=10)

print("GBM Classifier: Best parameters -", bayes_gbc.max)
print("Accuracy:", bayes_gbc.max['target'])

################################################################################

# Support Vector Machine HPO
# Hyperparameter tuning for SVM Regressor
def optimize_svr(C, kernel, gamma, epsilon):
    svr = SVR(C=C, kernel=kernel, gamma=gamma, epsilon=epsilon)
    return -cross_val_score(svr, X, y, cv=3, scoring='neg_mean_squared_error').mean()

svr_bounds = {'C': (1e-6, 1e+6), 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
              'gamma': (1e-6, 1e+1), 'epsilon': (1e-6, 1e+1)}

bayes_svr = BayesianOptimization(f=optimize_svr, pbounds=svr_bounds, random_state=42)
bayes_svr.maximize(init_points=10, n_iter=10)

print("SVM Regressor: Best parameters -", bayes_svr.max)
print("Negative Mean Squared Error:", -bayes_svr.max['target'])

# Hyperparameter tuning for SVM Classifier
def optimize_svc(C, kernel, gamma):
    svc = SVC(C=C, kernel=kernel, gamma=gamma)
    return cross_val_score(svc, X, y, cv=3, scoring='accuracy').mean()

svc_bounds = {'C': (1e-6, 1e+6), 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'gamma': (1e-6, 1e+1)}

bayes_svc = BayesianOptimization(f=optimize_svc, pbounds=svc_bounds, random_state=42)
bayes_svc.maximize(init_points=10, n_iter=10)

print("SVM Classifier: Best parameters -", bayes_svc.max)
print("Accuracy:", bayes_svc.max['target'])

################################################################################