In [None]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler, StandardScaler, PolynomialFeatures
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.decomposition import PCA
from scipy.stats import zscore

<h1>DATA PREPROCESSING</h1>

In [None]:
#@title preprocessing_classes
class Duplicates:
    def __init__(self, duplicates=True):
        self.duplicates = duplicates
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X
    def handle(self, df):
        if self.duplicates:
            original_shape = df.shape
            df.drop_duplicates(inplace=True, ignore_index=True)
            df.reset_index(drop=True, inplace=True)
            new_shape = df.shape
            count = original_shape[0] - new_shape[0]
        return df

--*
class MissingValues:
    def __init__(self, missing_num=None, missing_categ=None):
        self.missing_num = missing_num
        self.missing_categ = missing_categ
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X
    def handle(self, df, _n_neighbors=5):
        if self.missing_num or self.missing_categ:
            self.count_missing = df.isna().sum().sum()
            if self.count_missing != 0:
                df = df.dropna(how='all')
                df.reset_index(drop=True)
                if self.missing_num: # numeric data
                    if self.missing_num == 'auto':
                        self.missing_num = 'linreg'
                        lr = LinearRegression()
                        df = self._lin_regression_impute(df, lr)
                        self.missing_num = 'knn'
                        imputer = KNNImputer(n_neighbors=_n_neighbors)
                        df = self._impute(df, imputer, type='num')
                    elif self.missing_num in ['mean', 'median', 'most_frequent']:
                        imputer = SimpleImputer(strategy=self.missing_num)
                        df = self._impute(df, imputer, type='num')
                    elif self.missing_num == 'delete':
                        df = self._delete(df, type='num')

                if self.missing_categ: # categorical data
                    if self.missing_categ == 'auto':
                        self.missing_categ = 'logreg'
                        lr = LogisticRegression()
                        df = self._log_regression_impute(df, lr)
                        self.missing_categ = 'knn'
                        imputer = KNNImputer(n_neighbors=_n_neighbors)
                        df = self._impute(df, imputer, type='categ')
                    elif self.missing_categ == 'most_frequent':
                        imputer = SimpleImputer(strategy=self.missing_categ)
                        df = self._impute(df, imputer, type='categ')
                    elif self.missing_categ == 'delete':
                        df = self._delete(df, type='categ')
            else:
                pass
        else:
            pass
        return df
    def _impute(self, df, imputer, type):
        cols_num = df.select_dtypes(include=np.number).columns
        if type == 'num':
            for feature in df.columns:
                if feature in cols_num:
                    if df[feature].isna().sum().sum() != 0:
                        try:
                            df_imputed = pd.DataFrame(imputer.fit_transform(np.array(df[feature]).reshape(-1, 1)))
                            counter = df[feature].isna().sum().sum() - df_imputed.isna().sum().sum()

                            if (df[feature].fillna(-9999) % 1  == 0).all():
                                df[feature] = df_imputed
                                df[feature] = df[feature].round()
                                df[feature] = df[feature].astype('Int64')
                            else:
                                df[feature] = df_imputed
                            if counter != 0:
                                pass
                        except:
                            pass
        else:
            for feature in df.columns:
                if feature not in cols_num:
                    if df[feature].isna().sum()!= 0:
                        try:
                            mapping = dict()
                            mappings = {k: i for i, k in enumerate(df[feature].dropna().unique(), 0)}
                            mapping[feature] = mappings
                            df[feature] = df[feature].map(mapping[feature])
                            df_imputed = pd.DataFrame(imputer.fit_transform(np.array(df[feature]).reshape(-1, 1)), columns=[feature])
                            counter = sum(1 for i, j in zip(list(df_imputed[feature]), list(df[feature])) if i != j)
                            df[feature] = df_imputed
                            df[feature] = df[feature].round()
                            df[feature] = df[feature].astype('Int64')
                            mappings_inv = {v: k for k, v in mapping[feature].items()}
                            df[feature] = df[feature].map(mappings_inv)
                        except:
                            pass
        return df
    def _lin_regression_impute(self, df, model):
        cols_num = df.select_dtypes(include=np.number).columns
        mapping = dict()
        for feature in df.columns:
            if feature not in cols_num:
                mappings = {k: i for i, k in enumerate(df[feature])}
                mapping[feature] = mappings
                df[feature] = df[feature].map(mapping[feature])
        for feature in cols_num:
            try:
                test_df = df[df[feature].isnull()==True].dropna(subset=[x for x in df.columns if x != feature])
                train_df = df[df[feature].isnull()==False].dropna(subset=[x for x in df.columns if x != feature])
                if len(test_df.index) != 0:
                    pipe = make_pipeline(StandardScaler(), model)
                    y = np.log(train_df[feature])
                    X_train = train_df.drop(feature, axis=1)
                    test_df.drop(feature, axis=1, inplace=True)
                    try:
                        model = pipe.fit(X_train, y)
                    except:
                        y = train_df[feature]
                        model = pipe.fit(X_train, y)
                    if (y == train_df[feature]).all():
                        pred = model.predict(test_df)
                    else:
                        pred = np.exp(model.predict(test_df))
                    test_df[feature]= pred
                    if (df[feature].fillna(-9999) % 1  == 0).all():
                        test_df[feature] = test_df[feature].round()
                        test_df[feature] = test_df[feature].astype('Int64')
                        df[feature].update(test_df[feature])
                    else:
                        df[feature].update(test_df[feature])
            except:
                pass
        for feature in df.columns:
            try:
                mappings_inv = {v: k for k, v in mapping[feature].items()}
                df[feature] = df[feature].map(mappings_inv)
            except:
                pass
        return df
    def _log_regression_impute(self, df, model):
        cols_num = df.select_dtypes(include=np.number).columns
        mapping = dict()
        for feature in df.columns:
            if feature not in cols_num:
                mappings = {k: i for i, k in enumerate(df[feature])}
                mapping[feature] = mappings
                df[feature] = df[feature].map(mapping[feature])
        for feature in cols_num:
            try:
                test_df = df[df[feature].isnull()==True].dropna(subset=[x for x in df.columns if x != feature])
                train_df = df[df[feature].isnull()==False].dropna(subset=[x for x in df.columns if x != feature])
                if len(test_df.index) != 0:
                    pipe = make_pipeline(StandardScaler(), model)
                    y = train_df[feature].astype('int')
                    X_train = train_df.drop(feature, axis=1)
                    test_df.drop(feature, axis=1, inplace=True)
                    try:
                        model = pipe.fit(X_train, y)
                    except:
                        y = np.log(train_df[feature].astype('int'))
                        model = pipe.fit(X_train, y)
                    if (y == np.log(train_df[feature].astype('int'))).all():
                        pred = model.predict(test_df)
                    else:
                        pred = np.exp(model.predict(test_df))
                    test_df[feature]= pred
                    if (df[feature].fillna(-9999) % 1  == 0).all():
                        test_df[feature] = test_df[feature].round()
                        test_df[feature] = test_df[feature].astype('Int64')
                        df[feature].update(test_df[feature])
                    else:
                        df[feature].update(test_df[feature])
            except:
                pass
        for feature in df.columns:
            try:
                mappings_inv = {v: k for k, v in mapping[feature].items()}
                df[feature] = df[feature].map(mappings_inv)
            except:
                pass
        return df
    def _delete(self, df, type):
        if type == 'num':
            cols_num = df.select_dtypes(include=np.number).columns
            for feature in df.columns:
                if feature in cols_num:
                    df = df.dropna(subset=[feature])
        else:
            for feature in df.columns:
                if feature not in cols_num:
                    df = df.dropna(subset=[feature])
        return df


from scipy.stats import zscore
class Outliers:
    def __init__(self, method=None):
        self.method = method
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X
    def handle(self, df):
        if self.method:
            self.count_outliers = 0
            if self.method == 'zscore':
                df, self.count_outliers = self._zscore_outliers(df)
            elif self.method == 'iqr':
                df, self.count_outliers = self._iqr_outliers(df)
            elif self.method == 'manual':
                df, self.count_outliers = self._manual_outliers(df)
            else:
                pass
        return df
    def _zscore_outliers(self, df):
        outlier_count = 0
        cols_num = df.select_dtypes(include=np.number).columns
        for feature in cols_num:
            if df[feature].dtype != 'object':
                z_scores = zscore(df[feature])
                abs_z_scores = np.abs(z_scores)
                outliers = (abs_z_scores > self.threshold).sum()
                outlier_count += outliers
                df = df[(abs_z_scores < self.threshold).all(axis=1)]
        return df, outlier_count
    def _iqr_outliers(self, df):
        outlier_count = 0
        cols_num = df.select_dtypes(include=np.number).columns
        for feature in cols_num:
            if df[feature].dtype != 'object':
                q1 = df[feature].quantile(0.25)
                q3 = df[feature].quantile(0.75)
                iqr = q3 - q1
                lower_bound = q1 - (self.threshold * iqr)
                upper_bound = q3 + (self.threshold * iqr)
                outliers = ((df[feature] < lower_bound) | (df[feature] > upper_bound)).sum()
                outlier_count += outliers
                df = df[(df[feature] >= lower_bound) & (df[feature] <= upper_bound)]
        return df, outlier_count
    def _manual_outliers(self, df):
        outlier_count = 0
        for feature in self.manual_dict:
            if feature in df.columns:
                outliers = df[df[feature].isin(self.manual_dict[feature])]
                outlier_count += len(outliers)
                df = df[~df[feature].isin(self.manual_dict[feature])]
        return df, outlier_count


class Adjust:
    def __init__(self, scaler=None, extract_datetime=False, round_values=False):
        self.scaler = scaler
        self.extract_datetime = extract_datetime
        self.round_values = round_values
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X
    def handle(self, df):
        if self.scaler == 'minmax':
            scaler = preprocessing.MinMaxScaler()
        elif self.scaler == 'standard':
            scaler = preprocessing.StandardScaler()
        elif self.scaler == 'robust':
            scaler = preprocessing.RobustScaler()
        elif self.scaler == 'maxabs':
            scaler = preprocessing.MaxAbsScaler()
        elif self.scaler == 'quantile':
            scaler = preprocessing.QuantileTransformer()
        elif self.scaler == 'power':
            scaler = preprocessing.PowerTransformer()
        if self.extract_datetime:
            df = self._convert_datetime(df)
        if self.round_values:
            df = self._round_values(df)
        df[df.columns] = scaler.fit_transform(df[df.columns])
        return df
    def _convert_datetime(self, df):
        cols = set(df.columns) ^ set(df.select_dtypes(include=np.number).columns)
        for feature in cols:
            try:
                df[feature] = pd.to_datetime(df[feature], infer_datetime_format=True)
                try:
                    df['Day'] = pd.to_datetime(df[feature]).dt.day
                    if self.extract_datetime in ['auto', 'M','Y','h','m','s']:
                        df['Month'] = pd.to_datetime(df[feature]).dt.month
                        if self.extract_datetime in ['auto', 'Y','h','m','s']:
                            df['Year'] = pd.to_datetime(df[feature]).dt.year
                            if self.extract_datetime in ['auto', 'h','m','s']:
                                df['Hour'] = pd.to_datetime(df[feature]).dt.hour
                                if self.extract_datetime in ['auto', 'm','s']:
                                    df['Minute'] = pd.to_datetime(df[feature]).dt.minute
                                    if self.extract_datetime in ['auto', 's']:
                                        df['Sec'] = pd.to_datetime(df[feature]).dt.second
                except:
                    pass
            except:
                pass
        return df
    def _round_values(self, df):
        cols_num = df.select_dtypes(include=np.number).columns
        for feature in cols_num:
            if (df[feature].fillna(-9999) % 1  == 0).all():
                try:
                    df[feature] = df[feature].astype('Int64')
                except:
                    pass
            else:
                try:
                    df[feature] = df[feature].astype(float)
                    dec = None
                    for value in df[feature]:
                        try:
                            if dec is None:
                                dec = str(value)[::-1].find('.')
                            else:
                                if str(value)[::-1].find('.') > dec:
                                    dec = str(value)[::-1].find('.')
                        except:
                            pass
                    df[feature] = df[feature].round(decimals=dec)
                except:
                    pass
        return df


class EncodeCateg:
    def __init__(self, encode_categ=None):
        self.encode_categ = encode_categ
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X
    def handle(self, df):
        if self.encode_categ:
            if not isinstance(self.encode_categ, list):
                self.encode_categ = ['auto']
            cols_categ = set(df.columns) ^ set(df.select_dtypes(include=pd.np.number).columns)
            if len(self.encode_categ) == 1:
                target_cols = cols_categ
            else:
                target_cols = self.encode_categ[1]
            for feature in target_cols:
                if feature in cols_categ:
                    feature = feature
                else:
                    feature = df.columns[feature]
                try:
                    pd.to_datetime(df[feature])
                except:
                    try:
                        if self.encode_categ[0] == 'auto':
                            if df[feature].nunique() <=10:
                                df = self._to_onehot(df, feature)
                            elif df[feature].nunique() <=20:
                                df = self._to_label(df, feature)
                        elif self.encode_categ[0] == 'onehot':
                            df = self._to_onehot(df, feature)
                        elif self.encode_categ[0] == 'label':
                            df = self._to_label(df, feature)
                    except:
                        pass
        return df
    def _to_onehot(self, df, feature, limit=10):
        one_hot = pd.get_dummies(df[feature], prefix=feature)
        if one_hot.shape[1] > limit:
            print('ONEHOT encoding for feature "{}" creates {} new features. Consider LABEL encoding instead.'.format(feature, one_hot.shape[1]))
        df = df.join(one_hot)
        return df
    def _to_label(self, df, feature):
        le = LabelEncoder()
        df[feature + '_lab'] = le.fit_transform(df[feature].values)
        mapping = dict(zip(le.classes_, range(len(le.classes_))))
        for key in mapping:
            try:
                if pd.np.isnan(key):
                    replace = {mapping[key] : key }
                    df[feature].replace(replace, inplace=True)
            except:
                pass
        return df

In [None]:
dataset = pd.read_csv('dp_data1.csv')
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 25 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Feature_1   1000 non-null   float64
 1   Feature_2   1000 non-null   float64
 2   Feature_3   1000 non-null   float64
 3   Feature_4   1000 non-null   int64  
 4   Feature_5   1000 non-null   int64  
 5   Feature_6   1000 non-null   object 
 6   Feature_7   1000 non-null   int64  
 7   Feature_8   1000 non-null   float64
 8   Feature_9   1000 non-null   float64
 9   Feature_10  1000 non-null   float64
 10  Feature_11  1000 non-null   int64  
 11  Feature_12  1000 non-null   float64
 12  Feature_13  1000 non-null   float64
 13  Feature_14  1000 non-null   float64
 14  Feature_15  1000 non-null   int64  
 15  Feature_16  1000 non-null   float64
 16  Feature_17  782 non-null    float64
 17  Feature_18  1000 non-null   float64
 18  Feature_19  746 non-null    object 
 19  Feature_20  1000 non-null   

In [None]:
print("Column indexes:")
for i, column in enumerate(dataset.columns):
    print(f"{i} : {column}")

target_index = int(input("Enter the index of the target variable: "))
target_variable = dataset.columns[target_index]

Column indexes:
0 : Feature_1
1 : Feature_2
2 : Feature_3
3 : Feature_4
4 : Feature_5
5 : Feature_6
6 : Feature_7
7 : Feature_8
8 : Feature_9
9 : Feature_10
10 : Feature_11
11 : Feature_12
12 : Feature_13
13 : Feature_14
14 : Feature_15
15 : Feature_16
16 : Feature_17
17 : Feature_18
18 : Feature_19
19 : Feature_20
20 : Feature_21
21 : Feature_22
22 : Feature_23
23 : Feature_24
24 : Feature_25
Enter the index of the target variable: 20


In [None]:
from sklearn.pipeline import Pipeline
preprocessing__pipeline = Pipeline([
    ('missing_values', MissingValues(missing_num='knn', missing_categ='logreg')),
    ('outliers', Outliers(method='zscore')),
    ('adjust', Adjust(scaler='standard', extract_datetime=True, round_values=True)),
    ('encode_categ', EncodeCateg(encode_categ=['auto'])),
    ('duplicates', Duplicates())
])

cleaned_dataset = preprocessing_pipeline(dataset, target_variable)
cleaned_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 21 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   PC1         1000 non-null   float64
 1   PC2         1000 non-null   float64
 2   PC3         1000 non-null   float64
 3   PC4         1000 non-null   float64
 4   PC5         1000 non-null   float64
 5   PC6         1000 non-null   float64
 6   PC7         1000 non-null   float64
 7   PC8         1000 non-null   float64
 8   PC9         1000 non-null   float64
 9   PC10        1000 non-null   float64
 10  PC11        1000 non-null   float64
 11  PC12        1000 non-null   float64
 12  PC13        1000 non-null   float64
 13  PC14        1000 non-null   float64
 14  PC15        1000 non-null   float64
 15  PC16        1000 non-null   float64
 16  PC17        1000 non-null   float64
 17  PC18        1000 non-null   float64
 18  PC19        1000 non-null   float64
 19  PC20        1000 non-null   

<h1>MODEL SELECTION</h1>

In [None]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from scipy.stats import randint, uniform

target_dtype = dataset[target_variable].dtype

def split_data(dataset, target_variable):
    X = dataset.drop(columns=[target_variable])
    y = dataset[target_variable]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test

def tune_hyperparameters(model, param_distributions, X_train, y_train):
    search = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, n_iter=10, scoring='accuracy', cv=5, random_state=42)
    search.fit(X_train, y_train)
    return search.best_estimator_

def select_models(target_dtype):
    regression_models = [LinearRegression(), RandomForestRegressor()]
    classification_models = [LogisticRegression(), DecisionTreeClassifier(), RandomForestClassifier()]

    if target_dtype in [np.float64, np.int64]:
        return regression_models
    elif target_dtype == object:
        return classification_models
    else:
        raise ValueError("Unsupported target variable type. Please ensure the target variable is numeric or categorical.")

def evaluate_model(model, X_test, y_test, problem_type):
    y_pred = model.predict(X_test)
    if problem_type == 'classification':
        return accuracy_score(y_test, y_pred)
    elif problem_type == 'regression':
        return mean_squared_error(y_test, y_pred)

X_train, X_test, y_train, y_test = split_data(cleaned_dataset, target_variable)

models = select_models(dataset[target_variable].dtype)

best_model = None
best_score = 0

param_grid = {
    'LinearRegression': {},
    'DecisionTreeRegressor': {
        'max_depth': randint(1, 20),
        'min_samples_split': randint(2, 20),
        'min_samples_leaf': randint(1, 20)
    },
    'RandomForestRegressor': {
        'n_estimators': randint(10, 100),
        'max_depth': randint(1, 20),
        'min_samples_split': randint(2, 20),
        'min_samples_leaf': randint(1, 20),
        'max_features': randint(2, 10)
    },
    'LogisticRegression': {
        'C': uniform(0, 10),
        'penalty': ['l1', 'l2']
    },
    'DecisionTreeClassifier': {
        'max_depth': randint(1, 20),
        'min_samples_split': randint(2, 20),
        'min_samples_leaf': randint(1, 20)
    },
    'RandomForestClassifier': {
        'n_estimators': randint(10, 100),
        'max_depth': randint(1, 20),
        'min_samples_split': randint(2, 20),
        'min_samples_leaf': randint(1, 20),
        'max_features': uniform(0, 1)
    }
}

for model in models:
    model.fit(X_train, y_train)
    score = evaluate_model(model, X_test, y_test, 'regression' if target_dtype in [np.float64, np.int64] else 'classification')
    if (target_dtype in [np.float64, np.int64] and score > best_score) or (target_dtype == object and score < best_score):
        best_score = score
        best_model = model

    if model.__class__.__name__ in param_grid:
        best_model = tune_hyperparameters(model, param_grid[model.__class__.__name__], X_train, y_train)
    print(f'Model: {model} -- score: {score}')

best_model.fit(cleaned_dataset.drop(columns=[target_variable]), cleaned_dataset[target_variable])


Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/_param_validation.py", line 192, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py", line 221, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py", line 106, in _check_targets
    raise ValueError("{0} is not supported".format(y_type))
ValueError: continuous is not s

Model: LinearRegression() -- score: 0.3601819237281984


Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/_param_validation.py", line 192, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py", line 221, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py", line 106, in _check_targets
    raise ValueError("{0} is not supported".format(y_type))
ValueError: continuous is not s

Model: RandomForestRegressor() -- score: 0.3641750182046754


In [None]:
#Bayesian Network
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split

def objective_function(params):
    hidden_layer_sizes = params['hidden_layer_sizes']
    activation = params['activation']
    solver = params['solver']
    alpha = params['alpha']

    model = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes,
                          activation=activation,
                          solver=solver,
                          alpha=alpha,
                          random_state=42)

    model.fit(X_train, y_train)

    if problem_type == 'classification':
        y_pred = model.predict(X_val)
        score = accuracy_score(y_val, y_pred)
    elif problem_type == 'regression':
        y_pred = model.predict(X_val)
        score = -mean_squared_error(y_val, y_pred)

    return score

search_space = {
    'hidden_layer_sizes': Integer(50, 200, prior='uniform'),
    'activation': Categorical(['relu', 'tanh']),
    'solver': Categorical(['adam', 'sgd']),
    'alpha': Real(1e-5, 1e-2, prior='log-uniform')
}

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

bo = BayesSearchCV(estimator=None, search_spaces=[search_space], n_iter=50, cv=5, scoring=None)

bo.fit(X_train, y_train)

best_params = bo.best_params_
print("Best Hyperparameters:", best_params)

final_model = MLPClassifier(**best_params, random_state=42)
final_model.fit(X_train, y_train)