In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pickle
from sklearn.metrics import classification_report, roc_curve, precision_recall_curve, roc_auc_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import xgboost as xgb
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
%matplotlib inline
from sklearn.model_selection import train_test_split


In [2]:
data = pd.read_csv('/content/BankCustomerData.csv')

In [3]:
data

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,term_deposit
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42634,21,student,single,secondary,no,2488,no,no,telephone,12,jan,661,2,92,1,success,yes
42635,87,retired,married,primary,no,2190,no,no,telephone,12,jan,512,2,-1,0,unknown,yes
42636,34,blue-collar,married,primary,no,6718,no,no,cellular,13,jan,278,4,97,1,other,no
42637,22,student,single,secondary,no,254,no,no,cellular,13,jan,143,2,-1,0,unknown,yes


In [4]:
data.isna().sum()

age             0
job             0
marital         0
education       0
default         0
balance         0
housing         0
loan            0
contact         0
day             0
month           0
duration        0
campaign        0
pdays           0
previous        0
poutcome        0
term_deposit    0
dtype: int64

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFECV

X = data.drop('term_deposit', axis =1)
y = data['term_deposit']

rfc = RandomForestClassifier(random_state=1)
rfecv = RFECV(estimator=rfc, step =1, cv =10, scoring = 'accuracy')

rfecv.fit(X.select_dtypes(exclude = "object"), y)

In [7]:
print('Optimal Number of features is : {}'.format(rfecv.n_features_))


Optimal Number of features is : 1


In [8]:
X.drop(X.columns[np.where(rfecv.support_ == False)[0]], axis =1 ,inplace=True)


In [10]:
### It will zero variance features
from sklearn.feature_selection import VarianceThreshold
var_thres=VarianceThreshold(threshold=0)
var_thres.fit(X.select_dtypes(exclude = "object"))

In [12]:
constant_columns = [column for column in X.select_dtypes(exclude = "object").columns
                    if column not in X.select_dtypes(exclude = "object").columns[var_thres.get_support()]]

print(len(constant_columns))

0


In [13]:
# with the following function we can select highly correlated features
# it will remove the first feature that is correlated with anything other feature

def correlation(dataset, threshold):
    col_corr = set()  # Set of all the names of correlated columns
    corr_matrix = dataset.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if abs(corr_matrix.iloc[i, j]) > threshold: # we are interested in absolute coeff value
                colname = corr_matrix.columns[i]  # getting the name of column
                col_corr.add(colname)
    return col_corr

In [14]:
corr_features = correlation(X, 0.8)
len(set(corr_features))

  corr_matrix = dataset.corr()


0

In [15]:
X

Unnamed: 0,education,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,tertiary,no,unknown,5,may,261,1,-1,0,unknown
1,secondary,no,unknown,5,may,151,1,-1,0,unknown
2,secondary,yes,unknown,5,may,76,1,-1,0,unknown
3,unknown,no,unknown,5,may,92,1,-1,0,unknown
4,unknown,no,unknown,5,may,198,1,-1,0,unknown
...,...,...,...,...,...,...,...,...,...,...
42634,secondary,no,telephone,12,jan,661,2,92,1,success
42635,primary,no,telephone,12,jan,512,2,-1,0,unknown
42636,primary,no,cellular,13,jan,278,4,97,1,other
42637,secondary,no,cellular,13,jan,143,2,-1,0,unknown


In [17]:
y.value_counts()

no     38678
yes     3961
Name: term_deposit, dtype: int64

In [18]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state = 20)


In [19]:
x_train.shape, x_test.shape


((34111, 10), (8528, 10))

In [21]:
from sklearn.feature_selection import mutual_info_classif
# determine the mutual information
mutual_info = mutual_info_classif(x_train.select_dtypes(exclude="object"), y_train)
mutual_info

array([0.00398915, 0.06229328, 0.0024155 , 0.00342634, 0.        ])

In [23]:
mutual_info = pd.Series(mutual_info)
mutual_info.index = x_train.select_dtypes(exclude="object").columns
mutual_info.sort_values(ascending=False)

duration    0.062293
day         0.003989
pdays       0.003426
campaign    0.002415
previous    0.000000
dtype: float64

In [24]:
num_features = x_train.select_dtypes(exclude = "object")
cat_features = x_train.select_dtypes(include = "object")

In [25]:
num_features

Unnamed: 0,day,duration,campaign,pdays,previous
2977,14,161,2,-1,0
3805,16,241,1,-1,0
28838,30,81,1,-1,0
30558,5,60,2,272,2
3699,16,178,1,-1,0
...,...,...,...,...,...
31962,13,1091,2,-1,0
23452,28,91,5,-1,0
23775,28,908,1,-1,0
37135,13,11,8,299,3


In [26]:
cat_features

Unnamed: 0,education,loan,contact,month,poutcome
2977,tertiary,yes,unknown,may,unknown
3805,secondary,yes,unknown,may,unknown
28838,secondary,no,cellular,jan,unknown
30558,secondary,no,cellular,feb,failure
3699,primary,no,unknown,may,unknown
...,...,...,...,...,...
31962,secondary,yes,cellular,apr,unknown
23452,tertiary,no,telephone,aug,unknown
23775,tertiary,yes,cellular,aug,unknown
37135,secondary,no,cellular,may,other


In [28]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.linear_model import (
    LogisticRegression,
    RidgeClassifier,
    SGDClassifier,
    PassiveAggressiveClassifier,
    Perceptron,
)
from sklearn.svm import SVC, NuSVC, LinearSVC
from sklearn.neighbors import (
    KNeighborsClassifier,
    RadiusNeighborsClassifier,
    NearestCentroid,
)
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import (
    RandomForestClassifier,
    BaggingClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier,
    HistGradientBoostingClassifier,
    VotingClassifier,
)
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.discriminant_analysis import (
    LinearDiscriminantAnalysis,
    QuadraticDiscriminantAnalysis,
)
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import *
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA
from sklearn.preprocessing import OneHotEncoder

import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import (
    RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier,
    BaggingClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier
)
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
import warnings
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from tabulate import tabulate  # Import the tabulate library
from sklearn.neural_network import MLPClassifier



step1 = ColumnTransformer(
    transformers=[
        ('col_tnf', OneHotEncoder( sparse = False,drop='first', handle_unknown='ignore'),cat_features.columns),
        ('num_tnf', StandardScaler(),num_features.columns )
    ],
    remainder='passthrough'
)

In [29]:
from sklearn.impute import SimpleImputer
from sklearn.ensemble import HistGradientBoostingClassifier
from tqdm import tqdm
import warnings
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, LSTM, GRU, Embedding, SimpleRNN, Dropout


# Ignore all warnings
warnings.filterwarnings("ignore")

from sklearn.linear_model import *

def evaluate_model(model, x_train, y_train, x_test, y_test):
    # Preprocess 'previous_year_rating' column


    pipe = Pipeline([
        ('Transform', step1),  # You need to define the 'step1' transformer
        ('model', model),
    ])

    pipe.fit(x_train, y_train)
    y_pred = pipe.predict(x_test)

    # Use appropriate classification metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average="macro")
    recall = recall_score(y_test, y_pred,  average="macro")
    f1 = f1_score(y_test, y_pred,  average="macro")

    # Calculate the confusion matrix
    cm = confusion_matrix(y_test, y_pred)

    return accuracy, precision, recall, f1, cm


# Initialize different classification models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest Classifier': RandomForestClassifier(),
    'Gradient Boosting Classifier': GradientBoostingClassifier(),
    'AdaBoost Classifier': AdaBoostClassifier(),
    'Bagging Classifier': BaggingClassifier(),
    'Extra Trees Classifier': ExtraTreesClassifier(),
    'Support Vector Classifier': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree Classifier': DecisionTreeClassifier(),
    'Naive Bayes': GaussianNB(),
    'HistGradientBoosting Classifier': HistGradientBoostingClassifier(),
    'SGD Classifier': SGDClassifier(),
    'Passive Aggressive Classifier': PassiveAggressiveClassifier(),
    'Ridge Classifier': RidgeClassifier(),
    'MLP Neural Network': MLPClassifier(hidden_layer_sizes=(10,), max_iter=100),

}
# Create a list to store the results
results = []

# Evaluate and print classification metrics for different models
for model_name, model in tqdm(models.items()):
    accuracy, precision, recall, f1, cm = evaluate_model(model, x_train, y_train, x_test, y_test)
    results.append([model_name, accuracy, precision, recall, f1, cm])

# Print the results in a tabular form
headers = ["Model", "Accuracy", "Precision", "Recall", "F1 Score", "Confusion Matrix"]
print(tabulate(results, headers, tablefmt="grid"))

max_accuracy_index = np.argmax([result[1] for result in results])
best_model_name = results[max_accuracy_index][0]
print(f"\nThe model with the highest accuracy is: {best_model_name} (Accuracy: {results[max_accuracy_index][1]:.4f})")

100%|██████████| 15/15 [00:58<00:00,  3.87s/it]

+---------------------------------+------------+-------------+----------+------------+--------------------+
| Model                           |   Accuracy |   Precision |   Recall |   F1 Score | Confusion Matrix   |
| Logistic Regression             |   0.922139 |    0.799149 | 0.636648 |   0.680081 | [[7641  112]       |
|                                 |            |             |          |            |  [ 552  223]]      |
+---------------------------------+------------+-------------+----------+------------+--------------------+
| Random Forest Classifier        |   0.919911 |    0.763365 | 0.692328 |   0.720555 | [[7524  229]       |
|                                 |            |             |          |            |  [ 454  321]]      |
+---------------------------------+------------+-------------+----------+------------+--------------------+
| Gradient Boosting Classifier    |   0.92378  |    0.789143 | 0.675294 |   0.71438  | [[7590  163]       |
|                           




In [30]:
!pip install SMOTE


Collecting SMOTE
  Downloading smote-0.1-py2.py3-none-any.whl (3.3 kB)
Installing collected packages: SMOTE
Successfully installed SMOTE-0.1


In [31]:
!pip install imbalanced-learn




In [32]:
from imblearn.over_sampling import SMOTE


In [34]:


X_train1, X_test1, y_train1, y_test1 = train_test_split(
    X,
    y,
    test_size=0.25,
    random_state=25

)

In [38]:
X_test1 = pd.get_dummies(X_test1, drop_first = True)

In [39]:
X_train1

Unnamed: 0,day,duration,campaign,pdays,previous,education_secondary,education_tertiary,education_unknown,loan_yes,contact_telephone,...,month_jul,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,poutcome_other,poutcome_success,poutcome_unknown
10318,12,278,2,-1,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,1
13125,8,705,1,-1,0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,0,1
576,6,69,1,-1,0,1,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
14624,15,277,1,-1,0,1,0,0,0,1,...,1,0,0,0,0,0,0,0,0,1
26079,19,228,3,-1,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35702,8,265,1,290,2,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
26767,20,283,2,189,2,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
6618,28,283,3,-1,0,1,0,0,1,0,...,0,0,0,1,0,0,0,0,0,1
24894,18,132,2,-1,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1


In [40]:
sm = SMOTE(random_state=27)
X_train1, y_train1 = sm.fit_resample(X_train1, y_train1)

In [42]:
y_train1.value_counts()

no     28996
yes    28996
Name: term_deposit, dtype: int64

In [44]:
from sklearn.impute import SimpleImputer
from sklearn.ensemble import HistGradientBoostingClassifier
from tqdm import tqdm
import warnings
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, LSTM, GRU, Embedding, SimpleRNN, Dropout


# Ignore all warnings
warnings.filterwarnings("ignore")

from sklearn.linear_model import *

def evaluate_model(model, X_train1, y_train1, X_test1, y_test1):
    # Preprocess 'previous_year_rating' column


    pipe = Pipeline([
        ('Transform', step1),  # You need to define the 'step1' transformer
        ('model', model),
    ])

    pipe.fit(x_train, y_train)
    y_pred = pipe.predict(x_test)

    # Use appropriate classification metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average="macro")
    recall = recall_score(y_test, y_pred,  average="macro")
    f1 = f1_score(y_test, y_pred,  average="macro")

    # Calculate the confusion matrix
    cm = confusion_matrix(y_test, y_pred)

    return accuracy, precision, recall, f1, cm


# Initialize different classification models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest Classifier': RandomForestClassifier(),
    'Gradient Boosting Classifier': GradientBoostingClassifier(),
    'AdaBoost Classifier': AdaBoostClassifier(),
    'Bagging Classifier': BaggingClassifier(),
    'Extra Trees Classifier': ExtraTreesClassifier(),
    'Support Vector Classifier': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree Classifier': DecisionTreeClassifier(),
    'Naive Bayes': GaussianNB(),
    'HistGradientBoosting Classifier': HistGradientBoostingClassifier(),
    'SGD Classifier': SGDClassifier(),
    'Passive Aggressive Classifier': PassiveAggressiveClassifier(),
    'Ridge Classifier': RidgeClassifier(),
    'MLP Neural Network': MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000),

}
# Create a list to store the results
results = []

# Evaluate and print classification metrics for different models
for model_name, model in tqdm(models.items()):
    accuracy, precision, recall, f1, cm = evaluate_model(model, X_train1, y_train1, X_test1, y_test1)
    results.append([model_name, accuracy, precision, recall, f1, cm])

# Print the results in a tabular form
headers = ["Model", "Accuracy", "Precision", "Recall", "F1 Score", "Confusion Matrix"]
print(tabulate(results, headers, tablefmt="grid"))

max_accuracy_index = np.argmax([result[1] for result in results])
best_model_name = results[max_accuracy_index][0]
print(f"\nThe model with the highest accuracy is: {best_model_name} (Accuracy: {results[max_accuracy_index][1]:.4f})")

100%|██████████| 15/15 [02:36<00:00, 10.41s/it]

+---------------------------------+------------+-------------+----------+------------+--------------------+
| Model                           |   Accuracy |   Precision |   Recall |   F1 Score | Confusion Matrix   |
| Logistic Regression             |   0.922139 |    0.799149 | 0.636648 |   0.680081 | [[7641  112]       |
|                                 |            |             |          |            |  [ 552  223]]      |
+---------------------------------+------------+-------------+----------+------------+--------------------+
| Random Forest Classifier        |   0.922139 |    0.773457 | 0.695296 |   0.725864 | [[7540  213]       |
|                                 |            |             |          |            |  [ 451  324]]      |
+---------------------------------+------------+-------------+----------+------------+--------------------+
| Gradient Boosting Classifier    |   0.92378  |    0.789143 | 0.675294 |   0.71438  | [[7590  163]       |
|                           




In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc, precision_recall_curve
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from tabulate import tabulate
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Define your class labels (replace with your actual class labels)
class_labels = ['no', 'yes']

# Define your step1 transformer (replace with your actual transformer)

def evaluate_model(model, x_train, y_train, x_test, y_test):
    pipe = Pipeline([
        ('Transform', step1),  # You need to define the 'step1' transformer
        ('model', model),
    ])

    pipe.fit(x_train, y_train)
    y_pred = pipe.predict(x_test)

    # Convert string labels to binary labels
    label_mapping = {label: idx for idx, label in enumerate(class_labels)}
    y_test_binary = np.array([label_mapping[label] for label in y_test])
    y_pred_binary = np.array([label_mapping[label] for label in y_pred])

    # Use appropriate classification metrics
    accuracy = accuracy_score(y_test_binary, y_pred_binary)
    precision = precision_score(y_test_binary, y_pred_binary, average="macro")
    recall = recall_score(y_test_binary, y_pred_binary, average="macro")
    f1 = f1_score(y_test_binary, y_pred_binary, average="macro")

    # Calculate the confusion matrix with class labels
    cm = confusion_matrix(y_test_binary, y_pred_binary)

    # Calculate ROC curve and AUC
    fpr, tpr, thresholds = roc_curve(y_test_binary, y_pred_binary)
    roc_auc = auc(fpr, tpr)

    # Calculate Precision-Recall curve
    precision_curve, recall_curve, _ = precision_recall_curve(y_test_binary, y_pred_binary)

    return accuracy, precision, recall, f1, cm, fpr, tpr, roc_auc, precision_curve, recall_curve

# Initialize different classification models
models = {
    'MLP Neural Network': MLPClassifier(hidden_layer_sizes=(50,), max_iter=1000),
}

# Create a list to store the results
results = []

# Evaluate and print classification metrics for different models
for model_name, model in models.items():
    accuracy, precision, recall, f1, cm, fpr, tpr, roc_auc, precision_curve, recall_curve = evaluate_model(model, X_train1, y_train1, X_test1, y_test1)
    results.append([model_name, accuracy, precision, recall, f1, cm, roc_auc, precision_curve, recall_curve])


# Print the results in a tabular form
headers = ["Model", "Accuracy", "Precision", "Recall", "F1 Score"]
print(tabulate(results, headers, tablefmt="grid"))

max_accuracy_index = np.argmax([result[1] for result in results])
best_model_name = results[max_accuracy_index][0]
print(f"\nThe model with the highest accuracy is: {best_model_name} (Accuracy: {results[max_accuracy_index][1]:.4f})")

# Plot confusion matrix with class labels and show labels
plt.figure(figsize=(8, 6))

# Show diagonal elements in red color
sns.heatmap(results[max_accuracy_index][5], annot=True, fmt="d", cmap="Blues", cbar=False, vmin=0, vmax=results[max_accuracy_index][5].diagonal().max() + 10, annot_kws={"color": "red"})

plt.title(f"Confusion Matrix for {best_model_name}")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.xticks(np.arange(len(class_labels)) + 0.5, class_labels)
plt.yticks(np.arange(len(class_labels)) + 0.5, class_labels)
plt.show()

# Plot ROC curve
plt.figure(figsize=(8, 8))
plt.plot(results[max_accuracy_index][4], results[max_accuracy_index][3], color='darkorange', lw=2, label='ROC curve (AUC = {:.2f})'.format(results[max_accuracy_index][6]))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

# Plot Precision-Recall curve
plt.figure(figsize=(8, 8))
plt.plot(results[max_accuracy_index][7], results[max_accuracy_index][8], color='darkorange', lw=2, label='Precision-Recall curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc='upper right')
plt.show()


In [52]:
model = MLPClassifier(hidden_layer_sizes=(10,), max_iter=100)
model.fit(X_train1, y_train1)  # Training the model

# Calculate metrics on the training set
train_accuracy = accuracy_score(y_train1, model.predict(X_train1))
train_precision = precision_score(y_train1, model.predict(X_train1), average='macro')
train_recall = recall_score(y_train1, model.predict(X_train1), average='macro')
train_f1 = f1_score(y_train1, model.predict(X_train1), average='macro')

# Calculate metrics on the test set
test_accuracy = accuracy_score(y_test1, model.predict(X_test1))
test_precision = precision_score(y_test1, model.predict(X_test1), average='macro')
test_recall = recall_score(y_test1, model.predict(X_test1), average='macro')
test_f1 = f1_score(y_test1, model.predict(X_test1), average='macro')

# Create a DataFrame
metrics_df = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Train Set': [train_accuracy, train_precision, train_recall, train_f1],
    'Test Set': [test_accuracy, test_precision, test_recall, test_f1]
})

metrics_df


Unnamed: 0,Metric,Train Set,Test Set
0,Accuracy,0.91642,0.888555
1,Precision,0.916558,0.684123
2,Recall,0.91642,0.729988
3,F1 Score,0.916413,0.703221


In [48]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,)],  # Various sizes of hidden layers
    'activation': ['relu', 'tanh'],  # Activation functions to try
    'solver': ['adam', 'sgd'],  # Solvers to try
    'alpha': [0.0001, 0.001, 0.01],  # L2 penalty (regularization term) parameter
    'max_iter': [1000]  # Maximum number of iterations
}

# Initialize MLPClassifier
mlp = MLPClassifier()

# Initialize GridSearchCV with the classifier and parameter grid
grid_search = GridSearchCV(mlp, param_grid, scoring='accuracy', cv=3, verbose=1, n_jobs=-1)

# Perform Grid Search
grid_search.fit(X_train1, y_train1)

# Get the best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

# Get the best model
best_model = grid_search.best_estimator_

# Evaluate the best model
accuracy, precision, recall, f1, cm, fpr, tpr, roc_auc, precision_curve, recall_curve = evaluate_model(best_model, X_train1, y_train1, X_test1, y_test1)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


Fitting 3 folds for each of 36 candidates, totalling 108 fits


KeyboardInterrupt: 