In [1]:
import pandas as pd
import numpy as np
from MLP.utils import datasets,plot_table,train_model
from sklearn import metrics
from imblearn.under_sampling import RandomUnderSampler
from tabulate import tabulate
import matplotlib.pyplot as plt
from results.data_processing import DataProcessor
from results.utils import evaluate,null_check,results
from results.models import rus_boost, svm_model, xgb_model, logistic_regression_model,probit_regression_model,MLP,mlp_grid_search, random_forests
import json
from collections import defaultdict

In [2]:
data = pd.read_csv('./data/merged_compustat_and_labels.csv')
with open('MLP/features.json') as json_file:
    data_items = json.load(json_file)

data.replace([np.inf, -np.inf], np.nan, inplace=True)

data = data.fillna(0)

data_obj = DataProcessor(data,(1990,2002), (2002,2002), (2003,2019), 5)

In [3]:
models = {"MLP": MLP,
         "RUS BOOST": rus_boost,
          "Logit":logistic_regression_model,
         "Probit":probit_regression_model,
         "Xg Boost":xgb_model}

---

-----

### Batch Processing

In [4]:
test_periods = [(2003,2019)]

In [5]:
train_period = (1990,2002)
res = defaultdict(lambda: defaultdict(dict))
for test_period in test_periods:
    for model in models.keys():
        for data_item in data_items.keys():
            auc = results(data_obj,train_period,test_period,data_items[data_item],models[model],'over')
            res[test_period][data_item][model] = auc

Optimization terminated successfully.
         Current function value: 0.579011
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.598600
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.624152
         Iterations 8
Test AUC Score: 0.6701160290397631
Test AUC Score: 0.6575388747299562
Test AUC Score: 0.6679662459356145


In [6]:
res

defaultdict(<function __main__.<lambda>()>,
            {(2003,
              2019): defaultdict(dict,
                         {'features': {'MLP': 0.6138772530364529,
                           'RUS BOOST': 0.6981484449811184,
                           'Logit': 0.6299340912072732,
                           'Probit': 0.6401794666980123,
                           'Xg Boost': 0.6701160290397631},
                          'raw_financial_items_28': {'MLP': 0.6099790495289557,
                           'RUS BOOST': 0.6935936098391818,
                           'Logit': 0.6217356536172955,
                           'Probit': 0.6248934369636598,
                           'Xg Boost': 0.6575388747299562},
                          'financial_ratios_14': {'MLP': 0.6017435226593385,
                           'RUS BOOST': 0.6781667031771219,
                           'Logit': 0.6319123493795301,
                           'Probit': 0.6327112753191103,
                           'Xg Boos

## Batch - Tuning

In [7]:
res = defaultdict(lambda: defaultdict(dict))
for test_period in test_periods:
    for data_item in data_items.keys():
        param_grid = {
            'activation': ['logistic', 'tanh', 'relu'],
            'hidden_layer_sizes': [
                (len(data_items[data_item]), 40),
                (len(data_items[data_item]), 50),
                (len(data_items[data_item]), 40, 50),
                (len(data_items[data_item]), 30, 40),
                (len(data_items[data_item]), 40, 30, 50),
                (len(data_items[data_item]), 30, 40, 30),
                (len(data_items[data_item]), 40, 50, 60, 40),
                (len(data_items[data_item]), 50, 50, 50, 50),
                (len(data_items[data_item]), 30, 30, 30, 30)
                ],
            'learning_rate_init': [0.001, 0.01, 0.1]
            }
        auc = results(data_obj,train_period,test_period,data_items[data_item],mlp_grid_search,param_grid,'over')
        res[test_period][data_item] = auc

Best Hyperparameters: {'activation': 'logistic', 'hidden_layer_sizes': (41, 50), 'learning_rate_init': 0.001}
Best AUC Score: 0.6750913762546036
Test AUC Score: 0.6493749884977225
Best Hyperparameters: {'activation': 'logistic', 'hidden_layer_sizes': (28, 30, 30, 30, 30), 'learning_rate_init': 0.001}
Best AUC Score: 0.6916270449375599
Test AUC Score: 0.6498537355538816
Best Hyperparameters: {'activation': 'logistic', 'hidden_layer_sizes': (14, 30, 40), 'learning_rate_init': 0.001}
Best AUC Score: 0.6887016357762049
Test AUC Score: 0.6433061938948582


In [8]:
res

defaultdict(<function __main__.<lambda>()>,
            {(2003,
              2019): defaultdict(dict,
                         {'features': 0.6493749884977225,
                          'raw_financial_items_28': 0.6498537355538816,
                          'financial_ratios_14': 0.6433061938948582})})

In [9]:
res = defaultdict(lambda: defaultdict(dict))
for test_period in test_periods:
    for data_item in data_items.keys():
        param_grid = {
            'activation': ['logistic', 'tanh', 'relu'],
            'hidden_layer_sizes': [
                (len(data_items[data_item]), 20),
                (len(data_items[data_item]), 30),
                (len(data_items[data_item]), 40, 50, 10),
                (len(data_items[data_item]), 30, 40, 10),
                (len(data_items[data_item]), 40, 30, 50, 20),
                (len(data_items[data_item]), 30, 40),
                (len(data_items[data_item]), 40, 50, 40, 40,20),
                (len(data_items[data_item]), 30, 50, 50, 40,45),
                (len(data_items[data_item]), 20,30,40,50,20)
                ],
            'learning_rate_init': [0.001, 0.01, 0.05, 0.09, 0.1, 0.5]
            }
        auc = results(data_obj,train_period,test_period,data_items[data_item],mlp_grid_search,param_grid,'over')
        res[test_period][data_item] = auc

Best Hyperparameters: {'activation': 'relu', 'hidden_layer_sizes': (41, 40, 30, 50, 20), 'learning_rate_init': 0.001}
Best AUC Score: 0.6865363988237778
Test AUC Score: 0.5991588050454482
Best Hyperparameters: {'activation': 'logistic', 'hidden_layer_sizes': (28, 20), 'learning_rate_init': 0.001}
Best AUC Score: 0.7010594212938617
Test AUC Score: 0.6536590897082667
Best Hyperparameters: {'activation': 'logistic', 'hidden_layer_sizes': (14, 30, 40), 'learning_rate_init': 0.001}
Best AUC Score: 0.6887016357762049
Test AUC Score: 0.6433061938948582


In [10]:
res

defaultdict(<function __main__.<lambda>()>,
            {(2003,
              2019): defaultdict(dict,
                         {'features': 0.5991588050454482,
                          'raw_financial_items_28': 0.6536590897082667,
                          'financial_ratios_14': 0.6433061938948582})})

In [None]:
pd.DataFrame.from_dict(res[key], orient='index')

------------
_____________

## XgBoost Tuning

In [11]:
res = defaultdict(lambda: defaultdict(dict))
print("XgBoost Results:))")
for test_period in test_periods:
    for data_item in data_items.keys():
        param_grid = {
                    'max_depth': [3, 4, 5],
                    'learning_rate': [0.001, 0.01, 0.05, 0.09, 0.1, 0.5]
                    }
        auc = results(data_obj,train_period,test_period,data_items[data_item],xgb_model,param_grid,'over')
        res[test_period][data_item] = auc


XgBoost Results:))
Best Hyperparameters: {'learning_rate': 0.1, 'max_depth': 4}
Best AUC Score: 0.7373946402448406
Test AUC Score: 0.655070916364936
Best Hyperparameters: {'learning_rate': 0.05, 'max_depth': 4}
Best AUC Score: 0.7245860362306307
Test AUC Score: 0.6473188042499356
Best Hyperparameters: {'learning_rate': 0.1, 'max_depth': 4}
Best AUC Score: 0.7334656351399393
Test AUC Score: 0.6770338995125409


In [12]:
res

defaultdict(<function __main__.<lambda>()>,
            {(2003,
              2019): defaultdict(dict,
                         {'features': 0.655070916364936,
                          'raw_financial_items_28': 0.6473188042499356,
                          'financial_ratios_14': 0.6770338995125409})})

-----------
----

In [None]:
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.model_selection import GridSearchCV
# def random_forests(data_obj,train_period,test_period,item):
#     # Define the Random Forest classifier
#     rf_model = RandomForestClassifier(random_state=42)

#     # Define the parameter grid to search
#     param_grid = {
#         'n_estimators': [100, 200, 300],  # Number of trees in the forest
#         'max_depth': [None, 10, 20, 30],  # Maximum depth of the tree
#         'min_samples_split': [2, 5, 10],  # Minimum number of samples required to split an internal node
#         'min_samples_leaf': [1, 2, 4]  # Minimum number of samples required to be at a leaf node
#     }

#     # Perform grid search using cross-validation
#     grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, scoring='roc_auc', cv=5)
#     grid_search.fit(X_train, y_train)

#     # Get the best hyperparameters and the corresponding AUC score
#     best_params = grid_search.best_params_
#     best_auc = grid_search.best_score_
#     test_auc = grid_search.score(X_test, y_test)

#     # Print the best hyperparameters, the corresponding AUC score, and test AUC Score
#     print("Best Hyperparameters:", best_params)
#     print("Best AUC Score:", best_auc)
#     print("Test AUC Score:", test_auc)

#     # Return the test AUC score
#     return test_auc

In [None]:
# random_forests

## Random Forest

In [13]:
res = defaultdict(lambda: defaultdict(dict))
print("Random Forest Results:))")
for test_period in test_periods:
    for data_item in data_items.keys():
        param_grid = {
                    'n_estimators': [100, 200, 300,400, 500],  # Number of trees in the forest
                    'max_depth': [ 10, 20],  # Maximum depth of the tree
                    'min_samples_split': [5, 10],  # Minimum number of samples required to split an internal node
                    'min_samples_leaf': [5,6]  # Minimum number of samples required to be at a leaf node
                    }
        auc = results(data_obj,train_period,test_period,data_items[data_item],random_forests,param_grid,'over')
        res[test_period][data_item] = auc


Random Forest Results:))
Best Hyperparameters: {'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 300}
Best AUC Score: 0.7529631548341842
Test AUC Score: 0.7125599231558808
Best Hyperparameters: {'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 200}
Best AUC Score: 0.7515501150406402
Test AUC Score: 0.700872533095392
Best Hyperparameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 300}
Best AUC Score: 0.7422057467297949
Test AUC Score: 0.6946625573110257


In [14]:
res

defaultdict(<function __main__.<lambda>()>,
            {(2003,
              2019): defaultdict(dict,
                         {'features': 0.7125599231558808,
                          'raw_financial_items_28': 0.700872533095392,
                          'financial_ratios_14': 0.6946625573110257})})

----

------

## Window Processing

In [9]:
train_batches,test_batches = data_obj.create_batches()

In [10]:
for train_period,test_period in zip(train_batches[:-1],test_batches[:-1]):
    print(train_period,test_period)

(1990, 1995) (1996, 2001)
(1996, 2001) (2002, 2006)
(2002, 2006) (2007, 2011)
(2007, 2011) (2012, 2016)
(2012, 2016) (2017, 2021)


In [11]:
data_items = features_comp
res = defaultdict(lambda: defaultdict(dict))
for train_period,test_period in zip(train_batches[:-1],test_batches[:-1]):
    for model in models.keys():
        for data_item in data_items.keys():
            auc = results(data_obj,train_period,test_period,data_items[data_item],models[model],'over')
            res[str(train_period) + '-' +str(test_period)][data_item][model] = auc

Optimization terminated successfully.
         Current function value: 0.498506
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.580061
         Iterations 9
Optimization terminated successfully.
         Current function value: 0.608704
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.669697
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.536646
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.585333
         Iterations 9
Optimization terminated successfully.
         Current function value: 0.603220
         Iterations 9
Optimization terminated successfully.
         Current function value: 0.678238
         Iterations 13
Optimization terminated successfully.
         Current function value: 0.511275
         Iterations 12
Optimization terminated successfully.
         Current function value: 0.5700

In [12]:
for key in res.keys():
    columns = ["Train - Test : "+str(key), 'MLP', 'RUS BOOST', 'Logit', 'Probit']
    df = pd.DataFrame.from_dict(res[key], orient='index').reset_index()
    df.columns = columns
    for col in ['MLP', 'RUS BOOST', 'Logit', 'Probit']:
         
        try:
            df[col] = df[col].round(3)
        except:
            pass
    df.set_index("Train - Test : "+str(key), inplace=True)
    print(tabulate(df, headers='keys', tablefmt='fancy_grid'))

╒══════════════════════════════════════════════╤═══════╤═════════════╤═════════╤══════════╕
│ Train - Test : (1990, 1995)-(1996, 2001)     │   MLP │   RUS BOOST │   Logit │   Probit │
╞══════════════════════════════════════════════╪═══════╪═════════════╪═════════╪══════════╡
│ 28 Raw Financial Items + 14 Financial Ratios │   0.5 │       0.698 │   0.561 │    0.577 │
├──────────────────────────────────────────────┼───────┼─────────────┼─────────┼──────────┤
│ 28 Raw Financial Items                       │   0.5 │       0.697 │   0.548 │    0.55  │
├──────────────────────────────────────────────┼───────┼─────────────┼─────────┼──────────┤
│ 14 Financial Ratios                          │   0.5 │       0.651 │   0.635 │    0.641 │
├──────────────────────────────────────────────┼───────┼─────────────┼─────────┼──────────┤
│ Calculated M score Feat                      │   0.5 │       0.516 │   0.537 │    0.539 │
╘══════════════════════════════════════════════╧═══════╧═════════════╧═════════╧

## MLP Param Tuning

In [5]:
train_batches,test_batches = data_obj.create_batches()

for train_period,test_period in zip(train_batches[:-1],test_batches[:-1]):
    print(train_period,test_period)

(1990, 1995) (1996, 2001)
(1996, 2001) (2002, 2006)
(2002, 2006) (2007, 2011)
(2007, 2011) (2012, 2016)
(2012, 2016) (2017, 2021)


In [6]:
data_items = features_comp

In [7]:
from results.models import mlp_grid_search
model = mlp_grid_search
res = defaultdict(lambda: defaultdict(dict))


for train_period,test_period in zip(train_batches[:-1],test_batches[:-1]):

    for data_item in data_items.keys():
        
        param_grid = {
            'activation': ['logistic', 'tanh', 'relu'],
            'hidden_layer_sizes': [
                (len(data_items[data_item]), 40),
                (len(data_items[data_item]), 50),
                (len(data_items[data_item]), 40, 50),
                (len(data_items[data_item]), 30, 40),
                (len(data_items[data_item]), 40, 30, 50),
                (len(data_items[data_item]), 30, 40, 30),
                (len(data_items[data_item]), 40, 50, 60, 40),
                (len(data_items[data_item]), 50, 50, 50, 50),
                (len(data_items[data_item]), 30, 30, 30, 30)
                ],
            'learning_rate_init': [0.001, 0.01, 0.1]
            }
        auc = results(data_obj,train_period,test_period,data_items[data_item],model,param_grid,'over')
        res[str(train_period) + '-' +str(test_period)][data_item] = auc

Best Hyperparameters: {'activation': 'tanh', 'hidden_layer_sizes': (42, 40, 50, 60, 40), 'learning_rate_init': 0.001}
Best AUC Score: 0.6932138610497707
Test AUC Score: 0.6094481700488392
Best Hyperparameters: {'activation': 'tanh', 'hidden_layer_sizes': (28, 30, 30, 30, 30), 'learning_rate_init': 0.001}
Best AUC Score: 0.6785586886359776
Test AUC Score: 0.6249807849696124
Best Hyperparameters: {'activation': 'logistic', 'hidden_layer_sizes': (14, 40), 'learning_rate_init': 0.01}
Best AUC Score: 0.6883302191268899
Test AUC Score: 0.5937202403938217
Best Hyperparameters: {'activation': 'tanh', 'hidden_layer_sizes': (14, 40), 'learning_rate_init': 0.001}
Best AUC Score: 0.6509248327430145
Test AUC Score: 0.4641166715014932
Best Hyperparameters: {'activation': 'logistic', 'hidden_layer_sizes': (42, 30, 40), 'learning_rate_init': 0.001}
Best AUC Score: 0.7779707815924033
Test AUC Score: 0.7358577767638388
Best Hyperparameters: {'activation': 'logistic', 'hidden_layer_sizes': (28, 40), 'lea



Best Hyperparameters: {'activation': 'logistic', 'hidden_layer_sizes': (14, 40, 50), 'learning_rate_init': 0.001}
Best AUC Score: 0.7115028000973946
Test AUC Score: 0.6765969719962719
Best Hyperparameters: {'activation': 'tanh', 'hidden_layer_sizes': (14, 40), 'learning_rate_init': 0.01}
Best AUC Score: 0.6072459952611434
Test AUC Score: 0.6031902992528666
Best Hyperparameters: {'activation': 'logistic', 'hidden_layer_sizes': (42, 30, 40), 'learning_rate_init': 0.001}
Best AUC Score: 0.7450904045828857
Test AUC Score: 0.6893268063087458
Best Hyperparameters: {'activation': 'relu', 'hidden_layer_sizes': (28, 40, 50), 'learning_rate_init': 0.01}
Best AUC Score: 0.7618863005728608
Test AUC Score: 0.648777987109461
Best Hyperparameters: {'activation': 'relu', 'hidden_layer_sizes': (14, 40, 50), 'learning_rate_init': 0.001}
Best AUC Score: 0.6519949427139278
Test AUC Score: 0.6287276805820385
Best Hyperparameters: {'activation': 'tanh', 'hidden_layer_sizes': (14, 30, 40), 'learning_rate_ini



Best Hyperparameters: {'activation': 'relu', 'hidden_layer_sizes': (14, 40, 30, 50), 'learning_rate_init': 0.001}
Best AUC Score: 0.6264799999999999
Test AUC Score: 0.5940969885203554




Best Hyperparameters: {'activation': 'tanh', 'hidden_layer_sizes': (14, 40, 30, 50), 'learning_rate_init': 0.1}
Best AUC Score: 0.6059259259259259
Test AUC Score: 0.39256489021375596


In [9]:
res

defaultdict(<function __main__.<lambda>()>,
            {'(1990, 1995)-(1996, 2001)': defaultdict(dict,
                         {'28 Raw Financial Items + 14 Financial Ratios': 0.6094481700488392,
                          '28 Raw Financial Items': 0.6249807849696124,
                          '14 Financial Ratios': 0.5937202403938217,
                          'Calculated M score Feat': 0.4641166715014932}),
             '(1996, 2001)-(2002, 2006)': defaultdict(dict,
                         {'28 Raw Financial Items + 14 Financial Ratios': 0.7358577767638388,
                          '28 Raw Financial Items': 0.7378827268830929,
                          '14 Financial Ratios': 0.6765969719962719,
                          'Calculated M score Feat': 0.6031902992528666}),
             '(2002, 2006)-(2007, 2011)': defaultdict(dict,
                         {'28 Raw Financial Items + 14 Financial Ratios': 0.6893268063087458,
                          '28 Raw Financial Items': 0.64877798

In [9]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV, train_test_split
from imblearn.over_sampling import BorderlineSMOTE
from collections import defaultdict

In [6]:
print(data_items)

{'features': ['act', 'ap', 'at', 'ceq', 'che', 'cogs', 'csho', 'dlc', 'dltis', 'dltt', 'dp', 'ib', 'invt', 'ivao', 'ivst', 'lct', 'lt', 'ni', 'ppegt', 'pstk', 're', 'rect', 'sale', 'sstk', 'txp', 'txt', 'xint', 'prcc_f', 'dch_wc', 'ch_rsst', 'dch_rec', 'dch_inv', 'soft_assets', 'ch_cs', 'ch_cm', 'ch_roa', 'bm', 'dpi', 'reoa', 'EBIT', 'ch_fcf'], 'raw_financial_items_28': ['act', 'ap', 'at', 'ceq', 'che', 'cogs', 'csho', 'dlc', 'dltis', 'dltt', 'dp', 'ib', 'invt', 'ivao', 'ivst', 'lct', 'lt', 'ni', 'ppegt', 'pstk', 're', 'rect', 'sale', 'sstk', 'txp', 'txt', 'xint', 'prcc_f'], 'financial_ratios_14': ['dch_wc', 'ch_rsst', 'dch_rec', 'dch_inv', 'soft_assets', 'ch_cs', 'ch_cm', 'ch_roa', 'bm', 'dpi', 'reoa', 'EBIT', 'ch_fcf', 'issue']}


In [None]:
test_periods = [(2003,2019)]

train_period = (1990,2002)
res = defaultdict(lambda: defaultdict(dict))
for test_period in test_periods:
    for item in data_items.keys():
        param_grid = {
                'activation': ['logistic', 'tanh', 'relu'],
                'hidden_layer_sizes': [
                    (len(data_items[item]), 40),
#                     (len(data_items[item]), 50),
                    (len(data_items[item]), 40, 50),
#                     (len(data_items[item]), 30, 40),
#                     (len(data_items[item]), 40, 30, 50),
                    (len(data_items[item]), 30, 40, 30),
#                     (len(data_items[item]), 40, 50, 60, 40),
#                     (len(data_items[item]), 50, 50, 50, 50),
                    (len(data_items[item]), 30, 30, 30, 30)
                ],
                'learning_rate_init': [0.001, 0.01, 0.1]
            }

        train_data, validation_data, test_data = data_obj.split_data_periods(train_period, test_period)
        train_data, validation_data, test_data = null_check(data_items[item], train_data, validation_data, test_data)
        X_train, y_train = train_data[data_items[item]], train_data['misstate']
        X_test, y_test = test_data[data_items[item]], test_data['misstate']

 
        X_train_resampled, y_train_resampled = BorderlineSMOTE().fit_resample(X_train, y_train)
        print(X_train_resampled.shape,y_train_resampled.shape)
#             rus = RandomUnderSampler(random_state=42)
#             X_train_resampled, y_train_resampled = rus.fit_resample(X_train, y_train)
        mlp_model = MLPClassifier(max_iter=4000, random_state=42, verbose=False)

        # Perform grid search using cross-validation
        grid_search = GridSearchCV(estimator=mlp_model, param_grid=param_grid, scoring='roc_auc', cv=5)
        grid_search.fit(X_train, y_train)

        best_params = grid_search.best_params_
        best_auc = grid_search.best_score_
        test_auc = grid_search.score(X_test, y_test)
        print("Best Hyperparameters:", best_params)
        print("Best AUC Score:", best_auc)
        print("Test AUC Score:", test_auc)


(239976, 41) (239976,)


In [None]:
for train_period,test_period in zip(train_batches[:-1],test_batches[:-1]):
    train_data, validation_data, test_data = data_obj.split_data_periods(train_period, test_period)
    for item in data_items.keys():
        
        param_grid = {
                    'activation': ['logistic', 'tanh', 'relu'],
                    'hidden_layer_sizes': [
                        (len(data_items[item]), 40),
                        (len(data_items[item]), 50),
                        (len(data_items[item]), 40, 50),
                        (len(data_items[item]), 30, 40),
                        (len(data_items[item]), 40, 30, 50),
                        (len(data_items[item]), 30, 40, 30),
                        (len(data_items[item]), 40, 50, 60, 40),
                        (len(data_items[item]), 50, 50, 50, 50),
                        (len(data_items[item]), 30, 30, 30, 30)
                    ],
                    'learning_rate_init': [0.001, 0.01, 0.1]
                }

        
        train_data, validation_data, test_data = null_check(data_items[item], train_data, validation_data, test_data)
    
        X_train, y_train = train_data[data_items[item]], train_data['misstate']
        X_test, y_test = test_data[data_items[item]], test_data['misstate']
        rus = RandomUnderSampler(random_state=42)
        X_train_resampled, y_train_resampled = rus.fit_resample(X_train, y_train)
        mlp_model = MLPClassifier(max_iter=10000, random_state=42, verbose=False,early_stopping=True)
        grid_search = GridSearchCV(estimator=mlp_model, param_grid=param_grid, scoring='roc_auc', cv=5)
        grid_search.fit(X_train_resampled, y_train_resampled)

        best_params = grid_search.best_params_
        best_auc = grid_search.best_score_
        test_auc = grid_search.score(X_test, y_test)
        print("Best Hyperparameters:", best_params)
        print("Best AUC Score:", best_auc)
        print("Test AUC Score:", test_auc)

In [None]:
X_train_resampled