In [None]:
# !pip install pandas==1.3.4
# !pip install numpy==1.21.2
# !pip install matplotlib==3.5.0
# !pip install scikit-learn==1.0.1
# !pip install tensorflow==2.4.1
# !pip install optuna==2.10.0

# Init

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import tensorflow as tf
from tensorflow.keras.models import Sequential  
from tensorflow.keras.layers import Dense, Dropout 
from tensorflow.keras import backend as K

import optuna

In [2]:
import warnings
warnings.filterwarnings("ignore")

# Function Definition

Create model architecture function

In [3]:
def create_model(trial: optuna.trial.Trial, input_size: int): 
    model = Sequential()
    model.add(Dense(input_size,input_shape=(input_size,),activation='relu')) 

    num_layers = trial.suggest_int('num_layers',low=0,high=3)  
    for layer_i in range(num_layers):  
        n_units = trial.suggest_int(f'n_units_layer_{layer_i}',low=10,high=50,step=5)  
        dropout_rate = trial.suggest_float(f'dropout_rate_layer_{layer_i}',low=0,high=0.5)  
        actv_func = trial.suggest_categorical(f'actv_func_layer_{layer_i}',['relu','tanh','elu'])  

        model.add(Dropout(dropout_rate))  
        model.add(Dense(n_units,activation=actv_func)) 

    model.add(Dense(1,activation='sigmoid')) 
    return model 

Create optimizers function

In [4]:
def create_optimizer(trial: optuna.trial.Trial): 
	opt_kwargs = {} 
	opt_selected = trial.suggest_categorical('optimizer', ['Adam','SGD']) 
	if opt_selected == 'SGD': 
		opt_kwargs['lr'] = trial.suggest_float('sgd_lr',1e-5,1e-1,log=True) 
		opt_kwargs['momentum'] = trial.suggest_float('sgd_momentum',1e-5,1e-1,log=True) 
	else: #’Adam’ 
		opt_kwargs['lr'] = trial.suggest_float('adam_lr',1e-5,1e-1,log=True) 

	optimizer = getattr(tf.optimizers,opt_selected)(**opt_kwargs) 
	return optimizer

Create custom metric

In [5]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

Create the preprocessing function

In [6]:
def preprocessing(X: pd.DataFrame, 
                  numeric_preprocessor, categorical_preprocessor,
                  is_train = True
                 ):
    if is_train:
        X[numerical_feats] = numeric_preprocessor.fit_transform(X[numerical_feats])
        X_cat = categorical_preprocessor.fit_transform(X[categorical_feats]).toarray()
        X_cat = pd.DataFrame(X_cat,columns=categorical_preprocessor.get_feature_names_out())
        X = X.drop(columns=categorical_feats).reset_index(drop=True)
        X = pd.concat([X,X_cat],axis=1)
    else:
        X[numerical_feats] = numeric_preprocessor.transform(X[numerical_feats])
        X_cat = categorical_preprocessor.transform(X[categorical_feats]).toarray()
        X_cat = pd.DataFrame(X_cat,columns=categorical_preprocessor.get_feature_names_out())
        X = X.drop(columns=categorical_feats).reset_index(drop=True)
        X = pd.concat([X,X_cat],axis=1)
    
    return X, numeric_preprocessor, categorical_preprocessor

Create the train function

In [7]:
def train(trial, df_train: pd.DataFrame, df_val: pd.DataFrame = None, use_pruner: bool = False):
    X_train,y_train = df_train.drop(columns=['y']), df_train['y']
    
    if df_val is not None:
        X_val,y_val = df_val.drop(columns=['y']), df_val['y'] 

    #Preprocessing
    numeric_preprocessor = StandardScaler()
    categorical_preprocessor = OneHotEncoder(handle_unknown="ignore")
    
    X_train,numeric_preprocessor,categorical_preprocessor = preprocessing(X_train,
                                                                          numeric_preprocessor,
                                                                          categorical_preprocessor,
                                                                          is_train=True)
    if df_val is not None:
        X_val,_,_ = preprocessing(X_val,
                                  numeric_preprocessor,categorical_preprocessor,
                                  is_train=False)

    #Build model & optimizer
    model = create_model(trial,X_train.shape[1])
    optimizer = create_optimizer(trial)
    
    callbacks = []
    if use_pruner:
        callbacks.append(optuna.integration.TFKerasPruningCallback(trial,'val_f1_m'))

    model.compile(loss='binary_crossentropy',optimizer=optimizer,
                  metrics=[f1_m],
                 )
    history = model.fit(X_train,y_train,
                        epochs=trial.suggest_int('epoch',15,50),
                        batch_size=64,
                        validation_data=(X_val,y_val) if df_val is not None else None,
                        callbacks=callbacks,
                        verbose=False
                       )
    if df_val is not None:
        return np.mean(history.history['val_f1_m'])
    else:
        return model

Create the objective function

In [8]:
def objective(trial: optuna.trial.Trial, df_train: pd.DataFrame, use_pruner: bool = False): 
    #Split into Train and Validation data
    df_train_hp, df_val = train_test_split(df_train, test_size=0.1, random_state=0)
    
    # Train and Validate Model
    val_f1_score = train(trial, df_train_hp, df_val, use_pruner)
        
    return val_f1_score

Create the final train and evaluation function to test the best set of hyperparmeters

In [9]:
def train_and_evaluate_final(df_train: pd.DataFrame, df_test: pd.DataFrame, **kwargs):
    X_train,y_train = df_train.drop(columns=['y']), df_train['y']
    X_test,y_test = df_test.drop(columns=['y']), df_test['y'] 
    
    # Preprocessing
    numeric_preprocessor = StandardScaler()
    categorical_preprocessor = OneHotEncoder(handle_unknown="ignore")
    X_train,numeric_preprocessor,categorical_preprocessor = preprocessing(X_train,
                                                                          numeric_preprocessor,
                                                                          categorical_preprocessor,
                                                                          is_train=True)
    X_test,_,_ = preprocessing(X_test,numeric_preprocessor,categorical_preprocessor,
                              is_train=False)

    #Build model
    input_size = X_train.shape[1]
    model = Sequential()
    model.add(Dense(input_size,input_shape=(input_size,),activation='relu')) 

    num_layers = kwargs.get('num_layers',0)  
    for layer_i in range(num_layers):  
        n_units = kwargs.get(f'n_units_layer_{layer_i}',0)  
        dropout_rate = kwargs.get(f'dropout_rate_layer_{layer_i}',0)  
        actv_func = kwargs.get(f'actv_func_layer_{layer_i}','relu')  

        model.add(Dropout(dropout_rate))  
        model.add(Dense(n_units,activation=actv_func)) 

    model.add(Dense(1,activation='sigmoid'))
    
    #Build Optimizer
    opt_kwargs = {} 
    opt_selected = kwargs.get('optimizer', 'Adam')
    if opt_selected == 'SGD': 
        opt_kwargs['lr'] = kwargs.get('sgd_lr',1e-5) 
        opt_kwargs['momentum'] = kwargs.get('sgd_momentum',1e-5) 
    else: #’Adam’ 
        opt_kwargs['lr'] = kwargs.get('adam_lr',1e-5) 

    optimizer = getattr(tf.optimizers,opt_selected)(**opt_kwargs) 
    
    #Training process
    model.compile(loss='binary_crossentropy',optimizer=optimizer,
                  metrics=[f1_m],
                 )
    print(model.summary())
    history = model.fit(X_train,y_train,
                        epochs=kwargs.get('epoch',15),
                        batch_size=64,
                        validation_data=None,
                        verbose=True
                       )
    
    # Evaluation Process
    y_test_pred_probas = model.predict(X_test)
    y_test_pred = [1 if x[0]>0.5 else 0 for x in y_test_pred_probas]
    
    print("="*100)
    print("F1-Score on Test Data: ",f1_score(y_test, y_test_pred))

# TPE

In [10]:
df = pd.read_csv("train.csv",sep=";")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45211 entries, 0 to 45210
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        45211 non-null  int64 
 1   job        45211 non-null  object
 2   marital    45211 non-null  object
 3   education  45211 non-null  object
 4   default    45211 non-null  object
 5   balance    45211 non-null  int64 
 6   housing    45211 non-null  object
 7   loan       45211 non-null  object
 8   contact    45211 non-null  object
 9   day        45211 non-null  int64 
 10  month      45211 non-null  object
 11  duration   45211 non-null  int64 
 12  campaign   45211 non-null  int64 
 13  pdays      45211 non-null  int64 
 14  previous   45211 non-null  int64 
 15  poutcome   45211 non-null  object
 16  y          45211 non-null  object
dtypes: int64(7), object(10)
memory usage: 5.9+ MB


Convert the target variable to integer

In [11]:
df['y'] = df['y'].map({'yes':1,'no':0})

Split full data into train and test data

In [12]:
df_train, df_test = train_test_split(df, test_size=0.1, random_state=0) 

Get list of numerical features

In [13]:
numerical_feats = list(df_train.drop(columns='y').select_dtypes(include=np.number).columns)

Get list of categorical features

In [14]:
categorical_feats = list(df_train.drop(columns='y').select_dtypes(exclude=np.number).columns)

Perform Hyperparameter Tuning with TPE

In [15]:
study = optuna.create_study(direction='maximize',
                            sampler=optuna.samplers.TPESampler(seed=0),
                           )
study.optimize(lambda trial: objective(trial, df_train),
               n_trials=50, n_jobs=-1,
              )

[32m[I 2022-04-20 10:20:20,413][0m A new study created in memory with name: no-name-aa8e5236-b2a3-42ef-ad82-cb510e4cefe0[0m
[32m[I 2022-04-20 10:24:36,844][0m Trial 12 finished with value: 0.04922506125534282 and parameters: {'num_layers': 2, 'n_units_layer_0': 20, 'dropout_rate_layer_0': 0.0772495838596658, 'actv_func_layer_0': 'tanh', 'n_units_layer_1': 50, 'dropout_rate_layer_1': 0.04434725249154464, 'actv_func_layer_1': 'relu', 'optimizer': 'SGD', 'sgd_lr': 1.6236443271599312e-05, 'sgd_momentum': 5.369509899402855e-05, 'epoch': 17}. Best is trial 12 with value: 0.04922506125534282.[0m
[32m[I 2022-04-20 10:24:45,014][0m Trial 15 finished with value: 0.06477986092472242 and parameters: {'num_layers': 1, 'n_units_layer_0': 10, 'dropout_rate_layer_0': 0.27879212624167066, 'actv_func_layer_0': 'tanh', 'optimizer': 'Adam', 'adam_lr': 1.0151548489720613e-05, 'epoch': 18}. Best is trial 15 with value: 0.06477986092472242.[0m
[32m[I 2022-04-20 10:24:51,577][0m Trial 8 finished wi

In [16]:
print("Best Trial:")
best_trial = study.best_trial

print("    Value: ", best_trial.value)

print("    Hyperparameters: ")
for key, value in best_trial.params.items():
    print(f"        {key}: {value}")

Best Trial:
    Value:  0.5627583599672085
    Hyperparameters: 
        num_layers: 2
        n_units_layer_0: 30
        dropout_rate_layer_0: 0.14068484717257745
        actv_func_layer_0: relu
        n_units_layer_1: 20
        dropout_rate_layer_1: 0.34708586671782293
        actv_func_layer_1: relu
        optimizer: Adam
        adam_lr: 0.0018287924415952158
        epoch: 41


In [17]:
best_trial.params

{'num_layers': 2,
 'n_units_layer_0': 30,
 'dropout_rate_layer_0': 0.14068484717257745,
 'actv_func_layer_0': 'relu',
 'n_units_layer_1': 20,
 'dropout_rate_layer_1': 0.34708586671782293,
 'actv_func_layer_1': 'relu',
 'optimizer': 'Adam',
 'adam_lr': 0.0018287924415952158,
 'epoch': 41}

In [18]:
train_and_evaluate_final(df_train, df_test, **best_trial.params)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 51)                2652      
_________________________________________________________________
dropout (Dropout)            (None, 51)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 30)                1560      
_________________________________________________________________
dropout_1 (Dropout)          (None, 30)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 20)                620       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 21        
Total params: 4,853
Trainable params: 4,853
Non-trainable params: 0
______________________________________________________

# Random Search

In [19]:
df = pd.read_csv("train.csv",sep=";")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45211 entries, 0 to 45210
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        45211 non-null  int64 
 1   job        45211 non-null  object
 2   marital    45211 non-null  object
 3   education  45211 non-null  object
 4   default    45211 non-null  object
 5   balance    45211 non-null  int64 
 6   housing    45211 non-null  object
 7   loan       45211 non-null  object
 8   contact    45211 non-null  object
 9   day        45211 non-null  int64 
 10  month      45211 non-null  object
 11  duration   45211 non-null  int64 
 12  campaign   45211 non-null  int64 
 13  pdays      45211 non-null  int64 
 14  previous   45211 non-null  int64 
 15  poutcome   45211 non-null  object
 16  y          45211 non-null  object
dtypes: int64(7), object(10)
memory usage: 5.9+ MB


Convert the target variable to integer

In [20]:
df['y'] = df['y'].map({'yes':1,'no':0})

Split full data into train and test data

In [21]:
df_train, df_test = train_test_split(df, test_size=0.1, random_state=0) 

Get list of numerical features

In [22]:
numerical_feats = list(df_train.drop(columns='y').select_dtypes(include=np.number).columns)

Get list of categorical features

In [23]:
categorical_feats = list(df_train.drop(columns='y').select_dtypes(exclude=np.number).columns)

Perform Hyperparameter Tuning with Random Search

In [24]:
study = optuna.create_study(direction='maximize',
                            sampler=optuna.samplers.RandomSampler(seed=0),
                           )
study.optimize(lambda trial: objective(trial, df_train),
               n_trials=50, n_jobs=-1,
              )

[32m[I 2022-04-20 11:55:00,925][0m A new study created in memory with name: no-name-f607aa08-015d-4b74-9305-0136b55ae733[0m




[32m[I 2022-04-20 11:58:39,086][0m Trial 0 finished with value: 0.3546561966339747 and parameters: {'num_layers': 0, 'optimizer': 'SGD', 'sgd_lr': 0.007784659100631173, 'sgd_momentum': 0.005869930756080532, 'epoch': 15}. Best is trial 0 with value: 0.3546561966339747.[0m
[32m[I 2022-04-20 12:00:20,006][0m Trial 14 finished with value: 0.013436927814923581 and parameters: {'num_layers': 2, 'n_units_layer_0': 45, 'dropout_rate_layer_0': 0.1669875457623211, 'actv_func_layer_0': 'elu', 'n_units_layer_1': 30, 'dropout_rate_layer_1': 0.028360326323867258, 'actv_func_layer_1': 'relu', 'optimizer': 'SGD', 'sgd_lr': 2.7947921344554796e-05, 'sgd_momentum': 1.0250528538923333e-05, 'epoch': 21}. Best is trial 0 with value: 0.3546561966339747.[0m
[32m[I 2022-04-20 12:00:35,224][0m Trial 12 finished with value: 0.016200784255157818 and parameters: {'num_layers': 2, 'n_units_layer_0': 35, 'dropout_rate_layer_0': 0.0999912463651953, 'actv_func_layer_0': 'relu', 'n_units_layer_1': 30, 'dropout_

In [25]:
print("Best Trial:")
best_trial = study.best_trial

print("    Value: ", best_trial.value)

print("    Hyperparameters: ")
for key, value in best_trial.params.items():
    print(f"        {key}: {value}")

Best Trial:
    Value:  0.5478656393289566
    Hyperparameters: 
        num_layers: 0
        optimizer: Adam
        adam_lr: 0.05075826567070766
        epoch: 50


In [26]:
best_trial.params

{'num_layers': 0,
 'optimizer': 'Adam',
 'adam_lr': 0.05075826567070766,
 'epoch': 50}

In [27]:
train_and_evaluate_final(df_train, df_test, **best_trial.params)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 51)                2652      
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 52        
Total params: 2,704
Trainable params: 2,704
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Ep

# Grid Search

In [28]:
df = pd.read_csv("train.csv",sep=";")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45211 entries, 0 to 45210
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        45211 non-null  int64 
 1   job        45211 non-null  object
 2   marital    45211 non-null  object
 3   education  45211 non-null  object
 4   default    45211 non-null  object
 5   balance    45211 non-null  int64 
 6   housing    45211 non-null  object
 7   loan       45211 non-null  object
 8   contact    45211 non-null  object
 9   day        45211 non-null  int64 
 10  month      45211 non-null  object
 11  duration   45211 non-null  int64 
 12  campaign   45211 non-null  int64 
 13  pdays      45211 non-null  int64 
 14  previous   45211 non-null  int64 
 15  poutcome   45211 non-null  object
 16  y          45211 non-null  object
dtypes: int64(7), object(10)
memory usage: 5.9+ MB


Convert the target variable to integer

In [29]:
df['y'] = df['y'].map({'yes':1,'no':0})

Split full data into train and test data

In [30]:
df_train, df_test = train_test_split(df, test_size=0.1, random_state=0) 

Get list of numerical features

In [31]:
numerical_feats = list(df_train.drop(columns='y').select_dtypes(include=np.number).columns)

Get list of categorical features

In [32]:
categorical_feats = list(df_train.drop(columns='y').select_dtypes(exclude=np.number).columns)

Define Search Space

In [33]:
search_space = {'num_layers': [0,1],
                'n_units_layer_0': list(range(10,50,5)),
                'dropout_rate_layer_0': np.linspace(0,0.5,5),
                'actv_func_layer_0': ['relu','elu'],
                'optimizer': ['Adam','SGD'],
                'sgd_lr': np.linspace(1e-5,1e-1,5),
                'sgd_momentum': np.linspace(1e-5,1e-1,5),
                'adam_lr': np.linspace(1e-5,1e-1,5),
                'epoch': list(range(15,50,5))
               }

Perform Hyperparameter Tuning with Grid Search

In [34]:
study = optuna.create_study(direction='maximize',
                            sampler=optuna.samplers.GridSampler(search_space),
                           )
study.optimize(lambda trial: objective(trial, df_train),
               n_trials=50, n_jobs=-1
              )

[32m[I 2022-04-20 12:23:31,788][0m A new study created in memory with name: no-name-1f7d17a9-eddf-497a-804b-eb75cb7a6948[0m




[32m[I 2022-04-20 12:25:44,804][0m Trial 2 finished with value: 0.2509408483902613 and parameters: {'num_layers': 0, 'optimizer': 'SGD', 'sgd_lr': 1e-05, 'sgd_momentum': 1e-05, 'epoch': 15}. Best is trial 2 with value: 0.2509408483902613.[0m
[32m[I 2022-04-20 12:25:53,374][0m Trial 6 finished with value: 0.45009910464286806 and parameters: {'num_layers': 1, 'n_units_layer_0': 10, 'dropout_rate_layer_0': 0.25, 'actv_func_layer_0': 'elu', 'optimizer': 'SGD', 'sgd_lr': 0.05000500000000001, 'sgd_momentum': 1e-05, 'epoch': 15}. Best is trial 6 with value: 0.45009910464286806.[0m
[32m[I 2022-04-20 12:25:54,994][0m Trial 8 finished with value: 0.3451046049594879 and parameters: {'num_layers': 1, 'n_units_layer_0': 35, 'dropout_rate_layer_0': 0.5, 'actv_func_layer_0': 'relu', 'optimizer': 'SGD', 'sgd_lr': 0.025007500000000002, 'sgd_momentum': 0.1, 'epoch': 15}. Best is trial 6 with value: 0.45009910464286806.[0m
[32m[I 2022-04-20 12:26:35,864][0m Trial 13 finished with value: 0.4755

In [35]:
print("Best Trial:")
best_trial = study.best_trial

print("    Value: ", best_trial.value)

print("    Hyperparameters: ")
for key, value in best_trial.params.items():
    print(f"        {key}: {value}")

Best Trial:
    Value:  0.5741229319572448
    Hyperparameters: 
        num_layers: 0
        optimizer: Adam
        adam_lr: 0.05000500000000001
        epoch: 25


In [36]:
best_trial.params

{'num_layers': 0,
 'optimizer': 'Adam',
 'adam_lr': 0.05000500000000001,
 'epoch': 25}

In [37]:
train_and_evaluate_final(df_train, df_test, **best_trial.params)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 51)                2652      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 52        
Total params: 2,704
Trainable params: 2,704
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
F1-Score on Test Data:  0.6102941176470589


# Simulated Annealing

In [38]:
df = pd.read_csv("train.csv",sep=";")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45211 entries, 0 to 45210
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        45211 non-null  int64 
 1   job        45211 non-null  object
 2   marital    45211 non-null  object
 3   education  45211 non-null  object
 4   default    45211 non-null  object
 5   balance    45211 non-null  int64 
 6   housing    45211 non-null  object
 7   loan       45211 non-null  object
 8   contact    45211 non-null  object
 9   day        45211 non-null  int64 
 10  month      45211 non-null  object
 11  duration   45211 non-null  int64 
 12  campaign   45211 non-null  int64 
 13  pdays      45211 non-null  int64 
 14  previous   45211 non-null  int64 
 15  poutcome   45211 non-null  object
 16  y          45211 non-null  object
dtypes: int64(7), object(10)
memory usage: 5.9+ MB


Convert the target variable to integer

In [39]:
df['y'] = df['y'].map({'yes':1,'no':0})

Split full data into train and test data

In [40]:
df_train, df_test = train_test_split(df, test_size=0.1, random_state=0) 

Get list of numerical features

In [41]:
numerical_feats = list(df_train.drop(columns='y').select_dtypes(include=np.number).columns)

Get list of categorical features

In [42]:
categorical_feats = list(df_train.drop(columns='y').select_dtypes(exclude=np.number).columns)

Perform Hyperparameter Tuning with Simulated Annealing

In [43]:
class SimulatedAnnealingSampler(optuna.samplers.BaseSampler):
    '''Reference: https://github.com/optuna/optuna-examples/blob/main/samplers/simulated_annealing_sampler.py
    '''
    def __init__(self, temperature=100, cooldown_factor=0.9, neighbor_range_factor=0.1, seed=None):
        self._rng = np.random.RandomState(seed)
        self._independent_sampler = optuna.samplers.RandomSampler(seed=seed)
        self._temperature = temperature
        self.cooldown_factor = cooldown_factor
        self.neighbor_range_factor = neighbor_range_factor
        self._current_trial = None

    def infer_relative_search_space(self, study, trial):
        return optuna.samplers.intersection_search_space(study)

    def sample_relative(self, study, trial, search_space):
        if search_space == {}:
            # The relative search space is empty (it means this is the first trial of a study).
            return {}

        # The rest of this method is an implementation of Simulated Annealing (SA) algorithm.
        prev_trial = self._get_last_complete_trial(study)

        # Update the current state of SA if the transition is accepted.
        if self._rng.uniform(0, 1) <= self._transition_probability(study, prev_trial):
            self._current_trial = prev_trial

        # Pick a new neighbor (i.e., parameters).
        params = self._sample_neighbor_params(search_space)

        # Decrease the temperature via geometric cooling annealing schedule.
        self._temperature *= self.cooldown_factor

        return params

    def _sample_neighbor_params(self, search_space):
        # Generate a sufficiently near neighbor (i.e., parameters).
        #
        # In this example, we define a sufficiently near neighbor as
        # `self.neighbor_range_factor * 100` percent region of the entire
        # search space centered on the current point.

        params = {}
        for param_name, param_distribution in search_space.items():
            if isinstance(param_distribution, optuna.distributions.CategoricalDistribution):
                params[param_name] = self._rng.choice(param_distribution.choices)
            else:
                current_value = self._current_trial.params[param_name]
                width = (
                    param_distribution.high - param_distribution.low
                ) * self.neighbor_range_factor
                neighbor_low = max(current_value - width, param_distribution.low)
                neighbor_high = min(current_value + width, param_distribution.high)
                
                if isinstance(param_distribution, optuna.distributions.UniformDistribution):
                    params[param_name] = self._rng.uniform(neighbor_low, neighbor_high)
                elif isinstance(param_distribution, optuna.distributions.LogUniformDistribution):
                    params[param_name] = self._rng.uniform(np.log(max(1e-6,neighbor_low)), np.log(neighbor_high))
                elif isinstance(param_distribution, optuna.distributions.DiscreteUniformDistribution):
                    params[param_name] = self._rng.choice(np.linspace(neighbor_low, neighbor_high, param_distribution.q))
                elif isinstance(param_distribution, optuna.distributions.IntUniformDistribution):
                    params[param_name] = self._rng.choice(range(max(int(neighbor_low)-1,param_distribution.low), 
                                                                min(int(neighbor_high)+1,param_distribution.high), 
                                                                param_distribution.step))
                elif isinstance(param_distribution, optuna.distributions.IntLogUniformDistribution):
                    params[param_name] = self._rng.choice(range(max(int(np.log(max(1e-6,neighbor_low)))-1,param_distribution.low), 
                                                                min(int(np.log(neighbor_high))+1,param_distribution.high), 
                                                                param_distribution.step))
                else:
                    raise NotImplementedError(
                        "Unsupported distribution {}.".format(param_distribution)
                    )

        return params

    def _transition_probability(self, study, prev_trial):
        if self._current_trial is None:
            return 1.0

        prev_value = prev_trial.value
        current_value = self._current_trial.value

        # `prev_trial` is always accepted if it has a better value than the current trial.
        if study.direction == optuna.study.StudyDirection.MINIMIZE and prev_value <= current_value:
            return 1.0
        elif study.direction == optuna.study.StudyDirection.MAXIMIZE and prev_value >= current_value:
            return 1.0

        # Calculate the probability of accepting `prev_trial` that has a worse value than
        # the current trial.
        return np.exp(-abs(current_value - prev_value) / self._temperature)

    @staticmethod
    def _get_last_complete_trial(study):
        complete_trials = study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.COMPLETE])
        return complete_trials[-1]

    def sample_independent(self, study, trial, param_name, param_distribution):
        # In this example, this method is invoked only in the first trial of a study.
        # The parameters of the trial are sampled by using `RandomSampler` as follows.
        return self._independent_sampler.sample_independent(
            study, trial, param_name, param_distribution
        )

In [44]:
study = optuna.create_study(direction='maximize',
                            sampler=SimulatedAnnealingSampler(seed=0),
                           )
study.optimize(lambda trial: objective(trial, df_train),
               n_trials=50, n_jobs=-1
              )

[32m[I 2022-04-20 12:50:25,703][0m A new study created in memory with name: no-name-7c5345e3-0d4d-4897-9931-ed0e4736711b[0m




[32m[I 2022-04-20 12:53:22,100][0m Trial 4 finished with value: 0.21750324269135793 and parameters: {'num_layers': 2, 'n_units_layer_0': 40, 'dropout_rate_layer_0': 0.30138168803582194, 'actv_func_layer_0': 'elu', 'n_units_layer_1': 10, 'dropout_rate_layer_1': 0.3281647947326367, 'actv_func_layer_1': 'elu', 'optimizer': 'SGD', 'sgd_lr': 1.6494986078992885e-05, 'sgd_momentum': 6.30937149822963e-05, 'epoch': 15}. Best is trial 4 with value: 0.21750324269135793.[0m
[32m[I 2022-04-20 12:53:42,698][0m Trial 5 finished with value: 0.19734012045794064 and parameters: {'num_layers': 0, 'optimizer': 'SGD', 'sgd_lr': 6.94272949243014e-05, 'sgd_momentum': 3.2787264983352775e-05, 'epoch': 18}. Best is trial 4 with value: 0.21750324269135793.[0m
[32m[I 2022-04-20 12:53:47,306][0m Trial 14 finished with value: 0.07093937810072128 and parameters: {'num_layers': 2, 'n_units_layer_0': 35, 'dropout_rate_layer_0': 0.1326947454697227, 'actv_func_layer_0': 'relu', 'n_units_layer_1': 25, 'dropout_ra

In [45]:
print("Best Trial:")
best_trial = study.best_trial

print("    Value: ", best_trial.value)

print("    Hyperparameters: ")
for key, value in best_trial.params.items():
    print(f"        {key}: {value}")

Best Trial:
    Value:  0.5558179441170815
    Hyperparameters: 
        num_layers: 3
        n_units_layer_0: 30
        dropout_rate_layer_0: 0.28421697443432425
        actv_func_layer_0: tanh
        n_units_layer_1: 20
        dropout_rate_layer_1: 0.05936385947712203
        actv_func_layer_1: tanh
        n_units_layer_2: 25
        dropout_rate_layer_2: 0.2179324626328134
        actv_func_layer_2: relu
        optimizer: Adam
        adam_lr: 0.006100619734336806
        epoch: 39


In [46]:
best_trial.params

{'num_layers': 3,
 'n_units_layer_0': 30,
 'dropout_rate_layer_0': 0.28421697443432425,
 'actv_func_layer_0': 'tanh',
 'n_units_layer_1': 20,
 'dropout_rate_layer_1': 0.05936385947712203,
 'actv_func_layer_1': 'tanh',
 'n_units_layer_2': 25,
 'dropout_rate_layer_2': 0.2179324626328134,
 'actv_func_layer_2': 'relu',
 'optimizer': 'Adam',
 'adam_lr': 0.006100619734336806,
 'epoch': 39}

In [47]:
train_and_evaluate_final(df_train, df_test, **best_trial.params)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 51)                2652      
_________________________________________________________________
dropout_2 (Dropout)          (None, 51)                0         
_________________________________________________________________
dense_9 (Dense)              (None, 30)                1560      
_________________________________________________________________
dropout_3 (Dropout)          (None, 30)                0         
_________________________________________________________________
dense_10 (Dense)             (None, 20)                620       
_________________________________________________________________
dropout_4 (Dropout)          (None, 20)                0         
_________________________________________________________________
dense_11 (Dense)             (None, 25)               

# Successive Halving + Random Search

In [48]:
df = pd.read_csv("train.csv",sep=";")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45211 entries, 0 to 45210
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        45211 non-null  int64 
 1   job        45211 non-null  object
 2   marital    45211 non-null  object
 3   education  45211 non-null  object
 4   default    45211 non-null  object
 5   balance    45211 non-null  int64 
 6   housing    45211 non-null  object
 7   loan       45211 non-null  object
 8   contact    45211 non-null  object
 9   day        45211 non-null  int64 
 10  month      45211 non-null  object
 11  duration   45211 non-null  int64 
 12  campaign   45211 non-null  int64 
 13  pdays      45211 non-null  int64 
 14  previous   45211 non-null  int64 
 15  poutcome   45211 non-null  object
 16  y          45211 non-null  object
dtypes: int64(7), object(10)
memory usage: 5.9+ MB


Convert the target variable to integer

In [49]:
df['y'] = df['y'].map({'yes':1,'no':0})

Split full data into train and test data

In [50]:
df_train, df_test = train_test_split(df, test_size=0.1, random_state=0) 

Get list of numerical features

In [51]:
numerical_feats = list(df_train.drop(columns='y').select_dtypes(include=np.number).columns)

Get list of categorical features

In [52]:
categorical_feats = list(df_train.drop(columns='y').select_dtypes(exclude=np.number).columns)

Perform Hyperparameter Tuning with Random Search + SH

In [53]:
study = optuna.create_study(direction='maximize',
                            sampler=optuna.samplers.RandomSampler(seed=0),
                            pruner=optuna.pruners.SuccessiveHalvingPruner(reduction_factor=3,
                                                                          min_resource=5)
                           )
study.optimize(lambda trial: objective(trial, df_train, use_pruner=True),
               n_trials=100, n_jobs=-1,
              )

[32m[I 2022-04-20 13:17:12,505][0m A new study created in memory with name: no-name-34380eaf-1d10-432e-82fc-2656edf6dcce[0m




[32m[I 2022-04-20 13:18:42,179][0m Trial 14 pruned. Trial was pruned at epoch 5.[0m
[32m[I 2022-04-20 13:18:44,613][0m Trial 4 pruned. Trial was pruned at epoch 5.[0m
[32m[I 2022-04-20 13:18:45,359][0m Trial 12 pruned. Trial was pruned at epoch 5.[0m
[32m[I 2022-04-20 13:18:45,951][0m Trial 7 pruned. Trial was pruned at epoch 5.[0m
[32m[I 2022-04-20 13:18:53,501][0m Trial 5 pruned. Trial was pruned at epoch 5.[0m
[32m[I 2022-04-20 13:19:03,659][0m Trial 0 pruned. Trial was pruned at epoch 5.[0m
[32m[I 2022-04-20 13:19:05,044][0m Trial 13 pruned. Trial was pruned at epoch 5.[0m
[32m[I 2022-04-20 13:19:06,968][0m Trial 15 pruned. Trial was pruned at epoch 5.[0m
[32m[I 2022-04-20 13:19:10,325][0m Trial 11 pruned. Trial was pruned at epoch 5.[0m
[32m[I 2022-04-20 13:20:22,768][0m Trial 16 pruned. Trial was pruned at epoch 5.[0m
[32m[I 2022-04-20 13:20:24,156][0m Trial 18 pruned. Trial was pruned at epoch 5.[0m
[32m[I 2022-04-20 13:20:27,816][0m Trial 20 p

In [54]:
pruned_trials = study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.COMPLETE])
print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  87
  Number of complete trials:  13


In [55]:
print("Best Trial:")
best_trial = study.best_trial

print("    Value: ", best_trial.value)

print("    Hyperparameters: ")
for key, value in best_trial.params.items():
    print(f"        {key}: {value}")

Best Trial:
    Value:  0.5818294137716293
    Hyperparameters: 
        num_layers: 3
        n_units_layer_0: 10
        dropout_rate_layer_0: 0.03540368984067649
        actv_func_layer_0: elu
        n_units_layer_1: 15
        dropout_rate_layer_1: 0.008554081181978979
        actv_func_layer_1: elu
        n_units_layer_2: 15
        dropout_rate_layer_2: 0.4887044768096681
        actv_func_layer_2: relu
        optimizer: Adam
        adam_lr: 0.02763126523504823
        epoch: 28


In [56]:
best_trial.params

{'num_layers': 3,
 'n_units_layer_0': 10,
 'dropout_rate_layer_0': 0.03540368984067649,
 'actv_func_layer_0': 'elu',
 'n_units_layer_1': 15,
 'dropout_rate_layer_1': 0.008554081181978979,
 'actv_func_layer_1': 'elu',
 'n_units_layer_2': 15,
 'dropout_rate_layer_2': 0.4887044768096681,
 'actv_func_layer_2': 'relu',
 'optimizer': 'Adam',
 'adam_lr': 0.02763126523504823,
 'epoch': 28}

In [57]:
train_and_evaluate_final(df_train, df_test, **best_trial.params)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 51)                2652      
_________________________________________________________________
dropout_5 (Dropout)          (None, 51)                0         
_________________________________________________________________
dense_14 (Dense)             (None, 10)                520       
_________________________________________________________________
dropout_6 (Dropout)          (None, 10)                0         
_________________________________________________________________
dense_15 (Dense)             (None, 15)                165       
_________________________________________________________________
dropout_7 (Dropout)          (None, 15)                0         
_________________________________________________________________
dense_16 (Dense)             (None, 15)               

# Hyper Band + Random Search

In [10]:
df = pd.read_csv("train.csv",sep=";")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45211 entries, 0 to 45210
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        45211 non-null  int64 
 1   job        45211 non-null  object
 2   marital    45211 non-null  object
 3   education  45211 non-null  object
 4   default    45211 non-null  object
 5   balance    45211 non-null  int64 
 6   housing    45211 non-null  object
 7   loan       45211 non-null  object
 8   contact    45211 non-null  object
 9   day        45211 non-null  int64 
 10  month      45211 non-null  object
 11  duration   45211 non-null  int64 
 12  campaign   45211 non-null  int64 
 13  pdays      45211 non-null  int64 
 14  previous   45211 non-null  int64 
 15  poutcome   45211 non-null  object
 16  y          45211 non-null  object
dtypes: int64(7), object(10)
memory usage: 5.9+ MB


Convert the target variable to integer

In [11]:
df['y'] = df['y'].map({'yes':1,'no':0})

Split full data into train and test data

In [12]:
df_train, df_test = train_test_split(df, test_size=0.1, random_state=0) 

Get list of numerical features

In [13]:
numerical_feats = list(df_train.drop(columns='y').select_dtypes(include=np.number).columns)

Get list of categorical features

In [14]:
categorical_feats = list(df_train.drop(columns='y').select_dtypes(exclude=np.number).columns)

Perform Hyperparameter Tuning with Random Search + SH

In [15]:
study = optuna.create_study(direction='maximize',
                            sampler=optuna.samplers.RandomSampler(seed=0),
                            pruner=optuna.pruners.HyperbandPruner(reduction_factor=3,
                                                                  min_resource=5
                                                                 )
                           )
study.optimize(lambda trial: objective(trial, df_train, use_pruner=True),
               n_trials=100, n_jobs=-1,
              )

[32m[I 2022-04-20 15:49:25,285][0m A new study created in memory with name: no-name-49cd4609-d60f-42de-b1bd-6a4e4f0cdfe5[0m
[32m[I 2022-04-20 15:51:47,970][0m Trial 13 finished with value: 0.0002876983645061652 and parameters: {'num_layers': 3, 'n_units_layer_0': 50, 'dropout_rate_layer_0': 0.07507687468347152, 'actv_func_layer_0': 'relu', 'n_units_layer_1': 40, 'dropout_rate_layer_1': 0.38640343196581817, 'actv_func_layer_1': 'relu', 'n_units_layer_2': 10, 'dropout_rate_layer_2': 0.05532443342711424, 'actv_func_layer_2': 'relu', 'optimizer': 'SGD', 'sgd_lr': 0.0005288384693846171, 'sgd_momentum': 6.91464937139131e-05, 'epoch': 15}. Best is trial 13 with value: 0.0002876983645061652.[0m
[32m[I 2022-04-20 15:51:49,329][0m Trial 8 pruned. Trial was pruned at epoch 15.[0m
[32m[I 2022-04-20 15:51:52,474][0m Trial 7 pruned. Trial was pruned at epoch 15.[0m
[32m[I 2022-04-20 15:51:54,504][0m Trial 10 pruned. Trial was pruned at epoch 15.[0m
[32m[I 2022-04-20 15:51:55,230][0m

In [16]:
pruned_trials = study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.COMPLETE])
print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  79
  Number of complete trials:  21


In [17]:
print("Best Trial:")
best_trial = study.best_trial

print("    Value: ", best_trial.value)

print("    Hyperparameters: ")
for key, value in best_trial.params.items():
    print(f"        {key}: {value}")

Best Trial:
    Value:  0.5800432627265518
    Hyperparameters: 
        num_layers: 0
        optimizer: Adam
        adam_lr: 0.05584201313189952
        epoch: 37


In [18]:
best_trial.params

{'num_layers': 0,
 'optimizer': 'Adam',
 'adam_lr': 0.05584201313189952,
 'epoch': 37}

In [19]:
train_and_evaluate_final(df_train, df_test, **best_trial.params)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 51)                2652      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 52        
Total params: 2,704
Trainable params: 2,704
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/37
Epoch 2/37
Epoch 3/37
Epoch 4/37
Epoch 5/37
Epoch 6/37
Epoch 7/37
Epoch 8/37
Epoch 9/37
Epoch 10/37
Epoch 11/37
Epoch 12/37
Epoch 13/37
Epoch 14/37
Epoch 15/37
Epoch 16/37
Epoch 17/37
Epoch 18/37
Epoch 19/37
Epoch 20/37
Epoch 21/37
Epoch 22/37
Epoch 23/37
Epoch 24/37
Epoch 25/37
Epoch 26/37
Epoch 27/37
Epoch 28/37
Epoch 29/37
Epoch 30/37
Epoch 31/37
Epoch 32/37
Epoch 33/37
Epoch 34/37
Epoch 35/37
Epoch 36/37
Epoch 37/37
F1-Score on Test Data:  0.609841827768014
