In [18]:
# data management
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from Utils import downcast, pd_col_to_dummy
from datetime import datetime

from pathlib import Path
from multilayer_perceptron import MultilayerPerceptron

# Bayesian Optimization
from bayes_opt import BayesianOptimization
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events

# model selection
from sklearn.model_selection import train_test_split

# Metrics
from sklearn.metrics import (
    recall_score, 
    precision_score,
    f1_score,
    confusion_matrix
)

# Plot
from matplotlib.pyplot import hist 

In [19]:
# from google.colab import drive
# drive.mount('/content/drive')

In [20]:
# ruta = ("/content/drive/MyDrive/Proyecto_IDI2/Reporte de Anticipos.xlsx")

### 1. Load data

In [21]:
df = pd.read_csv('Reporte de Anticipo Propuesta.csv')

In [22]:
df = downcast(df)

### 2.0 Standardize data

In [23]:
df_continuous = df.drop(["Gender", "BirthState", "State_Id", "Paid"], axis=1)

### 4.0 Fit a model

In [24]:
# By the means of bayesian opimization, the following parameters want to be tested:
# - Neurons in the hidden layer
# - Learning rate
# - Threshold


#### 4.1 Split the data into train and test sets

In [25]:
y = df.Paid

In [31]:
# Splitting to training and testing data
X_train, X_test, y_train, y_test = train_test_split(
    df_continuous, y, test_size = 0.3, random_state = 4, stratify = y
)

y_train = y_train.values.reshape(-1, 1)
y_test = y_test.values.reshape(-1, 1)

In [27]:
# The following combinations will be tested
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [28]:
def FPR(y_true, y_pred):
    tn, fp, _, _ = confusion_matrix(y_true, y_pred).ravel()
    return fp / (fp + tn)

In [29]:
def opt_bas(neurons, learning_rate=1.25, threshold=0.6, test_size=0.3, random_state=4):
    
    X_train_ = X_train.copy()
    X_test_ = X_test.copy()


        
    ###############################
    # fit and predict
    ###############################

    # parameters
    hyperparameters = {
        "n_neurons": int(neurons),
        "eta": learning_rate,
        "threshold": threshold,
        "random_state": 4,
    }

    model = MultilayerPerceptron(**hyperparameters).fit(X_train_, y_train)

    y_pred_prod = model.predict(X_test_, prob=True)

    y_pred = model.predict(X_test_, prob=False)
    
    # calulate metrics
    fpr = FPR(y_test, y_pred)
    recall_ = 1 - recall_score(y_test, y_pred)

    # distance to 0.5
    dist = np.mean(np.sin(y_pred_prod*np.pi))

    
    return -(fpr*10 + dist*100 + recall_*2)

In [30]:
class newJSONLogger(JSONLogger):

      def __init__(self, path):
            self._path=None
            super(JSONLogger, self).__init__()
            self._path = path if path[-5:] == ".json" else path + ".json"

In [17]:
# Bounded region of parameter space
pbounds = {
    'neurons': (5, 70), 'learning_rate': (0.1, 10)
}

# Bayes optimizer instantiation
optimizer = BayesianOptimization(f=opt_bas, 
                                 pbounds=pbounds, 
                                 random_state=10, verbose=2,
                                )

# keep data
log_path = Path().resolve() / "Logs_smpl" / "no_features_neurons_1_7.json"
logger = newJSONLogger(path = str(log_path))
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

In [None]:
optimizer.maximize(init_points=25, n_iter=500)