In [1]:
import pandas as pd
import os

data_path = '../../data/modulo2/examen/'
df = pd.read_csv(data_path+'churn-bigml-80 - churn-bigml-80.csv')
df.head()

Unnamed: 0,State,Account length,Area code,International plan,Voice mail plan,Number vmail messages,Total day minutes,Total day calls,Total day charge,Total eve minutes,Total eve calls,Total eve charge,Total night minutes,Total night calls,Total night charge,Total intl minutes,Total intl calls,Total intl charge,Customer service calls,Churn
0,KS,128,415,No,Yes,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.7,1,False
1,OH,107,415,No,Yes,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.7,1,False
2,NJ,137,415,No,No,0,243.4,114,41.38,121.2,110,10.3,162.6,104,7.32,12.2,5,3.29,0,False
3,OH,84,408,Yes,No,0,299.4,71,50.9,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78,2,False
4,OK,75,415,Yes,No,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73,3,False


## Nota:

* Entrada $ X = (x_1, ... x_j)$
* Capa de Entrada, que se encarga de asignar esta información de X a las capas ocultas.
* 1ra Capa oculta:
    * $Z = \sum^{j}_{i=1} w_ix_i + w_0$
    * Función de activación, sea f, una función de activación:
        Obteníamos $f(Z)$
* Capa de Salida:
    * $\hat{y} = \sum^{m}_{i=1} v_iz_i + v_0$
    
    
Tenemos la predicción estimada en el formato que la conocíamos como $\hat{y}$.

## Propagación hacia atrás. 

* 1. Partimos de la función de costo. Seleccionar $MAE=\frac{1}{n}\sum^{n}_{h=0}(y_h - \hat{y}_h)$
* 2. Entonces esto se convierte en:
    $\frac{1}{n}\sum^{n}_{h=0}(y_h - \sum^{m}_{i=1} v_iz_i + v_0)$
* 3. Volvemos a sustituir, ahora el valor de $z_i$:
    $\frac{1}{n}\sum^{n}_{h=0}(y_h - \sum^{m}_{i=1} v_i f(\sum^{j}_{i=1} w_ix_i + w_0) + v_0)$
    
### Nota: la ecuación en 3, sería nuestra función de costo, sobre la cuál derivaríamos respecto a sus parámetros, w, v. 

## Pasos:
* 1. Damos valores para w y v aleatorios. 
* 2. Calculamos el error (en este caso el MAE).
* 3. Calculamos el gradiente, es decir, el jacobiano de la función de costo, respecto a w y a v.
* 4. Damos $\alpha$ pasos en la diriección que más decrese la función de costo, dado el paso 3. 
* 5. Actualizamos los valores de w, v dado el tamaño de paso y la dirección. 
* 6. Repetimos los pasos 3 a 5. 

In [2]:
df.head(2)

Unnamed: 0,State,Account length,Area code,International plan,Voice mail plan,Number vmail messages,Total day minutes,Total day calls,Total day charge,Total eve minutes,Total eve calls,Total eve charge,Total night minutes,Total night calls,Total night charge,Total intl minutes,Total intl calls,Total intl charge,Customer service calls,Churn
0,KS,128,415,No,Yes,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.7,1,False
1,OH,107,415,No,Yes,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.7,1,False


In [3]:
tar = 'Churn'
predictors = [c for c in df.columns if 'Total' in c] + ['Number vmail messages', 'Customer service calls']
X = df[predictors].copy()
y = df[tar].copy()

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, classification_report

In [5]:
Xt, Xv, yt, yv = train_test_split(X, y, test_size=.2)

# Red Neuronal

In [6]:
from sklearn.neural_network import MLPClassifier

In [7]:
help(MLPClassifier)

Help on class MLPClassifier in module sklearn.neural_network._multilayer_perceptron:

class MLPClassifier(sklearn.base.ClassifierMixin, BaseMultilayerPerceptron)
 |  MLPClassifier(hidden_layer_sizes=(100,), activation='relu', *, solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10, max_fun=15000)
 |  
 |  Multi-layer Perceptron classifier.
 |  
 |  This model optimizes the log-loss function using LBFGS or stochastic
 |  gradient descent.
 |  
 |  .. versionadded:: 0.18
 |  
 |  Parameters
 |  ----------
 |  hidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)
 |      The ith element represents the number of neurons in the ith
 |      hidden layer.
 |  
 |  activation : {'identity

In [8]:
from sklearn.naive_bayes import GaussianNB

In [9]:
bayes = GaussianNB()
bayes.fit(Xt, yt)

print(classification_report(yt, bayes.predict(Xt)))

              precision    recall  f1-score   support

       False       0.90      0.96      0.93      1821
        True       0.60      0.35      0.44       311

    accuracy                           0.87      2132
   macro avg       0.75      0.66      0.69      2132
weighted avg       0.85      0.87      0.86      2132



In [10]:
print(classification_report(yv, bayes.predict(Xv)))

              precision    recall  f1-score   support

       False       0.91      0.97      0.94       457
        True       0.71      0.42      0.52        77

    accuracy                           0.89       534
   macro avg       0.81      0.69      0.73       534
weighted avg       0.88      0.89      0.88       534



In [62]:
nn = MLPClassifier(max_iter=300, verbose=2, activation='tanh', solver='sgd',
                   hidden_layer_sizes=(200,500, 100, 400),
                   learning_rate='adaptive', shuffle=True)
nn.fit(Xt, yt)

Iteration 1, loss = 0.61605574
Iteration 2, loss = 0.43695993
Iteration 3, loss = 0.42915528
Iteration 4, loss = 0.41066253
Iteration 5, loss = 0.40823684
Iteration 6, loss = 0.40576782
Iteration 7, loss = 0.40509165
Iteration 8, loss = 0.40355250
Iteration 9, loss = 0.40232420
Iteration 10, loss = 0.40115571
Iteration 11, loss = 0.39997810
Iteration 12, loss = 0.39845604
Iteration 13, loss = 0.39713487
Iteration 14, loss = 0.39622716
Iteration 15, loss = 0.39497308
Iteration 16, loss = 0.39372097
Iteration 17, loss = 0.39264408
Iteration 18, loss = 0.39281495
Iteration 19, loss = 0.39127063
Iteration 20, loss = 0.39028399
Iteration 21, loss = 0.38964817
Iteration 22, loss = 0.38932045
Iteration 23, loss = 0.38761344
Iteration 24, loss = 0.38728877
Iteration 25, loss = 0.38648969
Iteration 26, loss = 0.38560916
Iteration 27, loss = 0.38490818
Iteration 28, loss = 0.38422748
Iteration 29, loss = 0.38368389
Iteration 30, loss = 0.38322538
Iteration 31, loss = 0.38215872
Iteration 32, los

Iteration 250, loss = 0.31280549
Iteration 251, loss = 0.31250623
Iteration 252, loss = 0.31284438
Iteration 253, loss = 0.31075424
Iteration 254, loss = 0.31214141
Iteration 255, loss = 0.31141579
Iteration 256, loss = 0.31184506
Iteration 257, loss = 0.31040664
Iteration 258, loss = 0.31080557
Iteration 259, loss = 0.31257345
Iteration 260, loss = 0.31097280
Iteration 261, loss = 0.31054747
Iteration 262, loss = 0.30978842
Iteration 263, loss = 0.31019910
Iteration 264, loss = 0.31104165
Iteration 265, loss = 0.31028653
Iteration 266, loss = 0.30990950
Iteration 267, loss = 0.30961047
Iteration 268, loss = 0.30936062
Iteration 269, loss = 0.31071236
Iteration 270, loss = 0.30823107
Iteration 271, loss = 0.30958731
Iteration 272, loss = 0.30757902
Iteration 273, loss = 0.30947927
Iteration 274, loss = 0.30890062
Iteration 275, loss = 0.30885329
Iteration 276, loss = 0.30888148
Iteration 277, loss = 0.30815806
Iteration 278, loss = 0.30743662
Iteration 279, loss = 0.30819233
Iteration 



MLPClassifier(activation='tanh', hidden_layer_sizes=(200, 500, 100, 400),
              learning_rate='adaptive', max_iter=300, solver='sgd', verbose=2)

In [60]:
print(classification_report(yt, nn.predict(Xt)))
print(classification_report(yv, nn.predict(Xv)))

              precision    recall  f1-score   support

       False       0.88      0.99      0.93      1821
        True       0.84      0.23      0.36       311

    accuracy                           0.88      2132
   macro avg       0.86      0.61      0.64      2132
weighted avg       0.88      0.88      0.85      2132

              precision    recall  f1-score   support

       False       0.87      0.98      0.92       457
        True       0.53      0.13      0.21        77

    accuracy                           0.86       534
   macro avg       0.70      0.56      0.57       534
weighted avg       0.82      0.86      0.82       534

