In [263]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,StandardScaler,OneHotEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout
from tensorflow import keras
from sklearn.metrics import accuracy_score,confusion_matrix

In [232]:
data = pd.read_csv('Churn_Modelling.csv')

In [233]:
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [234]:
X = data.iloc[:,3:13].values
y = data.iloc[:,13].values

In [235]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [236]:
# variables categoricas del pais
lb1 = LabelEncoder()
X[:,1] = lb1.fit_transform(X[:,1])

In [237]:
# tipo de variables de sexo de la persona
lb2 = LabelEncoder()
X[:,2] = lb2.fit_transform(X[:,2])

In [238]:
X

array([[619, 0, 0, ..., 1, 1, 101348.88],
       [608, 2, 0, ..., 0, 1, 112542.58],
       [502, 0, 0, ..., 1, 0, 113931.57],
       ...,
       [709, 0, 0, ..., 0, 1, 42085.58],
       [772, 1, 1, ..., 1, 0, 92888.52],
       [792, 0, 0, ..., 1, 0, 38190.78]], dtype=object)

In [239]:
transformer = ColumnTransformer(
    transformers=[
        ("Churn_Modelling",        # Un nombre de la transformación
         OneHotEncoder(categories='auto'), # La clase a la que transformar
         [1]            # Las columnas a transformar.
         )
    ], remainder='passthrough'
)
X = transformer.fit_transform(X)
X = X[:,1:]

In [240]:
X.shape

(10000, 11)

In [241]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=0)

In [242]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

### inicializacion de la red neuronal artificial

In [214]:
# especificacion de la arquietctura:
# inicializacion de la red neuronal artificial
rna = Sequential()

In [215]:
# añadir capa de red neuronal
rna.add(Dense(units=6,kernel_initializer='uniform',activation='relu',input_dim=11))

In [216]:
# Añadir una segunda capa
rna.add(Dense(units=6,kernel_initializer='uniform',activation='relu'))
rna.add(Dense(units=3,kernel_initializer='uniform',activation='relu'))

In [217]:
# Añadir la capa de salida:
rna.add(Dense(units=1,kernel_initializer='uniform',activation='sigmoid'))

In [218]:
# Compilar la red neuronal artificial:
rna.compile(optimizer='adam',loss='binary_crossentropy',metrics=["accuracy"])

In [219]:
# Ajustamos el modelo para entrenamiento
rna.fit(X_train,y_train,batch_size=10,epochs=50)

Train on 7500 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f8be05f5750>

In [220]:
predict = rna.predict(X_test)

In [221]:
predict = (predict>0.5)

In [222]:
y_test

array([0, 1, 0, ..., 0, 0, 0])

In [223]:
accuracy_score(y_test,predict)

0.8432

In [224]:
confusion_matrix(y_test,predict)

array([[1930,   61],
       [ 331,  178]])

## Mejorar el modelo de red neuronal artificial
* Usando nuestro modelo que logro una precision de 0.864 como podemos mejorar el modelo? una forma de mejorar el modelo es potenciar lo que es la precision del modeoo y disminuir la variabilidad de este , podemos usar la tecnica llamada cross-validation en donde logramos una distribucion de los datos

In [273]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score,GridSearchCV

In [268]:
def build_classifier():
    rna = Sequential()
    rna.add(Dense(units=6,kernel_initializer='uniform',activation='relu',input_dim=11))
    rna.add(Dense(units=6,kernel_initializer='uniform',activation='relu'))
    rna.add(Dense(units=3,kernel_initializer='uniform',activation='relu'))
    rna.add(Dense(units=1,kernel_initializer='uniform',activation='sigmoid'))
    rna.compile(optimizer='adam',loss='binary_crossentropy',metrics=["accuracy"])
    
    return rna

In [290]:
classifier = KerasClassifier(build_fn=build_classifier,batch_size=10,nb_epoch=100)
accuracies = cross_val_score(estimator=classifier,X = X_train,y = y_train)

Train on 6000 samples


Train on 6000 samples


Train on 6000 samples


Train on 6000 samples


Train on 6000 samples


In [270]:
accuracies

array([0.78733331, 0.79533333, 0.81266665, 0.78933334, 0.79666668])

In [271]:
mean = accuracies.mean()
mean

0.7962666630744935

In [272]:
variance = accuracies.std()
variance

0.008920387643462467

## Mejorar la RNA
* Regularizacion del Dropout para evitar le overfitting

In [301]:
classifier = KerasClassifier(build_fn=build_classifier)

In [302]:
parameters = {
    'batch_size': [25,32],
    'nb_epoch'  : [100,500],
    'optimizer' : ['adam','rmsprop'],
}

In [303]:
grid_search = GridSearchCV(estimator=classifier,param_grid=parameters,scoring='accuracy',cv=10)
grid_search = grid_search.fit(X_train,y_train)

ValueError: optimizer is not a legal parameter

In [286]:
#best_parameters = grid_search.best_params_
#best_accuracy = grid_search.best_score_