# Redes Neuronales Artificiales - Artificial Neural Networks

## Instalamos las dependencias necesarias

In [None]:
# pip install --upgrade pip

# Instalar git repository para poder utilizar git
# conda install git

# Instalar Theano
# pip install --upgrade --no-deps git+git://github.com/Theano/Theano.git

# Instalar Tensorflow y Keras
# conda install -c -conda-forge keras
# pip install keras <- en caso de no usar conda
# pip install scipy six

## Parte 1: Pre procesado de datos

In [1]:
# Importamos librerías
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [78]:
# Importamos el dataset
dataset = pd.read_csv('Churn_Modelling.csv')
print(dataset)

      RowNumber  CustomerId    Surname  CreditScore Geography  Gender  Age  \
0             1    15634602   Hargrave          619    France  Female   42   
1             2    15647311       Hill          608     Spain  Female   41   
2             3    15619304       Onio          502    France  Female   42   
3             4    15701354       Boni          699    France  Female   39   
4             5    15737888   Mitchell          850     Spain  Female   43   
...         ...         ...        ...          ...       ...     ...  ...   
9995       9996    15606229   Obijiaku          771    France    Male   39   
9996       9997    15569892  Johnstone          516    France    Male   35   
9997       9998    15584532        Liu          709    France  Female   36   
9998       9999    15682355  Sabbatini          772   Germany    Male   42   
9999      10000    15628319     Walker          792    France  Female   28   

      Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMemb

In [31]:
print(dataset.columns)

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')


In [6]:
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

In [7]:
print(X.shape)
print(y.shape)

(10000, 10)
(10000,)


In [8]:
# Codificar datos Categóricos
# pip install scikit-learn
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

le_X_1 = LabelEncoder() 
X[:, 1] = le_X_1.fit_transform(X[:, 1])
le_X_2 = LabelEncoder() 
X[:, 2] = le_X_2.fit_transform(X[:, 2])
X.shape

(10000, 10)

In [61]:
# Eliminamos una columna de las variables dummies para evitar colinealidad
ct = ColumnTransformer( [('one_hot_encoder', OneHotEncoder(categories='auto'), [1])], 
                       remainder='passthrough')
X = np.array(ct.fit_transform(X), dtype=int)
X.shape

# No hace falta relaizar para género, ya que solo tienen dos valores

(10000, 13)

In [10]:
print(X.shape)
print(y.shape)

(10000, 12)
(10000,)


In [11]:
## Dividir el data set en conjunto de entrenamiento y conjunto de testing
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [12]:
# Escalado de  variables

from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.fit_transform(X_test)

print(X_train.shape)

(8000, 12)


## Parte 2: Construir la RNA

In [13]:
# Importar Keras y librerías adicionales
import keras
from keras.models import Sequential
from keras.layers import Dense

In [15]:
# Inicializar la RNA
classifier = Sequential()

In [None]:
# Añadir las capas de entrada y primera capa oculta
classifier.add(Dense(units= 6, kernel_initializer= 'uniform', 
                     activation= 'relu', input_dim= 11))

In [17]:
# Añadir la segunda capa oculta
classifier.add(Dense(units= 6, kernel_initializer= 'uniform', 
                     activation= 'relu'))

In [18]:
# Añadir la capa de salida
classifier.add(Dense(units= 1, kernel_initializer= 'uniform', 
                     activation= 'sigmoid'))

In [19]:
# Compilar la RNA
classifier.compile(optimizer= 'adam', loss= 'binary_crossentropy', metrics= ['accuracy'])

In [20]:
# Ajustamos la RNA al Conjunto de Entrenamiento
classifier.fit(X_train, y_train, batch_size= 10, epochs= 100)

Epoch 1/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.7957 - loss: 0.5689
Epoch 2/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8019 - loss: 0.4254
Epoch 3/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8005 - loss: 0.4316
Epoch 4/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8288 - loss: 0.4161
Epoch 5/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8263 - loss: 0.4126
Epoch 6/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8396 - loss: 0.3984
Epoch 7/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8375 - loss: 0.4007
Epoch 8/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8368 - loss: 0.4000
Epoch 9/100
[1m800/800[0m [32

<keras.src.callbacks.history.History at 0x1d4383f8550>

## Parte 3: Evaluar el modelo y calcular predicciones finales

In [21]:
# Predecir el resultado con el conjunto de test
y_pred = classifier.predict(X_test)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [27]:
# Convertir los valores a binarios
y_pred = (y_pred > 0.5)
y_pred

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [29]:
# Elaborar una matriz de confusión (para evaluar las predicciones)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[1498,   97],
       [ 186,  219]])

In [30]:
(1498 + 219)/2000

0.8585

## Predicción de nuevos resultados

In [79]:
# Supongamos que tienes nuevos datos en un DataFrame llamado new_data
new_data = pd.DataFrame({
    'CreditScore': [600],
    'Geography': ['France'],
    'Gender': ['Male'],
    'Age': [40],
    'Tenure': [3],
    'Balance': [60000],
    'NumOfProducts': [2],
    'HasCrCard': [1],
    'IsActiveMember': [1],
    'EstimatedSalary': [50000]
})

In [80]:
# Aplicar las mismas transformaciones a los nuevos datos
new_data['Geography'] = le_X_1.transform(new_data['Geography'])
new_data['Gender'] = le_X_2.transform(new_data['Gender'])

new_data = np.array(ct.transform(new_data), dtype=int)

# Escalar los nuevos datos
new_data = sc_X.transform(new_data)

# Predecir con el modelo entrenado
new_pred = classifier.predict(new_data)

# Convertir los valores a binarios
new_pred = (new_pred > 0.5)

print(new_pred)

ValueError: X has 10 features, but ColumnTransformer is expecting 12 features as input.