# Artifical Neural Net for Predicting Customer Churn

## Importing Libraries

In [119]:
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

## Working with Data

### Data Preprocessing

In [120]:
data = pd.read_csv('./Datasets/Part 1 - Artificial Neural Networks (ANN)/Churn_Modelling.csv')
X = data.iloc[:, 3:-1].values
y = data.iloc[:,-1].values

### Encoding Categorial Features

#### Encoding Gender Category

In [121]:
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

X

array([[619, 'France', 0, ..., 1, 1, 101348.88],
       [608, 'Spain', 0, ..., 0, 1, 112542.58],
       [502, 'France', 0, ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 0, ..., 0, 1, 42085.58],
       [772, 'Germany', 1, ..., 1, 0, 92888.52],
       [792, 'France', 0, ..., 1, 0, 38190.78]], dtype=object)

#### One Hot Encoding of Countries

In [122]:
ct = ColumnTransformer([('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
X

array([[1.0, 0.0, 0.0, ..., 1, 1, 101348.88],
       [0.0, 0.0, 1.0, ..., 0, 1, 112542.58],
       [1.0, 0.0, 0.0, ..., 1, 0, 113931.57],
       ...,
       [1.0, 0.0, 0.0, ..., 0, 1, 42085.58],
       [0.0, 1.0, 0.0, ..., 1, 0, 92888.52],
       [1.0, 0.0, 0.0, ..., 1, 0, 38190.78]], dtype=object)

### Splitting into Train-Test Set

In [123]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### Feature Scaling

In [124]:
sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Building the ANN

### Initializing the ANN

In [125]:
ann = tf.keras.models.Sequential()

### Adding the Input Layer and the Hidden Layers

In [126]:
ann.add(tf.keras.layers.Dense(units = 10, activation = 'relu'))
ann.add(tf.keras.layers.Dense(units = 10, activation = 'relu'))
ann.add(tf.keras.layers.Dense(units = 10, activation = 'relu'))
ann.add(tf.keras.layers.Dense(units = 10, activation = 'relu'))


### Adding Output Layer

In [127]:
ann.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))

## Training the ANN

### Compiling the ANN

In [128]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

### Training ANN on Training Set

In [129]:
ann.fit(X_train, y_train, batch_size = 32, epochs = 100)

Epoch 1/100


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6982 - loss: 0.5876
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8007 - loss: 0.4486
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8208 - loss: 0.4037
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8348 - loss: 0.3806
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8461 - loss: 0.3652
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8536 - loss: 0.3544
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8548 - loss: 0.3485
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8608 - loss: 0.3408
Epoch 9/100
[1m250/250[0m [32m━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x15068f17d40>

## Making Predictions and Evaluating the ANN Model

### Predicting the Result of a Single Observation

In [130]:
print((ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])) > 0.5).astype(int))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
[[0]]


### Predicting Test Set

In [131]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)
y_test = (y_test > 0.5).astype(int)
pd.DataFrame({'Actual': y_test, 'Test': y_pred.flatten()}).reset_index(drop=True)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  


Unnamed: 0,Actual,Test
0,0,0
1,1,0
2,0,0
3,0,0
4,0,0
...,...,...
1995,0,0
1996,0,0
1997,0,0
1998,0,0


### Confusion Matrix

In [132]:
cm = confusion_matrix(y_test, y_pred)
print(accuracy_score(y_test, y_pred))
print(cm)

0.863
[[1506   89]
 [ 185  220]]
