In [59]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [60]:
tf.__version__

# Data Preprocessing

## Importing the Dataset

In [61]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
data = pd.read_csv('/kaggle/input/churn-modeling-dataset/Churn_Modelling.csv') 
data.head()

In [62]:
x=data.iloc[:,3:-1].values #excluding the first 3 rows because they are not so important
y=data.iloc[:,-1].values

In [63]:
x

In [64]:
y

## Encoding Categorical Data

### Label Encoding the Gender Column

In [65]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
x[:,2]=le.fit_transform(x[:,2])

### One Hot Encoding for the Geography Column

In [66]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct=ColumnTransformer(transformers=[("encoder", OneHotEncoder(), [1])], remainder="passthrough")
x=np.array(ct.fit_transform(x))

In [67]:
print(x)

## Splitting the dataset for Training and Tes set

In [68]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=9)

## Feature Scalling 
very fundamental for deep learning... so we apply feature scalling for everything

In [69]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
x_train=ss.fit_transform(x_train)
x_test=ss.transform(x_test)

# Building the ANN

## Initializing the ANN


First, we create a variable that will be nothing else than the artificial neural network itself.This artificial neural network viable will be created as an object of a certain class. And that certain class is the sequential class, which allows exactly to build an artificial neural network.

In [70]:
ann=tf.keras.models.Sequential() 
# this creates  a ANN variable,which represents our artificial neural network,
#created as an instance of that sequential class which initializes artificial neural network

## Adding the input layer and the first hidden layer


In [71]:
ann.add(tf.keras.layers.Dense(units=6,activation="relu")) ## to know what is the number, just needs an experimentation
#in this dense function now,  we can specify,  how many hidden neurons we want to have.

## Adding the second hidden layer


In [72]:
ann.add(tf.keras.layers.Dense(units=6,activation="relu")) 

## Adding the output layer


In [73]:
 ann.add(tf.keras.layers.Dense(units=1,activation="sigmoid")) ## to know what is the number, just needs an experimentation
# having a sigmoid activation function allows to get not only ultimately the predictions,
#but the probabilities that the binary outcome is one.

# Part 3 - Traiing the ANN

## Compiling the ANN


In [74]:
ann.compile(optimizer= "adam", loss="binary_crossentropy", metrics=["accuracy"])
#what's the Cassey gradient descent allows to do?
# it is what will update the weights in order to reduce the loss error between your predictions and the real results.

-- optimizer--- when we trained in and on the training set, we will at each iteration.

Compare the predictions in a batch to the real results in the same batch.

And that optimizer here will update the weights through stochastic gradient descent 

-- loss-- 
When you are doing binary classification, you know, classification, when you have to predict a binary outcome.

Well, the lost function must always be "binary_crossentropy"

if you were doing non binary classification. (You know, like, for example, predicting three different categories.) here you would have to enter a "category_crossentropy" loss.


-- metrics--
we can actually choose several metrics at the same time.

## Training the ANN on the Training Set

In [75]:
ann.fit(x_train, y_train, batch_size=32,epochs=100)

-- batch_size
The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters. (https://machinelearningmastery.com/difference-between-a-batch-and-an-epoch/)
batch size, because indeed batch learning is always more efficient and more performant when training in artificial new network.



The batch size parameter gives exactly the number of predictions you want to have in the batch to be compared to that same number of real results. Classic value is 32

-- epochs

 a neural network has to be trained over a certain amount of Epochs

So as to improve the accuracy over time.
The number of epochs is a hyperparameter that defines the number times that the learning algorithm will work through the entire training dataset.

One epoch means that each sample in the training dataset has had an opportunity to update the internal model parameters. An epoch is comprised of one or more batches - The number of epochs is traditionally large, often hundreds or thousands, allowing the learning algorithm to run until the error from the model has been sufficiently minimized.(https://machinelearningmastery.com/difference-between-a-batch-and-an-epoch/)

## Homework
Use our ANN model to predict if the customer with the following informations will leave the bank:

Geography: France

Credit Score: 600

Gender: Male

Age: 40 years old

Tenure: 3 years

Balance: $ 60000

Number of Products: 2

Does this customer have a credit card? Yes

Is this customer an Active Member: Yes

Estimated Salary: $ 50000

So, should we say goodbye to that customer?

In [81]:
ann.predict(ss.transform([[1,0,0,600,1,40,3,60000,2,1,1,50000]])) > 0.5
# the probability the the customer will leave the bank
#2d array
#2% of leaving the bank

which means... that the customer will not exit the bank

## Predicting the test set results


In [82]:
y_pred=ann.predict(x_test)
y_pred=(y_pred>0.5)

print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [83]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm=confusion_matrix(y_test, y_pred)
cm

In [84]:
accuracy_score(y_test, y_pred)