# Data Preprocessing

In [63]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Importing the libraries

In [64]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [65]:
tf.__version__

'2.17.0'

## Importing Dataset

In [66]:
data_set = pd.read_csv("/content/American Express - Sheet1.csv")
X = data_set.iloc[:, :-1].values
y = data_set.iloc[:, -1].values

In [67]:
X

array([[553, 'Delhi', 'Female', ..., 4, 1, 274150],
       [447, 'Bengaluru', 'Male', ..., 4, 1, 519360],
       [501, 'Delhi', 'Female', ..., 4, 1, 545501],
       ...,
       [627, 'Mumbai', 'Female', ..., 4, 0, 494067],
       [600, 'Bengaluru', 'Female', ..., 2, 1, 109375],
       [553, 'Delhi', 'Male', ..., 4, 1, 180031]], dtype=object)

In [68]:
y

array([0, 0, 0, ..., 0, 1, 0])




## Handling Missing Data

## Encoding Categorical Data

In [69]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
X[:, 2] = label_encoder.fit_transform(X[:, 2])
X

array([[553, 'Delhi', 0, ..., 4, 1, 274150],
       [447, 'Bengaluru', 1, ..., 4, 1, 519360],
       [501, 'Delhi', 0, ..., 4, 1, 545501],
       ...,
       [627, 'Mumbai', 0, ..., 4, 0, 494067],
       [600, 'Bengaluru', 0, ..., 2, 1, 109375],
       [553, 'Delhi', 1, ..., 4, 1, 180031]], dtype=object)

In [70]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
X

array([[0.0, 1.0, 0.0, ..., 4, 1, 274150],
       [1.0, 0.0, 0.0, ..., 4, 1, 519360],
       [0.0, 1.0, 0.0, ..., 4, 1, 545501],
       ...,
       [0.0, 0.0, 1.0, ..., 4, 0, 494067],
       [1.0, 0.0, 0.0, ..., 2, 1, 109375],
       [0.0, 1.0, 0.0, ..., 4, 1, 180031]], dtype=object)

## Splitting data into Test set & Training Set


In [71]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Feature Scaling

In [72]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

## **ANN**

## Initialization

In [86]:
ann = tf.keras.models.Sequential()
ann

<Sequential name=sequential_5, built=False>

## Adding input layer as first hidden layer

In [109]:
ann.add(tf.keras.layers.Dense(units=5, activation='relu'))

<Sequential name=sequential_5, built=True>

## Adding second hidden layer

In [110]:
ann.add(tf.keras.layers.Dense(units=5, activation='relu'))

## Adding output layer

In [111]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

## **ANN TRAINING**

ANN Compiling

In [112]:
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'] )

ANN Training

In [114]:
ann.fit(X_train, y_train, batch_size=32, epochs=120)

Epoch 1/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7927 - loss: 0.6251
Epoch 2/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8034 - loss: 0.4942
Epoch 3/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8074 - loss: 0.4665
Epoch 4/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7996 - loss: 0.4554
Epoch 5/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7931 - loss: 0.4536
Epoch 6/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8004 - loss: 0.4430
Epoch 7/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7973 - loss: 0.4441
Epoch 8/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7955 - loss: 0.4453
Epoch 9/120
[1m249/249[0m [32

<keras.src.callbacks.history.History at 0x7c04e0456bf0>

In [115]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_test), 1), y_test.reshape(len(y_pred), 1)), 1))

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 1]]


## Confusion Matrix

In [116]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
cm

array([[1467,   98],
       [ 202,  219]])

In [117]:
accuracy_score(y_test, y_pred)

0.8489425981873112