In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle 

In [91]:
data = pd.read_csv('Churn_Modelling.csv')
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [None]:
#preprocess the data
#drop irrelevent columns
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
data

In [None]:
#encode categorical variables
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])
data

In [93]:
#onehot encode 'Geography' column
from sklearn.preprocessing import OneHotEncoder
onehot_encoder = OneHotEncoder()
geo_encoder = onehot_encoder.fit_transform(data[['Geography']])
geo_encoder

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>

In [92]:
onehot_encoder.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [90]:
geo_encoded_df =pd.DataFrame(geo_encoder.toarray(), columns=onehot_encoder.get_feature_names_out(['Geography']))
geo_encoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [None]:
#combine one hot encoder columns with the orginal data
# data = pd.concat([data.drop('Geography',axis=1), geo_encoded_df],axis=1)
data = pd.concat([data.drop('Geography',axis=1), geo_encoded_df], axis=1)
data.head()

In [None]:
#save the encoders and scaler

with open('label_encoder_gender.pkl', 'wb') as f:
    pickle.dump(label_encoder_gender,f)

with open('onehot_encoder.pkl', 'wb') as f:
    pickle.dump(onehot_encoder,f)

In [None]:
#divide the dataset into independent and dependent vfeature

X = data.drop('Exited', axis=1)
y = data['Exited']


#split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_test
#Scalr these eature

scaler = StandardScaler()
X_train= scaler.fit_transform(X_train)      
X_test = scaler.transform(X_test)



In [None]:
X_train

In [None]:
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler,f)

In [88]:
#ANN IMPLEMENTATION
#for implementing an ANN
#Sequential Network API is a linear stack of layers. We can create a Sequential model by passing a list of layer instances to the constructor:
#Dense for hidden neurons and output layer
#Activation for activation function (sigmoid tanh relu) 
#Optimizer for optimization algorithm (adam sgd rmsprop)Responsibele for updating the weights of the network based on the loss function and the data it sees during training.
#loss function for calculating the error between predicted and actual values (binary_crossentropy for binary classification, categorical_crossentropy for multi-class classification, mean_squared_error for regression)
#matrics for evaluating the performance of the model (accuracy, precision, recall, f1-score)
#training -> logs-> folders-> tensorboard-> visualise the training process




import tensorflow as tf
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import TensorBoard,EarlyStopping
import datetime 




In [None]:
##build our ANN model
model = Sequential([
    Dense(64,activation='relu', input_shape=(X_train.shape[1],)),#1 hiddent layer 1 connected with input layer
    Dense(32,activation='relu'), #2 hiddent layer 2 connected with hiddent layer 1
    Dense(1,activation='sigmoid') #output layer connected with hiddent layer
])

In [None]:
model.summary()

In [None]:
import tensorflow
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.001)
loss = tensorflow.keras.losses.BinaryCrossentropy()
loss

In [None]:
#compile the model
model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

In [None]:
#set up the TensorBoard 
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
#setup early stopping to prevent overfitting
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [89]:
#train the model
history = model.fit(X_train, y_train, validation_data=(X_test,y_test) ,epochs=100, callbacks=[tensorboard_callback, early_stopping_callback])

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.8687 - loss: 0.3146 - val_accuracy: 0.8575 - val_loss: 0.3492
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8700 - loss: 0.3127 - val_accuracy: 0.8595 - val_loss: 0.3431
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8695 - loss: 0.3122 - val_accuracy: 0.8665 - val_loss: 0.3390
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8721 - loss: 0.3104 - val_accuracy: 0.8615 - val_loss: 0.3435
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.8719 - loss: 0.3096 - val_accuracy: 0.8655 - val_loss: 0.3411


In [None]:
model.save('model.keras')

In [None]:
#Load TensorBoard Extension
%load_ext tensorboard

In [None]:
%pip install --upgrade setuptools
import pkg_resources; print(pkg_resources.__version__)

In [None]:
# import pkg_resources
%tensorboard --logdir logs/fit