In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
import pickle
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime
import os


In [2]:
# Load and preprocess the data
data = pd.read_csv(r"C:\Users\saksh\Desktop\Code\Udemy\Deep learning\Churn_Modelling.csv")
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
data.head()


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
# Label encoding for 'Gender'
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

In [4]:
# One-hot encoding for 'Geography'
onehot_encoder_geo = OneHotEncoder()
geo_encoder = onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoded_df = pd.DataFrame(geo_encoder, columns=onehot_encoder_geo.get_feature_names_out(['Geography']))
data = pd.concat([data.drop('Geography', axis=1), geo_encoded_df], axis=1)

In [5]:
# Save the encoders
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)
with open('onehot_encoder_geo.pkl', 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)

In [6]:
# Splitting the data into training and testing sets
X = data.drop('Exited', axis=1)
y = data['Exited']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Standardizing the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:

# Save the scaler
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [9]:
# Building the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),  # hidden layer 1
    Dense(32, activation='relu'),  # hidden layer 2
    Dense(1, activation='sigmoid')  # output layer
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
model.summary()

In [11]:

# Compiling the model
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt, loss="binary_crossentropy", metrics=['accuracy'])

In [12]:

# TensorBoard and EarlyStopping callbacks
log_dir = "logs/fit" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
print(f"Logging to directory: {log_dir}")

Logging to directory: logs/fit20240812-151751


In [13]:
# Ensure the log directory exists
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [14]:
tensorflow_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [15]:
# Training the model
history = model.fit(
    X_train, y_train, validation_data=(X_test, y_test), epochs=100,
    callbacks=[tensorflow_callback, early_stopping_callback]
)


Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8186 - loss: 0.4368 - val_accuracy: 0.8565 - val_loss: 0.3531
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8576 - loss: 0.3513 - val_accuracy: 0.8535 - val_loss: 0.3519
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8610 - loss: 0.3409 - val_accuracy: 0.8540 - val_loss: 0.3541
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8590 - loss: 0.3437 - val_accuracy: 0.8515 - val_loss: 0.3460
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8579 - loss: 0.3451 - val_accuracy: 0.8570 - val_loss: 0.3470
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8591 - loss: 0.3407 - val_accuracy: 0.8590 - val_loss: 0.3458
Epoch 7/100
[1m250/25

In [16]:

# Saving the model
model.save('model.h5')



In [17]:
# Starting TensorBoard (run this in a Jupyter notebook)
# Ensure no other TensorBoard instances are running on the same port
%load_ext tensorboard



In [20]:

%tensorboard --logdir logs/fit20240812-151751


Reusing TensorBoard on port 6006 (pid 17964), started 0:01:24 ago. (Use '!kill 17964' to kill it.)