In [1]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder,OneHotEncoder
import pickle
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import datetime


In [2]:
data=pd.read_csv(r"customer_churn_dataset-training-master.csv")
data.head()

Unnamed: 0,CustomerID,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Subscription Type,Contract Length,Total Spend,Last Interaction,Churn
0,2.0,30.0,Female,39.0,14.0,5.0,18.0,Standard,Annual,932.0,17.0,1.0
1,3.0,65.0,Female,49.0,1.0,10.0,8.0,Basic,Monthly,557.0,6.0,1.0
2,4.0,55.0,Female,14.0,4.0,6.0,18.0,Basic,Quarterly,185.0,3.0,1.0
3,5.0,58.0,Male,38.0,21.0,7.0,7.0,Standard,Monthly,396.0,29.0,1.0
4,6.0,23.0,Male,32.0,20.0,5.0,8.0,Basic,Monthly,617.0,20.0,1.0


In [3]:
data=data.drop(['CustomerID','Subscription Type'],axis=1)
data.head()

Unnamed: 0,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Contract Length,Total Spend,Last Interaction,Churn
0,30.0,Female,39.0,14.0,5.0,18.0,Annual,932.0,17.0,1.0
1,65.0,Female,49.0,1.0,10.0,8.0,Monthly,557.0,6.0,1.0
2,55.0,Female,14.0,4.0,6.0,18.0,Quarterly,185.0,3.0,1.0
3,58.0,Male,38.0,21.0,7.0,7.0,Monthly,396.0,29.0,1.0
4,23.0,Male,32.0,20.0,5.0,8.0,Monthly,617.0,20.0,1.0


In [4]:
# Initialize label encoder
label_encoder = LabelEncoder()


binary_columns = ['Gender', 'Churn']

# Apply label encoding to the binary columns
for column in binary_columns:
    data[column] = label_encoder.fit_transform(data[column])

# Display the updated data
data.head()

Unnamed: 0,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Contract Length,Total Spend,Last Interaction,Churn
0,30.0,0,39.0,14.0,5.0,18.0,Annual,932.0,17.0,1
1,65.0,0,49.0,1.0,10.0,8.0,Monthly,557.0,6.0,1
2,55.0,0,14.0,4.0,6.0,18.0,Quarterly,185.0,3.0,1
3,58.0,1,38.0,21.0,7.0,7.0,Monthly,396.0,29.0,1
4,23.0,1,32.0,20.0,5.0,8.0,Monthly,617.0,20.0,1


In [5]:
onehot_encoder = OneHotEncoder()

# Perform OneHotEncoding on 'Contract Length'
contract_length_encoded = onehot_encoder.fit_transform(data[['Contract Length']])

# Convert the result to a DataFrame for better readability
contract_length_df = pd.DataFrame(contract_length_encoded.toarray(), columns=onehot_encoder.get_feature_names_out(['Contract Length']))

# Display the resulting encoded columns
contract_length_df.head()


Unnamed: 0,Contract Length_Annual,Contract Length_Monthly,Contract Length_Quarterly,Contract Length_nan
0,1.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0
2,0.0,0.0,1.0,0.0
3,0.0,1.0,0.0,0.0
4,0.0,1.0,0.0,0.0


In [6]:
data_updated = pd.concat([data, contract_length_df], axis=1)

# Drop the original 'Contract Length' column, as it's now represented in the one-hot encoded format
data_updated = data_updated.drop('Contract Length', axis=1)

# Display the updated dataset with the new one-hot encoded columns
data_updated.head()

Unnamed: 0,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Total Spend,Last Interaction,Churn,Contract Length_Annual,Contract Length_Monthly,Contract Length_Quarterly,Contract Length_nan
0,30.0,0,39.0,14.0,5.0,18.0,932.0,17.0,1,1.0,0.0,0.0,0.0
1,65.0,0,49.0,1.0,10.0,8.0,557.0,6.0,1,0.0,1.0,0.0,0.0
2,55.0,0,14.0,4.0,6.0,18.0,185.0,3.0,1,0.0,0.0,1.0,0.0
3,58.0,1,38.0,21.0,7.0,7.0,396.0,29.0,1,0.0,1.0,0.0,0.0
4,23.0,1,32.0,20.0,5.0,8.0,617.0,20.0,1,0.0,1.0,0.0,0.0


In [7]:
with open("label_encoder.pkl","wb" )as file:
    pickle.dump(label_encoder,file)
with open("onehot_encoder.pkl","wb") as file:
    pickle.dump(onehot_encoder,file)


In [8]:
data_updated.head()

Unnamed: 0,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Total Spend,Last Interaction,Churn,Contract Length_Annual,Contract Length_Monthly,Contract Length_Quarterly,Contract Length_nan
0,30.0,0,39.0,14.0,5.0,18.0,932.0,17.0,1,1.0,0.0,0.0,0.0
1,65.0,0,49.0,1.0,10.0,8.0,557.0,6.0,1,0.0,1.0,0.0,0.0
2,55.0,0,14.0,4.0,6.0,18.0,185.0,3.0,1,0.0,0.0,1.0,0.0
3,58.0,1,38.0,21.0,7.0,7.0,396.0,29.0,1,0.0,1.0,0.0,0.0
4,23.0,1,32.0,20.0,5.0,8.0,617.0,20.0,1,0.0,1.0,0.0,0.0


In [9]:
X= data_updated.drop(['Churn'], axis=1)
y=data_updated['Churn']


In [10]:
y=pd.DataFrame(y)
y

Unnamed: 0,Churn
0,1
1,1
2,1
3,1
4,1
...,...
440828,0
440829,0
440830,0
440831,0


In [11]:
X = pd.DataFrame(X)

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [13]:
print(X_train.shape)  # Should be (n_samples, n_features)
print(y_train.shape)  # Should be (n_samples,)



(352666, 12)
(352666, 1)


In [14]:
X_test = X_test.reindex(columns=X_train.columns, fill_value=0)

In [15]:
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and testing sets
X_train = scaler.fit_transform(X_train)
X_test= scaler.transform(X_test)

In [16]:
with open("scaler.pkl","wb") as file:
    pickle.dump(scaler,file)

In [17]:
model=Sequential([ 
    Dense(64,activation="relu",input_shape=(X_train.shape[1],)),
    Dense(32,activation="sigmoid"),
    Dense(1,activation="sigmoid")
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
model.compile(optimizer='Adam',loss="binary_crossentropy",metrics=['accuracy'])

In [19]:

from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [20]:
## Set up Early Stopping
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

In [21]:

history=model.fit(
    X_train,y_train,validation_data=(X_test,y_test),epochs=100,
    callbacks=[tensorflow_callback,early_stopping_callback]
)

Epoch 1/100
[1m11021/11021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 4ms/step - accuracy: 0.9511 - loss: 0.1247 - val_accuracy: 0.5674 - val_loss: 0.6840
Epoch 2/100
[1m11021/11021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4ms/step - accuracy: 0.5665 - loss: 0.6848 - val_accuracy: 0.5674 - val_loss: 0.6852
Epoch 3/100
[1m11021/11021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4ms/step - accuracy: 0.5677 - loss: 0.6845 - val_accuracy: 0.5674 - val_loss: 0.6853
Epoch 4/100
[1m11021/11021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3ms/step - accuracy: 0.5683 - loss: 0.6843 - val_accuracy: 0.5674 - val_loss: 0.6843
Epoch 5/100
[1m11021/11021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 3ms/step - accuracy: 0.5675 - loss: 0.6846 - val_accuracy: 0.5674 - val_loss: 0.6841
Epoch 6/100
[1m11021/11021[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 3ms/step - accuracy: 0.5667 - loss: 0.6847 - val_accuracy: 0.5674 - val_loss:

In [22]:
model.save('model.h5')



In [33]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [34]:
%tensorboard --logdir logs/fit/20240923-224607


Reusing TensorBoard on port 6011 (pid 20940), started 22:44:32 ago. (Use '!kill 20940' to kill it.)