In [1]:
from tensorflow.keras.models import Sequential

In [2]:
from tensorflow.keras.layers import Dense
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.preprocessing import ( StandardScaler,
                                    OneHotEncoder, OrdinalEncoder
                                    )
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import(   
                                Pipeline,
                                make_pipeline
                            )
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from imblearn.pipeline import Pipeline as ImbPipeline

In [3]:
import tensorflow as tf
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from scikeras.wrappers import KerasClassifier

In [4]:
df= pd.concat([
    pd.read_csv('customer_churn_dataset-training-master.csv'),
    pd.read_csv('customer_churn_dataset-testing-master.csv')
],
    axis=0)

df.reset_index(drop= True, inplace= True)
df= df.dropna()

In [5]:
subscription_map = {
    'Basic': 3,
    'Premium': 1,
    'Standard': 2
}

contract_map = {
    'Annual': 2,
    'Monthly': 3,
    'Quarterly': 1
}
df['Subscription Type']=df['Subscription Type'].map(subscription_map)
df['Contract Length']=df['Contract Length'].map(contract_map)

df['Subscription_Contract'] = (df['Subscription Type']  + (df['Contract Length']-1)**2) 

In [6]:
Irr_cols= ['CustomerID', 'Subscription Type', 'Contract Length']
df=df.drop(columns= Irr_cols, axis=1)


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 505206 entries, 0 to 505206
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   Age                    505206 non-null  float64
 1   Gender                 505206 non-null  object 
 2   Tenure                 505206 non-null  float64
 3   Usage Frequency        505206 non-null  float64
 4   Support Calls          505206 non-null  float64
 5   Payment Delay          505206 non-null  float64
 6   Total Spend            505206 non-null  float64
 7   Last Interaction       505206 non-null  float64
 8   Churn                  505206 non-null  float64
 9   Subscription_Contract  505206 non-null  int64  
dtypes: float64(8), int64(1), object(1)
memory usage: 42.4+ MB


In [8]:
X = df.drop('Churn', axis=1)
y = df['Churn']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,  random_state=42,  stratify=y) 

In [10]:
numerical_features = [
    'Age', 'Tenure', 'Usage Frequency', 'Support Calls',
    'Payment Delay', 'Total Spend', 'Last Interaction', 'Subscription_Contract'
]
categorical_features_ohe = ['Gender']


num_pipeline = Pipeline(steps=[
    ('scale', StandardScaler())
])

ohe_pipeline = Pipeline(steps=[
    ('one-hot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])


column_transformer = ColumnTransformer(
    transformers=[
        ('num', num_pipeline, numerical_features),
        ('cat', ohe_pipeline, categorical_features_ohe)
    ],
    remainder='drop',
    n_jobs=-1
)


def build_nn(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(input_shape,)),  
        tf.keras.layers.Dense(54, activation='relu'),  
        tf.keras.layers.Dense(24, activation='relu'),  
        tf.keras.layers.Dense(1, activation='sigmoid')  
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

X_transformed = column_transformer.fit_transform(X_train)
input_shape = X_transformed.shape[1]  

nn_classifier = KerasClassifier(
    model=lambda: build_nn(input_shape),
    epochs=20,
    batch_size=32,
    verbose=1
)


pipeline = Pipeline(steps=[
    ('preprocessor', column_transformer),  
    ('classifier', nn_classifier) 
])



In [11]:

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)


Epoch 1/20
[1m12631/12631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 543us/step - accuracy: 0.8983 - loss: 0.2835
Epoch 2/20
[1m12631/12631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 745us/step - accuracy: 0.9293 - loss: 0.2123
Epoch 3/20
[1m12631/12631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 588us/step - accuracy: 0.9315 - loss: 0.2023
Epoch 4/20
[1m12631/12631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 620us/step - accuracy: 0.9328 - loss: 0.1977
Epoch 5/20
[1m12631/12631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 569us/step - accuracy: 0.9329 - loss: 0.1952
Epoch 6/20
[1m12631/12631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 597us/step - accuracy: 0.9343 - loss: 0.1906
Epoch 7/20
[1m12631/12631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 584us/step - accuracy: 0.9339 - loss: 0.1910
Epoch 8/20
[1m12631/12631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 532us/step - accuracy: 0.9342 - loss:

In [12]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
accuracy_score(y_test, y_pred)



0.9352942340808773

In [13]:
precision_score(y_test, y_pred)

0.8984067267962508

In [14]:
recall_score(y_test, y_pred)

0.9960961870978092