In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import keras
from keras.models import Sequential
from keras.layers import Dense

In [2]:
data = pd.read_csv("Churn_Modelling.csv")
data

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [3]:
data.dtypes

RowNumber            int64
CustomerId           int64
Surname             object
CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

In [4]:
X = data.drop(columns=['Exited'])
y = data['Exited']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)

In [5]:
numeric_features = X_train.select_dtypes(include=['float64','int64']).columns
non_numeric_features = X_train.select_dtypes(include=['object']).columns

In [6]:
numeric_transformer = StandardScaler()
non_numeric_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocesser = ColumnTransformer(
    transformers=[
        ('num',numeric_transformer,numeric_features),
        ('non_num',non_numeric_transformer,non_numeric_features)
    ])

X_train_preprocessed = preprocesser.fit_transform(X_train)
X_test_preprocessed = preprocesser.transform(X_test)

X_test_preprocessed = X_test_preprocessed.toarray()
X_train_preprocessed = X_train_preprocessed.toarray()

In [8]:
model = Sequential()

model.add(Dense(units=64, activation='relu', input_dim=X_train_preprocessed.shape[1]))

model.add(Dense(units=32,activation='relu'))

model.add(Dense(units=1, activation='sigmoid'))

model.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train_preprocessed,y_train,epochs=5,batch_size=32,validation_split=0.3)

Epoch 1/5
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7868 - loss: 0.5210 - val_accuracy: 0.8160 - val_loss: 0.4272
Epoch 2/5
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8351 - loss: 0.3831 - val_accuracy: 0.8351 - val_loss: 0.4019
Epoch 3/5
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8836 - loss: 0.2936 - val_accuracy: 0.8240 - val_loss: 0.4056
Epoch 4/5
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9129 - loss: 0.2259 - val_accuracy: 0.8164 - val_loss: 0.4333
Epoch 5/5
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9375 - loss: 0.1793 - val_accuracy: 0.8053 - val_loss: 0.4856


<keras.src.callbacks.history.History at 0x1fbfbc86cb0>

In [9]:
loss,accuracy = model.evaluate(X_test_preprocessed,y_test)
print(f'Test loss: {loss}, Test acc: {accuracy}')

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8230 - loss: 0.4693
Test loss: 0.4660561978816986, Test acc: 0.8176000118255615
