In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, f1_score, recall_score, confusion_matrix

from keras.models import Sequential
from keras.layers import Dense, Dropout

In [15]:
data = pd.read_csv("data/Churn_treino.csv", sep=";")

data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0,1,1,1,10134888,1
1,608,Spain,Female,41,1,8380786,1,0,1,11254258,0
2,502,France,Female,42,8,1596608,3,1,0,11393157,1
3,699,France,Female,39,1,0,2,0,0,9382663,0
4,850,Spain,Female,43,2,12551082,1,1,1,790841,0


In [16]:
X = data.drop(columns=["Exited"], axis=1)
y = data["Exited"]

In [17]:
standard_scaler = StandardScaler()

numerical_features = X.select_dtypes(include=[np.number]).columns

X[numerical_features] = standard_scaler.fit_transform(X[numerical_features])

X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,-0.326221,France,Female,0.293517,-1.04176,-1.110553,-0.911583,0.646092,0.970243,0.170614
1,-0.440036,Spain,Female,0.198164,-1.387538,0.222782,-0.911583,-1.547768,0.970243,0.353281
2,-1.536794,France,Female,0.293517,1.032908,-0.856542,2.527057,0.646092,-1.03067,0.375948
3,0.501521,France,Female,0.007457,-1.387538,-1.110553,0.807737,-1.547768,-1.03067,0.047859
4,2.063884,Spain,Female,0.388871,-1.04176,0.886252,-0.911583,0.646092,0.970243,-1.354223


In [18]:
label_encoder = LabelEncoder()

category_features = X.select_dtypes(include="object").columns

for column in category_features:
    X[column] = label_encoder.fit_transform(X[column])

X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,-0.326221,0,0,0.293517,-1.04176,-1.110553,-0.911583,0.646092,0.970243,0.170614
1,-0.440036,2,0,0.198164,-1.387538,0.222782,-0.911583,-1.547768,0.970243,0.353281
2,-1.536794,0,0,0.293517,1.032908,-0.856542,2.527057,0.646092,-1.03067,0.375948
3,0.501521,0,0,0.007457,-1.387538,-1.110553,0.807737,-1.547768,-1.03067,0.047859
4,2.063884,2,0,0.388871,-1.04176,0.886252,-0.911583,0.646092,0.970243,-1.354223


In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [21]:
model = Sequential()

model.add(Dense(units=64, activation="relu", input_dim=X_train.shape[1]))
model.add(Dropout(0.4))
model.add(Dense(units=32, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(units=64, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(units=1, activation="sigmoid"))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [23]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

model.fit(X_train, y_train, epochs=50, batch_size=32)

Epoch 1/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7856 - loss: 0.5454
Epoch 2/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8046 - loss: 0.4729
Epoch 3/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7996 - loss: 0.4618
Epoch 4/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8144 - loss: 0.4377
Epoch 5/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8094 - loss: 0.4304
Epoch 6/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8160 - loss: 0.4308
Epoch 7/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8172 - loss: 0.4381
Epoch 8/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8233 - loss: 0.4314
Epoch 9/50
[1m219/219[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x7f56382c4860>

In [24]:
predict = model.predict(X_test)

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [25]:
y_pred = (predict > 0.5).astype(int)

y_pred

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [1]])

In [26]:
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))
print("Recall Score: ", recall_score(y_test, y_pred))
print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred))

Accuracy:  0.859
F1 Score:  0.590513068731849
Recall Score:  0.49114331723027377
Confusion Matrix: 
 [[2272  107]
 [ 316  305]]
