In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
df = pd.read_csv('../Churn_Modelling.csv')

In [3]:
df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [None]:
df = df.drop(columns=['RowNumber', 'CustomerId', 'Surname',])

In [None]:
X, y = df.drop(columns=['EstimatedSalary']), df['EstimatedSalary']

In [6]:
X.columns

Index(['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance',
       'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'Exited'],
      dtype='object')

In [25]:
y

0       101348.88
1       112542.58
2       113931.57
3        93826.63
4        79084.10
          ...    
9995     96270.64
9996    101699.77
9997     42085.58
9998     92888.52
9999     38190.78
Name: EstimatedSalary, Length: 10000, dtype: float64

In [8]:
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
ctf = ColumnTransformer(
    [
        ('ohe_geo', OneHotEncoder(drop='first'), ['Geography']),
        ('ord_gender', OrdinalEncoder(), ['Gender']),
        ('scale_remaining', StandardScaler(), ['CreditScore', 'Age', 'Tenure', 'Balance',
                                               'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'Exited'])
    ]
)

In [9]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)

In [10]:
X_train_transformed=ctf.fit_transform(X_train)

In [13]:
pd.DataFrame(X_train_transformed,columns=ctf.get_feature_names_out())

Unnamed: 0,ohe_geo__Geography_Germany,ohe_geo__Geography_Spain,ord_gender__Gender,scale_remaining__CreditScore,scale_remaining__Age,scale_remaining__Tenure,scale_remaining__Balance,scale_remaining__NumOfProducts,scale_remaining__HasCrCard,scale_remaining__IsActiveMember,scale_remaining__Exited
0,0.0,0.0,1.0,0.242678,-0.466495,-1.388428,0.767473,0.829232,-1.549099,0.964073,-0.504015
1,0.0,0.0,1.0,-0.530402,-0.182648,-1.388428,1.056934,-0.906081,-1.549099,0.964073,-0.504015
2,0.0,0.0,0.0,1.345605,-0.371879,-0.352877,-1.231649,-0.906081,0.645536,0.964073,-0.504015
3,0.0,1.0,1.0,-0.386094,-0.277264,-0.007693,-1.231649,0.829232,0.645536,-1.037265,-0.504015
4,1.0,0.0,0.0,0.046831,-1.507264,-0.352877,1.391986,-0.906081,-1.549099,0.964073,-0.504015
...,...,...,...,...,...,...,...,...,...,...,...
6995,1.0,0.0,1.0,0.510679,-0.655726,-0.007693,0.412644,-0.906081,-1.549099,-1.037265,-0.504015
6996,1.0,0.0,1.0,0.139601,-0.655726,-1.388428,0.888738,-0.906081,-1.549099,-1.037265,-0.504015
6997,0.0,1.0,1.0,-2.427024,-0.371879,1.027859,-1.231649,-0.906081,-1.549099,-1.037265,-0.504015
6998,0.0,0.0,0.0,-0.344863,0.101198,-1.733612,-1.231649,-0.906081,0.645536,-1.037265,-0.504015


#### Ann Regression

In [15]:
import tensorflow as tf #For different attributes like losses, optimizers
from tensorflow.keras.models import Sequential #ann
from tensorflow.keras.layers import Dense  #hidden layers
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard   #logging

In [16]:
# 1. Create the model and pass the layers
# Dense(no of nuerons,activation function,input shape in first hl)
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_transformed.shape[1],)),  # hl1
    Dense(32, activation='relu'),  # hl2
    Dense(1)  # output layer- no or by default linear activation fn
])

In [17]:
model.summary()

In [53]:
opt=tf.optimizers.Adam(learning_rate=0.001)
loss=tf.losses.MeanSquaredError()

In [54]:
# 3.compile model- we can use different metrics parameter also
model.compile(loss=loss,optimizer=opt,metrics=['mae'])

In [55]:
# 4. Add callbacks(here 2 : tensorboard,earlystopping)
import datetime
log_dir = 'reg_logs/dir'+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tf_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
# histogram_freq=1 means on every epoch we save logs

In [56]:
early_stopping = EarlyStopping(
    patience=10, monitor='val_loss', restore_best_weights=True)

In [57]:
X_test_transformed=ctf.transform(X_test)

In [58]:
history = model.fit(X_train_transformed, y_train, validation_data=(
    X_test_transformed, y_test), callbacks=[early_stopping, tf_callback],epochs=100)

Epoch 1/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 3311056896.0000 - mae: 49730.9609 - val_loss: 3286807552.0000 - val_mae: 49249.6836
Epoch 2/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3256875520.0000 - mae: 49362.2383 - val_loss: 3286748160.0000 - val_mae: 49251.0898
Epoch 3/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3296357888.0000 - mae: 49778.3125 - val_loss: 3287453184.0000 - val_mae: 49251.6797
Epoch 4/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 3269038848.0000 - mae: 49466.1211 - val_loss: 3288270848.0000 - val_mae: 49254.3164
Epoch 5/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3248034560.0000 - mae: 49141.4375 - val_loss: 3287329792.0000 - val_mae: 49253.3125
Epoch 6/100
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3240016640

In [59]:
from sklearn.metrics import r2_score as sklearn_r2

y_pred_test = model.predict(X_test_transformed)
print("Test R2 Score:", sklearn_r2(y_test, y_pred_test))


[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 878us/step
Test R2 Score: -0.0037721881729799733
