In [113]:
#Import Packages
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder,OneHotEncoder
import pickle

In [114]:
#Load Data
data=pd.read_csv("Churn_Modelling.csv")
print(data)


      RowNumber  CustomerId    Surname  CreditScore Geography  Gender  Age  \
0             1    15634602   Hargrave          619    France  Female   42   
1             2    15647311       Hill          608     Spain  Female   41   
2             3    15619304       Onio          502    France  Female   42   
3             4    15701354       Boni          699    France  Female   39   
4             5    15737888   Mitchell          850     Spain  Female   43   
...         ...         ...        ...          ...       ...     ...  ...   
9995       9996    15606229   Obijiaku          771    France    Male   39   
9996       9997    15569892  Johnstone          516    France    Male   35   
9997       9998    15584532        Liu          709    France  Female   36   
9998       9999    15682355  Sabbatini          772   Germany    Male   42   
9999      10000    15628319     Walker          792    France  Female   28   

      Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMemb

In [115]:
# Drop Unwanted Columns
data=data.drop(['CustomerId','Surname','RowNumber'],axis=1)
print(data.head())

   CreditScore Geography  Gender  Age  Tenure    Balance  NumOfProducts  \
0          619    France  Female   42       2       0.00              1   
1          608     Spain  Female   41       1   83807.86              1   
2          502    France  Female   42       8  159660.80              3   
3          699    France  Female   39       1       0.00              2   
4          850     Spain  Female   43       2  125510.82              1   

   HasCrCard  IsActiveMember  EstimatedSalary  Exited  
0          1               1        101348.88       1  
1          0               1        112542.58       0  
2          1               0        113931.57       1  
3          0               0         93826.63       0  
4          1               1         79084.10       0  


In [116]:
# Label Encode For Gender 
label_encoder_gender=LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])
print(data)

      CreditScore Geography  Gender  Age  Tenure    Balance  NumOfProducts  \
0             619    France       0   42       2       0.00              1   
1             608     Spain       0   41       1   83807.86              1   
2             502    France       0   42       8  159660.80              3   
3             699    France       0   39       1       0.00              2   
4             850     Spain       0   43       2  125510.82              1   
...           ...       ...     ...  ...     ...        ...            ...   
9995          771    France       1   39       5       0.00              2   
9996          516    France       1   35      10   57369.61              1   
9997          709    France       0   36       7       0.00              1   
9998          772   Germany       1   42       3   75075.31              2   
9999          792    France       0   28       4  130142.79              1   

      HasCrCard  IsActiveMember  EstimatedSalary  Exited  
0   

In [117]:
# One Hot Encoding For Geography 
one_hot_encoding_geo=OneHotEncoder()
encoded_geo=one_hot_encoding_geo.fit_transform(data[['Geography']])
print(encoded_geo.toarray())

[[1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 ...
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]]


In [118]:
# Combine with features
encoded_geo=pd.DataFrame(encoded_geo.toarray(),columns=one_hot_encoding_geo.get_feature_names_out(['Geography']))
print(encoded_geo)

      Geography_France  Geography_Germany  Geography_Spain
0                  1.0                0.0              0.0
1                  0.0                0.0              1.0
2                  1.0                0.0              0.0
3                  1.0                0.0              0.0
4                  0.0                0.0              1.0
...                ...                ...              ...
9995               1.0                0.0              0.0
9996               1.0                0.0              0.0
9997               1.0                0.0              0.0
9998               0.0                1.0              0.0
9999               1.0                0.0              0.0

[10000 rows x 3 columns]


In [119]:
#Concatinate With Original Data
encoded_data=pd.concat([data.drop(['Geography'],axis=1),encoded_geo],axis=1)
print(encoded_data)

      CreditScore  Gender  Age  Tenure    Balance  NumOfProducts  HasCrCard  \
0             619       0   42       2       0.00              1          1   
1             608       0   41       1   83807.86              1          0   
2             502       0   42       8  159660.80              3          1   
3             699       0   39       1       0.00              2          0   
4             850       0   43       2  125510.82              1          1   
...           ...     ...  ...     ...        ...            ...        ...   
9995          771       1   39       5       0.00              2          1   
9996          516       1   35      10   57369.61              1          1   
9997          709       0   36       7       0.00              1          0   
9998          772       1   42       3   75075.31              2          1   
9999          792       0   28       4  130142.79              1          1   

      IsActiveMember  EstimatedSalary  Exited  Geog

In [120]:
# Save One Hot Encoder and Label Encoder As Pivkle File
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)
    
with open('one_hot_encoding_geo.pkl','wb') as file:
    pickle.dump(one_hot_encoding_geo,file)

In [121]:
#Split The data for Test and Train
X=encoded_data.drop(['EstimatedSalary'],axis=1)
y=encoded_data['EstimatedSalary']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
print("X_Train")
print(X_train)
print("y_train")
print(y_train)

X_Train
      CreditScore  Gender  Age  Tenure    Balance  NumOfProducts  HasCrCard  \
9254          686       1   32       6       0.00              2          1   
1561          632       1   42       4  119624.60              2          1   
1670          559       1   24       3  114739.92              1          1   
6087          561       0   27       9  135637.00              1          1   
6669          517       1   56       9  142147.32              1          0   
...           ...     ...  ...     ...        ...            ...        ...   
5734          768       1   54       8   69712.74              1          1   
5191          682       0   58       1       0.00              1          1   
5390          735       0   38       1       0.00              3          0   
860           667       1   43       8  190227.46              1          1   
7270          697       1   51       1  147910.30              1          1   

      IsActiveMember  Exited  Geography_Fra

In [122]:
# Scaling the data for Standared format
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

#Save the scaler as pickle file

with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)
print("X_Train")
print(X_train)
print("y_train")
print(y_train)

X_Train
[[ 0.35649971  0.91324755 -0.6557859  ...  1.00150113 -0.57946723
  -0.57638802]
 [-0.20389777  0.91324755  0.29493847 ... -0.99850112  1.72572313
  -0.57638802]
 [-0.96147213  0.91324755 -1.41636539 ... -0.99850112 -0.57946723
   1.73494238]
 ...
 [ 0.86500853 -1.09499335 -0.08535128 ...  1.00150113 -0.57946723
  -0.57638802]
 [ 0.15932282  0.91324755  0.3900109  ...  1.00150113 -0.57946723
  -0.57638802]
 [ 0.47065475  0.91324755  1.15059039 ... -0.99850112  1.72572313
  -0.57638802]]
y_train
9254    179093.26
1561    195978.86
1670     85891.02
6087    153080.40
6669     39488.04
          ...    
5734     69381.05
5191       706.50
5390     92220.12
860      97508.04
7270     53581.14
Name: EstimatedSalary, Length: 8000, dtype: float64


ANN REGRESSSION

In [123]:
# Import neccessary packages
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [124]:
#Build The Model
model=Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)),  #Hidden Layer 1
    Dense(34,activation='relu'),                                   # Hidden Layer 2
    Dense(1)                                 # Output Layer
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [125]:
#Compile The Model
model.compile(optimizer='adam',loss='mean_absolute_error',metrics=['mae'])
model.summary()

In [126]:
#Set Up tensor Board
log_dir='logs/fit'+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)


In [127]:
#Set up early Stopping
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)


In [128]:
#train the Model
history=model.fit(
    X_train,y_train,
    validation_data=(X_test,y_test),
    epochs=100,
    callbacks=[early_stopping_callback,tensorboard_callback]
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 100411.6328 - mae: 100411.6328 - val_loss: 98521.4766 - val_mae: 98521.4766
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 100183.6250 - mae: 100183.6250 - val_loss: 96959.4375 - val_mae: 96959.4375
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 97644.8516 - mae: 97644.8516 - val_loss: 92951.8984 - val_mae: 92951.8984
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 94246.1719 - mae: 94246.1719 - val_loss: 86326.6562 - val_mae: 86326.6562
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 86993.0859 - mae: 86993.0859 - val_loss: 77800.8281 - val_mae: 77800.8281
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 77197.3281 - mae: 77197.3281 - val_loss: 69017.6797 - va

In [129]:
#Tensor Borad
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [130]:
%tensorboard --logdir logs/fit20241127-230616

Reusing TensorBoard on port 6006 (pid 6752), started 0:05:01 ago. (Use '!kill 6752' to kill it.)

In [131]:
#Save The Model
model.save('regression_model.h5')

