In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder,LabelEncoder
import pickle

In [2]:
data = pd.read_csv('Churn_Modelling.csv')

In [3]:
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

In [4]:
## Encoding categorical variables
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])


In [5]:
onehot_encoder_geography = OneHotEncoder(handle_unknown='ignore')
geography_encoded = onehot_encoder_geography.fit_transform(data[['Geography']]).toarray()
geography_encoded_df = pd.DataFrame(geography_encoded, columns=onehot_encoder_geography.get_feature_names_out(['Geography']))

In [6]:
## combining the encoded geography with the original data
data = pd.concat([data, geography_encoded_df], axis=1)
data = data.drop(['Geography'], axis=1)

In [7]:
## splitting the data into features and target variable
X = data.drop('EstimatedSalary', axis=1)
y = data['EstimatedSalary']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

array([ 0.35649971,  0.91324755, -0.6557859 ,  0.34567966, -1.21847056,
        0.80843615,  0.64920267,  0.97481699, -0.50857963,  1.00150113,
       -0.57946723, -0.57638802])

In [14]:
# save the encoders ans scalers for future use
with open('label_encoder_gender.pkl', 'wb') as f:
    pickle.dump(label_encoder_gender, f)
with open('onehot_encoder_geography.pkl', 'wb') as f:
    pickle.dump(onehot_encoder_geography, f)
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

In [15]:
## ANN Regression model
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [16]:
# Building the ANN model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'] )

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [17]:
model.summary()

In [18]:
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

log_dir = "regression/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [19]:
## Set up early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [20]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=100,
    callbacks=[early_stopping, tensorboard_callback],
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 13320153088.0000 - mae: 100165.0078 - val_loss: 13012390912.0000 - val_mae: 98545.2500
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 13106048000.0000 - mae: 99179.3203 - val_loss: 12753170432.0000 - val_mae: 97234.1094
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 12947879936.0000 - mae: 97987.3516 - val_loss: 12085833728.0000 - val_mae: 93856.1172
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 12128760832.0000 - mae: 94365.1250 - val_loss: 10942882816.0000 - val_mae: 88081.4688
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 10905615360.0000 - mae: 88117.6953 - val_loss: 9413498880.0000 - val_mae: 80409.9453
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir regression/fit --host localhost --port 6006

In [21]:
## Evaluate the model
loss, mae = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}, Test MAE: {mae}")

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3428677632.0000 - mae: 50902.8203  
Test Loss: 3358517248.0, Test MAE: 50112.671875


In [22]:
model.save('salary_regression_model.h5')

