# Regression
### Target is "EstimatedSalary"

In [6]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

In [2]:
df = pd.read_csv("Churn_Modelling.csv")
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
cols_to_drop = ['RowNumber', 'CustomerId', 'Surname']
df = df.drop(cols_to_drop, axis=1)

### Train Test Split

In [5]:
X = df.drop(['EstimatedSalary'],axis=1)
y = df['EstimatedSalary'].astype('float32')
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,y, train_size=0.75, random_state=42)

### Preprocessing

In [19]:
def preprocessor():
  # Column Transformer
  ct_steps = [('ohe', OneHotEncoder(sparse_output=False),['Geography']),
          ('oe', OrdinalEncoder(),['Gender'])]
  ct = ColumnTransformer(transformers=ct_steps, remainder='passthrough')

  # Pipeline
  pipeline_steps = [('column_transformer',ct),
                    ('scaler',StandardScaler())]
  pipeline = Pipeline(steps=pipeline_steps)
#   pipeline.set_output(transform='pandas')

  return pipeline

In [21]:
pipeline = preprocessor()
Xtrain_preprocessed = pipeline.fit_transform(Xtrain)
Xtest_preprocessed = pipeline.transform(Xtest)

In [36]:
joblib.dump(pipeline, 'preprocessor_regressor.joblib')

['preprocessor_regressor.joblib']

### ANN Model

In [27]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime

In [24]:
# Build the ANN model

model = Sequential([
    InputLayer(input_shape=(Xtrain_preprocessed.shape[1],)),
    Dense(64, activation='relu'), # 1st hidden layer
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1)
])
model.summary()



In [25]:
model.compile(optimizer='adam',
              loss='mean_absolute_error',
              metrics=['mae'])

### Setup Tensorboard

In [29]:
log_dir = 'logs/regression/fit/' + datetime.datetime.now().strftime('%Y%m%d - %H%M%S')
tensorboard_callback = TensorBoard(log_dir = log_dir,
                                   histogram_freq = 1)

### Setup Early Stopping

In [30]:
early_stopping_callback = EarlyStopping(monitor='val_loss',
                                        restore_best_weights=True,
                                        patience=10)

### Train the model

In [31]:
history = model.fit(Xtrain_preprocessed,Ytrain,
                    validation_data=(Xtest_preprocessed,Ytest),
                    epochs=100,
                    callbacks=[tensorboard_callback,early_stopping_callback])

Epoch 1/100
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - loss: 100414.7969 - mae: 100414.7969 - val_loss: 97000.5000 - val_mae: 97000.5000
Epoch 2/100
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - loss: 97247.3906 - mae: 97247.3906 - val_loss: 78449.3203 - val_mae: 78449.3203
Epoch 3/100
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 72032.5938 - mae: 72032.5938 - val_loss: 52388.4219 - val_mae: 52388.4219
Epoch 4/100
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 51891.3164 - mae: 51891.3164 - val_loss: 50352.6484 - val_mae: 50352.6484
Epoch 5/100
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 49959.3242 - mae: 49959.3242 - val_loss: 50338.9805 - val_mae: 50338.9805
Epoch 6/100
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 50428.3086 - mae: 50428.3086 - val_loss: 50260.0430 - v

### Tensorboard

In [32]:
%load_ext tensorboard

In [33]:
%tensorboard --logdir logs/regression/fit

### Evaluate on test data

In [34]:
test_loss, test_mae = model.evaluate(Xtest_preprocessed,Ytest)
print(f"Test MAE: {test_mae}")

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 50720.1211 - mae: 50720.1211
Test MAE: 49982.265625


In [35]:
model.save("regression_model.keras")