In [32]:
import pandas as pd
import numpy as np
import pickle
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [34]:

data = pd.read_csv("saving_model/dataset/Churn_Modelling.csv")


data.drop(labels=["RowNumber", "CustomerId", "Surname", "Exited"],axis=1,inplace=True)

X = data.drop("EstimatedSalary", axis=1)
y = data["EstimatedSalary"]


In [35]:
categorical_cols = X.select_dtypes(include='object').columns
numerical_cols = X.select_dtypes(exclude='object').columns

print("categorical column:", categorical_cols)
print("numerical column:", numerical_cols)



categorical column: Index(['Geography', 'Gender'], dtype='object')
numerical column: Index(['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember'],
      dtype='object')


In [36]:
# Defining the categories for ordinal encoding
gender_cat = ['Female', 'Male']
geo_cat = ['France', 'Spain', 'Germany']

In [37]:
# Creating pipelines for numerical and categorical features
# Pipeline for numerical features
num_pipeline = Pipeline(steps=[('scaler', StandardScaler())])
            

# Pipeline for categorical features
cat_pipeline = Pipeline(steps=[('ordinalencoder', OrdinalEncoder(categories=[geo_cat, gender_cat]))])
               # Convert categorical values into numbers

# Combine numerical and categorical pipelines

preprocessor = ColumnTransformer(
                    [("num_pipeline", num_pipeline, numerical_cols),      # Apply numerical pipeline to numerical columns
                     ("cat_pipeline", cat_pipeline, categorical_cols)]    # Apply categorical pipeline to categorical columns
                )

In [38]:
# Preprocessing
X_processed = preprocessor.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42
)


In [39]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [40]:
import tensorflow
opt = tensorflow.keras.optimizers.Adam(learning_rate = 0.001)


In [41]:
# ANN REGRESSION MODEL
model = Sequential([
    Dense(64, activation='relu', input_shape = (X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)
])

model.compile(
    optimizer=opt,
    loss="mse"
)


In [42]:
# Train
model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32
)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x278031bded0>

In [43]:
# Evaluation
y_pred = model.predict(X_test).flatten()

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MSE:", mse)
print("RMSE:", rmse)
print("MAE:", mae)
print("R2 Score:", r2)


MSE: 3336098950.817789
RMSE: 57758.97290307186
MAE: 49916.51210296874
R2 Score: -0.010555287158572568


In [44]:
# Save model & preprocessor
model.save("model.h5")

with open("preprocessor.pkl", "wb") as f:
    pickle.dump(preprocessor, f)


  saving_api.save_model(
