In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import pickle
from tensorflow.keras.models import load_model

In [4]:
#We use pickle in Python when we want to save (serialize) Python objects into a file or load (deserialize) them back later
#Load the trained model, scaler, pickle, onehot
model = load_model('model.h5')

#load the enocder and scaler
with open('one_hot_encode.pk1', 'rb') as file:
    one_hot_encoder = pickle.load(file)
    
with open('label_encoder_gender.pk1', 'rb') as file:
    label_encoder_gender  = pickle.load(file)
    
with open('Scaler.pk1', 'rb') as file:
    scaler = pickle.load(file)



In [57]:
#Example of input data
input_data = {
    "Credit Score": 600,
    "Geography": "France",
    "Gender" : "Male",
    "Age": 40,
    "Tenure" : 3,
    "Balance" : 60000,
    "NumOfProducts": 2,
    "HascaCard" : 1,
    "IsActiveMember" : 1,
    "EstimateSalary" : 50000
}

In [82]:
#One hot eocode Geography

# We apply the trained OneHotEncoder on the 'Geography' column value from input_data.
# This converts the categorical text (e.g., "France", "Spain", "Germany") into a 
# numerical one-hot encoded array (binary vector). 
# .transform() ensures we use the same encoding mapping learned during training, 
# and .toarray() converts the sparse matrix result into a dense NumPy array 
# so it can be used in model prediction
geo_encoded = one_hot_encoder.transform([input_data['Geography']])


In [83]:
# We create a DataFrame from the one-hot encoded array for easier handling.  
# 'geo_encoded' is converted into a DataFrame with proper column names.  
# The column names are fetched using one_hot_encoder.get_feature_names_out(['Geography']),  
# which gives names like 'Geography_France', 'Geography_Germany', etc.  
# This makes the encoded data human-readable and aligned with the model’s input features.
geo_encoded_df = pd.DataFrame(geo_encoded, columns=one_hot_encoder.get_feature_names_out(['Geograohy']))

In [84]:
geo_encoded_df

Unnamed: 0,Geograohy_France
0,1.0


In [86]:
input_data = pd.DataFrame([[input_data]])

In [61]:
#combined one hot encoded column with input_data
input_df = pd.concat([input_data.reset_index(drop=True),geo_encoded_df], axis=1)
input_df

Unnamed: 0,Credit Score,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HascaCard,IsActiveMember,EstimateSalary,Geograohy_France
0,600,France,Male,40,3,60000,2,1,1,50000,1.0


In [62]:
#Encode Categorical Variable
input_df['Gender'] = label_encoder_gender.transform(input_df['Gender'])
input_df

Unnamed: 0,Credit Score,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HascaCard,IsActiveMember,EstimateSalary,Geograohy_France
0,600,France,1,40,3,60000,2,1,1,50000,1.0


In [49]:
#concetenation one hot encoded
input_df = pd.concat([input_df.drop('Geography', axis=1), geo_encoded_df],axis=1)
input_df

Unnamed: 0,Credit Score,Gender,Age,Tenure,Balance,NumOfProducts,HascaCard,IsActiveMember,EstimateSalary,Geograohy_France,Geograohy_France.1
0,600,1,40,3,60000,2,1,1,50000,1.0,1.0


In [88]:
# Example: list of feature names used in training after preprocessing
training_columns = scaler.feature_names_in_   # available in sklearn >= 1.0

# Align input_df with training columns (add missing, reorder, fill with 0)
input_df = input_df.reindex(columns=training_columns, fill_value=0)

# Now safe to scale
Scaler_df = scaler.transform(input_df)

In [89]:
Scaler_df

array([[-6.76262379,  0.91324755,  0.10479359, -0.69539349, -0.25781119,
         0.80843615, -1.54035103,  0.97481699, -1.74616572, -0.99850112,
        -0.57946723, -0.57638802]])

In [90]:
#predict churn
prediction = model.predict(Scaler_df)
prediction

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 642ms/step


array([[0.11219859]], dtype=float32)

In [91]:
prediction_proba = prediction[0][0]
prediction_proba

np.float32(0.11219859)

In [92]:
if prediction_proba > 0.5:
    print("The customer is likely to churn")
else:
    print("The customer is not likely to churn")

The customer is not likely to churn
