In [70]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler,LabelEncoder

In [71]:
# Load the trained model, scaler pickle, onehot

model = load_model('model.h5')

# load the encoder and scaler

with open('onehot_encoder_geo.pkl', 'rb') as file:
    onehot_encoder_geo = pickle.load(file)

with open('label_encoder_gender.pkl', 'rb') as file:
    label_encoder_gender = pickle.load(file)

with open('scaler.pkl', 'rb') as file:
    scaler = pickle.load(file)



In [53]:
input_data = {
    'CreditScore' : 600,
    'Geography' : 'France',
    'Gender' : 'Male',
    'Age' : 40,
    'Tenure' : 3,
    'Balance' : 60000,
    'NumberOfProducts' : 2,
    'HasCrCard'  : 1,
    'IsActiveMember' : 1,
    'EstimatedSalary' : 50000
}

In [72]:
input_df = pd.DataFrame([input_data])
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumberOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,600,France,Male,40,3,60000,2,1,1,50000


In [73]:
# Encode 'Gender'
input_df['Gender'] = label_encoder_gender.transform(input_df['Gender'])

# One-hot encode 'Geography'
geo_encoded_array = onehot_encoder_geo.transform(input_df[['Geography']])

In [74]:
# Convert sparse matrix to a DataFrame with correct column names
geo_encoded_df = pd.DataFrame(
    geo_encoded_array.toarray(),
    columns=onehot_encoder_geo.get_feature_names_out(['Geography'])
)

In [75]:
# Concatenate the new one-hot columns (resetting index to align them)
input_processed_df = pd.concat([input_df.reset_index(drop=True), geo_encoded_df], axis=1)

In [76]:
# Drop the original string 'Geography' column
input_processed_df = input_processed_df.drop('Geography', axis=1)

In [77]:
input_processed_df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumberOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,1,40,3,60000,2,1,1,50000,1.0,0.0,0.0


In [79]:
try:
    # 1. FIX: Rename the column
    # This changes 'NumberOfProducts' to 'NumOfProducts'
    if 'NumberOfProducts' in input_processed_df.columns:
        input_processed_df.rename(columns={'NumberOfProducts': 'NumOfProducts'}, inplace=True)
        print("Fixed column name: 'NumberOfProducts' -> 'NumOfProducts'")
    else:
        print("Column 'NumOfProducts' already correct.")

    # 2. BEST PRACTICE: Get the exact feature order the scaler expects
    expected_features = scaler.feature_names_in_

    # 3. Re-order your DataFrame to match the scaler's training data
    final_input_df = input_processed_df[expected_features]

    # 4. Scale the correctly named and ordered data
    input_scaled = scaler.transform(final_input_df)

    print("\n--- Scaling Successful ---")
    print(input_scaled)

except KeyError as e:
    print(f"\n--- ERROR ---")
    print(f"A column is still missing: {e}")
    print("Expected columns:", list(scaler.feature_names_in_))
    print("Your columns:", list(input_processed_df.columns))
except Exception as e:
    print(f"An error occurred: {e}")

Fixed column name: 'NumberOfProducts' -> 'NumOfProducts'

--- Scaling Successful ---
[[-0.53598516  0.91324755  0.10479359 -0.69539349 -0.25781119  0.80843615
   0.64920267  0.97481699 -0.87683221  1.00150113 -0.57946723 -0.57638802]]


In [80]:
# predict churn

prediction = model.predict(input_scaled)
prediction

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step


array([[0.03601537]], dtype=float32)

In [83]:
if prediction > 0.5:
    print("The customer is likely to churn.")
else:
    print("The customer is unlikely to churn.")

The customer is unlikely to churn.
