In [2]:
import numpy as np
import pandas as pd
import pickle
from tensorflow.keras.models import load_model


In [3]:
# Load model and preprocessing tools
model = load_model("artifacts/model.h5")

with open("artifacts/label_encoder_gender.pkl", "rb") as f:
    le_gender = pickle.load(f)

with open("artifacts/onehot_encoder_geo.pkl", "rb") as f:
    ohe_geo = pickle.load(f)

with open("artifacts/scaler.pkl", "rb") as f:
    scaler = pickle.load(f)




In [4]:
def preprocess_input(data, le_gender, ohe_geo, scaler):
    """
    Preprocess a single input dictionary for prediction.
    """
    # Encode Gender
    data['Gender'] = le_gender.transform([data['Gender']])[0]

    # One-hot encode Geography
    geo_encoded = ohe_geo.transform([[data['Geography']]]).toarray()[0]
    geo_df = pd.DataFrame([geo_encoded], columns=ohe_geo.categories_[0])

    # Create DataFrame from the input
    df_input = pd.DataFrame([data])
    df_input = df_input.drop(['Geography'], axis=1)
    df_input = pd.concat([df_input, geo_df], axis=1)

    # Ensure column order matches training
    feature_order = ['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance',
                     'NumOfProducts', 'HasCrCard', 'IsActiveMember',
                     'EstimatedSalary'] + list(ohe_geo.categories_[0])
    
    df_input = df_input[feature_order]

    # Scale
    scaled_input = scaler.transform(df_input)
    return scaled_input


In [6]:
new_data = {
    "CreditScore": 600,
    "Geography": "France",
    "Gender": "Female",
    "Age": 45,
    "Tenure": 3,
    "Balance": 60000,
    "NumOfProducts": 2,
    "HasCrCard": 1,
    "IsActiveMember": 1,
    "EstimatedSalary": 50000
}


In [7]:
import numpy as np
import pandas as pd

# Step 1: Label encode Gender
gender_encoded = le_gender.transform([new_data['Gender']])[0]  # single value

# Step 2: One-hot encode Geography
geo_encoded = ohe_geo.transform([[new_data['Geography']]])  # returns ndarray, no .toarray() needed

# Step 3: Create dataframe for easy concatenation
geo_df = pd.DataFrame(geo_encoded, columns=ohe_geo.categories_[0])

# Step 4: Build final feature dataframe for scaling and prediction
input_df = pd.DataFrame([{
    "CreditScore": new_data["CreditScore"],
    "Gender": gender_encoded,
    "Age": new_data["Age"],
    "Tenure": new_data["Tenure"],
    "Balance": new_data["Balance"],
    "NumOfProducts": new_data["NumOfProducts"],
    "HasCrCard": new_data["HasCrCard"],
    "IsActiveMember": new_data["IsActiveMember"],
    "EstimatedSalary": new_data["EstimatedSalary"],
}])

input_df = pd.concat([input_df, geo_df], axis=1)

# Step 5: Scale input features
input_scaled = scaler.transform(input_df)

# Step 6: Predict using your loaded model
prediction = model.predict(input_scaled)

print(f"Prediction (probability of churn): {prediction[0][0]:.4f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
Prediction (probability of churn): 0.0604


