In [1]:
# Import the necessary libraries

import joblib
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [2]:
# Load the trained model
model = joblib.load('best_dt.pkl')
print("Model loaded successfully")

Model loaded successfully


In [3]:
# Create a new customer DataFrame with the correct structure
new_customer = pd.DataFrame({
    'age': [35, 42, 28],  
    'job': ['admin.', 'technician', 'retired'],  
    'marital': ['married', 'single', 'divorced'],
    'education': ['high.school', 'basic.6y', 'university.degree'],
    'default': ['no', 'no', 'yes'],
    'housing': ['yes', 'no', 'yes'],
    'loan': ['no', 'yes', 'no'],
    'contact': ['cellular', 'telephone', 'cellular'],
    'month': ['may', 'jul', 'jun'],
    'day_of_week': ['mon', 'wed', 'fri'],  # Fixed column name
    'duration': [250, 180, 500],
    'campaign': [1, 3, 2],
    'pdays': [-1, 200, -1],
    'previous': [0, 2, 1],
    'poutcome': ['unknown', 'failure', 'success'],
    'emp.var.rate': [-1.8, 1.1, 0.5],  
    'cons.price.idx': [92.89, 93.44, 93.20],  
    'cons.conf.idx': [-46.2, -36.4, -40.0],  
    'euribor3m': [1.25, 4.85, 2.10],  
    'nr.employed': [5099.1, 5228.1, 5195.8]
})

print("New customer DataFrame created successfully")

New customer DataFrame created successfully


In [4]:
# Label encode Education

# Define the order of education levels
education_order = ['basic.6y', 'high.school', 'university.degree']

# Apply label encoding based on this order
new_customer['education'] = new_customer['education'].apply(lambda x: education_order.index(x))

In [5]:
# One-Hot Encoding for other Categorical Columns
new_customer = pd.get_dummies(new_customer, columns = ['job', 'marital', 'contact', 'month', 'day_of_week', 'poutcome'], drop_first=True)


In [6]:
# Convert 'yes'/'no' columns to numeric
new_customer["housing"] = new_customer["housing"].map({"yes": 1, "no": 0})
new_customer["loan"] = new_customer["loan"].map({"yes": 1, "no": 0})


In [7]:
new_customer.shape

(3, 25)

In [9]:
original_features = joblib.load('features.pkl')  
new_customer = new_customer.reindex(columns=original_features, fill_value=0)  # Ensure alignment with the original features

In [11]:
# Make predictions
best_dt = joblib.load('best_dt.pkl')  
predictions = best_dt.predict(new_customer)  
probabilities = best_dt.predict_proba(new_customer)  
print(predictions)  
print(probabilities)


[0 0 0]
[[1. 0.]
 [1. 0.]
 [1. 0.]]


**Interpretation of Results**

The model predicts 0 for all three new customer entries, indicating that it classifies them as not subscribing (no) to the bank’s term deposit.

Observations:
- The probability scores suggest that the model is highly confident in predicting class 0 (no subscription) for all cases.
- If the dataset is imbalanced, the model may have learned to favor the majority class, leading to biased predictions.
- Despite applying SMOTE (to balance the dataset) and hyperparameter tuning (to optimize model performance), the predictions remain the same, suggesting that the model may still be influenced by class distribution.
- Further investigation into feature importance is necessary to determine whether certain features are disproportionately affecting the predictions.

This analysis highlights the need for additional model evaluation, potentially exploring alternative algorithms or further adjustments to improve predictive performance.