In [20]:
import joblib
import pandas as pd
import numpy as np

# Load the saved models
random_forest_pipeline = joblib.load("random_forest.pkl")
decision_tree_pipeline = joblib.load("decision_tree.pkl")
svm_pipeline = joblib.load("svm.pkl")
lr_pipeline = joblib.load("lr.pkl")

# List of loaded models for easy iteration
models = {
    "random_forest": random_forest_pipeline,
    "decision_tree": decision_tree_pipeline,
    "svm": svm_pipeline,
    "lr": lr_pipeline
}

# Define the columns used during training
# These columns must match the columns used in the training data
training_columns = [
    'age', 'job', 'marital', 'education', 'default', 'housing', 'loan', 
    'contact', 'month', 'day_of_week', 'duration', 'campaign', 'pdays', 
    'previous', 'poutcome'
]

# Example input for prediction
sample_input = {
    'age': None,                          # Numerical feature
    'duration': [180],                    # Numerical feature
    'campaign': [2],                      # Numerical feature
    'pdays': [10],                        # Numerical feature
    'previous': [0],                      # Numerical feature
    'job': ['admin.'],                    # Categorical feature
    'marital': ['married'],               # Categorical feature
    'education': ['university.degree'],   # Categorical feature
    'default': ['no'],                    # Categorical feature
    'housing': ['no'],                   # Categorical feature
    'loan': [np.NaN],                       # Categorical feature
    'contact': ['cellular'],              # Categorical feature
    'month': ['may'],                     # Categorical feature
    'day_of_week': ['mon'],               # Categorical feature
    'poutcome': ['success']               # Categorical feature
}

# Convert the sample input into a DataFrame
sample_df = pd.DataFrame(sample_input)

# Ensure all columns from training data are present in sample_df
missing_columns = set(training_columns) - set(sample_df.columns)
for col in missing_columns:
    sample_df[col] = 0  # Default value for missing columns (adjust as needed)

# Reorder columns to match the training data
sample_df = sample_df[training_columns]

print(f"Number of features in sample_df: {sample_df.shape[1]}")
print(f"Columns in sample_df: {sample_df.columns.tolist()}")

# Make predictions using all models
for name, model in models.items():
    prediction = model.predict(sample_df)
    print(f"Model: {name}")
    print(f"Prediction: {prediction}")
    if hasattr(model, "predict_proba"):  # Check if the model supports probability estimates
        prediction_proba = model.predict_proba(sample_df)
        print(f"Prediction Probabilities: {prediction_proba}")
    print('-' * 50)

# Save predictions to a CSV file (optional)
predictions = []
for name, model in models.items():
    prediction = model.predict(sample_df)
    predictions.append({
        "Model": name,
        "Prediction": prediction[0]
    })

# Convert predictions to a DataFrame
predictions_df = pd.DataFrame(predictions)

# Save to CSV
predictions_df.to_csv("predictions.csv", index=False)

Number of features in sample_df: 15
Columns in sample_df: ['age', 'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'duration', 'campaign', 'pdays', 'previous', 'poutcome']
Model: random_forest
Prediction: [0]
Prediction Probabilities: [[0.7 0.3]]
--------------------------------------------------
Model: decision_tree
Prediction: [0]
Prediction Probabilities: [[1. 0.]]
--------------------------------------------------
Model: svm
Prediction: [0]
--------------------------------------------------
Model: lr
Prediction: [1]
Prediction Probabilities: [[0.1892058 0.8107942]]
--------------------------------------------------
