In [19]:
import pickle
import pandas as pd

# Load the saved pipeline
with open('order_cancellation_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

# Example input for prediction (dictionary or DataFrame with correct column names)
sample_input = pd.DataFrame([{
    'Age Group': 'Older',
    'Rating Category': 'Neutral',
    'Quantity': 2,
    'Total Price': 4999,
    'Add-on Total': 250,
    'Gender': 'Male',
    'Loyalty Member': 'No',
    'Product Type': 'Smartphone',
    'Shipping Type': 'Standard',
    'Payment Method': 'Credit Card',
    'Purchased Month': 12,
    'Purchased Weekday': 4,
    'Purchased Year': 2024
}])

# Predict cancellation (1 = Cancelled, 0 = Completed)
prediction = loaded_model.predict(sample_input)
prediction_proba = loaded_model.predict_proba(sample_input)

print("Prediction:", "Cancelled" if prediction[0] == 1 else "Completed")
print("Cancellation Probability:", prediction_proba[0][1])


Prediction: Cancelled
Cancellation Probability: 0.72


In [21]:
sample_input = pd.DataFrame([{
    'Age Group': 'Young',
    'Rating Category': 'Happy',
    'Quantity': 2,
    'Total Price': 30000,
    'Add-on Total': 1000,
    'Gender': 'Female',
    'Loyalty Member': 'Yes',
    'Product Type': 'Laptop',
    'Shipping Type': 'Express',
    'Payment Method': 'Debit Card',
    'Purchased Month': 6,
    'Purchased Weekday': 2,
    'Purchased Year': 2023
}])
# Prediction
prediction = loaded_model.predict(sample_input)[0]
probability = loaded_model.predict_proba(sample_input)[0][1]  # Prob. for Completed (if 1 = Completed)

print("Prediction:", "Completed" if prediction == 1 else "Cancelled")
print("Completion Probability:", round(probability, 2))


Prediction: Completed
Completion Probability: 0.64


## Model Details
Model Used: RandomForestClassifier (class_weight='balanced')

Preprocessing: Includes ordinal encoding, one-hot encoding, and custom weekday mapping.

Training Accuracy: ~64.5%

Precision-Recall Tradeoff: Focused on identifying cancelled orders due to business impact.

## Key Patterns Identified for Cancellation
Based on your exploratory data analysis:

High Cancellation Is Likely When…
* 🚚 Shipping Type is Standard
* 📱 Product Type is Mobile
* 🧓 Age Group is Older
* 💳 Payment Method is Credit Card
* 😐 Rating Category is Low