In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix


In [7]:
# Load orders table
df1 = pd.read_csv('orders_table.csv')

# Convert Purchase_Date to datetime if it exists
if 'Purchase_Date' in df1.columns:
    df1['Purchase_Date'] = pd.to_datetime(df1['Purchase_Date'], errors='coerce')


In [15]:
# Target
y = df1['order_status']

# Features: drop order_status, order_id, transaction_id, Purchase_Date, User_ID
X = df1.drop(columns=[
    'order_status',
    'order_id',
    'transaction_id',
    'Purchase_Date',  # optional: dates not used directly
    'User_ID'         # remove user identifier
], errors='ignore')


In [16]:
numeric_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()


In [17]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [18]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=200, random_state=42))
])


In [19]:
pipeline.fit(X_train, y_train)


In [20]:
y_pred = pipeline.predict(X_test)

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Classification Report:
               precision    recall  f1-score   support

   Cancelled       0.14      0.02      0.04        43
   Delivered       0.84      0.98      0.91       359
    Returned       0.00      0.00      0.00        23

    accuracy                           0.83       425
   macro avg       0.33      0.33      0.32       425
weighted avg       0.73      0.83      0.77       425

Confusion Matrix:
 [[  1  42   0]
 [  6 351   2]
 [  0  23   0]]


In [37]:
# ================================
# Step 8: Interactive Prediction for a New Order
# ================================

import pandas as pd


print("\n--- Predict Order Status for a New Order ---")

# Ask user for specific inputs
new_order_data = {
    'Payment_Method': [input("Payment Method (e.g., UPI, Credit Card, Debit Card, Net Banking): ")],
    'shipping_cost': [float(input("Shipping Cost (numeric): "))],
    'total_items': [int(input("Total Items (numeric): "))],
    'order_total': [float(input("Order Total (numeric): "))]
}

# Convert to DataFrame
new_order_df = pd.DataFrame(new_order_data)

# Fill missing numeric/categorical columns used in training
for col in X.select_dtypes(include=['int64', 'float64']).columns:
    if col not in new_order_df.columns:
        new_order_df[col] = 0
for col in X.select_dtypes(include=['object']).columns:
    if col not in new_order_df.columns:
        new_order_df[col] = 'Unknown'

# Reorder columns to match training data
new_order_prepared = new_order_df[X.columns.tolist()]

# Predict
predicted_status = pipeline.predict(new_order_prepared)
new_order_prepared['predicted_order_status'] = predicted_status

# Show result
print("\nPredicted Order Status:")
print(new_order_prepared[['predicted_order_status']])



--- Predict Order Status for a New Order ---

Predicted Order Status:
  predicted_order_status
0              Delivered
