In [2]:
import pandas as pd
import numpy as np
import joblib

# Load processed data and trained model
df = pd.read_csv("../Data/processed_wallet_data.csv")
model = joblib.load("../artifacts/RandomForest_wallet_model.pkl")

print("Model and dataset loaded successfully.")
df.head()


Model and dataset loaded successfully.


Unnamed: 0,User_ID,Month,Year,total_kwh,avg_kwh,total_cost,avg_cost,avg_wallet_balance,avg_session_duration,avg_cost_efficiency,peak_hour_ratio,City,Vehicle_Type,Subscription_Type,Payment_Mode,Charger_Type
0,U00001,12,2024,6.12,6.12,72.46,72.46,336.08,60.0,11.839869,0.0,Hyderabad,2W,Basic,UPI,Superfast
1,U00002,6,2025,39.2,39.2,236.38,236.38,778.06,60.0,6.030102,0.0,Ahmedabad,4W,Basic,Credit Card,Slow
2,U00003,5,2025,27.11,27.11,184.35,184.35,1593.61,60.0,6.800074,0.0,Delhi,4W,Premium,UPI,Slow
3,U00004,2,2025,9.41,9.41,57.59,57.59,433.03,60.0,6.120085,0.0,Chennai,4W,Basic,Credit Card,Slow
4,U00005,1,2025,38.95,38.95,309.26,309.26,1788.99,60.0,7.939923,0.0,Delhi,2W,Pay-as-you-go,Credit Card,Superfast


In [3]:
# Recreate enriched features (same logic as training)
df['sessions_per_user_month'] = df.groupby(['User_ID', 'Month', 'Year'])['Month'].transform('count')
df['cost_per_kwh_est'] = df['avg_cost_efficiency'] * df['peak_hour_ratio'].fillna(0)
df['wallet_to_cost_ratio'] = df['avg_wallet_balance'] / (df['avg_cost'] + 1e-6)
df['vehicle_encoded'] = df['Vehicle_Type']
df['subscription_encoded'] = df['Subscription_Type']

# Select features used in model
feature_cols = [
    'avg_wallet_balance', 'avg_session_duration', 'peak_hour_ratio',
    'charging_duration_min', 'cost_per_kwh_est', 'wallet_to_cost_ratio',
    'sessions_per_user_month', 'Month', 'Year'
]
X = df[feature_cols]

print("Feature matrix ready:", X.shape)


KeyError: "['charging_duration_min'] not in index"

In [None]:
# Predict total monthly wallet usage
df['predicted_wallet_usage'] = model.predict(X)

# Confidence range (±10%)
df['lower_bound'] = (df['predicted_wallet_usage'] * 0.9).round(2)
df['upper_bound'] = (df['predicted_wallet_usage'] * 1.1).round(2)

# Suggested top-up
df['recommended_topup'] = (df['predicted_wallet_usage'] * 1.1).round(2)

df[['User_ID', 'Month', 'Year', 'predicted_wallet_usage', 'recommended_topup', 'lower_bound', 'upper_bound']].head(10)


In [None]:
# Categorize user patterns based on predicted usage
conditions = [
    (df['predicted_wallet_usage'] < df['predicted_wallet_usage'].quantile(0.33)),
    (df['predicted_wallet_usage'] < df['predicted_wallet_usage'].quantile(0.66)),
    (df['predicted_wallet_usage'] >= df['predicted_wallet_usage'].quantile(0.66))
]
choices = ['Low Usage', 'Moderate Usage', 'High Usage']
df['usage_category'] = np.select(conditions, choices)

# Human-readable recommendation text
df['recommendation_text'] = (
    "Recommended recharge: ₹" + df['recommended_topup'].astype(str) +
    " | Expected usage range: ₹" + df['lower_bound'].astype(str) +
    " – ₹" + df['upper_bound'].astype(str) +
    " | Category: " + df['usage_category']
)

df[['User_ID', 'Month', 'Year', 'recommendation_text']].head(10)


In [None]:
# Save prediction results
df[['User_ID', 'Month', 'Year', 'predicted_wallet_usage', 'recommended_topup',
    'lower_bound', 'upper_bound', 'usage_category']].to_csv(
    "../artifacts/predicted_wallet_recommendations.csv", index=False
)

print("Predictions and recommendations saved to /artifacts/predicted_wallet_recommendations.csv")


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8,5))
df.groupby('usage_category')['predicted_wallet_usage'].mean().plot(kind='bar')
plt.title("Average Predicted Wallet Usage by Category")
plt.ylabel("Predicted Wallet Usage (₹)")
plt.show()
