# Smart Mandi: Local Vegetable Price Forecasting

**Problem:** Farmers often lose money because they don't know future market prices. A **24-hour price forecast** helps them decide when to sell.

**Solution:** Predict **Price per KG** of vegetables (Potato, Onion, Brinjal, etc.) based on:
- **Month** & **Season**
- **Historical price trends** (lags, rolling averages)

**Data:** Vegetable and Fruit Prices in India (Kaggle)

In [None]:
import sys
from pathlib import Path

ROOT = Path("..").resolve()  # d:\VIbe Coding
sys.path.insert(0, str(ROOT))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from smart_mandi.data_loader import prepare_for_modeling
from smart_mandi.features import build_features, get_feature_columns
from smart_mandi.models import train_and_evaluate, save_model, get_models, evaluate_model

## 1. Load & Explore Data

In [None]:
CSV_PATH = ROOT / "Vegetable and Fruits Prices in India.csv"
df = prepare_for_modeling(CSV_PATH, min_samples_per_vegetable=200)
print(f"Shape: {df.shape}")
print(f"\nVegetables: {df['vegetable'].unique().tolist()}")
df.head(10)

In [None]:
# Price distribution by vegetable
plt.figure(figsize=(10, 5))
sns.boxplot(data=df, x="vegetable", y="price_per_kg")
plt.xticks(rotation=45)
plt.title("Price per KG by Vegetable")
plt.tight_layout()
plt.show()

## 2. Feature Engineering

In [None]:
df_feat = build_features(df)
feature_cols = get_feature_columns()
df_feat = df_feat.dropna(subset=feature_cols + ["price_per_kg"])
print(f"Features: {feature_cols}")
df_feat[feature_cols + ["price_per_kg", "vegetable"]].head()

## 3. Train-Test Split (Time-based)

In [None]:
# Use last 20% of dates as test (simulates real forecasting)
df_feat = df_feat.sort_values("date")
split_idx = int(len(df_feat) * 0.8)
train_df = df_feat.iloc[:split_idx]
test_df = df_feat.iloc[split_idx:]

X_train = train_df[feature_cols].fillna(train_df[feature_cols].median())
y_train = train_df["price_per_kg"]
X_test = test_df[feature_cols].fillna(train_df[feature_cols].median())
y_test = test_df["price_per_kg"]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Train: {len(train_df)}, Test: {len(test_df)}")

## 4. Model Training & Evaluation

In [None]:
results, scaler = train_and_evaluate(
    pd.DataFrame(X_train, columns=feature_cols),
    y_train,
    pd.DataFrame(X_test, columns=feature_cols),
    y_test,
    feature_cols,
)

for name, res in results.items():
    if name == "model":
        continue
    m = res.get("model")
    if m is not None:
        print(f"{name}: MAE={res['MAE']:.2f} ₹/kg, RMSE={res['RMSE']:.2f}, R2={res['R2']:.3f}")

In [None]:
# Pick best model (by MAE) and save
best_name = min(
    [k for k in results if isinstance(results.get(k), dict) and "MAE" in results.get(k, {})],
    key=lambda k: results[k]["MAE"],
)
best_model = results[best_name]["model"]
MODEL_DIR = Path("models")
save_model(best_model, scaler, MODEL_DIR)
print(f"Saved best model: {best_name}")

## 5. Prediction Example (24-hour forecast)

In [None]:
from smart_mandi.predict import predict_price

# Example: Predict Potato price for March 2024 with recent prices
recent_prices = [25, 27, 26, 28, 29, 30, 31]  # last 7 days
pred = predict_price("Potato", month=3, year=2024, recent_prices=recent_prices, model_path=MODEL_DIR)
print(f"Predicted Potato price (Mar 2024): ₹{pred}/kg")

# Without recent prices (uses season + month)
pred2 = predict_price("Onion", month=6, year=2024, model_path=MODEL_DIR)
print(f"Predicted Onion price (Jun 2024, no history): ₹{pred2}/kg")