In [1]:
##         Main Model

import os
import joblib
import requests
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import xgboost as xgb

# -----------------------------
# Config / paths
# -----------------------------
DATA_CSV = r"C:\Users\ASUS\OneDrive\Desktop\MinorProject-1(MK3)\Shelf_Life Datasets\indian_dishes_shelf_life_359mb.csv"
MODEL_PATH = "xgb_shelf_life_model.pkl"
SCALER_PATH = "scaler.pkl"
ENCODERS_PATH = "encoders.pkl"
OPENWEATHER_API_KEY = os.getenv("OPENWEATHER_API_KEY", "c4ca890706091c1ee632750964ec178d")

FORCE_RETRAIN = True  # retrain the corrected model

# -----------------------------
# Load dataset
# -----------------------------
df = pd.read_csv(DATA_CSV)

# -----------------------------
# Feature engineering helpers
# -----------------------------
def get_dish_type(dish):
    dish = str(dish).lower()
    if any(x in dish for x in ["chicken", "fish", "mutton", "egg", "maach"]):
        return "NonVegCurry"
    elif any(x in dish for x in ["paneer", "dal", "curry", "korma", "butter"]):
        return "VegCurry"
    elif any(x in dish for x in ["roti", "naan", "chapati", "paratha", "puri"]):
        return "Bread"
    elif any(x in dish for x in ["biryani", "rice", "pulao", "khichdi"]):
        return "Rice"
    elif any(x in dish for x in ["laddu", "halwa", "jamun", "rasgulla", "cake", "sweet"]):
        return "Sweet"
    elif any(x in dish for x in ["lassi", "rabri", "milk", "cream", "ice_cream", "cheese"]):
        return "Dairy"
    elif any(x in dish for x in ["samosa", "kachori", "pakoda", "tikki", "fried"]):
        return "FriedSnack"
    else:
        return "Other"

def get_base_shelf_life(dish):
    dish = str(dish).lower()
    if "paneer" in dish: 
        return 6
    if any(x in dish for x in ["chicken", "fish", "mutton"]): 
        return 6
    if "rice" in dish: 
        return 8
    if "aloo" in dish or "potato" in dish: 
        return 10
    if any(x in dish for x in ["roti", "naan", "chapati", "bread"]): 
        return 12
    if any(x in dish for x in ["laddu", "halwa", "jamun", "rasgulla", "cake", "sweet"]): 
        return 72
    if "ice_cream" in dish: 
        return 2
    if any(x in dish for x in ["fried", "samosa", "kachori", "pakoda", "tikki"]): 
        return 24
    return 12

def is_nonveg(dish):
    dish = str(dish).lower()
    return 1 if any(x  in dish for x in ["chicken", "fish", "mutton", "egg", "maach"]) else 0

# Apply features
df["DishType"] = df["Dish"].apply(get_dish_type)
df["BaseShelfLife"] = df["Dish"].apply(get_base_shelf_life)
df["Is_NonVeg"] = df["Dish"].apply(is_nonveg)

# -----------------------------
# Encode categories
# -----------------------------
le_dish = LabelEncoder()
le_storage = LabelEncoder()
le_type = LabelEncoder()

df["Dish_encoded"] = le_dish.fit_transform(df["Dish"].astype(str))
df["Storage_encoded"] = le_storage.fit_transform(df["Storage"].astype(str))
df["DishType_encoded"] = le_type.fit_transform(df["DishType"].astype(str))

# -----------------------------
# Extra feature: Temp * Humidity
# -----------------------------
df["Temp_x_Hum"] = df["Temperature (¬∞C)"] * df["Humidity (%)"]

# -----------------------------
# Features & target (raw hours)
# -----------------------------
X  = df[[ 
    "Dish_encoded",
    "Storage_encoded",
    "Temperature (¬∞C)",
    "Humidity (%)",
    "DishType_encoded",
    "BaseShelfLife",
    "Is_NonVeg",
    "Temp_x_Hum"
]]
y = df["Shelf Life (hours)"]

# Scale features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# -----------------------------
# Train or load model
# -----------------------------
need_train = FORCE_RETRAIN or (not os.path.exists(MODEL_PATH)) or (not os.path.exists(SCALER_PATH)) or (not os.path.exists(ENCODERS_PATH))
if need_train:
    print("üöÄ Training corrected XGBoost model (no early stopping)...")
    xgb_model = xgb.XGBRegressor(
        objective="reg:squarederror",
        n_estimators=300,
        learning_rate=0.05,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.7,
        reg_alpha=1.0,
        reg_lambda=2.0,
        random_state=42,
        verbosity=0
    )
    xgb_model.fit(X_train, y_train)
    joblib.dump(xgb_model, MODEL_PATH)
    joblib.dump(scaler, SCALER_PATH)
    joblib.dump({"dish": le_dish, "storage": le_storage, "type": le_type}, ENCODERS_PATH)
    print("‚úÖ Saved model, scaler, and encoders.")
else:
    print("üîÑ Loading existing model and artifacts...")
    xgb_model = joblib.load(MODEL_PATH)
    scaler = joblib.load(SCALER_PATH)
    encoders = joblib.load(ENCODERS_PATH)
    le_dish, le_storage, le_type = encoders["dish"], encoders["storage"], encoders["type"]

# -----------------------------
# Evaluation
# -----------------------------
y_pred = xgb_model.predict(X_test)
print("\n‚úÖ Model Performance (on test set):")
print(f"R¬≤ Score : {r2_score(y_test, y_pred):.3f}")
print(f"MAE      : {mean_absolute_error(y_test, y_pred):.2f} hours")
print(f"MSE      : {mean_squared_error(y_test, y_pred):.2f} hours¬≤")

# -----------------------------
# Weather fetch
# -----------------------------
def fetch_weather(city):
    url = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid={OPENWEATHER_API_KEY}&units=metric"
    r = requests.get(url, timeout=10)
    r.raise_for_status()
    data = r.json()
    return float(data["main"]["temp"]), int(data["main"]["humidity"])

# -----------------------------
# Improved sanity-adjusted prediction
# -----------------------------
def adjust_prediction(pred_hours, base_life, storage, temp_c, humidity, dish_type):
    pred = max(pred_hours, 0.01)

    # Dish type multipliers
    type_multiplier = {
        "FriedSnack": 0.7,
        "NonVegCurry": 0.6,
        "VegCurry": 0.8,
        "Rice": 0.75,
        "Bread": 0.9,
        "Sweet": 1.0,
        "Dairy": 0.5,
        "Other": 0.85
    }
    pred *= type_multiplier.get(dish_type, 0.8)

    # Storage multipliers
    s = storage.lower()
    if "open" in s:
        pred *= 0.3 if temp_c >= 30 else 0.5 if temp_c >= 25 else 0.7
    elif "airtight" in s or "sealed" in s:
        pred *= 0.8 if temp_c >= 30 else 0.95
    elif "refrig" in s or "fridge" in s or "cold" in s:
        pred *= 1.5 if temp_c <= 4 else 1.2

    # Humidity effect
    if humidity >= 80:
        pred *= 0.75
    elif humidity >= 60:
        pred *= 0.9

    # Hard minimum and maximum
    pred = max(pred, base_life * 0.3)   # minimum 30% of base
    pred = min(pred, base_life * 3.5)   # maximum 3.5x of base

    return float(np.round(pred, 2))

# -----------------------------
# Format hours & minutes
# -----------------------------
def format_hours_minutes(pred_hours):
    hours = int(pred_hours)
    minutes = int(round((pred_hours - hours) * 60))
    if minutes >= 60:
        hours += 1
        minutes -= 60
    return hours, minutes

# -----------------------------
# Interactive prediction
# -----------------------------
def predict_shelf_life():
    try:
        city = input("\nüåç Enter city name: ").strip()
        temp_api, humidity_api = fetch_weather(city)
        print(f"üì° Weather in {city}: Temperature={temp_api:.2f}¬∞C, Humidity={humidity_api}%")

        print("\nüçΩ Choose a dish:")
        for i, dish in enumerate(le_dish.classes_, start=1):
            print(f"{i}. {dish}")
        try:
            dish_choice = int(input("Enter dish number: ").strip())
            if dish_choice < 1 or dish_choice > len(le_dish.classes_):
                dish_choice = 1
        except:
            dish_choice = 1
        matched_dish = le_dish.classes_[dish_choice - 1]
        print(f"‚úÖ Selected dish ‚Üí {matched_dish}")

        print("\nüì¶ Choose storage condition:")
        for i, storage in enumerate(le_storage.classes_, start=1):
            print(f"{i}. {storage}")
        try:
            storage_choice = int(input("Enter storage number: ").strip())
            if storage_choice < 1 or storage_choice > len(le_storage.classes_):
                storage_choice = 1
        except:
            storage_choice = 1
        matched_storage = le_storage.classes_[storage_choice - 1]
        print(f"‚úÖ Selected storage ‚Üí {matched_storage}")

        if "refrig" in matched_storage.lower():
            try:
                fridge_temp = float(input("‚ùÑÔ∏è Enter refrigerator temperature (¬∞C): ").strip())
            except:
                fridge_temp = 4.0
            temp_used, humidity_used = fridge_temp, humidity_api
        else:
            temp_used, humidity_used = temp_api, humidity_api

        dish_encoded = le_dish.transform([matched_dish])[0]
        storage_encoded = le_storage.transform([matched_storage])[0]
        dish_type = get_dish_type(matched_dish)
        dish_type_encoded = le_type.transform([dish_type])[0] if dish_type in le_type.classes_ else 0
        base_life = get_base_shelf_life(matched_dish)
        nonveg_flag = is_nonveg(matched_dish)
        temp_x_hum = temp_used * humidity_used

        X_input = np.array([[dish_encoded, storage_encoded, temp_used, humidity_used,
                             dish_type_encoded, base_life, nonveg_flag, temp_x_hum]])
        X_input_scaled = scaler.transform(X_input)

        pred_raw = float(xgb_model.predict(X_input_scaled)[0])
        pred_adj = adjust_prediction(pred_raw, base_life, matched_storage, temp_used, humidity_used, dish_type)
        hours, minutes = format_hours_minutes(pred_adj)

        print("\nüîÆ Prediction:")
        print(f"Dish       : {matched_dish}")
        print(f"Storage    : {matched_storage}")
        print(f"Temp used  : {temp_used} ¬∞C")
        print(f"Humidity   : {humidity_used} %")
        print(f"Raw model prediction  : {pred_raw:.2f} hours")
        print(f"Adjusted prediction   : {hours} hours {minutes} minutes")

    except Exception as e:
        print("An error occurred during prediction:", e)

# -----------------------------
# Run interactive prediction
# -----------------------------
if __name__ == "__main__":
    predict_shelf_life()


üöÄ Training corrected XGBoost model (no early stopping)...
‚úÖ Saved model, scaler, and encoders.

‚úÖ Model Performance (on test set):
R¬≤ Score : 0.923
MAE      : 5.26 hours
MSE      : 173.85 hours¬≤



üåç Enter city name:  Delhi


üì° Weather in Delhi: Temperature=31.05¬∞C, Humidity=45%

üçΩ Choose a dish:
1. aloo_gobi
2. aloo_matar
3. aloo_methi
4. aloo_shimla_mirch
5. aloo_tikki
6. bandar_laddu
7. bhatura
8. bhindi_masala
9. biryani
10. boondi
11. butter_chicken
12. chana_masala
13. chapati
14. cheesecake
15. chicken_curry
16. chicken_razala
17. chicken_tikka
18. chicken_tikka_masala
19. chicken_wings
20. chikki
21. chocolate_cake
22. cup_cakes
23. daal_baati_churma
24. daal_puri
25. dal_makhani
26. dal_tadka
27. dum_aloo
28. french_fries
29. fried_rice
30. gajar_ka_halwa
31. ghevar
32. grilled_cheese_sandwich
33. gulab_jamun
34. hamburger
35. ice_cream
36. imarti
37. jalebi
38. kachori
39. kadai_paneer
40. kadhi_pakoda
41. kajjikaya
42. kalakand
43. karela_bharta
44. kofta
45. lassi
46. ledikeni
47. litti_chokha
48. maach_jhol
49. macaroni_and_cheese
50. makki_di_roti_sarson_da_saag
51. malapua
52. misi_roti
53. misti_doi
54. modak
55. naan
56. navrattan_korma
57. omelette
58. palak_paneer
59. pancakes
60. 

Enter dish number:  18


‚úÖ Selected dish ‚Üí chicken_tikka_masala

üì¶ Choose storage condition:
1. Airtight
2. Open
3. Refrigerated


Enter storage number:  2


‚úÖ Selected storage ‚Üí Open

üîÆ Prediction:
Dish       : chicken_tikka_masala
Storage    : Open
Temp used  : 31.05 ¬∞C
Humidity   : 45 %
Raw model prediction  : 5.68 hours
Adjusted prediction   : 1 hours 48 minutes


