In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from prophet import Prophet
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import google.generativeai as genai
from sklearn.metrics import mean_squared_error

Importing plotly failed. Interactive plots will not work.


In [2]:
# Load the new dataset
df = pd.read_csv("realistic_dataset.csv")

# Convert date column to datetime format and extract year
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year

# Calculate waste units (stock level - sale units, ensuring no negative values)
df['waste_units'] = (df['stock_level'] - df['sale_units']).clip(lower=0)

# Define features (X) and target (y)
X = df[['sale_units', 'price', 'year']]
y = df['waste_units']

# Split into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Linear Regression model
reg = LinearRegression()
reg.fit(X_train, y_train)

# Predict waste units for the test set
y_pred = reg.predict(X_test)

# Create DataFrame to store predictions
df_test = df.loc[X_test.index, ['item_name']].copy()
df_test['predicted_waste'] = y_pred

# Identify high-risk dishes (sorted by highest predicted waste)
high_risk = df_test.groupby('item_name')['predicted_waste'].mean().sort_values(ascending=False)

# Convert to JSON format
high_risk_dish = json.dumps(high_risk.to_dict(), indent=4)

# Print high-risk dishes
print(high_risk_dish)

{
    "Garden Vegetable Medley": 5.983708054173547,
    "Tropical Fruit Salad": 5.927597701730162,
    "Hearty Potato Curry": 5.5355362940719886,
    "Fruity Veggie Smoothie": 5.141572644151567,
    "Spicy Veggie Stir-Fry": 5.100161965016228
}


In [4]:
# Load dataset
df = pd.read_csv("realistic_dataset.csv")

# Convert date to datetime format and extract year
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year

# List of ingredients
ingredients = ["apple", "banana", "cucumber", "okra", "orange", "potato", "tomato"]

high_risk_ml = {}

# Features for prediction
features = ['sale_units', 'price', 'year']

for ing in ingredients:
    waste_col = f"waste_{ing}"
    stock_col = f"stock_{ing}"
    sale_col = "sale_units"  # Using total sale units (no individual sale per ingredient)

    # Calculate waste for each ingredient
    df[waste_col] = (df[stock_col] - df[sale_col]).clip(lower=0)

    # Prepare data for model training
    X = df[features]
    y = df[waste_col]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train Linear Regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict waste units for the test set
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    # Store average predicted waste as risk factor
    avg_predicted_waste = y_pred.mean()
    high_risk_ml[ing] = avg_predicted_waste

# Convert to JSON format and print results
high_risk = pd.Series(high_risk_ml).sort_values(ascending=False)
print("\nHigh-Risk Ingredients (by average predicted waste in grams):")
high_risk_ingredients = json.dumps(high_risk.to_dict(), indent=4)
print(high_risk_ingredients)


High-Risk Ingredients (by average predicted waste in grams):
{
    "apple": 9349.23423464057,
    "potato": 7465.326938725418,
    "banana": 6209.388741448644,
    "tomato": 5267.435093491069,
    "cucumber": 4639.465994852684,
    "orange": 4325.481445533489,
    "okra": 1499.6205016607564
}


In [None]:
import google.generativeai as genai
import json
import pandas as pd
import numpy as np
import holidays
from prophet import Prophet

# Configure API key
genai.configure(api_key="gemini_api")

# Load JSON risk data
high_risk_items_json = high_risk_dish
high_risk_ingredients_json = high_risk_ingredients

with open("current_predicted_ingredients.json", "r") as file:
    predicted_consumption_json = json.load(file)

# Load updated dataset (with holiday & weekend spikes)
df = pd.read_csv("realistic_dataset.csv")
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')

# Get Indian public holidays
india_holidays = holidays.country_holidays('IN')

# Get user input for target prediction date
custom_date = input("Enter the date (YYYY-MM-DD): ")
target_date = pd.to_datetime(custom_date)

# Define recipe ingredient usage (grams per dish)
recipes = {
    "Tropical Fruit Salad": {"apple": 130, "banana": 90, "oranges": 30, "cucumber": 20, "okra": 0, "patato": 80, "tomato": 0},
    "Garden Vegetable Medley": {"cucumber": 55, "okra": 10, "tomato": 50, "apple": 50, "banana": 30, "oranges": 10, "patato": 70},
    "Hearty Potato Curry": {"patato": 100, "tomato": 50, "okra": 5, "apple": 60, "banana": 80, "cucumber": 50, "oranges": 40},
    "Fruity Veggie Smoothie": {"apple": 40, "banana": 60, "cucumber": 45, "oranges": 40, "okra": 5, "patato": 80, "tomato": 0},
    "Spicy Veggie Stir-Fry": {"patato": 90, "tomato": 50, "okra": 5, "cucumber": 35, "apple": 50, "banana": 85, "oranges": 50}
}

# Dictionary to store total predicted ingredient consumption
ingredient_totals = {}

# Function to check if the target date is a public holiday or weekend
def is_special_day(date):
    return date in india_holidays or date.weekday() in [5, 6]  # Saturday = 5, Sunday = 6

# Adjust sales forecast based on special demand periods
for item in df['item_name'].unique():
    df_item = df[df['item_name'] == item][['date', 'sale_units']].copy()
    df_item = df_item.rename(columns={'date': 'ds', 'sale_units': 'y'})
    
    # Train Prophet model on historical sales
    model = Prophet()
    model.fit(df_item)
    
    # Predict sales for the target date
    future_df = pd.DataFrame({'ds': [target_date]})
    forecast = model.predict(future_df)
    
    predicted_sales = forecast['yhat'].iloc[0]

    # If holiday or weekend, increase stock by 20-30%
    if is_special_day(target_date):
        predicted_sales *= np.random.uniform(1.2, 1.3)

    # Apply seasonality-based stock adjustment
    seasonal_multiplier = {
        1: 0.9,  2: 0.92, 3: 1.05, 4: 1.1, 5: 1.15, 6: 1.2,
        7: 1.25, 8: 1.3, 9: 1.2, 10: 1.5, 11: 1.4, 12: 1.35
    }
    predicted_sales *= seasonal_multiplier.get(target_date.month, 1.0)

    # Calculate ingredient consumption based on recipe
    if item in recipes:
        for ingredient, grams_per_dish in recipes[item].items():
            consumption = predicted_sales * grams_per_dish
            ingredient_totals[ingredient] = ingredient_totals.get(ingredient, 0) + consumption

# Apply buffer stock (5-15%) for volatile ingredients
for ingredient in ingredient_totals.keys():
    buffer_multiplier = np.random.uniform(1.05, 1.15)
    ingredient_totals[ingredient] = int(np.round(ingredient_totals[ingredient] * buffer_multiplier))

# Create JSON object
predicted_ingredient_consumption_json = json.dumps({
    "target_date": custom_date,
    "predicted_ingredient_consumption": ingredient_totals
}, indent=4)

# Format prompt for Gemini
prompt = f"""
Based on the following updated data:
- High-risk items with lower predicted sales: {json.dumps(high_risk_items_json, indent=4)}
- High-risk ingredients prone to wastage: {json.dumps(high_risk_ingredients_json, indent=4)}
- Updated predicted ingredient consumption (accounting for holidays, weekends, and seasonality): {json.dumps(predicted_ingredient_consumption_json, indent=4)}

Generate a JSON object containing the optimal stock levels for each ingredient. The stock levels should:
- Ensure sufficient availability while preventing over-purchasing.
- Adapt dynamically to holiday spikes and seasonal changes.
- Minimize wastage using a buffer stock mechanism.

Strictly return only the JSON object with optimal stock levels, without any additional text or explanations.
"""

# Generate response from Gemini
response = genai.GenerativeModel("gemini-2.0-flash").generate_content(prompt)

# Print Gemini's response
print(response.text)


10:14:01 - cmdstanpy - INFO - Chain [1] start processing
10:14:02 - cmdstanpy - INFO - Chain [1] done processing
10:14:03 - cmdstanpy - INFO - Chain [1] start processing
10:14:03 - cmdstanpy - INFO - Chain [1] done processing
10:14:04 - cmdstanpy - INFO - Chain [1] start processing
10:14:04 - cmdstanpy - INFO - Chain [1] done processing
10:14:05 - cmdstanpy - INFO - Chain [1] start processing
10:14:05 - cmdstanpy - INFO - Chain [1] done processing
10:14:06 - cmdstanpy - INFO - Chain [1] start processing
10:14:06 - cmdstanpy - INFO - Chain [1] done processing


```json
{
  "optimal_stock_levels": {
    "apple": {
      "target": 35959,
      "buffer": 9350,
      "total": 45309
    },
    "banana": {
      "target": 38001,
      "buffer": 6210,
      "total": 44211
    },
    "oranges": {
      "target": 18773,
      "buffer": 4326,
      "total": 23099
    },
    "cucumber": {
      "target": 21155,
      "buffer": 4640,
      "total": 25795
    },
    "okra": {
      "target": 2656,
      "buffer": 1500,
      "total": 4156
    },
    "potato": {
      "target": 47167,
      "buffer": 7466,
      "total": 54633
    },
    "tomato": {
      "target": 15862,
      "buffer": 5268,
      "total": 21130
    }
  },
  "date": "2025-03-30"
}
```
