In [2]:
import requests
import pandas as pd

In [3]:
def get_weather_forecast(start_date, end_date):
    response = requests.get(f"https://api.open-meteo.com/v1/forecast?latitude=52.52&longitude=13.41&daily=rain_sum&daily=temperature_2m_mean&start_date={start_date}&end_date={end_date}")
    # print("Returned", response.status_code)
    weather_data = response.json()
    # print(weather_data)
    return weather_data

In [20]:
from datetime import timedelta
from flask import jsonify
import joblib

model = joblib.load("ML/freshcast_xgb_model.joblib")
label_encoder = joblib.load("ML/product_label_encoder.joblib")

# Load dataset
df = pd.read_csv("ML/freshcast_dataset.csv")
df['date'] = pd.to_datetime(df['date'])
df.sort_values(['product', 'date'], inplace=True)

def forecast(product):
    if product not in df['product'].unique():
        return jsonify({"error": "Product not found"}), 404

    # Get recent 10 days of data for selected product
    product_df = df[df['product'] == product].copy()
    product_df = product_df.sort_values('date').tail(10).copy()
    product_df['date'] = pd.to_datetime(product_df['date'])

    forecast_dates = [
    (product_df['date'].max() + timedelta(days=i+1)).strftime("%Y-%m-%d")
    for i in range(7)
]

    # Create features
    # product_df['day_of_week'] = product_df['date'].dt.dayofweek
    # product_df['month'] = product_df['date'].dt.month
    # product_df['product_id'] = label_encoder.transform([product])[0]

    weather = get_weather_forecast(str(forecast_dates[0]), str(forecast_dates[-1]))
    input_data_list = []
    
    for i in range(7):
        input_data = {}
        input_data["temperature"] = weather["daily"]["temperature_2m_mean"][i]
        input_data["rainfall_mm"] = weather["daily"]["rain_sum"][i]
        date_i = pd.to_datetime(weather["daily"]["time"][i])
        input_data["day_of_week"] = date_i.dayofweek
        input_data["month"] = date_i.month
        input_data["product_id"] = label_encoder.transform([product])[0]
        input_data["sales_lag_1"] = product_df.iloc[-1]['sales']
        input_data["sales_lag_2"] = product_df.iloc[-2]['sales']
        input_data["sales_lag_3"] = product_df.iloc[-3]['sales']
        input_data["is_holiday"] = 0

        input_data_list.append(input_data)

    # print(input_data_list)

    # Simulate next 7 days
    predictions = []

    for i in range(7):
        # input_df = pd.DataFrame([input_data_list[i]])
        input_df = pd.DataFrame([input_data_list[i]])[['product_id', 'temperature', 'rainfall_mm', 'is_holiday', 'day_of_week', 'month', 'sales_lag_1', 'sales_lag_2', 'sales_lag_3']]
        pred = model.predict(input_df)[0]
        predictions.append(round(pred))

        # Update lag features for next prediction
        input_data['sales_lag_3'] = input_data['sales_lag_2']
        input_data['sales_lag_2'] = input_data['sales_lag_1']
        input_data['sales_lag_1'] = pred

        input_data['day_of_week'] = (input_data['day_of_week'] + 1) % 7

    return {
        "product": product,
        "dates": [pd.to_datetime(d).strftime("%Y-%m-%d") for d in forecast_dates],
        "predicted": predictions,
        "total": sum(predictions)
    }

In [21]:
forecast("Chicken (1kg)")

{'product': 'Chicken (1kg)',
 'dates': ['2025-07-01',
  '2025-07-02',
  '2025-07-03',
  '2025-07-04',
  '2025-07-05',
  '2025-07-06',
  '2025-07-07'],
 'predicted': [74, 73, 80, 76, 76, 76, 78],
 'total': 533}

In [22]:
df = pd.read_csv("ML/freshcast_dataset.csv")
total_sales = df.groupby("product")["sales"].sum()
products = total_sales.keys()
for i in products:
    print(i)

Bananas (1kg)
Bread Loaf
Chicken (1kg)
Lettuce (1 head)
Milk (1L)
Tomatoes (1kg)


In [28]:
df = pd.read_csv("ML/freshcast_dataset.csv")

df["date"] = pd.to_datetime(df["date"])

end_date = df["date"].max()
start_date = end_date - timedelta(days=6)

recent_df = df[(df["date"] >= start_date) & (df["date"] <= end_date)]

recent_sales = recent_df.groupby("product")["sales"].sum()
forecasts = []
for i in recent_sales.keys():
    fore = forecast(i)
    fore["total_sales"] = recent_sales[i]
    forecasts.append(fore)
    
forecasts

[{'product': 'Bananas (1kg)',
  'dates': ['2025-07-01',
   '2025-07-02',
   '2025-07-03',
   '2025-07-04',
   '2025-07-05',
   '2025-07-06',
   '2025-07-07'],
  'predicted': [92, 94, 93, 92, 95, 95, 94],
  'total': 655,
  'total_sales': np.int64(661)},
 {'product': 'Bread Loaf',
  'dates': ['2025-07-01',
   '2025-07-02',
   '2025-07-03',
   '2025-07-04',
   '2025-07-05',
   '2025-07-06',
   '2025-07-07'],
  'predicted': [88, 88, 88, 88, 88, 88, 80],
  'total': 608,
  'total_sales': np.int64(572)},
 {'product': 'Chicken (1kg)',
  'dates': ['2025-07-01',
   '2025-07-02',
   '2025-07-03',
   '2025-07-04',
   '2025-07-05',
   '2025-07-06',
   '2025-07-07'],
  'predicted': [74, 73, 80, 76, 76, 76, 78],
  'total': 533,
  'total_sales': np.int64(532)},
 {'product': 'Lettuce (1 head)',
  'dates': ['2025-07-01',
   '2025-07-02',
   '2025-07-03',
   '2025-07-04',
   '2025-07-05',
   '2025-07-06',
   '2025-07-07'],
  'predicted': [67, 74, 74, 73, 73, 73, 67],
  'total': 501,
  'total_sales': np.i

In [40]:
def forecast_past(product):
    if product not in df['product'].unique():
        return jsonify({"error": "Product not found"}), 404

    product_df = df[df['product'] == product].copy()
    product_df['date'] = pd.to_datetime(product_df['date'])
    product_df = product_df.sort_values('date')

    # Need at least 10 days of data to generate 7 lagged predictions
    if len(product_df) < 10:
        return jsonify({"error": "Not enough data to generate past forecast"}), 400

    past_df = product_df.tail(10).reset_index(drop=True)

    predicted = []
    actual = []
    dates = []

    for i in range(3, 10):
        row = past_df.iloc[i]
        date_i = pd.to_datetime(row['date'])
        dates.append(date_i.strftime("%Y-%m-%d"))
        actual.append(int(row['sales']))

        input_data = {
            'product_id': label_encoder.transform([product])[0],
            'temperature': row['temperature'],
            'rainfall_mm': row['rainfall_mm'],
            'is_holiday': row['is_holiday'],
            'day_of_week': date_i.dayofweek,
            'month': date_i.month,
            'sales_lag_1': past_df.iloc[i-1]['sales'],
            'sales_lag_2': past_df.iloc[i-2]['sales'],
            'sales_lag_3': past_df.iloc[i-3]['sales']
        }

        input_df = pd.DataFrame([input_data])[[
            'product_id', 'temperature', 'rainfall_mm', 'is_holiday',
            'day_of_week', 'month', 'sales_lag_1', 'sales_lag_2', 'sales_lag_3'
        ]]

        pred = model.predict(input_df)[0]
        predicted.append(round(pred))

    return {
        "product": product,
        "dates": dates,
        "actual": actual,
        "predicted": predicted,
        "total_actual": sum(actual),
        "total_predicted": sum(predicted),
        "accuracy": 100 - abs(((sum(actual) - sum(predicted)) / sum(actual)) * 100)
    }

In [41]:
forecast_past("Chicken (1kg)")

{'product': 'Chicken (1kg)',
 'dates': ['2025-06-24',
  '2025-06-25',
  '2025-06-26',
  '2025-06-27',
  '2025-06-28',
  '2025-06-29',
  '2025-06-30'],
 'actual': [84, 88, 77, 77, 62, 74, 70],
 'predicted': [82, 85, 78, 82, 72, 73, 72],
 'total_actual': 532,
 'total_predicted': 544,
 'accuracy': 97.74436090225564}

In [32]:
df = pd.read_csv("ML/freshcast_dataset.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180 entries, 0 to 179
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   date         180 non-null    object 
 1   product      180 non-null    object 
 2   sales        180 non-null    int64  
 3   temperature  180 non-null    float64
 4   rainfall_mm  180 non-null    int64  
 5   is_holiday   180 non-null    int64  
 6   event        6 non-null      object 
 7   quantity     180 non-null    int64  
dtypes: float64(1), int64(4), object(3)
memory usage: 11.4+ KB


In [33]:
df["product"]

0             Milk (1L)
1            Bread Loaf
2         Bananas (1kg)
3             Milk (1L)
4            Bread Loaf
             ...       
175    Lettuce (1 head)
176       Chicken (1kg)
177      Tomatoes (1kg)
178    Lettuce (1 head)
179       Chicken (1kg)
Name: product, Length: 180, dtype: object

In [37]:
past_forecasts = []

for i in df["product"].unique():
    past_forecasts.append(forecast_past(i))
    
past_forecasts

[{'product': 'Milk (1L)',
  'dates': ['2025-06-24',
   '2025-06-25',
   '2025-06-26',
   '2025-06-27',
   '2025-06-28',
   '2025-06-29',
   '2025-06-30'],
  'actual': [111, 101, 86, 97, 102, 110, 92],
  'predicted': [108, 109, 92, 99, 96, 101, 103],
  'total_actual': 699,
  'total_predicted': 708},
 {'product': 'Bread Loaf',
  'dates': ['2025-06-24',
   '2025-06-25',
   '2025-06-26',
   '2025-06-27',
   '2025-06-28',
   '2025-06-29',
   '2025-06-30'],
  'actual': [71, 82, 73, 91, 90, 90, 75],
  'predicted': [73, 83, 77, 89, 87, 90, 76],
  'total_actual': 572,
  'total_predicted': 575},
 {'product': 'Bananas (1kg)',
  'dates': ['2025-06-24',
   '2025-06-25',
   '2025-06-26',
   '2025-06-27',
   '2025-06-28',
   '2025-06-29',
   '2025-06-30'],
  'actual': [87, 104, 75, 96, 113, 104, 82],
  'predicted': [87, 100, 77, 93, 106, 101, 83],
  'total_actual': 661,
  'total_predicted': 647},
 {'product': 'Tomatoes (1kg)',
  'dates': ['2025-06-24',
   '2025-06-25',
   '2025-06-26',
   '2025-06-27

In [44]:
df = pd.read_csv("ML/freshcast_dataset.csv")
max_product = df.groupby("product")["sales"].sum().idxmax()
max_sales = df.groupby("product")["sales"].sum().max()

Top-selling product: Milk (1L) (3052 units)


In [45]:
forecast(max_product)

{'product': 'Milk (1L)',
 'dates': ['2025-07-01',
  '2025-07-02',
  '2025-07-03',
  '2025-07-04',
  '2025-07-05',
  '2025-07-06',
  '2025-07-07'],
 'predicted': [92, 94, 91, 92, 92, 92, 96],
 'total': 649}

In [46]:
forecast_past(max_product)

{'product': 'Milk (1L)',
 'dates': ['2025-06-24',
  '2025-06-25',
  '2025-06-26',
  '2025-06-27',
  '2025-06-28',
  '2025-06-29',
  '2025-06-30'],
 'actual': [111, 101, 86, 97, 102, 110, 92],
 'predicted': [108, 109, 92, 99, 96, 101, 103],
 'total_actual': 699,
 'total_predicted': 708,
 'accuracy': 98.71244635193133}