In [1]:
import pandas as pd # type: ignore
import numpy as np # type: ignore

In [3]:
# Import the FoodDemandForecaster class
from forecaster import FoodDemandForecaster

# Create an instance of the forecaster
forecaster = FoodDemandForecaster()

# Load the data (this will automatically call preprocess_data() which includes region mapping and meal filtering)
forecaster.load_data()

# If you want to view the regional mapping after it's been created
print("\nRegional Mapping:")
for center_id, region in forecaster.regional_mapping.items():
    print(f"Center ID {center_id} -> {region}")

# Check the dataset before and after filtering
# The original dataset should be in forecaster.train_data
print("\nData shape after filtering:")
print(f"Train data shape: {forecaster.train_data.shape}")
print(f"Unique meal IDs remaining: {len(forecaster.train_data['meal_id'].unique())}")

# You can also access the filtered meal IDs that were removed
removed_meals = [1571, 2104, 2956, 2490, 2569, 2664]
print(f"\nMeals filtered out: {removed_meals}")

# To see data availability analysis results
print("\nData Availability Analysis:")
for key, info in list(forecaster.data_availability.items())[:5]:  # Show first 5 items
    print(f"{key}: {info['records']} records, {info['weeks']} weeks, Sufficient: {info['sufficient']}")

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


🍽️ Loaded 45 meal names from meal_database.json
   • 1885: Basic Vegetable Pasta Bake
   • 1993: Simple Spinach and Chickpea Curry
   • 2539: Basic Tofu Stir Fry
   • ... and 42 more
✅ Loaded train.csv: 456548 records
📋 Train columns: ['id', 'week', 'center_id', 'meal_id', 'checkout_price', 'base_price', 'emailer_for_promotion', 'homepage_featured', 'num_orders']
ℹ️ test.csv not found - continuing without it (not required for forecasting)
🍽️ Filtering meals with insufficient coverage...
📊 Initial meals: 51 total
📊 Initial records: 456,548
🗑️ Removing meals: [1571, 2104, 2956, 2490, 2569, 2664]
✅ Filtering complete:
   • Meals removed: 6
   • Records removed: 38,746
   • Remaining meals: 45
   • Remaining records: 417,802
   • Data reduction: 8.5%
📍 Loading regional mapping from fulfilment_center_info.csv
📋 Fulfilment center data loaded: 77 centers
📋 Columns: ['center_id', 'city_code', 'region_code', 'center_type', 'op_area']
🗺️ Regional mapping created from fulfilment center info:
   R

KeyError: 'records'

In [None]:
# remove Lyon
forecaster.train_data = forecaster.train_data.loc[forecaster.train_data['region'] != "Region-Lyon"]

In [30]:
forecaster.train_data.drop(columns=['emailer_for_promotion', 'homepage_featured', 'base_price'], inplace=True)

In [16]:
print("Data Points in Total by region:")
forecaster.train_data['region'].value_counts()

Data Points in Total by region:


region
Region-Kassel    176117
Region-Luzern    105864
Region-Wien       86302
Name: count, dtype: int64

In [12]:
# Average per Region
regions = ['Region-Kassel', "Region-Luzern", "Region-Wien"]
for region in regions:
    len(forecaster.train_data[forecaster.train_data['region'] == region]) / 145
    print(f"Average data points per week in {region}: {len(forecaster.train_data[forecaster.train_data['region'] == region]) / 145:.2f}")

Average data points per week in Region-Kassel: 1214.60
Average data points per week in Region-Luzern: 730.10
Average data points per week in Region-Wien: 595.19


In [15]:
for region in regions:
    print(f"Average number of meals per week in {region}: {forecaster.train_data['num_orders'][forecaster.train_data['region'] == region].sum() / 145:.2f}")

Average number of meals per week in Region-Kassel: 381516.63
Average number of meals per week in Region-Luzern: 146010.45
Average number of meals per week in Region-Wien: 124915.57


In [29]:
for region in regions:
    region_data = forecaster.train_data[forecaster.train_data['region'] == region]
    # Calculate revenue per record
    region_data['revenue'] = region_data['num_orders'] * region_data['checkout_price']
    
    # Group by week and sum revenue
    weekly_revenue = region_data.groupby('week')['revenue'].sum().reset_index()
    
    # Calculate statistics
    total_weeks = weekly_revenue['week'].nunique()
    total_revenue = weekly_revenue['revenue'].sum()
    mean_revenue = weekly_revenue['revenue'].mean()
    
    # Format numbers with thousands separator (European style)
    total_revenue_formatted = f"{total_revenue:,.2f}"
    mean_revenue_formatted = f"{mean_revenue:,.2f}"
    total_avg_formatted = f"{total_revenue/145:,.2f}"
    
    print(f"Region: {region}")
    print(f"  - Total weeks with data: {total_weeks}")
    print(f"  - Total revenue: {total_revenue_formatted} EUR")
    print(f"  - Average revenue per week: {mean_revenue_formatted} EUR")

Region: Region-Kassel
  - Total weeks with data: 145
  - Total revenue: 14,277,983,431.71 EUR
  - Average revenue per week: 98,468,851.25 EUR
Region: Region-Luzern
  - Total weeks with data: 145
  - Total revenue: 5,788,937,305.94 EUR
  - Average revenue per week: 39,923,705.56 EUR
Region: Region-Wien
  - Total weeks with data: 145
  - Total revenue: 4,847,332,717.01 EUR
  - Average revenue per week: 33,429,880.81 EUR


In [10]:
forecaster.train_data['week'].value_counts()

week
105    2633
106    2630
122    2627
123    2616
53     2614
       ... 
34     2447
38     2442
35     2442
103    2381
62     2349
Name: count, Length: 145, dtype: int64

In [31]:
forecaster.train_data

Unnamed: 0,id,week,center_id,meal_id,checkout_price,num_orders,region
0,1379560,1,55,1885,136.83,177,Region-Kassel
1,1466964,1,55,1993,136.83,270,Region-Kassel
2,1346989,1,55,2539,134.86,189,Region-Kassel
3,1338232,1,55,2139,339.50,54,Region-Kassel
4,1448490,1,55,2631,243.50,40,Region-Kassel
...,...,...,...,...,...,...,...
456540,1035758,145,61,1525,319.13,134,Region-Wien
456541,1010438,145,61,2704,321.13,67,Region-Wien
456542,1116711,145,61,2492,455.93,42,Region-Wien
456543,1271326,145,61,1543,484.09,68,Region-Wien


In [None]:
import pandas as pd
import json

# Load the meal database from JSON file
with open('meal_database.json', 'r') as file:
    meal_data = json.load(file)

# Create a list to store the meal information
meals_list = []

# Extract data from the JSON structure
for meal_id, details in meal_data.items():
    # Calculate average ingredient weight
    avg_ingredient_weight = sum(details['kg_per_10_persons']) / len(details['kg_per_10_persons'])
    
    # Calculate perishability ratio (percentage of perishable ingredients)
    perishability = sum(details['perishable']) / len(details['perishable']) * 100
    
    # Create a dictionary for this meal
    meal_info = {
        'meal_id': int(meal_id),
        'name': details['name'],
        'type': details['type'],
        'price_tier': details['price_tier'],
        'avg_ingredient_weight': avg_ingredient_weight,
        'perishability_ratio': perishability,
        'num_ingredients': len(details['ingredients'])
    }
    
    meals_list.append(meal_info)

# Create DataFrame
meals_df = pd.DataFrame(meals_list)

# Sort by meal_id for better readability
meals_df = meals_df.sort_values('meal_id').reset_index(drop=True)

# Display the DataFrame
print(f"Loaded {len(meals_df)} meals from the database")
meals_df