<a href="https://colab.research.google.com/github/SirwaniViren/MSc-Gousto-Project/blob/main/initial_framework_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import math
import numpy as np
import pandas as pd
import random
from collections import defaultdict

In [2]:
random.seed(42)

In [72]:
# Dataframe of orders
# How many recipes per box - between 1 and 5
# lead day 5
orders_to_recipe_ld5_df = pd.DataFrame({'order_id': [1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
                                        'item_id': [10, 20, 50, 10, 30, 20, 50, 20, 30, 10, 40],
                                        'leadday': [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
                                        'site': ['F1', 'F1', 'F1', 'F2', 'F2', 'F3', 'F3', 'F2', 'F2', 'F3', 'F3']})

# lead day 0
orders_to_recipe_ld0_df = pd.DataFrame({'order_id': [1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
                                        'item_id': [10, 20, 50, 10, 30, 20, 50, 20, 30, 10, 40],
                                        'leadday': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                        'site': ['F2', 'F2', 'F2', 'F2', 'F2', 'F3', 'F3', 'F3', 'F3', 'F1', 'F1']})

# Set of recipe per factory
eligibility_id = {'F1': [10, 20, 40, 50], 'F2': [10, 20, 30, 40], 'F3': [10, 20, 30, 40, 50]}

# Should be computed from a set of recipes per factory
# Ideally, elgibility for 3 factories will go from F1: 30%, F2, 60% and F3: 100%
# eligible boxes per factory
orders_eligibility_df = pd.DataFrame({'order_id': [1, 2, 3, 4, 5], 'eligibility': [['F1', 'F2', 'F3'], ['F2', 'F3'], ['F3'], ['F2', 'F3'], ['F1', 'F3']]})

# Caps:
factory_caps = {'F1': 1, 'F2': 2, 'F3': np.float32('inf')}



In [73]:
# Function to allocate items
def allocate_items_greedy(orders_df, factory_caps, eligibility_id):
  allocation = defaultdict(lambda: defaultdict(int))
  current_order_id = None
  current_items = []
  current_sites = []

  for row in orders_df.itertuples():
    order_id = row.order_id
    item_id = row.item_id
    site = row.site

    if current_order_id is None:
        current_order_id = order_id

    if order_id != current_order_id:
        # Allocate items for the previous order
        for s in current_sites:
            if all(item in eligibility_id[s] for item in current_items) and factory_caps[s] > 0:
                for item in current_items:
                    allocation[item][s] += 1
                factory_caps[s] -= 1
                break

        # Reset for the new order
        current_order_id = order_id
        current_items = []
        current_sites = []

    current_items.append(item_id)
    current_sites.append(site)

  # Allocate items for the last order
  for s in current_sites:
    if all(item in eligibility_id[s] for item in current_items) and factory_caps[s] > 0:
      for item in current_items:
        allocation[item][s] += 1
      factory_caps[s] -= 1
      break

  return allocation


# Allocate items for LD5
ld5_allocation = allocate_items_greedy(orders_to_recipe_ld5_df, factory_caps.copy(), eligibility_id)
ld0_allocation = allocate_items_greedy(orders_to_recipe_ld0_df, factory_caps.copy(), eligibility_id)

In [74]:
def convert_dict_df(allocation_dict, alloc_day):
  # Convert allocation to dictionary with required format
  allocation_result = []
  for item_id, sites in allocation_dict.items():
      for site, count in sites.items():
          allocation_result.append({'item_id': item_id, 'site': site, alloc_day: count})

  return pd.DataFrame(allocation_result)

ld5_df = convert_dict_df(allocation_dict=ld5_allocation, alloc_day='lead_5')
ld0_df = convert_dict_df(allocation_dict=ld0_allocation, alloc_day='lead_0')

In [75]:
def merge_allocation(ld5_df, ld0_df):
    # Merge the two dataframes on 'item_id' and 'site'
    merged_df = pd.merge(ld5_df, ld0_df, on=['item_id', 'site'], how='outer')

    # Fill NaN values with zeros
    merged_df.fillna(0, inplace=True)

    # Ensure the integer type for lead_5 and lead_0 columns
    merged_df['lead_5'] = merged_df['lead_5'].astype(int)
    merged_df['lead_0'] = merged_df['lead_0'].astype(int)

    return merged_df

# Merge the dataframes and calculate the absolute error
merged_df = merge_allocation(ld5_df, ld0_df)
total_forecast = merged_df["lead_5"].sum()

In [76]:
print(merged_df)

    item_id site  lead_5  lead_0
0        10   F1       1       1
1        10   F2       1       1
2        10   F3       1       0
3        20   F1       1       0
4        20   F3       1       2
5        20   F2       1       0
6        50   F1       1       0
7        50   F3       1       1
8        30   F2       2       1
9        40   F3       1       0
10       30   F3       0       1
11       40   F1       0       1


In [85]:
site_df = merged_df.copy()
site_df['abs_error'] = (site_df['lead_5'] - site_df['lead_0']).abs()
print(site_df)

# Calculate WMAPE_site
wmape_site = site_df["abs_error"].sum() / total_forecast

# Display WMAPE_site
print(f"\nWeighted Mean Absolute Percentage Error (WMAPE) Site: {wmape_site:.4f}")

    item_id site  lead_5  lead_0  abs_error
0        10   F1       1       1          0
1        10   F2       1       1          0
2        10   F3       1       0          1
3        20   F1       1       0          1
4        20   F3       1       2          1
5        20   F2       1       0          1
6        50   F1       1       0          1
7        50   F3       1       1          0
8        30   F2       2       1          1
9        40   F3       1       0          1
10       30   F3       0       1          1
11       40   F1       0       1          1

Weighted Mean Absolute Percentage Error (WMAPE) Site: 0.8182


In [84]:
global_df = merged_df.copy()
global_df = global_df.groupby('item_id').sum().reset_index()
global_df['abs_error'] = (global_df['lead_5'] - global_df['lead_0']).abs()
global_df = global_df.drop(['site'], axis=1)
print(global_df)


# Calculate WMAPE_site
wmape_global = global_df["abs_error"].sum() / total_forecast

# Display WMAPE_site
print(f"\nWeighted Mean Absolute Percentage Error (WMAPE) Global: {wmape_global:.4f}")

   item_id  lead_5  lead_0  abs_error
0       10       3       2          1
1       20       3       2          1
2       30       2       2          0
3       40       1       1          0
4       50       2       1          1

Weighted Mean Absolute Percentage Error (WMAPE) Global: 0.2727
