In [21]:
import pandas as pd


# Daily pizza orders (each row = time period)
orders = pd.read_csv("modelling data/daily_sales.csv")

# Ingredient recipe matrix
recipes = pd.read_csv("modelling data/pizza_ingredient_matrix.csv")


sizes = ["_l", "_m", "_s", "_xl", "_xxl"]

# Expand recipe rows for each size
recipes_expanded = []
for _, row in recipes.iterrows():
    base_id = row["pizza_type_id"]
    for s in sizes:
        new_row = row.copy()
        new_row["pizza_type_id"] = base_id + s
        recipes_expanded.append(new_row)

recipes_expanded = pd.DataFrame(recipes_expanded)

# Set index to pizza_type_id for merging
recipes_expanded = recipes_expanded.set_index("pizza_type_id")


# Keep only pizzas that appear in demand CSV
common_pizzas = orders.columns.intersection(recipes_expanded.index)

orders = orders[common_pizzas]
recipes_expanded = recipes_expanded.loc[common_pizzas]

# Ingredient names (all columns except metadata)
ingredient_cols = recipes_expanded.columns.difference(["pizza_name", "category"])

# Multiply orders (T×P) with ingredient matrix (P×I)
ingredient_demand = orders.dot(recipes_expanded[ingredient_cols])

print("Ingredient demand per time period:")
print(ingredient_demand.head())

ingredient_demand.to_csv("raw_data/ingredient_demand.csv", index=False)



Ingredient demand per time period:
   Alfredo Sauce  Anchovies  Artichoke  Artichokes  Arugula  Asiago Cheese  \
0           0.58       0.00       0.00        0.30     0.00           0.36   
1           0.00       0.00       0.00        1.31     0.26           0.49   
2           0.00       0.14       0.56        1.24     0.26           0.34   
3           0.00       0.21       0.14        0.42     0.13           0.23   
4           0.29       0.14       0.42        1.68     0.39           0.82   

   Bacon  Barbecue Sauce  Barbecued Chicken  Beef Chuck Roast  ...  \
0   0.70            0.66               0.63              0.29  ...   
1   1.27            0.88               0.84              0.29  ...   
2   0.93            0.88               0.84              0.87  ...   
3   0.35            0.88               0.84              0.58  ...   
4   0.94            0.88               0.84              1.16  ...   

   Sliced Ham  Smoked Gouda Cheese  Soppressata Salami  Spinach  \
0       

In [22]:
# Add columns to daily sales
pizza_orders = pd.read_csv("modelling data/daily_sales.csv")

# Basic contextual features
base_features = ['time_bucket', 'oil_price', 'is_holiday']

# Encode time_bucket if needed
pizza_orders['time_bucket'] = pizza_orders['time_bucket'].astype('category').cat.codes

# All columns that end with yesterday
yesterday_features = [col for col in pizza_orders.columns if "_yesterday" in col]

# All avg3 features
avg3_features = [col for col in pizza_orders.columns if "_avg3" in col]

# Targets are columns without "yesterday" and without "avg3"
targets = [
    col for col in pizza_orders.columns
    if col not in base_features
    and "_yesterday" not in col
    and "_avg3" not in col
    and col not in ['date']  # date is not a target
]

feature_cols = base_features + yesterday_features + avg3_features

# Remove target columns
pizza_orders = pizza_orders[feature_cols]

# Add ingredient demand columns
ingredient_demand.columns = [f"ingredient_{col}" for col in ingredient_demand.columns]
pizza_orders = pd.concat([pizza_orders, ingredient_demand], axis=1)

pizza_orders.to_csv("modelling data/daily_sales_ingredients.csv", index=False)