In [290]:
import pandas as pd

In [291]:
# Load the sales data
sales_data = pd.read_csv(r'F:\GUVI\Project\Dominos - Predictive Purchase Order System\Cleaned_DATA\Pizza_sales.csv', index_col=0)
# Load the ingredients data
ingredients_data = pd.read_csv(r'F:\GUVI\Project\Dominos - Predictive Purchase Order System\Cleaned_DATA\ingredients_data.csv', index_col=0)

In [292]:
sales_data.isnull().sum()

pizza_id             0
order_id             0
pizza_name_id        0
quantity             0
order_date           0
order_time           0
unit_price           0
total_price          0
pizza_size           0
pizza_category       0
pizza_ingredients    0
pizza_name           0
dtype: int64

In [293]:
# Convert order_date to datetime
sales_data['order_date'] = pd.to_datetime(sales_data['order_date'], format='mixed')

In [294]:
sales_data.head(5)

Unnamed: 0,pizza_id,order_id,pizza_name_id,quantity,order_date,order_time,unit_price,total_price,pizza_size,pizza_category,pizza_ingredients,pizza_name
0,1,1,hawaiian_m,1,2015-01-01,11:38:36,13.25,13.25,M,Classic,"Sliced Ham, Pineapple, Mozzarella Cheese",The Hawaiian Pizza
1,2,2,classic_dlx_m,1,2015-01-01,11:57:40,16.0,16.0,M,Classic,"Pepperoni, Mushrooms, Red Onions, Red Peppers,...",The Classic Deluxe Pizza
2,3,2,five_cheese_l,1,2015-01-01,11:57:40,18.5,18.5,L,Veggie,"Mozzarella Cheese, Provolone Cheese, Smoked Go...",The Five Cheese Pizza
3,4,2,ital_supr_l,1,2015-01-01,11:57:40,20.75,20.75,L,Supreme,"Calabrese Salami, Capocollo, Tomatoes, Red Oni...",The Italian Supreme Pizza
4,5,2,mexicana_m,1,2015-01-01,11:57:40,16.0,16.0,M,Veggie,"Tomatoes, Red Peppers, Jalapeno Peppers, Red O...",The Mexicana Pizza


In [295]:
# Extract useful features like day, month, is_weekend (if not already done)
sales_data['day_of_week'] = sales_data['order_date'].dt.day_name()
sales_data['month'] = sales_data['order_date'].dt.month
sales_data['is_weekend'] = sales_data['day_of_week'].isin(['Saturday', 'Sunday'])

In [296]:
# Preview data
sales_data.head()

Unnamed: 0,pizza_id,order_id,pizza_name_id,quantity,order_date,order_time,unit_price,total_price,pizza_size,pizza_category,pizza_ingredients,pizza_name,day_of_week,month,is_weekend
0,1,1,hawaiian_m,1,2015-01-01,11:38:36,13.25,13.25,M,Classic,"Sliced Ham, Pineapple, Mozzarella Cheese",The Hawaiian Pizza,Thursday,1,False
1,2,2,classic_dlx_m,1,2015-01-01,11:57:40,16.0,16.0,M,Classic,"Pepperoni, Mushrooms, Red Onions, Red Peppers,...",The Classic Deluxe Pizza,Thursday,1,False
2,3,2,five_cheese_l,1,2015-01-01,11:57:40,18.5,18.5,L,Veggie,"Mozzarella Cheese, Provolone Cheese, Smoked Go...",The Five Cheese Pizza,Thursday,1,False
3,4,2,ital_supr_l,1,2015-01-01,11:57:40,20.75,20.75,L,Supreme,"Calabrese Salami, Capocollo, Tomatoes, Red Oni...",The Italian Supreme Pizza,Thursday,1,False
4,5,2,mexicana_m,1,2015-01-01,11:57:40,16.0,16.0,M,Veggie,"Tomatoes, Red Peppers, Jalapeno Peppers, Red O...",The Mexicana Pizza,Thursday,1,False


In [297]:
ingredients_data.head()

Unnamed: 0,pizza_name_id,pizza_name,pizza_ingredients,Items_Qty_In_Grams
0,bbq_ckn_l,The Barbecue Chicken Pizza,Barbecued Chicken,40.0
1,bbq_ckn_l,The Barbecue Chicken Pizza,Red Peppers,15.0
2,bbq_ckn_l,The Barbecue Chicken Pizza,Green Peppers,20.0
3,bbq_ckn_l,The Barbecue Chicken Pizza,Tomatoes,30.0
4,bbq_ckn_l,The Barbecue Chicken Pizza,Red Onions,60.0


In [298]:
# Aggregate sales by pizza_name_id, pizza_size, and date
aggregated_sales = sales_data.groupby(['order_date', 'pizza_name_id', 'pizza_size']).agg({
    'quantity': 'sum'
}).reset_index()

In [299]:
# Preview the aggregated sales data
aggregated_sales.head()

Unnamed: 0,order_date,pizza_name_id,pizza_size,quantity
0,2015-01-01,bbq_ckn_l,L,6
1,2015-01-01,bbq_ckn_m,M,4
2,2015-01-01,bbq_ckn_s,S,1
3,2015-01-01,big_meat_s,S,5
4,2015-01-01,calabrese_m,M,1


In [300]:
# Split the data into train and test (9 months train, 3 months test)
train_data = aggregated_sales[aggregated_sales['order_date'] < '2015-10-01'] # 9 months data
test_data = aggregated_sales[aggregated_sales['order_date'] >= '2015-10-01'] # last 3 months


In [301]:
test_data

Unnamed: 0,order_date,pizza_name_id,pizza_size,quantity
17273,2015-10-01,bbq_ckn_l,L,3
17274,2015-10-01,bbq_ckn_m,M,1
17275,2015-10-01,bbq_ckn_s,S,2
17276,2015-10-01,big_meat_s,S,4
17277,2015-10-01,brie_carre_s,S,1
...,...,...,...,...
22959,2015-12-31,the_greek_l,L,1
22960,2015-12-31,the_greek_xl,XL,1
22961,2015-12-31,veggie_veg_l,L,4
22962,2015-12-31,veggie_veg_m,M,2


In [302]:
train_data

Unnamed: 0,order_date,pizza_name_id,pizza_size,quantity
0,2015-01-01,bbq_ckn_l,L,6
1,2015-01-01,bbq_ckn_m,M,4
2,2015-01-01,bbq_ckn_s,S,1
3,2015-01-01,big_meat_s,S,5
4,2015-01-01,calabrese_m,M,1
...,...,...,...,...
17268,2015-09-30,the_greek_s,S,1
17269,2015-09-30,the_greek_xl,XL,2
17270,2015-09-30,veggie_veg_l,L,1
17271,2015-09-30,veggie_veg_m,M,2


In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
import numpy as np
from sklearn.metrics import mean_absolute_percentage_error