# Loading the Data

***Importing the Required Modules***

---





In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [4]:
pizza_sales_data=pd.read_excel('/content/Pizza_Sales_Cleaned.xlsx')
pizza_sales_data.drop(['pizza_ingredients','pizza_name','total_price','unit_price','order_time','order_id','pizza_id','order_date_date','order_date_month','order_date_week'],axis=1,inplace=True)
pizza_sales_data.head()

Unnamed: 0,pizza_name_id,quantity,order_date,pizza_size,pizza_category
0,hawaiian_m,1,2015-01-01,M,Classic
1,classic_dlx_m,1,2015-01-01,M,Classic
2,five_cheese_l,1,2015-01-01,L,Veggie
3,ital_supr_l,1,2015-01-01,L,Supreme
4,mexicana_m,1,2015-01-01,M,Veggie


# Project Approch

1.   Try Four Different Time Series Forecasting Model:
        
<div align="center">

|   **Model**     |
|-----------------|
| 1. **ARIMA**    |
| 2. **SARIMA**   |
| 3. **Prophet**  |
| 4. **XGBoost**  |

</div>

2. Finding out Which model is performance is Good in ForeCasting.

3. If All the Model overrall Performance is not so good Means Combine all Four Models and Take a pizza_name_id forcasted Quantity based on the individual pizaa_name_id Mape Scores.

In [5]:
!pip install pmdarima

Collecting pmdarima
  Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (7.8 kB)
Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pmdarima
Successfully installed pmdarima-2.0.4


**Extracting the first Date of the every week and Making the Data ready for the Model**

In [6]:
# Step 1: Data Preparation
def prepare_data(df):
    df['order_date'] = pd.to_datetime(df['order_date'])
    df['week'] = df['order_date'].dt.to_period('W').apply(lambda r: r.start_time)
    weekly_sales_by_pizza = df.groupby(['week', 'pizza_name_id'])['quantity'].sum().reset_index()
    weekly_sales_by_pizza.set_index('week', inplace=True)
    return weekly_sales_by_pizza

**Splitting the Data for Testing and Training by 30:70 Ratio**

In [7]:
# Step 2: Split data into training and test sets
def split_data(pizza_type_data, train_size=0.7):
    train_size = int(train_size * len(pizza_type_data))
    train_data = pizza_type_data[:train_size]['quantity']
    test_data = pizza_type_data[train_size:]['quantity']
    return train_data, test_data

# ARIMA MODEL

**Importing the required Modules for the ARIMA MODEL**

In [8]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_percentage_error
from pmdarima import auto_arima

**Calculating and optimizing the Model for Better Mape scores**

In [9]:
# Step 3: Calculate MAPE
def calculate_mape(actual, predicted):
    return mean_absolute_percentage_error(actual, predicted)

# Step 4: Optimize ARIMA Model and Forecast
def auto_tune_arima(train_data, test_data):
    model = auto_arima(train_data, start_p=1, start_q=1, max_p=5, max_q=5,
                       seasonal=False, trace=False, error_action='ignore',
                       suppress_warnings=True, stepwise=True)
    predictions = model.predict(n_periods=len(test_data))
    mape = calculate_mape(test_data, predictions)
    return predictions, mape, model

**ForeCasting the Next Week sales**

In [10]:
# Step 5: Forecast the next week for each pizza_name_id
def forecast_next_week(pizza_sales_weekly_by_pizza, periods=1):
    forecasts = []
    overall_mape = []
    last_week = pizza_sales_weekly_by_pizza.index.max()
    next_week_start = last_week + pd.Timedelta(weeks=1)

    # List to store data for the DataFrame
    forecast_data = []

    # Forecast for each pizza type
    for pizza_id in pizza_sales_weekly_by_pizza['pizza_name_id'].unique():
        pizza_type_data = pizza_sales_weekly_by_pizza[pizza_sales_weekly_by_pizza['pizza_name_id'] == pizza_id]

        # Split data into train and test
        train_data, test_data = split_data(pizza_type_data)

        # Optimize ARIMA model and forecast
        if len(train_data) > 5:  # Ensure there is enough data
            predictions, mape, model = auto_tune_arima(train_data, test_data)
            overall_mape.append(mape)

            # Forecast the next period (next week)
            forecast = model.predict(n_periods=periods)
            forecast_value = forecast.iloc[0]  # Access the first element using iloc

            # Append data to the list for DataFrame
            forecast_data.append({
                'pizza_name_id': pizza_id,
                'predicted_quantity': int(forecast_value),
                'mape': mape
            })
        else:
            print(f"Not enough data to train ARIMA model for pizza_id {pizza_id}")

    # Convert the list of dictionaries into a DataFrame
    forecast_df = pd.DataFrame(forecast_data)

    # Calculate the overall MAPE score
    avg_mape = sum(overall_mape) / len(overall_mape) if overall_mape else 0  # Handle empty list
    return forecast_df, avg_mape, next_week_start

**Diplaying the Forcasted Quantity for Next Week**

In [11]:
# Step 6: Display Forecast Results
def display_forecasts(forecast_df, next_week_start):
    print(f"Forecasted sales for the week starting {next_week_start.date()}:")
    for index, row in forecast_df.iterrows():
        print(f"Pizza ID {row['pizza_name_id']}: {row['predicted_quantity']} pizzas, MAPE: {row['mape']:.2%}")

    print("\nOverall Forecast DataFrame:")
    print(forecast_df)

**Calling All the Function to Executing**

In [12]:

# Main Execution
if __name__ == "__main__":

    # Prepare the data
    pizza_sales_weekly_by_pizza = prepare_data(pizza_sales_data)

    # Forecast next week sales and store results in a DataFrame
    forecast_df, avg_mape, next_week_start = forecast_next_week(pizza_sales_weekly_by_pizza)

    # Display the forecasted results along with MAPE
    display_forecasts(forecast_df, next_week_start)

    forecast_df.to_csv('ARIMA_Forecasted_result.csv', index=False)

  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(


Forecasted sales for the week starting 2016-01-04:
Pizza ID bbq_ckn_l: 19 pizzas, MAPE: 32.81%
Pizza ID bbq_ckn_m: 18 pizzas, MAPE: 42.20%
Pizza ID bbq_ckn_s: 8 pizzas, MAPE: 28.50%
Pizza ID big_meat_s: 37 pizzas, MAPE: 21.10%
Pizza ID brie_carre_s: 9 pizzas, MAPE: 38.00%
Pizza ID calabrese_l: 0 pizzas, MAPE: 57.39%
Pizza ID calabrese_m: 10 pizzas, MAPE: 46.39%
Pizza ID cali_ckn_l: 17 pizzas, MAPE: 36.30%
Pizza ID cali_ckn_m: 18 pizzas, MAPE: 36.90%
Pizza ID cali_ckn_s: 9 pizzas, MAPE: 46.21%
Pizza ID ckn_alfredo_l: 3 pizzas, MAPE: 53.72%
Pizza ID ckn_alfredo_m: 13 pizzas, MAPE: 26.11%
Pizza ID ckn_alfredo_s: 2 pizzas, MAPE: 64.99%
Pizza ID ckn_pesto_l: 8 pizzas, MAPE: 72.61%
Pizza ID ckn_pesto_m: 5 pizzas, MAPE: 48.23%
Pizza ID ckn_pesto_s: 5 pizzas, MAPE: 54.08%
Pizza ID classic_dlx_l: 8 pizzas, MAPE: 18.75%
Pizza ID classic_dlx_m: 23 pizzas, MAPE: 18.46%
Pizza ID classic_dlx_s: 14 pizzas, MAPE: 23.61%
Pizza ID five_cheese_l: 27 pizzas, MAPE: 37.26%
Pizza ID four_cheese_l: 25 pizzas,

  return get_prediction_index(
  return get_prediction_index(


# SARIMA MODEL

**Import Statements**

In [21]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_percentage_error
import warnings

# Import ConvergenceWarning from statsmodels.tools.sm_exceptions
from statsmodels.tools.sm_exceptions import ConvergenceWarning

# Instead of importing ValueWarning directly, catch it using its parent class
warnings.filterwarnings("ignore", category=UserWarning) # Changed this line
warnings.filterwarnings("ignore", category=ConvergenceWarning)

**Traing and Forecasting the Quantity sold for each Pizza Types**

In [14]:
def forecast_sales_per_pizza_type(pizza_type, train, test, periods=1):
    # Train the SARIMA model
    model = SARIMAX(train, order=(1, 0, 1), seasonal_order=(1, 0, 1, 7))
    model_fit = model.fit(disp=False)

    # Forecast for the next week
    forecast = model_fit.forecast(steps=periods)

    # Calculate MAPE on test data (actual vs forecasted)
    if len(test) > 0:
        predictions = model_fit.forecast(steps=len(test))
        mape = calculate_mape(test, predictions)
    else:
        mape = np.nan  # Not enough data to calculate MAPE

    # Return the pizza type, forecasted quantity, and MAPE
    return pizza_type, forecast.tolist(), mape

**Forcasting the Quantity for Next Week**

In [15]:
def forecast_next_week_sales_by_pizza_type(pizza_sales_weekly_by_pizza, periods=1):
    pizza_type_forecasts = {}
    mape_scores = {}

    # Get the last week's date range (for prediction display purposes)
    last_week = pizza_sales_weekly_by_pizza.index.max()
    next_week_start = last_week + pd.Timedelta(weeks=1)
    next_week_end = next_week_start + pd.Timedelta(days=6)

    # Loop through each pizza type
    for pizza_type in pizza_sales_weekly_by_pizza['pizza_name_id'].unique():
        # Filter the data for this pizza type
        pizza_type_data = pizza_sales_weekly_by_pizza[pizza_sales_weekly_by_pizza['pizza_name_id'] == pizza_type]

        # Split into training and testing data (80% training, 20% testing)
        train_size = int(0.7 * len(pizza_type_data))
        train = pizza_type_data[:train_size]['quantity']
        test = pizza_type_data[train_size:]['quantity']

        # Forecast sales for the next week for this pizza type
        pizza_type, forecast, mape = forecast_sales_per_pizza_type(pizza_type, train, test, periods)

        # Store the forecast and MAPE in dictionaries
        if forecast:
            pizza_type_forecasts[pizza_type] = forecast[0]  # Access the first value of the forecast list
            mape_scores[pizza_type] = mape

    return pizza_type_forecasts, mape_scores, next_week_start, next_week_end

**Final Calling and creating the Dataframe**

In [22]:

# Step 4: Forecast pizza sales for the next week and calculate MAPE
next_week_pizza_sales_forecasts, mape_scores, next_week_start, next_week_end = forecast_next_week_sales_by_pizza_type(pizza_sales_weekly_by_pizza, periods=1)

# Step 5: Display the forecasted sales and MAPE for each pizza type
print(f"Forecasted sales for the week starting {next_week_start.date()}")

total_forecasted_quantity = 0  # To accumulate the forecasted quantities

for pizza_type, forecast in next_week_pizza_sales_forecasts.items():
    forecast = int(forecast)
    print(f'{pizza_type}: {forecast} pizzas')
    total_forecasted_quantity += forecast

# Display total quantity of pizzas forecasted
print(f"Total quantity of units forecasted to be sold in the next week: {total_forecasted_quantity} pizzas")

# Display MAPE for each pizza type
print("\nMAPE scores:")
for pizza_type, mape in mape_scores.items():
    print(f"{pizza_type}: {mape:.2f}%")

# Step 6: Create a DataFrame with pizza_name_id, next week quantity, and MAPE
forecast_df = pd.DataFrame({
    'pizza_name_id': list(next_week_pizza_sales_forecasts.keys()),
    'next_week_quantity': list(next_week_pizza_sales_forecasts.values()),
    'MAPE': list(mape_scores.values())
})

# Step 7: Export the DataFrame to a CSV file
forecast_df.to_csv("SARIMA_Forecasted_Result.csv", index=False)

Forecasted sales for the week starting 2016-01-04
bbq_ckn_l: 17 pizzas
bbq_ckn_m: 18 pizzas
bbq_ckn_s: 9 pizzas
big_meat_s: 39 pizzas
brie_carre_s: 8 pizzas
calabrese_l: 2 pizzas
calabrese_m: 9 pizzas
cali_ckn_l: 18 pizzas
cali_ckn_m: 18 pizzas
cali_ckn_s: 8 pizzas
ckn_alfredo_l: 4 pizzas
ckn_alfredo_m: 13 pizzas
ckn_alfredo_s: 2 pizzas
ckn_pesto_l: 8 pizzas
ckn_pesto_m: 4 pizzas
ckn_pesto_s: 5 pizzas
classic_dlx_l: 7 pizzas
classic_dlx_m: 22 pizzas
classic_dlx_s: 14 pizzas
five_cheese_l: 27 pizzas
four_cheese_l: 25 pizzas
four_cheese_m: 10 pizzas
green_garden_l: 1 pizzas
green_garden_m: 6 pizzas
green_garden_s: 11 pizzas
hawaiian_l: 14 pizzas
hawaiian_m: 9 pizzas
hawaiian_s: 19 pizzas
ital_cpcllo_l: 14 pizzas
ital_cpcllo_m: 8 pizzas
ital_cpcllo_s: 7 pizzas
ital_supr_l: 14 pizzas
ital_supr_m: 19 pizzas
ital_supr_s: 3 pizzas
ital_veggie_m: 9 pizzas
ital_veggie_s: 5 pizzas
mediterraneo_l: 6 pizzas
mediterraneo_m: 3 pizzas
mediterraneo_s: 4 pizzas
mexicana_l: 16 pizzas
mexicana_m: 9 pizza

**PROPHET MODEL**

**Import Statements**

In [23]:
import pandas as pd
import numpy as np
from prophet import Prophet
import warnings

**Training the Prophet model and Forcasting the Sales**

In [29]:
def train_and_forecast_sales_prophet(pizza_type, train_data, periods=1):
    """Train Prophet model and forecast sales for the next period."""
    # Prepare the data for Prophet
    train_data = train_data.reset_index()
    train_data.columns = ['ds', 'y']  # Rename columns for Prophet

    # Train the Prophet model
    model = Prophet()
    model.fit(train_data)

    # Create future dataframe for the next week
    future = model.make_future_dataframe(periods=periods, freq='W-SUN')  # Forecast weekly

    # Forecast the sales
    forecast = model.predict(future)

    # Get the forecasted sales for the next week
    next_week_forecast = forecast[['ds', 'yhat']].tail(periods)['yhat'].tolist()

    # Calculate MAPE on training data (excluding future predictions)
    train_forecast = forecast[['ds', 'yhat']].head(len(train_data))
    mape = calculate_mape(train_data['y'], train_forecast['yhat'])

    return pizza_type, next_week_forecast, mape

**Forecasting for the Next Week**

In [30]:
def forecast_next_week(pizza_sales_weekly_by_pizza, periods=1):
    """Forecast sales for the next week for all pizza types using Prophet."""
    pizza_type_forecasts = {}
    mape_scores = {}

    # Calculate total sales per pizza type
    total_units_sold = pizza_sales_weekly_by_pizza.groupby('pizza_name_id')['quantity'].sum()

    # Get the last week's date range
    last_week = pizza_sales_weekly_by_pizza.index.max()
    next_week_start = last_week + pd.Timedelta(weeks=1)
    next_week_end = next_week_start + pd.Timedelta(days=6)

    # Loop through each pizza type
    for pizza_type in pizza_sales_weekly_by_pizza['pizza_name_id'].unique():
        # Filter the data for this pizza type
        pizza_type_data = pizza_sales_weekly_by_pizza[pizza_sales_weekly_by_pizza['pizza_name_id'] == pizza_type]

        # Split into training data
        train_data = split_data(pizza_type_data)

        # Forecast sales for the next week for this pizza type
        pizza_type, forecast, mape = train_and_forecast_sales_prophet(pizza_type, train_data, periods)

        # Store the forecast in the dictionary
        if forecast:
            pizza_type_forecasts[pizza_type] = forecast[0]  # Access the first value of the forecast list
            mape_scores[pizza_type] = mape  # Store MAPE score

    return pizza_type_forecasts, mape_scores, total_units_sold, next_week_start, next_week_end


**Displaying the Results**

In [31]:
def display_forecasts(forecasts, mape_scores, next_week_start, next_week_end):
    """Display forecasted sales results and MAPE scores."""
    print(f"Forecasted sales for the week starting {next_week_start.date()}")
    total_forecasted_quantity = 0  # To accumulate the forecasted quantities

    # Loop through each forecast and display
    for pizza_type, forecast in forecasts.items():
        forecast = int(forecast)
        forecasts[pizza_type] = forecast
        print(f'{pizza_type}: {forecast} pizzas')
        total_forecasted_quantity += forecast

    # Display total quantity of pizzas forecasted
    print(f"Total quantity of units forecasted to be sold in the next week: {total_forecasted_quantity} pizzas")

    # Display MAPE for each pizza type
    print("\nMAPE scores:")
    for pizza_type, mape in mape_scores.items():
        print(f"{pizza_type}: {mape:.2f}%")

**Finally Calling all the Functions to Executing the code**

In [34]:
if __name__ == "__main__":
    # Prepare the data
    pizza_sales_weekly_by_pizza = prepare_data(pizza_sales_data)

    # Forecast pizza sales for the next week using Prophet
    forecasts, mape_scores, total_units_sold, next_week_start, next_week_end = forecast_next_week(pizza_sales_weekly_by_pizza, periods=1)

    # Create a DataFrame with pizza_name_id, next week quantity, and MAPE
    forecast_df = pd.DataFrame({
        'pizza_name_id': list(forecasts.keys()),
        'next_week_quantity': list(forecasts.values()),
        'MAPE': list(mape_scores.values())
    })

    # Display the forecasted results
    display_forecasts(forecasts, mape_scores, next_week_start, next_week_end)

    # Export the DataFrame to a CSV file
    forecast_df.to_csv("Prophet_Forecasted_Result.csv", index=False)

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpx6qsk0bj/a3k_z9mh.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpx6qsk0bj/d7cq31nr.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=29745', 'data', 'file=/tmp/tmpx6qsk0bj/a3k_z9mh.json', 'init=/tmp/tmpx6qsk0bj/d7cq31nr.json', 'output', 'file=/tmp/tmpx6qsk0bj/prophet_modelbe18j1lg/prophet_model-20241004051838.csv', 'method=optimize', 'algorithm=newton', 'iter=10000']
05:18:38 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
05:18:38 - cmdstanpy - INFO - Chai

Forecasted sales for the week 2016-01-04 to 2016-01-10:
bbq_ckn_l: 16 pizzas
bbq_ckn_m: 18 pizzas
bbq_ckn_s: 9 pizzas
big_meat_s: 40 pizzas
brie_carre_s: 9 pizzas
calabrese_l: 5 pizzas
calabrese_m: 8 pizzas
cali_ckn_l: 18 pizzas
cali_ckn_m: 18 pizzas
cali_ckn_s: 9 pizzas
ckn_alfredo_l: 3 pizzas
ckn_alfredo_m: 14 pizzas
ckn_alfredo_s: 2 pizzas
ckn_pesto_l: 6 pizzas
ckn_pesto_m: 4 pizzas
ckn_pesto_s: 6 pizzas
classic_dlx_l: 8 pizzas
classic_dlx_m: 23 pizzas
classic_dlx_s: 15 pizzas
five_cheese_l: 27 pizzas
four_cheese_l: 24 pizzas
four_cheese_m: 10 pizzas
green_garden_l: 2 pizzas
green_garden_m: 6 pizzas
green_garden_s: 11 pizzas
hawaiian_l: 17 pizzas
hawaiian_m: 10 pizzas
hawaiian_s: 17 pizzas
ital_cpcllo_l: 15 pizzas
ital_cpcllo_m: 7 pizzas
ital_cpcllo_s: 4 pizzas
ital_supr_l: 14 pizzas
ital_supr_m: 19 pizzas
ital_supr_s: 3 pizzas
ital_veggie_m: 8 pizzas
ital_veggie_s: 5 pizzas
mediterraneo_l: 6 pizzas
mediterraneo_m: 5 pizzas
mediterraneo_s: 5 pizzas
mexicana_l: 17 pizzas
mexicana_m: 

XGBOOST Model

**Importing the Modules Needed**

In [42]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
import warnings

**Creating the Lag Features**

In [43]:
def create_lag_features(df, lag=3):
    """Create lag features for time series forecasting."""
    lag_features = pd.DataFrame()

    for i in range(1, lag + 1):
        lag_features[f'lag_{i}'] = df.shift(i)

    lag_features['quantity'] = df
    lag_features.dropna(inplace=True)  # Remove rows with missing values due to lagging
    return lag_features

**Training the XGBoost and Forcasting the Quantity**

In [57]:
def forecast_sales_per_pizza_type_xgboost(pizza_type, data, periods=1, lag=3):
    """Forecast sales for a specific pizza type using XGBoost."""
    # Create lag features
    lagged_data = create_lag_features(data, lag=lag)

    # Split into features (X) and target (y)
    X = lagged_data.drop('quantity', axis=1)
    y = lagged_data['quantity']

    # Split data into train and test sets (80% train, 30% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)

    # Train the XGBoost model
    model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=200, max_depth=4, learning_rate=0.01, random_state=42)
    model.fit(X_train, y_train)

    # Forecast the next period (next week)
    last_known_data = lagged_data.tail(1).drop('quantity', axis=1)  # Use the last known values as features
    forecast = model.predict(last_known_data)

    # Calculate MAPE on the test data
    y_pred = model.predict(X_test)
    mape = calculate_mape(y_test, y_pred)

    return pizza_type, forecast.tolist(), mape

**Forcasting the Quantity needed for each Pizza_name_id for Next Week**

In [45]:
def forecast_next_week_sales_by_pizza_type_xgboost(pizza_sales_weekly_by_pizza, periods=1, lag=3):
    """Forecast sales for the next week for all pizza types using XGBoost."""
    pizza_type_forecasts = {}
    mape_scores = {}

    # Get the last week's date range (for prediction display purposes)
    last_week = pizza_sales_weekly_by_pizza.index.max()
    next_week_start = last_week + pd.Timedelta(weeks=1)
    next_week_end = next_week_start + pd.Timedelta(days=6)

    # Loop through each pizza type
    for pizza_type in pizza_sales_weekly_by_pizza['pizza_name_id'].unique():
        # Filter the data for this pizza type
        pizza_type_data = pizza_sales_weekly_by_pizza[pizza_sales_weekly_by_pizza['pizza_name_id'] == pizza_type]

        # Forecast sales for the next week for this pizza type
        pizza_type, forecast, mape = forecast_sales_per_pizza_type_xgboost(pizza_type, pizza_type_data['quantity'], periods, lag)

        # Store the forecast and MAPE in dictionaries
        if forecast:
            pizza_type_forecasts[pizza_type] = forecast[0]  # Access the first value of the forecast list
            mape_scores[pizza_type] = mape

    return pizza_type_forecasts, mape_scores, next_week_start, next_week_end

**Displaying the Forecasted Results**

In [46]:
def display_forecast_results(forecasts, mape_scores, next_week_start, next_week_end):
    """Display the forecasted sales and MAPE for each pizza type."""
    print(f"Forecasted sales for the week {next_week_start.date()} to {next_week_end.date()}:")
    total_forecasted_quantity = 0  # To accumulate the forecasted quantities

    for pizza_type, forecast in forecasts.items():
        forecast = int(forecast)
        print(f'{pizza_type}: {forecast} pizzas')
        total_forecasted_quantity += forecast

    # Display total quantity of pizzas forecasted
    print(f"Total quantity of units forecasted to be sold in the next week: {total_forecasted_quantity} pizzas")

    # Display MAPE for each pizza type
    print("\nMAPE scores:")
    for pizza_type, mape in mape_scores.items():
        print(f"{pizza_type}: {mape:.2f}%")

**Main Function**

In [59]:
if __name__ == "__main__":
    # Prepare the data
    pizza_sales_weekly_by_pizza = prepare_data(pizza_sales_data)

    # Forecast pizza sales for the next week and calculate MAPE
    next_week_pizza_sales_forecasts, mape_scores, next_week_start, next_week_end = forecast_next_week_sales_by_pizza_type_xgboost(pizza_sales_weekly_by_pizza, periods=1, lag=3)

    # Display the forecasted results
    display_forecast_results(next_week_pizza_sales_forecasts, mape_scores, next_week_start, next_week_end)

    # Step 8: Create a DataFrame with pizza_name_id, next week quantity, and MAPE
    forecast_df = pd.DataFrame({
        'pizza_name_id': list(next_week_pizza_sales_forecasts.keys()),
        'next_week_quantity': list(next_week_pizza_sales_forecasts.values()),
        'MAPE': list(mape_scores.values())
    })

    forecast_df.to_csv("XGBOOST_Forcasted_Results.csv", index=False)


Forecasted sales for the week 2016-01-04 to 2016-01-10:
bbq_ckn_l: 12 pizzas
bbq_ckn_m: 14 pizzas
bbq_ckn_s: 9 pizzas
big_meat_s: 25 pizzas
brie_carre_s: 9 pizzas
calabrese_l: 3 pizzas
calabrese_m: 4 pizzas
cali_ckn_l: 19 pizzas
cali_ckn_m: 12 pizzas
cali_ckn_s: 9 pizzas
ckn_alfredo_l: 2 pizzas
ckn_alfredo_m: 11 pizzas
ckn_alfredo_s: 2 pizzas
ckn_pesto_l: 4 pizzas
ckn_pesto_m: 4 pizzas
ckn_pesto_s: 4 pizzas
classic_dlx_l: 8 pizzas
classic_dlx_m: 26 pizzas
classic_dlx_s: 10 pizzas
five_cheese_l: 15 pizzas
four_cheese_l: 20 pizzas
four_cheese_m: 6 pizzas
green_garden_l: 2 pizzas
green_garden_m: 4 pizzas
green_garden_s: 16 pizzas
hawaiian_l: 10 pizzas
hawaiian_m: 7 pizzas
hawaiian_s: 14 pizzas
ital_cpcllo_l: 8 pizzas
ital_cpcllo_m: 9 pizzas
ital_cpcllo_s: 5 pizzas
ital_supr_l: 9 pizzas
ital_supr_m: 11 pizzas
ital_supr_s: 4 pizzas
ital_veggie_m: 5 pizzas
ital_veggie_s: 5 pizzas
mediterraneo_l: 3 pizzas
mediterraneo_m: 4 pizzas
mediterraneo_s: 3 pizzas
mexicana_l: 10 pizzas
mexicana_m: 4 pi

**Now the Forcasted Quantity for all the Model is done Let rename the Columns in all the Dataset for Clear Understanding**

In [60]:
Arima_Model=pd.read_csv('/content/ARIMA_Forecasted_result.csv')
Arima_Model.head()

Unnamed: 0,pizza_name_id,predicted_quantity,mape
0,bbq_ckn_l,19,0.328097
1,bbq_ckn_m,18,0.421951
2,bbq_ckn_s,8,0.284963
3,big_meat_s,37,0.211025
4,brie_carre_s,9,0.380018


**Lets_ rename It**

In [61]:
Arima_Model.rename(columns={'predicted_quantity':'Arima_Predicted_Quantity','mape':'Arima_mape'},inplace=True)
Arima_Model.head()

Unnamed: 0,pizza_name_id,Arima_Predicted_Quantity,Arima_mape
0,bbq_ckn_l,19,0.328097
1,bbq_ckn_m,18,0.421951
2,bbq_ckn_s,8,0.284963
3,big_meat_s,37,0.211025
4,brie_carre_s,9,0.380018


**Sarima Model**

In [62]:
Sarima_Model=pd.read_csv('/content/SARIMA_Forecasted_Result.csv')
Sarima_Model.head()

Unnamed: 0,pizza_name_id,next_week_quantity,MAPE
0,bbq_ckn_l,17.670201,0.330073
1,bbq_ckn_m,18.621751,0.42501
2,bbq_ckn_s,9.67249,0.373132
3,big_meat_s,39.036142,0.241102
4,brie_carre_s,8.414301,0.390179


In [64]:
Sarima_Model.rename(columns={'next_week_quantity':'Sarima_Predicted_Quantity','MAPE':'Sarima_mape'},inplace=True)
Sarima_Model.head()

Unnamed: 0,pizza_name_id,Sarima_Predicted_Quantity,Sarima_mape
0,bbq_ckn_l,17.670201,0.330073
1,bbq_ckn_m,18.621751,0.42501
2,bbq_ckn_s,9.67249,0.373132
3,big_meat_s,39.036142,0.241102
4,brie_carre_s,8.414301,0.390179


Prophet Model

In [65]:
Prophet_Model=pd.read_csv('/content/Prophet_Forecasted_Result.csv')
Prophet_Model.head()

Unnamed: 0,pizza_name_id,next_week_quantity,MAPE
0,bbq_ckn_l,16.617704,0.209953
1,bbq_ckn_m,18.71274,0.169154
2,bbq_ckn_s,9.960162,0.449412
3,big_meat_s,40.726617,0.158759
4,brie_carre_s,9.825305,0.293974


In [66]:
Prophet_Model.rename(columns={'next_week_quantity':'Prophet_Predicted_Quantity','MAPE':'Prophet_mape'},inplace=True)
Prophet_Model.head()

Unnamed: 0,pizza_name_id,Prophet_Predicted_Quantity,Prophet_mape
0,bbq_ckn_l,16.617704,0.209953
1,bbq_ckn_m,18.71274,0.169154
2,bbq_ckn_s,9.960162,0.449412
3,big_meat_s,40.726617,0.158759
4,brie_carre_s,9.825305,0.293974


In [67]:
XGBoost_Model=pd.read_csv('/content/XGBOOST_Forcasted_Results.csv')
XGBoost_Model.head()

Unnamed: 0,pizza_name_id,next_week_quantity,MAPE
0,bbq_ckn_l,12.182958,0.248165
1,bbq_ckn_m,14.152276,0.305642
2,bbq_ckn_s,9.234591,0.408613
3,big_meat_s,25.036869,0.195002
4,brie_carre_s,9.471176,0.473958


In [68]:
XGBoost_Model.rename(columns={'next_week_quantity':'XGBoost_Predicted_Quantity','MAPE':'XGBoost_mape'},inplace=True)
XGBoost_Model.head()

Unnamed: 0,pizza_name_id,XGBoost_Predicted_Quantity,XGBoost_mape
0,bbq_ckn_l,12.182958,0.248165
1,bbq_ckn_m,14.152276,0.305642
2,bbq_ckn_s,9.234591,0.408613
3,big_meat_s,25.036869,0.195002
4,brie_carre_s,9.471176,0.473958


**Now Lets Merge the Four Dataframe with respect to pizza_name_id since it was common in all four DataFrame**

In [70]:

import pandas as pd
merged_df = pd.merge(Arima_Model, Sarima_Model, on='pizza_name_id', how='outer')
merged_df = pd.merge(merged_df, Prophet_Model, on='pizza_name_id', how='outer')
merged_df = pd.merge(merged_df, XGBoost_Model, on='pizza_name_id', how='outer')
merged_df.head()

Unnamed: 0,pizza_name_id,Arima_Predicted_Quantity,Arima_mape,Sarima_Predicted_Quantity,Sarima_mape,Prophet_Predicted_Quantity,Prophet_mape,XGBoost_Predicted_Quantity,XGBoost_mape
0,bbq_ckn_l,19,0.328097,17.670201,0.330073,16.617704,0.209953,12.182958,0.248165
1,bbq_ckn_m,18,0.421951,18.621751,0.42501,18.71274,0.169154,14.152276,0.305642
2,bbq_ckn_s,8,0.284963,9.67249,0.373132,9.960162,0.449412,9.234591,0.408613
3,big_meat_s,37,0.211025,39.036142,0.241102,40.726617,0.158759,25.036869,0.195002
4,brie_carre_s,9,0.380018,8.414301,0.390179,9.825305,0.293974,9.471176,0.473958


**Now Export This Dataframe If incase it might Usefull for Future**

In [71]:
merged_df.to_csv('Combined_forecasted_result.csv', index=False)

**Now Find out a Final quantity for each model based on the Models performances**

In [72]:
import pandas as pd

# Load the CSV file
file_path = '/content/Combined_forecasted_result.csv'
data = pd.read_csv(file_path)

# Function to find the best model based on the lowest MAPE score
def find_best_model(row):
    # Dictionary to map model names to their MAPE scores and quantities
    models = {
        'Arima': (row['Arima_mape'], row['Arima_Predicted_Quantity']),
        'Sarima': (row['Sarima_mape'], row['Sarima_Predicted_Quantity']),
        'Prophet': (row['Prophet_mape'], row['Prophet_Predicted_Quantity']),
        'XGBoost': (row['XGBoost_mape'], row['XGBoost_Predicted_Quantity'])
    }

    # Find the model with the lowest MAPE score
    best_model = min(models, key=lambda k: models[k][0])

    # Return the best model's quantity, lowest MAPE score, and model name
    return pd.Series({
        'quantity': models[best_model][1],
        'lowest_mape_score': models[best_model][0],
        'best_model': best_model
    })

# Apply the function to each row to create new columns
data[['quantity', 'lowest_mape_score', 'best_model']] = data.apply(find_best_model, axis=1)

data.to_csv('Final_ForeCasted_Data.csv', index=False)

# Display the result
print(data.head())


  pizza_name_id  Arima_Predicted_Quantity  Arima_mape  \
0     bbq_ckn_l                        19    0.328097   
1     bbq_ckn_m                        18    0.421951   
2     bbq_ckn_s                         8    0.284963   
3    big_meat_s                        37    0.211025   
4  brie_carre_s                         9    0.380018   

   Sarima_Predicted_Quantity  Sarima_mape  Prophet_Predicted_Quantity  \
0                  17.670201     0.330073                   16.617704   
1                  18.621751     0.425010                   18.712740   
2                   9.672490     0.373132                    9.960162   
3                  39.036142     0.241102                   40.726617   
4                   8.414301     0.390179                    9.825305   

   Prophet_mape  XGBoost_Predicted_Quantity  XGBoost_mape   quantity  \
0      0.209953                   12.182958      0.248165  16.617704   
1      0.169154                   14.152276      0.305642  18.712740   
2 

**Let's load and See**

In [73]:
Final_Quantity=pd.read_csv('/content/Final_ForeCasted_Data.csv')
Final_Quantity.head()

Unnamed: 0,pizza_name_id,Arima_Predicted_Quantity,Arima_mape,Sarima_Predicted_Quantity,Sarima_mape,Prophet_Predicted_Quantity,Prophet_mape,XGBoost_Predicted_Quantity,XGBoost_mape,quantity,lowest_mape_score,best_model
0,bbq_ckn_l,19,0.328097,17.670201,0.330073,16.617704,0.209953,12.182958,0.248165,16.617704,0.209953,Prophet
1,bbq_ckn_m,18,0.421951,18.621751,0.42501,18.71274,0.169154,14.152276,0.305642,18.71274,0.169154,Prophet
2,bbq_ckn_s,8,0.284963,9.67249,0.373132,9.960162,0.449412,9.234591,0.408613,8.0,0.284963,Arima
3,big_meat_s,37,0.211025,39.036142,0.241102,40.726617,0.158759,25.036869,0.195002,40.726617,0.158759,Prophet
4,brie_carre_s,9,0.380018,8.414301,0.390179,9.825305,0.293974,9.471176,0.473958,9.825305,0.293974,Prophet


In [79]:


# Round the 'quantity' column and update it in place
#Final_Quantity['quantity'] = Final_Quantity['quantity'].round()
print(Final_Quantity.quantity.sum())
print((Final_Quantity.lowest_mape_score.median()))
Final_Quantity.head()



902.0
0.3045989720277652


Unnamed: 0,pizza_name_id,Arima_Predicted_Quantity,Arima_mape,Sarima_Predicted_Quantity,Sarima_mape,Prophet_Predicted_Quantity,Prophet_mape,XGBoost_Predicted_Quantity,XGBoost_mape,quantity,lowest_mape_score,best_model
0,bbq_ckn_l,19,0.328097,17.670201,0.330073,16.617704,0.209953,12.182958,0.248165,17.0,0.209953,Prophet
1,bbq_ckn_m,18,0.421951,18.621751,0.42501,18.71274,0.169154,14.152276,0.305642,19.0,0.169154,Prophet
2,bbq_ckn_s,8,0.284963,9.67249,0.373132,9.960162,0.449412,9.234591,0.408613,8.0,0.284963,Arima
3,big_meat_s,37,0.211025,39.036142,0.241102,40.726617,0.158759,25.036869,0.195002,41.0,0.158759,Prophet
4,brie_carre_s,9,0.380018,8.414301,0.390179,9.825305,0.293974,9.471176,0.473958,10.0,0.293974,Prophet




> So the total Quantity Needed for the Next Week is **902** and the overall Mape score is **0.30**



In [80]:
# now take only needed Columns
Final_Quantity=Final_Quantity[['pizza_name_id','quantity','lowest_mape_score','best_model']]
Final_Quantity.head()

Unnamed: 0,pizza_name_id,quantity,lowest_mape_score,best_model
0,bbq_ckn_l,17.0,0.209953,Prophet
1,bbq_ckn_m,19.0,0.169154,Prophet
2,bbq_ckn_s,8.0,0.284963,Arima
3,big_meat_s,41.0,0.158759,Prophet
4,brie_carre_s,10.0,0.293974,Prophet


# Merging with the Ingredient Dataset

In [83]:
import pandas as pd

# Use pd.read_excel to read .xlsx files and specify the encoding
Pizza_ingredient = pd.read_excel(r'/content/Pizza_ingredients.xlsx', engine='openpyxl')
Pizza_ingredient.head()

Unnamed: 0,pizza_name_id,pizza_name,pizza_ingredients,Items_Qty_In_Grams
0,bbq_ckn_l,The Barbecue Chicken Pizza,Barbecued Chicken,40.0
1,bbq_ckn_l,The Barbecue Chicken Pizza,Red Peppers,15.0
2,bbq_ckn_l,The Barbecue Chicken Pizza,Green Peppers,20.0
3,bbq_ckn_l,The Barbecue Chicken Pizza,Tomatoes,30.0
4,bbq_ckn_l,The Barbecue Chicken Pizza,Red Onions,60.0


In [84]:

import pandas as pd
# Merge Final_Quantity and Pizza_ingredient datasets on 'pizza_name_id'
merged_df = pd.merge(Final_Quantity, Pizza_ingredient, on='pizza_name_id', how='left')

# Display the merged DataFrame
merged_df.head()


Unnamed: 0,pizza_name_id,quantity,lowest_mape_score,best_model,pizza_name,pizza_ingredients,Items_Qty_In_Grams
0,bbq_ckn_l,17.0,0.209953,Prophet,The Barbecue Chicken Pizza,Barbecued Chicken,40.0
1,bbq_ckn_l,17.0,0.209953,Prophet,The Barbecue Chicken Pizza,Red Peppers,15.0
2,bbq_ckn_l,17.0,0.209953,Prophet,The Barbecue Chicken Pizza,Green Peppers,20.0
3,bbq_ckn_l,17.0,0.209953,Prophet,The Barbecue Chicken Pizza,Tomatoes,30.0
4,bbq_ckn_l,17.0,0.209953,Prophet,The Barbecue Chicken Pizza,Red Onions,60.0


**Lets Find out the Actuall Quantity Needed for Each Ingredient**

In [85]:
merged_df['Inc_Quantity']=merged_df['quantity']*merged_df['Items_Qty_In_Grams']
merged_df.head()

Unnamed: 0,pizza_name_id,quantity,lowest_mape_score,best_model,pizza_name,pizza_ingredients,Items_Qty_In_Grams,Inc_Quantity
0,bbq_ckn_l,17.0,0.209953,Prophet,The Barbecue Chicken Pizza,Barbecued Chicken,40.0,680.0
1,bbq_ckn_l,17.0,0.209953,Prophet,The Barbecue Chicken Pizza,Red Peppers,15.0,255.0
2,bbq_ckn_l,17.0,0.209953,Prophet,The Barbecue Chicken Pizza,Green Peppers,20.0,340.0
3,bbq_ckn_l,17.0,0.209953,Prophet,The Barbecue Chicken Pizza,Tomatoes,30.0,510.0
4,bbq_ckn_l,17.0,0.209953,Prophet,The Barbecue Chicken Pizza,Red Onions,60.0,1020.0


**Exporting the Finale Forecated Quantity for each ingredient**

In [90]:
#Final=merged_df[['pizza_name_id','pizza_name','pizza_ingredients','Inc_Quantity']]
Final.head(20)

Unnamed: 0,pizza_name_id,pizza_name,pizza_ingredients,Inc_Quantity
0,bbq_ckn_l,The Barbecue Chicken Pizza,Barbecued Chicken,680.0
1,bbq_ckn_l,The Barbecue Chicken Pizza,Red Peppers,255.0
2,bbq_ckn_l,The Barbecue Chicken Pizza,Green Peppers,340.0
3,bbq_ckn_l,The Barbecue Chicken Pizza,Tomatoes,510.0
4,bbq_ckn_l,The Barbecue Chicken Pizza,Red Onions,1020.0
5,bbq_ckn_l,The Barbecue Chicken Pizza,Barbecue Sauce,255.0
6,bbq_ckn_m,The Barbecue Chicken Pizza,Barbecued Chicken,570.0
7,bbq_ckn_m,The Barbecue Chicken Pizza,Red Peppers,190.0
8,bbq_ckn_m,The Barbecue Chicken Pizza,Green Peppers,285.0
9,bbq_ckn_m,The Barbecue Chicken Pizza,Tomatoes,380.0


In [88]:


import pandas as pd
# Group by 'pizza_ingredients' and sum the 'Inc_Quantity'
ingredient_quantities = Final.groupby('pizza_ingredients')['Inc_Quantity'].sum()

# Create a DataFrame from the grouped data
purchase_order_df = pd.DataFrame({'Ingredient': ingredient_quantities.index, 'Total_Quantity_Needed': ingredient_quantities.values})

# Export the DataFrame to a CSV file named 'Purchace_order.csv'
purchase_order_df.to_csv('Purchace_order.csv', index=False)

# Print a message to indicate that the file has been exported
print("Purchase order exported to Purchace_order.csv")


Purchase order exported to Purchace_order.csv


In [87]:
Final.to_csv('Final_Inc_Quantity.csv', index=False)