### <b><span style='color:#F1C40F'>|</span> Home task</b>

- Choose any store from the initial dataset
- Check the presence of nans and fill them
- Make a forecast for 30, 180, 270, 365 days ahead
- Perform model evaluation

In [81]:

import plotly.express as px
import pandas as pd
import numpy as np

# model evaluation
from prophet import Prophet
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
import warnings
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error

warnings.filterwarnings('ignore')

## Loading data

In [82]:
def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:
    df.date = pd.to_datetime(df.date)
    df['day_of_week'] = df['date'].dt.day_name()
    
    return df


# load train dataset | convert datatype of "date" column from "object" to "datetime"
stores_df = pd.read_csv("train.csv")
stores_df = preprocess_data(stores_df)
stores_df

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,day_of_week
0,0,2013-01-01,1,AUTOMOTIVE,0.000,0,Tuesday
1,1,2013-01-01,1,BABY CARE,0.000,0,Tuesday
2,2,2013-01-01,1,BEAUTY,0.000,0,Tuesday
3,3,2013-01-01,1,BEVERAGES,0.000,0,Tuesday
4,4,2013-01-01,1,BOOKS,0.000,0,Tuesday
...,...,...,...,...,...,...,...
3000883,3000883,2017-08-15,9,POULTRY,438.133,0,Tuesday
3000884,3000884,2017-08-15,9,PREPARED FOODS,154.553,1,Tuesday
3000885,3000885,2017-08-15,9,PRODUCE,2419.729,148,Tuesday
3000886,3000886,2017-08-15,9,SCHOOL AND OFFICE SUPPLIES,121.000,8,Tuesday


## Choos the shope

In [83]:
stores_df = stores_df.set_index('id')
stores_df = stores_df[stores_df['store_nbr'] == 12]

# Sorting by data
stores_df = stores_df.sort_values(by='date', ascending=True)

stores_df

Unnamed: 0_level_0,date,store_nbr,family,sales,onpromotion,day_of_week
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
99,2013-01-01,12,AUTOMOTIVE,0.0,0,Tuesday
131,2013-01-01,12,SEAFOOD,0.0,0,Tuesday
130,2013-01-01,12,SCHOOL AND OFFICE SUPPLIES,0.0,0,Tuesday
129,2013-01-01,12,PRODUCE,0.0,0,Tuesday
128,2013-01-01,12,PREPARED FOODS,0.0,0,Tuesday
...,...,...,...,...,...,...
2999216,2017-08-15,12,FROZEN FOODS,23.0,0,Tuesday
2999217,2017-08-15,12,GROCERY I,2461.0,37,Tuesday
2999218,2017-08-15,12,GROCERY II,8.0,0,Tuesday
2999211,2017-08-15,12,CELEBRATION,10.0,0,Tuesday


## Check the presence of nans and fill them

In [84]:
Check_NaN = stores_df.isnull().sum()

if any(Check_NaN > 0):
    print(f'Yes NaN, NaN values {Check_NaN}')
    stores_df.fillna(0.0, inplace=True)
else:
    print("No NaN")

No NaN


# Make a forecast for 30, 180, 270, 365 days ahead

### Preprocess data to needed format

In [85]:

fbp_set = stores_df[['date', 'sales']]
fbp_set.rename(columns={"date": "ds", "sales": "y"}, inplace=True)
fbp_set.fillna(0, inplace=True)
fbp_set[:10]

Unnamed: 0_level_0,ds,y
id,Unnamed: 1_level_1,Unnamed: 2_level_1
99,2013-01-01,0.0
131,2013-01-01,0.0
130,2013-01-01,0.0
129,2013-01-01,0.0
128,2013-01-01,0.0
127,2013-01-01,0.0
126,2013-01-01,0.0
125,2013-01-01,0.0
124,2013-01-01,0.0
122,2013-01-01,0.0


### Creating the necessary functions

In [86]:
def mean_absolute_percentage_error(y_true, y_pred):
    non_zero_mask = y_true != 0
    y_true = y_true[non_zero_mask]
    y_pred = y_pred[non_zero_mask]

    if len(y_true) == 0:
        return np.inf

    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100

    return mape


def evaluate_forecasting_model(actual_values, predicted_values, round_nbr=2):
    mape = mean_absolute_percentage_error(actual_values, predicted_values)
    mae = mean_absolute_error(actual_values, predicted_values)
    mse = mean_squared_error(actual_values, predicted_values)

    print(f"MAE - {round(mae, round_nbr)}")
    print(f"MSE - {round(mse, round_nbr)}")
    print(f"MAPE - {round(mape, round_nbr)}")


def forecast_and_evaluate_prophet(dataset, window):
    train, test = dataset[:-window], dataset[-window:]

    train.rename(columns={'date': 'ds', 'sales': 'y'}, inplace=True)
    test.rename(columns={'date': 'ds', 'sales': 'y'}, inplace=True)

    model = Prophet()
    model.fit(train)

    future_sales = model.make_future_dataframe(periods=window)

    forecast = model.predict(future_sales)

    actual_values = test['y'].values
    predicted_values = forecast[-window:]['yhat'].values

    print(f'Make a forecast for {window} days ahead: ')
    evaluate_forecasting_model(
        actual_values=actual_values,
        predicted_values=predicted_values,
        round_nbr=3
    )

### Make a forecast for 30, 180, 270, 365 days ahead

In [87]:
window = [30,180,270,365]

for i in window:
    forecast_and_evaluate_prophet(stores_df,i)
    print('\n')

09:12:56 - cmdstanpy - INFO - Chain [1] start processing
09:13:16 - cmdstanpy - INFO - Chain [1] done processing


Make a forecast for 30 days ahead: 
MAE - 302.953
MSE - 273230.953
MAPE - 3217.115




09:13:21 - cmdstanpy - INFO - Chain [1] start processing
09:13:38 - cmdstanpy - INFO - Chain [1] done processing


Make a forecast for 180 days ahead: 
MAE - 314.972
MSE - 268710.867
MAPE - 3359.169




09:13:42 - cmdstanpy - INFO - Chain [1] start processing
09:13:54 - cmdstanpy - INFO - Chain [1] done processing


Make a forecast for 270 days ahead: 
MAE - 316.644
MSE - 288734.025
MAPE - 3444.71




09:13:58 - cmdstanpy - INFO - Chain [1] start processing
09:14:04 - cmdstanpy - INFO - Chain [1] done processing


Make a forecast for 365 days ahead: 
MAE - 323.697
MSE - 298501.724
MAPE - 3832.569


