### Importação das bibliotecas

In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import os

from datetime import datetime
from tqdm import tqdm
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima import auto_arima
from functools import reduce
from bizdays import Calendar

from functions import forecast_comparisson, extract_date_features

CAL = Calendar.load('ANBIMA')

In [2]:
jan = pd.read_csv(r"C:\Users\Yamac\OneDrive\Documentos\Programação\Trampo\Forecasting\Forecasting-techniques\Datasets\Sales_January_2019.csv")

In [3]:
jan['Quantity Ordered'][0]

'1'

### Leitura e tratamento dos dados

* Os dados estão separados por meses em planilhas diferentes
* Como as planilhas seguem a mesma estrutura, vou passar um loop e concatenar todos eles

In [4]:
folderpath = r"C:\Users\Yamac\OneDrive\Documentos\Programação\Trampo\Forecasting\Forecasting-techniques\Datasets"
datasets = []

for file in os.listdir(folderpath):
    dataset = pd.read_csv(os.path.join(folderpath, file))
    dataset = dataset[dataset['Order ID'] != 'Order ID']
    datasets.append(dataset)

In [99]:
df = pd.concat(datasets, axis=0, ignore_index=True)
df.head()

Unnamed: 0,Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address
0,176558.0,USB-C Charging Cable,2.0,11.95,04/19/19 08:46,"917 1st St, Dallas, TX 75001"
1,,,,,,
2,176559.0,Bose SoundSport Headphones,1.0,99.99,04/07/19 22:30,"682 Chestnut St, Boston, MA 02215"
3,176560.0,Google Phone,1.0,600.0,04/12/19 14:38,"669 Spruce St, Los Angeles, CA 90001"
4,176560.0,Wired Headphones,1.0,11.99,04/12/19 14:38,"669 Spruce St, Los Angeles, CA 90001"


In [6]:
df.shape

(186495, 6)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 186495 entries, 0 to 186494
Data columns (total 6 columns):
 #   Column            Non-Null Count   Dtype 
---  ------            --------------   ----- 
 0   Order ID          185950 non-null  object
 1   Product           185950 non-null  object
 2   Quantity Ordered  185950 non-null  object
 3   Price Each        185950 non-null  object
 4   Order Date        185950 non-null  object
 5   Purchase Address  185950 non-null  object
dtypes: object(6)
memory usage: 8.5+ MB


* Para identificar o mês a que as vendas se referem, é preciso alterar o formato da coluna "Order Date"
* Também vou separar a informação de horário em outra coluna

In [100]:
# Convert Order Data to datetime
df['Order Date'] = pd.to_datetime(df['Order Date'])

In [101]:
# Drop null values
df.dropna(inplace=True)

In [102]:
df['Hour'] = df['Order Date'].dt.hour
# Remove hour from Ordeer Date
df['Order Date'] = df['Order Date'].dt.date
df.head()

Unnamed: 0,Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address,Hour
0,176558,USB-C Charging Cable,2,11.95,2019-04-19,"917 1st St, Dallas, TX 75001",8
2,176559,Bose SoundSport Headphones,1,99.99,2019-04-07,"682 Chestnut St, Boston, MA 02215",22
3,176560,Google Phone,1,600.0,2019-04-12,"669 Spruce St, Los Angeles, CA 90001",14
4,176560,Wired Headphones,1,11.99,2019-04-12,"669 Spruce St, Los Angeles, CA 90001",14
5,176561,Wired Headphones,1,11.99,2019-04-30,"333 8th St, Los Angeles, CA 90001",9


In [103]:
# Sort dataframe cronologically
df.sort_values(by='Order Date', inplace=True)

In [104]:
df.rename(columns={'Quantity Ordered':'Amount', 'Price Each':'Price', 'Order Date':'Date', 'Purchase Address':'Address'}, inplace=True)

In [105]:
df['Hour'] = pd.to_datetime(df['Hour'], format='%H').dt.time
df['Date'] = pd.to_datetime(df['Date'])

In [106]:
# Create columns for city and state
df['City'] = df['Address'].apply(lambda x: x.split(',')[1])
df['State'] = df['Address'].apply(lambda x: x.split(',')[2].split(' ')[1])

#### Visualização dos dados

In [15]:
df.head()

Unnamed: 0,Order ID,Product,Amount,Price,Date,Address,Hour,City,State
68289,142066,27in 4K Gaming Monitor,1,389.99,2019-01-01,"110 Dogwood St, Seattle, WA 98101",22:00:00,Seattle,WA
76162,149579,Macbook Pro Laptop,1,1700.0,2019-01-01,"61 Lakeview St, Dallas, TX 75001",10:00:00,Dallas,TX
69491,143202,ThinkPad Laptop,1,999.99,2019-01-01,"129 Walnut St, Los Angeles, CA 90001",18:00:00,Los Angeles,CA
74481,147963,Flatscreen TV,1,300.0,2019-01-01,"655 Meadow St, Austin, TX 73301",11:00:00,Austin,TX
73301,146844,Flatscreen TV,1,300.0,2019-01-01,"593 Church St, New York City, NY 10001",11:00:00,New York City,NY


In [107]:
df['Amount'] = df['Amount'].astype('int')
df['Price'] = df['Price'].astype('float')

In [65]:
daily_sales = pd.DataFrame(df.groupby('Date')['Amount'].sum())
px.line(daily_sales, x=daily_sales.index, y='Amount', title='Daily Sales')

In [66]:
daily_revenue = pd.DataFrame(df.groupby('Date')['Price'].sum())
px.line(daily_revenue, x=daily_revenue.index, y='Price', title='Daily Revenue')

In [67]:
hyped_hours = pd.DataFrame(df.groupby('Hour')['Amount'].sum())
px.bar(hyped_hours, x=hyped_hours.index, y='Amount', title='Periods of the day with more sales')

In [68]:
# Number of unique products
df['Product'].unique()

array(['27in 4K Gaming Monitor', 'Macbook Pro Laptop', 'ThinkPad Laptop',
       'Flatscreen TV', 'USB-C Charging Cable', '34in Ultrawide Monitor',
       'Lightning Charging Cable', 'Bose SoundSport Headphones',
       'LG Dryer', 'AA Batteries (4-pack)', 'Apple Airpods Headphones',
       'Wired Headphones', 'AAA Batteries (4-pack)', '27in FHD Monitor',
       'iPhone', '20in Monitor', 'Google Phone', 'Vareebadd Phone',
       'LG Washing Machine'], dtype=object)

* É um pouco estranho, mas de fato, são apenas 19 produtos diferentes

In [69]:
product_sales_amount = pd.DataFrame(df.groupby('Product')['Amount'].sum()).sort_values('Amount', ascending=True)
px.bar(product_sales_amount, x='Amount', y=product_sales_amount.index, orientation='h', title='Amount sold by each product', text='Amount')

In [70]:
product_sales_rev = pd.DataFrame(df.groupby('Product')['Price'].sum()).sort_values('Price', ascending=True)
px.bar(product_sales_rev, x='Price', y=product_sales_rev.index, orientation='h', title='Revenue by each product', text='Price')	

In [71]:
sales_city = pd.DataFrame(df.groupby('City')[['Amount', 'Price']].sum()).sort_values('Amount', ascending=True)
px.bar(sales_city, x='Amount', y=sales_city.index, orientation='h', title='Amount sold by each city', text='Amount')

In [72]:
sales_state = pd.DataFrame(df.groupby('State')[['Amount', 'Price']].sum()).sort_values('Amount', ascending=True)
px.bar(sales_state, x='Amount', y=sales_state.index, orientation='h', title='Amount sold by each state', text='Amount')

##### Insights
* Na comparação entre quantidade vendida e receita, vemos que os produtos mais vendidos são os que geram a menor quantidade de receita
* É relevante ter noção da diferença do preço dos produtos, pois mais para a frente queremos calular uma função de erro baseada em prejuízo
* Existe uma clara preferência nos horários de compra, que são mais altos no horário de almoço e no começo da noite e mais baixos de madrugada, começo da manhã e meio da tarde, que são horários em que as pessoas estão dormindo ou trabalhando. Saber horários de maior consumo pode ajudar a direcionar melhor notificações e promoções

## Modelos de previsão

* Aqui começam os trabalhos de previsão das vendas
* A ideia é tentar prever como será a venda de cada produto nos próximos 30 dias

#### Modelos ingênuos

* Modelos ingênuos vão servir como formas simples de previsão
* A ideia é servir como benchmark para os modelos mais sofisticados

#### 1. Previsão baseada no dia anterior

In [73]:
df.head()

Unnamed: 0,Order ID,Product,Amount,Price,Date,Address,Hour,City,State
68289,142066,27in 4K Gaming Monitor,1,389.99,2019-01-01,"110 Dogwood St, Seattle, WA 98101",22:00:00,Seattle,WA
76162,149579,Macbook Pro Laptop,1,1700.0,2019-01-01,"61 Lakeview St, Dallas, TX 75001",10:00:00,Dallas,TX
69491,143202,ThinkPad Laptop,1,999.99,2019-01-01,"129 Walnut St, Los Angeles, CA 90001",18:00:00,Los Angeles,CA
74481,147963,Flatscreen TV,1,300.0,2019-01-01,"655 Meadow St, Austin, TX 73301",11:00:00,Austin,TX
73301,146844,Flatscreen TV,1,300.0,2019-01-01,"593 Church St, New York City, NY 10001",11:00:00,New York City,NY


In [None]:
forecasts_last_day = []
eval_last_day = []

# We're using a loop to forecast each product individually
for product in df['Product'].unique():
    sales_filtered_last = df[df['Product'] == product].sort_values(by='Date', ascending=True)
    sales_filtered_last = sales_filtered_last[['Product', 'Date', 'Amount']]
    sales_filtered_last = sales_filtered_last.groupby('Date').sum()
    sales_filtered_last['Product'] = product
    sales_filtered_last = sales_filtered_last.resample('D').ffill().fillna(0)  # Fill missing dates with 0

    train_last = sales_filtered_last.iloc[:-30] # Split in train and test by date -> in this case we want to use the last 30 days to test
    test_last = sales_filtered_last.iloc[-30:]
    
    train_last['Forecast'] = train_last['Amount'].shift(1)  # Shift the amount of the previous day to the next day
    test_last['Forecast'] = train_last['Forecast'].iloc[-1]  # As the test won't have the real last day sales, we're filling it with the last 
                                                             # predicted value
    forecasts_last_day.append(train_last)
    forecasts_last_day.append(test_last)

    predicted = test_last['Forecast'].values
    real = (test_last['Amount'] + 0.1).values  # Here we're adding 0.1 to avoid division by 0

    mape = np.mean(np.abs(predicted - real)/np.abs(real))
    mae = mean_absolute_error(real, predicted)
    mpe = np.abs(np.mean((real - predicted)/real)*100)
    rmse = np.sqrt(mean_squared_error(real, predicted))

    eval_last_day.append({'Product': product, 'MAPE':mape, 'MAE':mae, 'MPE':mpe, 'RMSE':rmse,
                        'Total Sold':real.sum(), 'Total Forecasted':predicted.sum()}) 

last_day = pd.concat(forecasts_last_day)
last_day['Forecast'].fillna(0, inplace=True)
eval_last_df = pd.DataFrame(eval_last_day)

In [75]:
# Forecasts for the next 30 days
last_day.tail()

Unnamed: 0_level_0,Amount,Product,Forecast
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-12-27,1,LG Washing Machine,4.0
2019-12-28,3,LG Washing Machine,4.0
2019-12-29,4,LG Washing Machine,4.0
2019-12-30,3,LG Washing Machine,4.0
2019-12-31,5,LG Washing Machine,4.0


In [76]:
# Evaluation metrics for the last 30 days for each product
eval_last_df.head(19)

Unnamed: 0,Product,MAPE,MAE,MPE,RMSE,Total Sold,Total Forecasted
0,27in 4K Gaming Monitor,1.262231,7.386667,125.596333,9.682803,805.0,1020.0
1,Macbook Pro Laptop,0.695026,6.58,18.679945,7.611176,606.0,450.0
2,ThinkPad Laptop,0.865251,8.34,86.244096,9.297132,502.0,750.0
3,Flatscreen TV,0.83206,3.993333,77.77564,5.826091,618.0,690.0
4,USB-C Charging Cable,0.869383,10.233333,78.886364,20.401225,3063.0,3090.0
5,34in Ultrawide Monitor,0.275578,8.02,24.812274,9.009439,827.0,600.0
6,Lightning Charging Cable,0.598181,12.226667,51.257634,20.6245,2902.0,2970.0
7,Bose SoundSport Headphones,0.661043,10.646667,38.658157,13.641481,1730.0,1530.0
8,LG Dryer,0.614777,2.3,61.477732,2.655184,99.0,30.0
9,AA Batteries (4-pack),0.306475,37.766667,30.647532,40.024284,3593.0,2460.0


In [77]:
forecast_comparisson(last_day, product_col='Product', real_sales_col='Amount', forecast_col='Forecast')

#### 2. Previsão baseada em médias móveis (rolling average)

In [None]:
eval_roll_avg = []
pred = []

for product in df['Product'].unique():
    sales_filtered = df[df['Product'] == product].sort_values(by='Date', ascending=True)
    sales_filtered = sales_filtered[['Product', 'Date', 'Amount']]
    sales_filtered = sales_filtered.groupby('Date').sum()
    sales_filtered['Product'] = product
    sales_filtered = sales_filtered.resample('D').ffill().fillna(0)

    train = sales_filtered.iloc[:-30] 
    window_size = 7
    windows = train['Amount'].rolling(window_size)  # Every group of 7 days
    moving_averages = windows.mean().tolist()  # Series with the mean of every window
    train = train.assign(Forecast=moving_averages)   
    train['Forecast'] = train['Forecast'].shift(1).fillna(0)

    test = pd.concat([train.tail(7), sales_filtered.iloc[-30:]])  # Test is the last 30 days plus the last 7 days of train to get the mean. 
                                                                  # After that, the mean will be calculated from the forecasted values

    for index, value in enumerate(test['Forecast']):
        if pd.isna(value):
            test['Forecast'].iloc[index] = test['Forecast'].iloc[max(0, index-7):index].mean()
    
    sales_pred = pd.concat([train[:-7], test])
    pred.append(sales_pred)
    
    # Evaluation of the test df
    
    sales_test = test[7:]
    sales_test['Product'] = product
    
    real = (sales_test['Amount'] + 0.1).values
    predicted = sales_test['Forecast'].values
    
    mape = np.mean(np.abs(predicted - real)/np.abs(real))
    mae = mean_absolute_error(real, predicted)
    mpe = np.abs(np.mean((real - predicted)/real)*100)
    rmse = np.sqrt(mean_squared_error(real, predicted))

    eval_roll_avg.append(pd.DataFrame({'Product': product, 'MAPE':mape, 'MAE':mae, 'MPE':mpe, 'RMSE':rmse,
                        'Total Sold':real.sum(), 'Total Forecasted':predicted.sum()}, index=[0]))

mov_avg = pd.concat(pred)
eval_mov_avg = pd.concat(eval_roll_avg)

In [79]:
mov_avg.head(10)

Unnamed: 0_level_0,Amount,Product,Forecast
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-01-01,7,27in 4K Gaming Monitor,0.0
2019-01-02,14,27in 4K Gaming Monitor,0.0
2019-01-03,14,27in 4K Gaming Monitor,0.0
2019-01-04,8,27in 4K Gaming Monitor,0.0
2019-01-05,13,27in 4K Gaming Monitor,0.0
2019-01-06,8,27in 4K Gaming Monitor,0.0
2019-01-07,12,27in 4K Gaming Monitor,0.0
2019-01-08,12,27in 4K Gaming Monitor,10.857143
2019-01-09,14,27in 4K Gaming Monitor,11.571429
2019-01-10,12,27in 4K Gaming Monitor,11.571429


In [80]:
eval_mov_avg.head(19)

Unnamed: 0,Product,MAPE,MAE,MPE,RMSE,Total Sold,Total Forecasted
0,27in 4K Gaming Monitor,0.847137,8.320005,34.08889,9.299361,805.0,605.897888
0,Macbook Pro Laptop,0.692176,6.01262,24.662242,7.109568,606.0,472.826559
0,ThinkPad Laptop,0.425365,4.91509,3.15776,5.662601,502.0,389.959186
0,Flatscreen TV,0.691157,6.59788,16.447505,7.660901,618.0,452.127528
0,USB-C Charging Cable,0.847339,29.315447,34.947396,31.822133,3063.0,2330.744572
0,34in Ultrawide Monitor,0.328977,9.58339,31.584022,10.564319,827.0,545.974461
0,Lightning Charging Cable,0.613561,27.690616,12.366453,30.955357,2902.0,2206.193762
0,Bose SoundSport Headphones,0.705107,20.005405,9.096134,21.202229,1730.0,1203.872231
0,LG Dryer,0.468946,1.647788,28.666562,1.961402,99.0,55.581613
0,AA Batteries (4-pack),0.238456,29.677253,23.678753,32.382004,3593.0,2707.091781


In [81]:
forecast_comparisson(mov_avg, product_col='Product', real_sales_col='Amount', forecast_col='Forecast')

In [82]:
# Comparing the naive models
print('Mean MAE last day forecast: ', eval_last_df['MAE'].mean())
print('Mean MPE last day forecast: ', eval_last_df['MPE'].mean(), end='\n\n')
print('Mean MAE moving average forecast: ', eval_mov_avg['MAE'].mean())
print('Mean MPE moving average forecast: ', eval_mov_avg['MPE'].mean())

Mean MAE last day forecast:  10.810526315789474
Mean MPE last day forecast:  55.67251334220112

Mean MAE moving average forecast:  13.914342819210315
Mean MPE moving average forecast:  18.97433142265193


### Econometrics and ML models

#### 3. ARIMA (and variants)

In [83]:
df.head()

Unnamed: 0,Order ID,Product,Amount,Price,Date,Address,Hour,City,State
68289,142066,27in 4K Gaming Monitor,1,389.99,2019-01-01,"110 Dogwood St, Seattle, WA 98101",22:00:00,Seattle,WA
76162,149579,Macbook Pro Laptop,1,1700.0,2019-01-01,"61 Lakeview St, Dallas, TX 75001",10:00:00,Dallas,TX
69491,143202,ThinkPad Laptop,1,999.99,2019-01-01,"129 Walnut St, Los Angeles, CA 90001",18:00:00,Los Angeles,CA
74481,147963,Flatscreen TV,1,300.0,2019-01-01,"655 Meadow St, Austin, TX 73301",11:00:00,Austin,TX
73301,146844,Flatscreen TV,1,300.0,2019-01-01,"593 Church St, New York City, NY 10001",11:00:00,New York City,NY


In [115]:
df.head()

Unnamed: 0,Order ID,Product,Amount,Price,Date,Address,Hour,City,State
68289,142066,27in 4K Gaming Monitor,1,389.99,2019-01-01,"110 Dogwood St, Seattle, WA 98101",22:00:00,Seattle,WA
76162,149579,Macbook Pro Laptop,1,1700.0,2019-01-01,"61 Lakeview St, Dallas, TX 75001",10:00:00,Dallas,TX
69491,143202,ThinkPad Laptop,1,999.99,2019-01-01,"129 Walnut St, Los Angeles, CA 90001",18:00:00,Los Angeles,CA
74481,147963,Flatscreen TV,1,300.0,2019-01-01,"655 Meadow St, Austin, TX 73301",11:00:00,Austin,TX
73301,146844,Flatscreen TV,1,300.0,2019-01-01,"593 Church St, New York City, NY 10001",11:00:00,New York City,NY


In [None]:
forecast_arima = []
evaluations = []

for product in tqdm(df['Product'].unique()):
    df_filtered = df[df['Product'] == product]
    df_filtered = pd.DataFrame(df_filtered.groupby('Date').agg({'Amount': 'sum', 'Price': 'mean'})).reset_index()
    df_filtered = extract_date_features(df_filtered, date_column='Date')  # Extracting date features
    df_filtered.set_index('Date', inplace=True)
    df_filtered['Product'] = product
    train_df = df_filtered.iloc[:-30]
    test_df = df_filtered.iloc[-30:]

    # Here we're using the auto_arima function to find the best parameters for the SARIMAX model
    arima_model = auto_arima(train_df['Amount'], exogenous=train_df[['Price', 'day_of_week', 'is_month_start', 'is_month_end']],         
                            max_order=None, max_p=5, max_q=5, max_d=1, m=1, max_P=4, max_Q=4, max_D=1, njobs=-1, stepwise=True, 
                            out_of_sample_size=30, scoring='mae', test='adf')
    arima_setup = SARIMAX(train_df['Amount'], order=arima_model.order)
    arima_fit = arima_setup.fit()
    predictions = arima_fit.forecast(steps=len(test_df), exogenous=test_df[['Price', 'day_of_week', 'is_month_start', 'is_month_end']])
    
    arima_preds = pd.DataFrame(predictions)
    arima_preds.columns = ['Forecast']
    arima_preds['Actual_values'] = test_df['Amount'].values
    arima_preds['Product'] = product
    forecast_arima.append(arima_preds)

    predicted = arima_preds['Forecast'].values
    real = (arima_preds['Actual_values']+0.1).values

    mape = np.mean(np.abs(predicted - real)/np.abs(real))
    mae = mean_absolute_error(real, predicted)
    mpe = np.abs(np.mean((real - predicted)/real)*100)
    rmse = np.sqrt(mean_squared_error(real, predicted))

    eval_arima = pd.DataFrame({'Product':product, 'MAPE':mape, 'MAE':mae, 'MPE':mpe, 'RMSE':rmse, 'Parametros':arima_model,
                            'Total Vendido':real.sum(), 'Total Previsto':predicted.sum()}, index=[0])
    evaluations.append(eval_arima)

forecast_arima = pd.concat(forecast_arima)
evaluations = pd.concat(evaluations)

In [113]:
forecast_arima.head()

Unnamed: 0,Forecast,Actual_values,Product
2019-12-03 00:00:00,22.179003,23,27in 4K Gaming Monitor
2019-12-04 00:00:00,22.768013,28,27in 4K Gaming Monitor
2019-12-05 00:00:00,25.072327,32,27in 4K Gaming Monitor
2019-12-06 00:00:00,26.633113,29,27in 4K Gaming Monitor
2019-12-07 00:00:00,27.169538,29,27in 4K Gaming Monitor


In [116]:
evaluations.head(19)

Unnamed: 0,Product,MAPE,MAE,MPE,RMSE,Parametros,Total Vendido,Total Previsto
0,27in 4K Gaming Monitor,0.914186,4.311831,76.82585,6.600899,"ARIMA(2,1,2)(0,0,0)[0]",805.0,795.273481
0,Macbook Pro Laptop,0.699379,5.357152,33.292436,6.58617,"ARIMA(1,1,2)(0,0,0)[0]",606.0,503.52382
0,ThinkPad Laptop,0.411516,4.509485,1.07974,5.301262,"ARIMA(1,0,2)(0,0,0)[0] intercept",502.0,407.594011
0,Flatscreen TV,0.672882,4.972533,31.820265,6.347691,"ARIMA(1,0,1)(0,0,0)[0] intercept",618.0,513.172664
0,USB-C Charging Cable,0.83791,21.279367,49.910445,25.7785,"ARIMA(0,1,1)(0,0,0)[0]",3063.0,2589.483436
0,34in Ultrawide Monitor,0.2798,8.141584,25.330648,9.122819,"ARIMA(1,1,2)(0,0,0)[0]",827.0,595.873335
0,Lightning Charging Cable,0.5695,17.743839,29.156055,23.85477,"ARIMA(0,1,1)(0,0,0)[0]",2902.0,2536.027259
0,Bose SoundSport Headphones,0.680135,15.759792,21.59319,17.583397,"ARIMA(0,1,1)(0,0,0)[0]",1730.0,1341.6995
0,LG Dryer,0.449256,1.418588,18.556709,1.714037,"ARIMA(1,0,1)(0,0,0)[0] intercept",93.0,59.144104
0,AA Batteries (4-pack),0.170661,21.458979,16.238449,24.603846,"ARIMA(0,1,1)(0,0,0)[0]",3593.0,2971.104262


In [117]:
print('Mean MAE last day forecast: ', eval_last_df['MAE'].mean())
print('Mean MPE last day forecast: ', eval_last_df['MPE'].mean(), end='\n\n')
print('Mean MAE moving average forecast: ', eval_mov_avg['MAE'].mean())
print('Mean MPE moving average forecast: ', eval_mov_avg['MPE'].mean(), end='\n\n')
print('Mean MAE ARIMA forecast: ', evaluations['MAE'].mean())
print('Mean MPE ARIMA forecast: ', evaluations['MPE'].mean())

Mean MAE last day forecast:  10.810526315789474
Mean MPE last day forecast:  55.67251334220112

Mean MAE moving average forecast:  13.914342819210315
Mean MPE moving average forecast:  18.97433142265193

Mean MAE ARIMA forecast:  10.572223000386526
Mean MPE ARIMA forecast:  26.38637384367984
