In [158]:
import pandas as pd
import numpy as np

In [159]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

In [160]:
main_dataset = pd.read_csv("drive/MyDrive/data_processed_plant_item_encoded_13_03_22.csv")

In [162]:
# Load data from Tensors
TFT_predictions = np.load('drive/MyDrive/Tensors/TFT_predictions.npy', allow_pickle=True)
naive_predictions = np.load('drive/MyDrive/Tensors/baseline_predictions.npy', allow_pickle=True)
actual = np.load('drive/MyDrive/Tensors/actual_sales.npy', allow_pickle=True)

# Baseline Error Performance

In order to compare model perofrmance we need an estimate of bayes limit for the problem. In this case we do not have a human error reference. So we use the the lowest of the following:
- APO recorded forecast. This is the collection of models used by the sales team (when we receive it that is)
- Persistance 1 month. Using the observed values from the previous months as the prediction of the next month. This is the Naive Forecast
- Persistance 3 month mean. Using the observations from the previous 3 months as the prediction of the next month.

By establishing a baseline error we have a refernce to compare our training and validation set performance. This guides us to understand where and how a model is performance. For example, if our bayes error is MAE 5% and our model training and validation perform at MAE 6% and 9% respectively then the relevant obserevation is that our model performs with high variance with respect to the baseline. The contrary is true if we consider baseline, train, and validation MAEs of 5%, 8%, and 8.5% respectively. In the latter case we should work on the bias of the training set before considering the validation performance (low variance).

In [164]:
main_dataset['perisitence_1'] = main_dataset['Volume'].shift(1)
main_dataset['perisitence_2'] = main_dataset['Volume'].shift(2)
main_dataset['perisitence_3'] = main_dataset['Volume'].shift(3)
main_dataset['perisitence_3_mean'] = (main_dataset['perisitence_1'] + main_dataset['perisitence_2'] + main_dataset['perisitence_3']) / 3 

In [165]:
# Filter by relevant Timesteps for predictions that have been made
subset_of_main_data = main_dataset[['ParentItemID', 'PlantID', 'time_idx', 'timeseries', 'Volume', 'perisitence_1', 'perisitence_3_mean']]
subset_of_main_data = subset_of_main_data[subset_of_main_data['time_idx'].isin([29, 30, 31, 32, 33, 34])]
subset_of_main_data = subset_of_main_data.reset_index()

# Add in the datasets from Tensors
subset_of_main_data['actuals_output'] = np.array(actual).flatten()
subset_of_main_data['naive_predictions'] = np.array(naive_predictions).flatten()
subset_of_main_data['TFT_predictions'] = np.array(TFT_predictions).flatten()

# Check that Actual is done properly
print(f"The difference between Actual Output and Original is:  {(subset_of_main_data['actuals_output'] - subset_of_main_data['Volume']).sum()}")

# Check for NaN's in Dataset. If there were nans, we would fill nans with linear interpolation because this is how we will fill when using the data in the models.
if sum(list(main_dataset.isna().sum())) > 0:
  print('\033[93m You have NaN\'s in your main_dataset dataset')

if sum(list(subset_of_main_data.isna().sum())) > 0:
  print('\033[93m You have NaN\'s in your subset_of_main_data dataset')


The difference between Actual Output and Original is:  0.0
[93m You have NaN's in your main_dataset dataset


In [207]:
aggregate = subset_of_main_data.groupby(['ParentItemID', 'time_idx']).agg('sum')
aggregate.drop(['index', 'timeseries'], axis=1, inplace=True)

In [168]:
subset_of_main_data.columns

Index(['index', 'ParentItemID', 'PlantID', 'time_idx', 'timeseries', 'Volume',
       'perisitence_1', 'perisitence_3_mean', 'actuals_output',
       'naive_predictions', 'TFT_predictions'],
      dtype='object')

In [208]:
def MAE(dataset, forecast):
  return tf.keras.losses.MAE(dataset["Volume"], dataset[forecast]).numpy()

In [209]:
def WMAPE(dataset, forecast):
  WMAPE_output = (( abs(  dataset[forecast] - dataset['actuals_output']  ) ).sum() / dataset['actuals_output'].sum()) * 100
  return WMAPE_output

In [335]:
list_of_forecasts = [
                    #  'APO_forecast', 'SalesHead_Corr_Forecast', 
                     'naive_predictions', 'perisitence_1', 'perisitence_3_mean', 'actuals_output', 'TFT_predictions']

ref_error = pd.DataFrame({
    "Method": list_of_forecasts,
    "MAE": [
            MAE(subset_of_main_data, list_of_forecasts[0]),
            MAE(subset_of_main_data, list_of_forecasts[1]),
            MAE(subset_of_main_data, list_of_forecasts[2]),
            MAE(subset_of_main_data, list_of_forecasts[3]),
            MAE(subset_of_main_data, list_of_forecasts[4])],
    "WMAPE":[
            WMAPE(subset_of_main_data, list_of_forecasts[0]),
            WMAPE(subset_of_main_data, list_of_forecasts[1]),
            WMAPE(subset_of_main_data, list_of_forecasts[2]),
            WMAPE(subset_of_main_data, list_of_forecasts[3]), 
            WMAPE(subset_of_main_data, list_of_forecasts[4])],
     "Aggregated WMAPE":[
            WMAPE(aggregate, list_of_forecasts[0]),
            WMAPE(aggregate, list_of_forecasts[1]),
            WMAPE(aggregate, list_of_forecasts[2]),
            WMAPE(aggregate, list_of_forecasts[3]), 
            WMAPE(aggregate, list_of_forecasts[4])]       }, 
    index=[i for i in range(len(list_of_forecasts))])

In [354]:
cell_hover = {  # for row hover use <tr> instead of <td>
    'selector': 'td:hover',
    'props': [('background-color', 'white'), ('color', 'black')]
}
index_names = {
    'selector': '.index_name',
    'props': 'font-style: italic; color: darkgrey; font-weight:normal;'
}
headers = {
    'selector': 'th:not(.index_name)',
    'props': 'background-color: #8900f2; color: white;'
}

def highlight_max(s, props=''):
    return np.where(s == np.nanmax(s.values), props, '')

def highlight_compare(s, props=''):
    return np.where(s > s[0], props, '')

def highlight_compare2(s, props=''):
    return np.where(s < s[0], props, '')


s = ref_error.style
s = s.set_table_styles([cell_hover, index_names, headers])

# DataFrame maximum in red, and column maximums in orange.

s.apply(highlight_max, props='color:white;background-color:orange;', subset=pd.IndexSlice[:, ['MAE', 'WMAPE', 'Aggregated WMAPE']], axis=0)\
  .apply(highlight_max, props='color:white;background-color:red', subset=pd.IndexSlice[:, ['MAE', 'WMAPE', 'Aggregated WMAPE']], axis=None)\
  .apply(highlight_compare, props='color:#961c00;',subset=pd.IndexSlice[:, ['MAE', 'WMAPE', 'Aggregated WMAPE']], axis=0)\
  .apply(highlight_compare2, props='color:green;',subset=pd.IndexSlice[:, ['MAE', 'WMAPE', 'Aggregated WMAPE']], axis=0)\
  .set_properties(**{'background-color': '#8900f2', 'font-weight':'bold'}, subset=pd.IndexSlice[:, ['Method']])

print('DataFrame maximum in red, and column maximums in orange. If metric is larger than Naive i.e.[0] for that metric, text is dark red. Otherwise, it is green.')
print('\033[93m Summary of errors:')
s

DataFrame maximum in red, and column maximums in orange. If metric is larger than Naive i.e.[0] for that metric, text is dark red
[93m Summary of errors:


Unnamed: 0,Method,MAE,WMAPE,Aggregated WMAPE
0,naive_predictions,40.568352,84.304947,62.370169
1,perisitence_1,34.615874,71.935126,51.168809
2,perisitence_3_mean,31.739148,65.957011,49.521603
3,actuals_output,0.0,0.0,0.0
4,TFT_predictions,48.12112,100.000334,99.997282
