# MuchLearningSuchWow - Prediction Analysis

This notebook contains the code we used to analyze the predictions made by our LSTM and the LightGBM models.

### Imports

In [None]:
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

### Data Paths

In [None]:
inputPath = "input/m5-forecasting-accuracy/"
submissionPath = "submissions/"

### Loading Data

In [None]:
# Load predicted sales
predicted_sales_lstm = pd.read_csv(submissionPath + "/lstm_submission.csv")
predicted_sales_lstm = predicted_sales_lstm[predicted_sales_lstm.id.str.contains("validation")]

In [None]:
# Load predicted sales
predicted_sales_lightgbm = pd.read_csv(submissionPath + "/lightgbm_submission.csv")
predicted_sales_lightgbm = predicted_sales_lightgbm[predicted_sales_lightgbm.id.str.contains("validation")]

In [None]:
# Load true sales
df_sales = pd.read_csv(inputPath + "/sales_train_evaluation.csv")

### Plotting Predictions vs True Sales

In [None]:
def plot_pred(predicted_sales_lstm, predicted_sales_lightgbm, true_sales, id_, thing):
    fig, ax = plt.subplots()
    ax.grid()
    
    ax.plot(np.arange(1, len(predicted_sales_lstm)+1), predicted_sales_lstm, label='LSTM predictions', color = 'blue')
    ax.plot(np.arange(1, len(predicted_sales_lightgbm)+1), predicted_sales_lightgbm, label='LightGBM predictions', color = 'green')
    ax.plot(np.arange(1, len(true_sales)+1), true_sales, label='true sales', color = 'red')
            
    ax.title.set_text("Predicted vs True sales of "+thing+" "+id_)
    ax.set_xlabel('Day')
    ax.set_ylabel('Predicted Sales')
    ax.legend(loc='center left', bbox_to_anchor=(1,0.9))
    plt.show()

In [None]:
item_id =  "HOBBIES_1_001_CA_1" # "FOODS_3_367_TX_3" #
predicted_sales_lstm_item = predicted_sales_lstm[predicted_sales_lstm.id.str.contains(item_id)].iloc[:, 1:].mean(axis=0).values
predicted_sales_lightgbm_item = predicted_sales_lightgbm[predicted_sales_lightgbm.id.str.contains(item_id)].iloc[:, 1:].mean(axis=0).values
true_sales_item = df_sales.loc[df_sales.id.str.contains(item_id), [f'd_{i}' for i in range(1914, 1942)]].values.mean(axis=0)

In [None]:
plot_pred(predicted_sales_lstm_item, predicted_sales_lightgbm_item, true_sales_item, item_id, "item")

In [None]:
dept_id = "HOBBIES_1" # "FOODS_3" #
predicted_sales_lstm_dept = predicted_sales_lstm[predicted_sales_lstm.id.str.contains(dept_id)].iloc[:, 1:].mean(axis=0).values
predicted_sales_lightgbm_dept = predicted_sales_lightgbm[predicted_sales_lightgbm.id.str.contains(dept_id)].iloc[:, 1:].mean(axis=0).values
true_sales_dept = df_sales.loc[df_sales.id.str.contains(dept_id), [f'd_{i}' for i in range(1914, 1942)]].values.mean(axis=0)

In [None]:
plot_pred(predicted_sales_lstm_dept, predicted_sales_lightgbm_dept, true_sales_dept, dept_id, "department")

In [None]:
store_id = "CA_1" # "TX_3" #
predicted_sales_lstm_store = predicted_sales_lstm[predicted_sales_lstm.id.str.contains(store_id)].iloc[:, 1:].mean(axis=0).values
predicted_sales_lightgbm_store = predicted_sales_lightgbm[predicted_sales_lightgbm.id.str.contains(store_id)].iloc[:, 1:].mean(axis=0).values
true_sales_store = df_sales.loc[df_sales.store_id.str.contains(store_id), [f'd_{i}' for i in range(1914, 1942)]].values.mean(axis=0)

In [None]:
plot_pred(predicted_sales_lstm_store, predicted_sales_lightgbm_store, true_sales_store, store_id, "store")

In [None]:
state_id =  "CA" # "TX"
predicted_sales_lstm_state = predicted_sales_lstm[predicted_sales_lstm.id.str.contains(state_id)].iloc[:, 1:].mean(axis=0).values
predicted_sales_lightgbm_state = predicted_sales_lightgbm[predicted_sales_lightgbm.id.str.contains(state_id)].iloc[:, 1:].mean(axis=0).values
true_sales_state = df_sales.loc[df_sales.state_id.str.contains(state_id), [f'd_{i}' for i in range(1914, 1942)]].values.mean(axis=0)

In [None]:
plot_pred(predicted_sales_lstm_state, predicted_sales_lightgbm_state, true_sales_state, state_id, "state")