In [1]:
import random
import pandas as pd
import datetime
import numpy as np
from numpy import savetxt, save, load
import matplotlib.pylab as plt
from matplotlib.pylab import rcParams
%matplotlib inline
import seaborn as sns
from tabulate import tabulate
from scipy.special import boxcox1p
from scipy.stats import norm, skew
from sklearn.externals import joblib

np.set_printoptions(precision=4,suppress=True)
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 999)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)

#code=utf-8



In [3]:
from sklearn import metrics

def mean_absolute_percent_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    # avoid devide by 0
    y_true = y_true + 0.1
    return round(np.mean(np.abs((y_true - y_pred) / y_true)), 2)

def eval_metrics(y_test, y_pred):
    rmse = round(np.sqrt(metrics.mean_squared_error(y_test, y_pred)), 2)
    mape = mean_absolute_percent_error(y_test, y_pred)
    return rmse, mape

In [4]:
def moving_average(qty):
    data = qty.rolling(window = 28).mean().dropna()
    # consider one-week replenishment
    data = data.iloc[:-7]
    return data

In [16]:
timestep = 1

import json
with open("../data/all/group_ids.json") as json_file:
    group_ids = json.load(json_file)

with open("../data/all/group_product_ids.json") as json_file:
    group_product_ids = json.load(json_file)

rmse, mape, X_train, X_test, Y_train, Y_test = {}, {}, {}, {}, {}, {}

for index, group_id in enumerate(group_ids):
    Y_train[group_id] = pd.read_json('../data/all/boosting_input_data/group%d_Y_train.json' % (group_id), orient='split')
    Y_test[group_id] = pd.read_json('../data/all/boosting_input_data/group%d_Y_test.json' % (group_id), orient = 'split')
    
    n = len(group_product_ids[str(group_id)])
    skip = 1
    ncols = 2
    nrows = int(n / ncols)
    selected_product = random.sample(group_product_ids[str(group_id)], n)
    _rmse, _mape = 0, 0
#     fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharex=True, sharey=True, figsize=(20,10))
#     plt.subplots_adjust(wspace=0.1)
    
    for i in range(nrows):
        for j in range(ncols):
            product_id = selected_product[ncols*i+j]
            y_train, y_test = Y_train[group_id], Y_test[group_id]
            y_train.product_id = y_train.product_id.apply(np.int64)
            y_test.product_id = y_test.product_id.apply(np.int64)
            y_test = y_test[y_test.product_id == product_id].qty
            y_train = y_train[y_train.product_id == product_id].qty
            qty = pd.concat([y_train, y_test], axis = 0, ignore_index=True)
            if len(qty) < 60:
                skip += 1
                continue
            y_pred = moving_average(qty)
            # compare prediction accuracy, train: test = 70: 30
            test_size = int(len(y_pred) * 0.3)
            y_pred = y_pred.iloc[-test_size:].reset_index(drop=True)
            qty = qty.iloc[-test_size:].reset_index(drop=True)
#             ax[i][j].plot(qty, linewidth = 1, label = 'Actual sales qty', color = 'blue', linestyle = 'dashed')
#             ax[i][j].plot(y_pred, linewidth = 1, label = 'Predict sales qty', color = 'red')
#             title = 'Qty for group %d, product %d, rmse: %f, mape: %f' % (int(group_id), product_id, 
#                                                                      round(eval_metrics(qty, y_pred)[0], 2), 
#                                                                      round(eval_metrics(qty, y_pred)[1],2))
#             ax[i][j].set_title(title)
            _rmse += eval_metrics(qty, y_pred)[0]
            _mape += eval_metrics(qty, y_pred)[1]
#     plt.show()
    with open("../data/all/group_product_ids.json", "w") as outfile:
        json.dump(group_product_ids, outfile)
    rmse[group_id], mape[group_id] = round(_rmse / (n-skip), 2), round(_mape / (n-skip), 2)

Model = 'Moving Average'
table = [[Model, 0, len(group_product_ids[str(0.0)]), rmse[0], mape[0]],
         [Model, 1, len(group_product_ids[str(1.0)]), rmse[1], mape[1]],
         [Model, 2, len(group_product_ids[str(2.0)]), rmse[2], mape[2]],
         [Model, 3, len(group_product_ids[str(3.0)]), rmse[3], mape[3]]]

print(tabulate(table, headers=["Model","Group", "Num_Products", "RMSE", "MAPE"]))

0.0 74236
0.0 61401
0.0 78702
0.0 71906
0.0 72741
0.0 64446
0.0 78657
0.0 19827
0.0 72801
0.0 14314
0.0 78127
0.0 20310
0.0 75026
0.0 71496
0.0 78882
0.0 60046
0.0 80442
0.0 78262
0.0 77837
0.0 74156
0.0 79112
0.0 2829
0.0 78412
0.0 74861
0.0 79802
0.0 78142
0.0 77897
0.0 77982
0.0 81387
0.0 78372
0.0 4236
0.0 80517
0.0 76371
0.0 78012
0.0 79347
0.0 78867
0.0 78192
0.0 76161
0.0 50401
0.0 70706
0.0 71946
0.0 76441
0.0 80207
0.0 76156
0.0 77867
0.0 77011
0.0 77291
0.0 74121
0.0 77521
0.0 76301
0.0 72786
0.0 79227
0.0 77036
0.0 77682
0.0 60041
0.0 76836
0.0 72011
0.0 10356
0.0 78042
0.0 16558
0.0 71891
0.0 72766
0.0 74246
0.0 71991
0.0 61396
0.0 73896
0.0 76826
0.0 79962
0.0 74966
0.0 74566
0.0 76181
0.0 72821
0.0 78932
0.0 78707
0.0 74706
0.0 76261
0.0 19843
0.0 74496
0.0 65031
0.0 78557
0.0 80357
0.0 77296
0.0 76536
0.0 74116
0.0 21937
0.0 78497
0.0 79692
0.0 74166
0.0 78182
0.0 23123
0.0 77241
0.0 9939
0.0 74681
0.0 74811
0.0 78752
0.0 72736
0.0 80212
0.0 72061
0.0 77086
0.0 78022
0.0