In [1]:
# %matplotlib inline
import pandas as pd
from fbprophet import Prophet
import numpy as np
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6
plt.style.use('fivethirtyeight')

In [2]:
raw_data = pd.read_csv("Online_Retail.csv")
raw_data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,01/12/10 08:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,01/12/10 08:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,01/12/10 08:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,01/12/10 08:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,01/12/10 08:26,3.39,17850.0,United Kingdom


In [3]:
sales_data = raw_data.drop(['InvoiceNo','Description','CustomerID','Country','UnitPrice'], axis = 1)
sales_data['InvoiceDate'] = pd.to_datetime(sales_data['InvoiceDate'])
print(sales_data.info())
sales_data.index

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries, 0 to 541908
Data columns (total 3 columns):
StockCode      541909 non-null object
Quantity       541909 non-null int64
InvoiceDate    541909 non-null datetime64[ns]
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 12.4+ MB
None


RangeIndex(start=0, stop=541909, step=1)

In [4]:
stock_codes = list(sales_data.StockCode.unique())

In [5]:
def extract_product(product_id,freq):
    #pull out all transactions of one product and drop StockCode
    product = sales_data.loc[sales_data['StockCode'] == product_id].drop('StockCode',axis=1)
    product=product.set_index(['InvoiceDate']) #Convert to timeseries 

    #DownSampling to a day
    day_summary = pd.DataFrame()
    day_summary['Quantity'] = product.Quantity.resample(freq).sum()
    day_summary = day_summary.fillna(0) #replacing NaN with 0
    day_summary = day_summary.clip(0) #replace -ve values with 0

    day_summary = day_summary['2010-12-12':]#trim as almost no data available before this date
    day_summary['Quantity']+=1#ti avoid -inf while taking log transformation 
#     print(day_summary.describe())
    #plot
#     fig, ax1 = plt.subplots()
#     ax1.plot(day_summary.index, day_summary['Quantity'])
#     ax1.set_xlabel('Date')
#     ax1.set_ylabel('Quantity')
#     # ax1.set_ylim(0,100)
#     plt.show()
    return day_summary

In [12]:
def result_analysis(errors):
    min_value = float('inf')
    max_value = 0
    None_count = 0
    error_sum = 0
    for pair in errors:
        if pair[1] != None:
            error_sum+=pair[1]
            if pair[1]<min_value:
                min_value=pair[1]
            if pair[1]>max_value:
                max_value=pair[1]
                print(pair[0])
        else:
            None_count+=1
    print(max_value)
    return ("min_value = "+str(min_value),"max_value = "+str(max_value),"avg value = "+str(error_sum/len(errors)),"number of none = "+str(None_count))

In [7]:
errors = list()
count =1 
for product in stock_codes:
    print('\r', count, end='')
    count+=1
    freq = 'D'
    try:
        day_summary = extract_product(product,freq)
        # day_summary.head()
        day_summary.reset_index(inplace=True)
        day_summary = day_summary.rename(columns={'Quantity': 'y','InvoiceDate': 'ds'})
        train,test = day_summary[:-10],day_summary[-10:]
        my_model = Prophet(daily_seasonality=True,yearly_seasonality=True,weekly_seasonality=True)
        my_model.fit(train)
        future_dates = my_model.make_future_dataframe(periods=10,freq=freq)
        forecast = my_model.predict(future_dates)
        predicted = forecast[['ds', 'yhat']].tail(10)
        errors.append((product,np.sqrt(mean_squared_error(test.y,predicted.yhat))))
    except:
        errors.append((product,None))
#     print(np.sqrt(mean_squared_error(test.y,predicted.yhat)))
#     my_model.plot(forecast,uncertainty=True);
#     my_model.plot_components(forecast);
    
result_analysis(errors)
with open("result.txt", 'w') as file:
    for item in errors:
        file.write("{}\n".format(item))


 552

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 9.0.


 1756

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 18.0.


 2024

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 0.0.


 26902299

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 24.0.


 2817

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 19.0.


 2846

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 16.0.


 2848

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 19.0.


 2885

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 17.0.
INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 17.0.


 30493022

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 15.0.


 31363073

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 23.0.


 3226

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 11.0.


 3251

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 23.0.


 3317

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 11.0.


 3635

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 14.0.


 3679

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 4.0.


 36983688

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 15.0.


 3699

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 15.0.


 37553719

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 10.0.


 3958

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 15.0.


 4002

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 8.0.


 403340134020

INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 3.0.


 4070

In [8]:
errors

[('85123A', 256.81976334088239),
 ('71053', 4.7541562530894552),
 ('84406B', 7.2352255047984251),
 ('84029G', 39.027536287031353),
 ('84029E', 76.592038463725103),
 ('22752', 13.199987748813552),
 ('21730', 2.9353927626677287),
 ('22633', 36.546748059712286),
 ('22632', 15.249131905778276),
 ('84879', 126.76104007324825),
 ('22745', 17.128394791960183),
 ('22748', 16.990835475507449),
 ('22749', 16.190349387406446),
 ('22310', 21.669872130158968),
 ('84969', 6.4609028928806431),
 ('22623', 8.687219451855368),
 ('22622', 11.964174574492684),
 ('21754', 13.637473384190923),
 ('21755', 5.4398930157555796),
 ('21777', None),
 ('48187', 29.127480238348301),
 ('22960', 60.112500734538692),
 ('22913', 0.8429987884183463),
 ('22912', 1.3105097348552865),
 ('22914', 1.0621548127940355),
 ('21756', 3.0888927735110108),
 ('22728', 69.707650046243387),
 ('22727', 62.097384378383495),
 ('22726', 60.408230165178644),
 ('21724', 6.8750155082847506),
 ('21883', 17.150304299790122),
 ('10002', 7.005951

In [13]:
result_analysis(errors)

85123A
21212
22197
22920
84826
3957.24395685


('min_value = 0.0',
 'max_value = 3957.24395685',
 'avg value = 14.375189116',
 'number of none = 288')