In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from warnings import filterwarnings
filterwarnings('ignore')
from scipy.special import boxcox1p, inv_boxcox
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt
import scipy.stats as scs
from itertools import product

In [1]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [2]:
def tsplot(y, lags=None, figsize=(10,8), system= "bmh"):
    #plot time series
    if not isinstance(y,pd.Series):
        y = pd.Series(y)
    with plt.style.context(system):
        fig = plt.figure(figsize=figsize)
        layout = (2,2)
        ts_ax = plt.subplot2grid(layout, (0,0), colspan=2)
        hist_ax = plt.subplot2grid(layout, (1,0))
        acf_ax = plt.subplot2grid(layout, (1,1))
        y.plot(ax=ts_ax)
        ts_ax.set_title('Time Series Analysis Plots')
        y.plot(ax=hist_ax, kind='hist', bins=25)
        hist_ax.set_title('Histogram')s
        smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5)
        plt.tight_layout()
    return

In [3]:
def display_menu():
    option = input("Enter your operation\n1. forcast my sales\n2. Exit\n ")
    return option

In [4]:
def datafiltering(df):
    df = df[['Created at', 'Paid Price']]
    df.rename(columns={'Created at':'Date', 'Paid Price':'Sales'}, inplace=True)
    df = df.dropna()
    # df = df[(df['Sales'] > 0) & (df['Sales'] < 100000)]
    df = df.fillna(0)
    return df

In [5]:
def forecast_sales(df):
    print(df.isnull().sum())
    df = df.set_index('Date')
    df.index = pd.to_datetime(df.index)
    df = df.sort_index()
    plt.plot(df)
    # plot(df)
    plt.xlabel('Date')
    plt.ylabel('Sales')
    plt.title('Sales')
    plt.show()
    df = df.resample('MS').mean()
    df.fillna(0, inplace=True)
    print(df)
    tsplot(df['Sales'], lags=30)
    plt.show()
    return df

In [6]:
def decompose(df):
    #decompose sales
    decomposition = sm.tsa.seasonal_decompose(df['Sales'], model='additive')
    print(decomposition.trend)
    fig = decomposition.plot()
    fig.show()
    plt.show()
    #test for stationarity
    print("Dickey-Fuller test: p=%f" % sm.tsa.stattools.adfuller(df['Sales'])[1])
    #make sales stationary
    print(df['Sales'].values)
    df['Sales_box'] = df.apply(lambda x: boxcox1p(df['Sales'], 0.15), axis=0)
    
    print(df['Sales_box'].values)
    print("Dickey-Fuller test: p=%f" % sm.tsa.stattools.adfuller(df['Sales_box'])[1])

    #plot sales
    tsplot(df['Sales_box'], lags=30)

    #decompose sales
    decomposition = sm.tsa.seasonal_decompose(df['Sales_box'], model='additive')
    fig = decomposition.plot()
    fig.show()
    plt.show()
    return df


In [7]:
def arima(df):
    #ACF and PACF for sales
    plt.figure(figsize=(10,8))
    ax = plt.subplot(211)
    sm.graphics.tsa.plot_acf(df['Sales_box'].values.squeeze(), lags=25, ax=ax)
    ax = plt.subplot(212)
    sm.graphics.tsa.plot_pacf(df['Sales_box'], lags=20, ax=ax)
    plt.tight_layout()
    plt.show()

    #set parameters
    Qs = range(0,2)
    qs = range(0,2)
    Ps = range(0,2)
    ps = range(0,2)
    D = 1
    d = 1
    parameters = product(ps, qs, Ps, Qs)
    parameters_list = list(parameters)
    print(len(parameters_list))

    #find best parameters
    results = []
    best_aic = float("inf")
    print(results)
    print(best_aic)
    for param in parameters_list:
        try:
            model = sm.tsa.statespace.SARIMAX(df['Sales_box'], order=(param[0], d, param[1]),
                                            seasonal_order=(param[2], D, param[3], 12)).fit(disp=-1)
            results.append([param, model.aic])
            if model.aic < best_aic:
                best_model = model
                print(best_model)
                best_aic = model.aic
                best_param = param
                print(best_param)
        except Exception as err:
            print(err)
            # print("exception in the data")
    
    print(best_model.summary())
    return best_model

In [8]:
def residuals(df, best_model):
    #plot residuals
    # plt.figure(figsize=(15,8))
    # plt.subplot(211)
    best_model.resid[13:].plot()
    plt.show()
    best_model.resid[13:].plot(kind='kde')
    plt.show()
    print("Dickey-Fuller test: p=%f" % sm.tsa.stattools.adfuller(best_model.resid[13:])[1])

    #forecast sales
    df['forecast'] = inv_boxcox(best_model.predict(start=0, end=100), 0.15)
    df[['Sales', 'forecast']].plot(figsize=(15, 6))
    plt.show()

    #forecast sales for next 12 months
    df['forecast'] = inv_boxcox(best_model.predict(start=0, end=100), 0.15)
    df[['Sales', 'forecast']].plot(figsize=(15, 6))
    plt.show()
    # return {{"df": df}}

In [9]:
def plot(df):
    plt.plot(df['Sales'])
    plt.xlabel('Date')
    plt.ylabel('Sales')
    plt.title('Sales')
    plt.show()

In [10]:
def monthwise_sales_prediction(best_model, month):
    month = pd.to_datetime(month)
    df['forecast'] = inv_boxcox(best_model.predict(start=0, end=month), 0.15)
    df[['Sales', 'forecast']].plot(figsize=(15, 6))
    plt.show()

In [11]:
def futuresales(best_model, df):
    from pandas.tseries.offsets import DateOffset
    future_dates = [df.index[-1] + DateOffset(months=x) for x in range(0,24)]
    future_datest_df = pd.DataFrame(index=future_dates[1:], columns=df.columns)
    future_df = pd.concat([df, future_datest_df])
    future_df['forecast'] = inv_boxcox(best_model.predict(start=0, end=100), 0.15)
    print(future_df)
    future_df[['Sales', 'forecast']].plot(figsize=(15, 6))
    plt.show()

In [12]:
def futuremonth(best_model, df):
    from pandas.tseries.offsets import DateOffset
    future_dates = [df.index[-1] + DateOffset(months=x) for x in range(0,2)]
    future_datest_df = pd.DataFrame(index=future_dates[1:], columns=df.columns)
    future_df = pd.concat([df, future_datest_df])
    future_df['forecast'] = inv_boxcox(best_model.predict(start=0, end=100), 0.15)
    print(future_df)
    print("_"*50)
    print("Sales forecast for next month is: ", future_df['forecast'].iloc[-1])
    future_df[['Sales', 'forecast']].plot(figsize=(15, 6))
    plt.show()

In [13]:
def futurethreemonths(best_model, df):
    from pandas.tseries.offsets import DateOffset
    future_dates = [df.index[-1] + DateOffset(months=x) for x in range(0,4)]
    future_datest_df = pd.DataFrame(index=future_dates[1:], columns=df.columns)
    future_df = pd.concat([df, future_datest_df])
    future_df['forecast'] = inv_boxcox(best_model.predict(start=0, end=100), 0.15)
    print(future_df)
    print("_"*50)
    print("Sales forecast for next 3 months is: ", future_df['forecast'].iloc[-1])
    future_df[['Sales', 'forecast']].plot(figsize=(15, 6))
    plt.show()

In [16]:
# import pandas as pd
def readfile():
    filename = input("enter a file path: ")
    if filename.endswith('.csv'):
        df = pd.read_csv(filename,sep=';', header=0)
        print("you have entered a CSV file")
    elif filename.endswith('.xlsx'):
        df = pd.read_excel(filename)
        print("you have entered a XLSX file")
    else:
        raise ValueError('Unknown file type')
    return df



In [None]:

df = readfile()
df = datafiltering(df)
plot(df)
df = forecast_sales(df)
df = decompose(df)
best_model = arima(df)
residuals(df, best_model)
# month = input("enter a month to forecast: ")
# monthwise_sales_prediction(best_model, month)
futuresales(best_model, df)
futuremonth(best_model, df)
futurethreemonths(best_model, df)