In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from tqdm import tqdm
import xlwt
from scipy.stats import norm
from xlwt import Workbook 
import math

In [2]:
J2_2019 = pd.read_excel('sales_movements_J2.xlsx', sheet_name = '2019')
J2_2018 = pd.read_excel('sales_movements_J2.xlsx', sheet_name = '2018')

In [3]:
lost_sales = pd.read_excel("Lost Sales.xlsx", sheet_name = "LostSalesAll")

In [5]:
def lossFunctionSingle(sales, loss_sales, wh, windowSize, sc):
    lost_sales_dict = {}
    """"
    creates a dictionary mapping a sales code to an array where index 0 is the 
    number of times a lost sale occurred and index 1 is the total of lost quantity
    windowSize: how many rows at once 
    
    Size of the moving window. This is the number of observations used for calculating the statistic.
   
    """
    sales.loc[:, 'TrnQty'] = sales['TrnQty'].abs()
    sales = sales[sales["StockCode"] == sc]
    loss_sales = loss_sales[loss_sales["StockCode"] == sc]
    loss_sales = loss_sales[loss_sales["Warehouse"] == wh]
    for index,row in loss_sales.iterrows():
        code = row["StockCode"]
        if code in lost_sales_dict.keys(): 
            lost_sales_dict[code][0] += 1
            lost_sales_dict[code][1] += row["QuantityLost"] 
        else:
            lost_sales_dict[code] = []
            lost_sales_dict[code].append(1)
            lost_sales_dict[code].append(row["QuantityLost"])
    TrnQty_withLoss = []
    sales = sales.groupby(["StockCode","EntryDate"])['TrnQty'].sum().reset_index()
    for index, row in sales.iterrows():
        code = row["StockCode"]
        TrnQty = row['TrnQty']
        if code in lost_sales_dict.keys(): 
            lostSalesAvg = lost_sales_dict[code][1] / lost_sales_dict[code][0]
            TrnQty_withLoss.append(TrnQty + lostSalesAvg)
        else:
            TrnQty_withLoss.append(TrnQty)
            
    sales.loc[:, "TrnQty_withLoss"] = TrnQty_withLoss
    sales.loc[:, "count"] = sales.groupby('StockCode')['StockCode'].transform('count')
    sales.loc[:, "EntryDate"] = pd.to_datetime(sales['EntryDate'])
    sales.loc[:, "mean"] = sales.TrnQty_withLoss.rolling(windowSize, min_periods = 1).mean()
    sales.loc[:, "std"] = sales.TrnQty_withLoss.rolling(windowSize, min_periods = 1).std()
    return sales

In [6]:
def calculateError(dwrw, nextYear, sd):
    error = 0
    count = 0
    nNy = nextYear[nextYear["StockCode"] == dwrw.StockCode.unique()[0]]
    nNy.loc[:, "EntryDate"] = pd.to_datetime(nNy['EntryDate'])
    nNy = nNy.groupby(["StockCode","EntryDate"])['TrnQty'].sum().reset_index()
    nNy['month'] = [row['EntryDate'].month for index, row in nNy.iterrows()]
    nNy['day'] = [row['EntryDate'].day for index, row in nNy.iterrows()]
    for index, row in dwrw.iterrows():
        upperBound = row["mean"] + sd * row["std"]
        q = nNy[nNy["month"] == row["EntryDate"].month]
        q = q[q["day"]== row["EntryDate"].day]
        q = q["TrnQty"]
        if len(q) > 0 and not np.isnan(upperBound):
            idx = list(q.index.values)[0]
            qtyatIndex = q[idx]
            if qtyatIndex >= 0:
                error += (q[idx] - upperBound)**2
                count += 1
    if count > 0:
        return error/count
    else:
        return -1
        

In [49]:
def minError(salesdf, nextYear, lossSales, wh):
    salesdf = salesdf.groupby(["StockCode","EntryDate"])['TrnQty'].sum().reset_index()
    salesdf.loc[:, "count"] = salesdf.groupby('StockCode')['StockCode'].transform('count')
    salesdf = salesdf[salesdf["count"] > 9]
    unqStockCodes = salesdf.StockCode.unique()
    minerrordict = {}
    for x in tqdm(range(0, len(unqStockCodes))):
        sc = unqStockCodes[x]
        base = salesdf[salesdf["StockCode"] == sc].shape[0]
        rate = int(round(base * .7))
        dwrw = lossFunctionSingle(salesdf, lossSales, wh, base, sc)
        priorWindowSize = base
        ct = base + 1
        priorError = calculateError(dwrw, nextYear, 0)
        attemptedWindowSizes = []
        errorWindowSizes = []
        minerrordict[sc] = base
        while True:
            if ct == 0:
                break
            if rate < 1:
                break
            currWindowSize = priorWindowSize - rate
            if currWindowSize in attemptedWindowSizes or currWindowSize < 1:
                break
            if len(errorWindowSizes) > 0:
                if currWindowSize < min(errorWindowSizes):
                    break
            attemptedWindowSizes.append(currWindowSize)
            if currWindowSize < 1:
                break
            dwrw = lossFunctionSingle(salesdf, lossSales, wh, currWindowSize, sc)
            currError = calculateError(dwrw, nextYear, 0)
            if currError < 0:
                break
            if currError < priorError:
                rate = 2
                minerrordict[sc] = currWindowSize
                priorError = currError
                priorWindowSize = currWindowSize
            elif currError == priorError:
                rate = rate + int(round(.1 * base))
            else:
                errorWindowSizes.append(currWindowSize)
                rate = rate - int(round(.05 * base))
            ct-=1
    return minerrordict
        
        

In [74]:
a = minError(J2_2018, J2_2019, lost_sales, "J2", 0)






A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s




 20%|██        | 1/5 [01:22<05:30, 82.56s/it][A[A[A[A



 40%|████      | 2/5 [02:29<03:53, 77.97s/it][A[A[A[A



 60%|██████    | 3/5 [03:50<02:37, 78.91s/it][A[A[A[A



 80%|████████  | 4/5 [05:20<01:22, 82.05s/it][A[A[A[A



100%|██████████| 5/5 [07:13<00:00, 91.40s/it][A[A[A[A



[A[A[A[A

In [4]:
norm.ppf(.84)

0.994457883209753

In [109]:
def generateTrnQty(salesdf, nextYear, lossSales, wh, confidentPercent):
    
    """
    salesdf -
    nextYear -
    lossSales -
    wh -
    confidentPercent - confidence level 
    
    This function returns a dataframe that shows the yearly and quarterly turnover quantity prediction for each stock code, 
    with mean value and a confidence interval upper bound value for each time period.
    
    """
    
    slidingWindowDict = minError(salesdf, nextYear, lossSales, wh)
    sd = norm.ppf((100 + confidentPercent)/200)
    df = pd.DataFrame([], columns = [])
    for key in slidingWindowDict.keys():
        lfs = lossFunctionSingle(salesdf, lossSales, wh, slidingWindowDict[key], key)
        a
        quartile = []
        standard = []
        for x in range(1, 5):
            quartile.append(sum([int(row['mean']) for index, row in lfs.iterrows() if int(row[
                "EntryDate"].month) < x*3 + 1 and int(row["EntryDate"].month) > (x-1)*3]))
            standard.append(math.ceil(quartile[x-1] + sum([sd * int(row['std']) for index, row in lfs.iterrows(
            ) if int(row["EntryDate"].month) < x*3 + 1 and int(row["EntryDate"].month) > (x-1)*3 and index > 1]))) 
        lfs = lfs.groupby("StockCode")['mean'].sum().round().reset_index()
        lfs[str(confidentPercent) + "% TrnQty"] = lfs['mean'][0] + standardDeviation
        for x in range(1, 5):
            lfs['q' + str(x)] = quartile[x - 1]
            lfs['q' + str(x) + " " + str(confidentPercent) + '% confidence'] = standard[x-1]          
        df = df.append(lfs)
    return df
        
    

In [112]:
generateTrnQty(J2_2018, J2_2019, lost_sales, "J2", 95)

Unnamed: 0,StockCode,mean,95% TrnQty,q1,q1 95% confidence,q2,q2 95% confidence,q3,q3 95% confidence,q4,q4 95% confidence
0,1419,273.0,773.0,104,291,28,87,60,166,56,149
0,1431,453.0,1176.0,152,403,59,152,102,230,93,246
0,159439,940.0,1695.0,350,662,142,244,176,300,257,448
0,159440,15829.0,46390.0,4640,13758,3056,8262,3935,11385,4099,12684
0,174257,7245.0,15645.0,2106,4264,1568,3297,1589,3436,1970,4605


In [60]:
a

{'1419': 64, '1431': 19, '159439': 34, '159440': 39, '174257': 33}