In [1]:
from ckpackages import azsql 
import pyodbc
import numpy as np
import pandas as pd
from fbprophet import Prophet

In [3]:
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 
import logging
logging.getLogger('fbprophet').setLevel(logging.ERROR)

In [4]:
pd.options.mode.chained_assignment = None

In [2]:
Query = """select *,substring(Product_Hierarchy,1,8) as 'Code'
            from V_AN_PC_UNDERCUTTING_PRI
            where region = 'East'
            and substring(Product_Hierarchy,1,8) in ('06131253','06131254') ;"""
pri_data = azsql.callstatement(Query)

## Data Cleaning

In [6]:
Actual_data = pd.read_excel(r"E:\0001_Studies\Undercutting\Default Actdte.xlsx")
Actual_data['Actdte'] = Actual_data['Actdte'].dt.strftime('%Y-%m-%d')

In [7]:
Distributor = pri_data
Distributor = Distributor[['Distributor_Code','Code','Actdte','Billing_Qty']]

In [8]:
Distributor['Actdte'] = Distributor['Actdte'].dt.strftime('%Y-%m-%d')
Distributor     = Distributor.groupby(['Distributor_Code','Code','Actdte'])["Billing_Qty"].sum().reset_index()

## Removing the rows with single entries 

In [9]:
rmv_single_rows = Distributor.groupby(["Distributor_Code","Code"]).agg(Count = ("Actdte","count")).reset_index()
rmv_single_rows = rmv_single_rows[rmv_single_rows["Count"] == 1]
Distributor_Clean = pd.merge(Distributor,rmv_single_rows,left_on = ['Distributor_Code', 'Code'],right_on=['Distributor_Code', 'Code'],how = "left")
Distributor_Clean = Distributor_Clean.fillna(2)
Distributor_Clean = Distributor_Clean[Distributor_Clean["Count"] > 1]
Distributor = Distributor_Clean[['Distributor_Code','Code','Actdte','Billing_Qty']].copy()

## FB Prophet Model

In [10]:
def fit_predict_model(dataframe,interval_width = 0.99, changepoint_range = 0.8):
    
    m = Prophet(daily_seasonality = False, 
                yearly_seasonality = False, 
                weekly_seasonality = False,
                seasonality_mode = 'multiplicative', 
                interval_width = interval_width,
                changepoint_range = changepoint_range)
    
    m = m.fit(dataframe)
    forecast = m.predict(dataframe)
    forecast['y'] = dataframe['y'].reset_index(drop = True)
#     mypath = r'E:\0001_Studies\Undercutting\output\Indica\Indica_graphs\Plot_'+k+'-'+v1+'-'+v2+'.png'
#     myplot = m.plot(forecast).savefig(mypath)    
    return forecast

In [11]:
def detect_anomalies(forecast):
    forecasted = forecast[['ds','trend', 'yhat', 'yhat_lower', 'yhat_upper', 'y']].copy()
    #forecast['fact'] = df['y']

    forecasted['anomaly'] = 0
    forecasted.loc[forecasted['y'] > forecasted['yhat_upper'], 'anomaly'] = 1
    forecasted.loc[forecasted['y'] < forecasted['yhat_lower'], 'anomaly'] = -1

    #anomaly importances
    forecasted['importance'] = 0
    forecasted.loc[forecasted['anomaly'] ==1, 'importance'] = \
        (forecasted['y'] - forecasted['yhat_upper'])/forecast['y']
    forecasted.loc[forecasted['anomaly'] ==-1, 'importance'] = \
        (forecasted['yhat_lower'] - forecasted['y'])/forecast['y']
    
    return forecasted

## Creating a Dictionary with Distributor Code and Product Code 

In [16]:
%%time
rmv_single_rows = Distributor.groupby(["Distributor_Code","Code"]).agg(Count = ("Actdte","count")).reset_index()
test1 = rmv_single_rows.groupby(["Distributor_Code"])["Code"].apply(lambda x: x.tolist())
test1 = test1.to_dict()

Wall time: 83 ms


## Matching Dist Code and Prod Code with Original Dataframe

In [None]:
%%time

# initialize dictionary 
output = {} 
final_dataset = pd.DataFrame()

for key in test1:
    for item in test1[key]:
        #print(key," ",item)
        temp = Distributor[(Distributor['Distributor_Code']==key)&(Distributor['Code']==item)][['Actdte','Billing_Qty']]
        
        temp_new = pd.merge(Actual_data, temp, how ='left', on=['Actdte'])
        temp_new.fillna(0,inplace =True)
        temp_new = temp_new.rename(columns = {"Actdte" : "ds","Billing_Qty":"y"})

        pred = fit_predict_model(temp_new)
        anomaly = detect_anomalies(pred)

        anomaly['Code'] = np.repeat(str(item),len(anomaly))
        anomaly['Distributor_Code'] = np.repeat(str(key),len(anomaly))

        #test1.setdefault(k,[]).append(anomaly)      
        final_dataset = final_dataset.append(anomaly)

#### New Wall Time 19 min 42 sec

In [None]:
final_dataset

In [19]:
final_dataset.to_csv(r'E:\0001_Studies\Undercutting\output\Indica\FaceBookProphet-v2_output.csv')