# Anomaly Detection

In [1]:
import  pandas as pd
import numpy as np
import scipy.stats as ss

In [2]:
pd.options.mode.chained_assignment = None

## Reading dataset and cleaning

In [3]:
data = pd.read_excel(r"E:\0001_Studies\Undercutting\AnomalyDetection_CorrectDataset.xlsx")

In [4]:
data.head(2)

Unnamed: 0,Distribution Channel,Division,Sold-To Party,Customer Group 3,Price List Type,Region Order,Plant,Material,Date,Sales,...,Gross,Gross.1,Sales cost,Sales cost.1,Net,Net.1,RtnsQty,RtnsQty.1,Rtns (cst),Rtns (cst).1
0,GT General Trade,PC Personal Care,2000001 N.CT. AGENCIES,006 GT,02 Super Stockist,TN TN,PYC1 CAVINKARE PVT LTD,KH05D8HWP02R KARTHIKA HERBAL POWDER 5.8 ...,10.07.2019,573752.23,...,5040.0,KG,272536.0,INR,4640.0,KG,0.0,CV,0.0,INR
1,GT General Trade,PC Personal Care,2000001 N.CT. AGENCIES,006 GT,02 Super Stockist,TN TN,PYC1 CAVINKARE PVT LTD,KH05D8HWP02R KARTHIKA HERBAL POWDER 5.8 ...,16.07.2019,215157.01,...,1890.0,KG,102201.0,INR,1740.0,KG,0.0,CV,0.0,INR


In [5]:
data.columns

Index(['Distribution Channel', 'Division', 'Sold-To Party', 'Customer Group 3',
       'Price List Type', 'Region Order', 'Plant', 'Material', 'Date', 'Sales',
       'Sales.1', 'Returns', 'Cred.Memos', 'Cred.Memos.1', 'CM: net 1',
       'CM: net 1.1', 'Net Value', 'Net Value.1', 'Tax', 'Tax.1', 'Bill. Qty',
       'Bill. Qty.1', 'CredMemQty', 'CredMemQty.1', 'Gross', 'Gross.1',
       'Sales cost', 'Sales cost.1', 'Net', 'Net.1', 'RtnsQty', 'RtnsQty.1',
       'Rtns (cst)', 'Rtns (cst).1'],
      dtype='object')

In [6]:
temp                   =  data["Material"].str.split("       ",expand=True)
data["Prod_Code"]      = temp[0]
data["Prod_Name"]      = temp[1]
data["Date"]           = data["Date"].astype("datetime64[ns]")
data["Month"]          = data.Date.dt.strftime('%B')

In [7]:
data = data.applymap(lambda s:s.strip() if type(s) == str else s)

### Line items in dataset = 13353 

In [8]:
len(data)

13353

In [26]:
data[["Sold-To Party","Prod_Name","Date","Month"]]

Unnamed: 0,Sold-To Party,Prod_Name,Date,Month
0,2000001 N.CT. AGENCIES,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,2019-10-07,October
1,2000001 N.CT. AGENCIES,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,2019-07-16,July
2,2000001 N.CT. AGENCIES,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,2019-07-23,July
3,2000001 N.CT. AGENCIES,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,2019-07-29,July
4,2000001 N.CT. AGENCIES,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,2019-07-08,July
...,...,...,...,...
13348,2007340 SVP AGENCIES,MEERA ADVANCE HW POWDER 80GM REFILL 50 P,2019-09-18,September
13349,2007340 SVP AGENCIES,MEERA ADVANCE HW POWDER 5.6 GM 1350 PCS,2019-09-18,September
13350,2007345 MAHESH AGENCIES,KARTHIKA HERBAL POWDER CONT 180G 80PCS,2019-09-23,September
13351,2007345 MAHESH AGENCIES,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,2019-09-23,September


### Filter only the required columns in dataframe

In [9]:
dist_data = data[["Sold-To Party","Month","Prod_Name","Bill. Qty"]]

In [10]:
dist_data.sort_values(by="Bill. Qty")

Unnamed: 0,Sold-To Party,Month,Prod_Name,Bill. Qty
9532,2000440 KRISHNA MARKETING,August,MEERA HERBAL POWDER 80GM REFILL 50 PCS,-11.0
5202,2000119 PRAMESH TRADERS,August,MEERA HERBAL POWDER 80GM REFILL 50 PCS,0.0
7666,2000163 R.N.AGENCIES,July,KARTHIKA HERBAL POWDER CONT 180G 80PCS,0.0
1630,2000019 MD TRADERS,September,MEERA HERBAL POWDER 40GM REFILL 100PCS,0.0
1629,2000019 MD TRADERS,August,MEERA HERBAL POWDER 40GM REFILL 100PCS,0.0
...,...,...,...,...
77,2000001 N.CT. AGENCIES,September,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,439.0
587,2000007 ASHIKKA AGENCIES,April,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,500.0
8660,2000178 VISALAKSHI MARKETINGS,September,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,600.0
565,2000007 ASHIKKA AGENCIES,September,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,800.0


### Grouping by 'Number of Items' bought by Distributors across the month 
#### They purchase pattern changes for every distributor, Mean and SD is caluculated for every distributor group

In [11]:
dist_data_groupby = dist_data.groupby(["Sold-To Party","Month","Prod_Name"]).agg(Sum_Of_Bill_Qty = ("Bill. Qty","sum")).reset_index()

In [12]:
len(dist_data_groupby)

8165

In [13]:
dist_data_groupby.sort_values(by = 'Sum_Of_Bill_Qty')

Unnamed: 0,Sold-To Party,Month,Prod_Name,Sum_Of_Bill_Qty
5485,2000440 KRISHNA MARKETING,August,MEERA HERBAL POWDER 80GM REFILL 50 PCS,-11.0
4397,2000164 SELVAA AGENCIES,September,MEERA HERBAL POWDER 5.6 GM 1350 PCS,0.0
2646,2000113 KVB TRADERS,September,MEERA HERBAL POWDER 80GM REFILL 50 PCS,0.0
2645,2000113 KVB TRADERS,September,MEERA HERBAL POWDER 5.6 GM 1350 PCS,0.0
1572,2000089 SRI MAHESWARI AGENCIES,September,MEERA HERBAL POWDER 120GM CONT 60 PCS,0.0
...,...,...,...,...
361,2000007 ASHIKKA AGENCIES,September,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,1169.0
324,2000007 ASHIKKA AGENCIES,April,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,1250.0
4884,2000178 VISALAKSHI MARKETINGS,July,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,1334.0
4914,2000178 VISALAKSHI MARKETINGS,September,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,1400.0


### Eliminated products with Number of item = 0

In [14]:
dist_data_groupby = dist_data_groupby[~dist_data_groupby['Sum_Of_Bill_Qty'].isin([0.0])]
len(dist_data_groupby)

8027

In [15]:
dist_data_groupby.to_csv(r"E:\0001_Studies\Undercutting\output\grouped_by_prod_dist.csv",index = False)

### Finding the mean and SD to Group

In [16]:
dist_data_groupby1 = dist_data_groupby.groupby(["Sold-To Party","Prod_Name"]).agg(Mean = ("Sum_Of_Bill_Qty","mean"),stddev = ("Sum_Of_Bill_Qty","std")).reset_index()
dist_data_groupby1

Unnamed: 0,Sold-To Party,Prod_Name,Mean,stddev
0,2000001 N.CT. AGENCIES,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,800.222222,517.889172
1,2000001 N.CT. AGENCIES,KARTHIKA HERBAL POWDER 50GM 180 PCS,8.714286,14.032276
2,2000001 N.CT. AGENCIES,KARTHIKA HERBAL POWDER CONT 180G 80PCS,10.285714,7.696629
3,2000001 N.CT. AGENCIES,MEERA ADVANCE HW POWDER 120GM CONT 60 PC,36.666667,35.381728
4,2000001 N.CT. AGENCIES,MEERA ADVANCE HW POWDER 5.6 GM 1350 PCS,135.500000,62.173145
...,...,...,...,...
2210,2007340 SVP AGENCIES,MEERA ADVANCE HW POWDER 5.6 GM 1350 PCS,30.000000,
2211,2007340 SVP AGENCIES,MEERA ADVANCE HW POWDER 80GM REFILL 50 P,8.000000,
2212,2007345 MAHESH AGENCIES,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,1.000000,
2213,2007345 MAHESH AGENCIES,KARTHIKA HERBAL POWDER CONT 180G 80PCS,1.000000,


In [17]:
zscore_data = pd.merge(dist_data_groupby, dist_data_groupby1, on = ["Sold-To Party","Prod_Name"], how = 'left' )
len(zscore_data)
zscore_data

Unnamed: 0,Sold-To Party,Month,Prod_Name,Sum_Of_Bill_Qty,Mean,stddev
0,2000001 N.CT. AGENCIES,April,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,1108.0,800.222222,517.889172
1,2000001 N.CT. AGENCIES,April,KARTHIKA HERBAL POWDER 50GM 180 PCS,2.0,8.714286,14.032276
2,2000001 N.CT. AGENCIES,April,KARTHIKA HERBAL POWDER CONT 180G 80PCS,16.0,10.285714,7.696629
3,2000001 N.CT. AGENCIES,April,MEERA HERBAL POWDER 120GM CONT 60 PCS,55.0,24.400000,19.654516
4,2000001 N.CT. AGENCIES,April,MEERA HERBAL POWDER 40GM REFILL 100PCS,11.0,5.600000,3.130495
...,...,...,...,...,...,...
8022,2007340 SVP AGENCIES,September,MEERA ADVANCE HW POWDER 5.6 GM 1350 PCS,30.0,30.000000,
8023,2007340 SVP AGENCIES,September,MEERA ADVANCE HW POWDER 80GM REFILL 50 P,8.0,8.000000,
8024,2007345 MAHESH AGENCIES,September,KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS,1.0,1.000000,
8025,2007345 MAHESH AGENCIES,September,KARTHIKA HERBAL POWDER CONT 180G 80PCS,1.0,1.000000,


In [18]:
zscore_data.dropna(axis = 0, inplace= True)
len(zscore_data)

7570

### compute z score

In [19]:
zscore_data['z-score']= (zscore_data['Sum_Of_Bill_Qty'] - zscore_data['Mean'])/zscore_data['stddev']

In [20]:
zscore_data.sort_values(by='z-score', ascending=False,inplace=True)
len(zscore_data)

7570

In [21]:
zscore_data.fillna(value='NaN',inplace=True)

In [22]:
print(zscore_data)

                          Sold-To Party      Month  \
1622  2000090    SRI SRINIVASA AGENCIES  September   
2106            2000100    JAI AGENCIES       July   
787       2000017    AKILADEVI AGENCIES  September   
4812   2000178    VISALAKSHI MARKETINGS  September   
2004             2000097    VMR TRADERS  September   
...                                 ...        ...   
7991       2007155    KANTI ENTERPRISES     August   
7992       2007155    KANTI ENTERPRISES       July   
8005    2007226    ADITHYAA ENTERPRISES     August   
8010    2007226    ADITHYAA ENTERPRISES    October   
8016    2007226    ADITHYAA ENTERPRISES  September   

                                     Prod_Name  Sum_Of_Bill_Qty        Mean  \
1622    KARTHIKA HERBAL POWDER CONT 180G 80PCS             15.0    3.272727   
2106    KARTHIKA HERBAL POWDER 5.8 GM 2000 PCS            341.0   93.666667   
787     KARTHIKA HERBAL POWDER CONT 180G 80PCS             19.0    5.555556   
4812    KARTHIKA HERBAL POWDER CONT

In [23]:
zscore_data.to_csv(r"E:\0001_Studies\Undercutting\output\ZScores_NanValues.csv",index = False)

### plot