## Demand Prediction Model

In [1]:
import pandas as pd
from datetime import datetime, timedelta
import calendar
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
df_initial = pd.read_pickle('./DemandDataFile', compression='infer')
df_region = pd.read_pickle('./RegionDataFile', compression='infer')
df_initial = pd.merge(df_initial, df_region, how='inner', right_on=['CITY_NAME'], left_on=['CITY'])
df_initial = df_initial.drop(['CITY_NAME'], axis=1)
df_initial = df_initial[~df_initial['PRODUCT_NAME'].str.contains("Small Flyers|Large Flyers|Meter Bubble Wrap|Bundle of 50 Boxes", na=False)]

In [3]:
df_initial.shape

(2350508, 21)

In [4]:
df_weights = pd.read_csv('./ProductWeights.csv')
df_weights.drop_duplicates(subset=['COD_SKU_CONFIG'],inplace =True)

df_productReviews = pd.read_csv('./ProductReviews.csv')
df_productReviews.drop_duplicates(subset=['COD_SKU_CONFIG'],inplace =True)

  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
df_weights.columns = ['SKU','PRODUCT_NAME','WEIGHT']
df_initial = pd.merge(df_initial, df_weights[['SKU','WEIGHT']], how='left')

In [6]:
df_productReviews.columns = ['SKU','AVG_RATING']
df_initial = pd.merge(df_initial, df_productReviews, how='left')

In [7]:
df_initial.Voucher[df_initial.Voucher > 0] = True
df_initial.Voucher[df_initial.Voucher != True] = False

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [8]:
df_initial['WEEKDAY'] = df_initial['DATE'].apply(lambda x:calendar.day_name[x.weekday()])
df_initial['MONTH'] = df_initial['DATE'].apply(lambda x:calendar.month_abbr[x.month])


In [9]:
tab_info = pd.DataFrame(df_initial.dtypes).T.rename(index={0:'column type'})
tab_info=tab_info.append(pd.DataFrame(df_initial.isnull().sum()).T.rename(index={0:'null values (nb)'}))
tab_info=tab_info.append(pd.DataFrame(df_initial.isnull().sum()/df_initial.shape[0]*100).T.rename(index={0:'null values (%)'}))
display(tab_info)

Unnamed: 0,COD_ORDER_NR,SKU,DATE,CATEGORY_LEVEL_1,CATEGORY_LEVEL_2,CATEGORY_LEVEL_3,CATEGORY_LEVEL_4,PRODUCT_NAME,BRAND_NAME,UNIT_PRICE,...,Gender,CITY,Voucher,MV,Quantity,REGION_NAME,WEIGHT,AVG_RATING,WEEKDAY,MONTH
column type,object,object,object,object,object,object,object,object,object,float64,...,object,object,object,float64,int64,object,object,float64,object,object
null values (nb),0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,356819,770955,0,0
null values (%),0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,15.1805,32.7995,0,0


### WEIGHT Column Cleaning 

In [10]:
import re
df_initial['WEIGHT'] = df_initial['WEIGHT'].apply(lambda x : re.sub(r'#', r'0', str(x)))
df_initial['WEIGHT'] = df_initial['WEIGHT'].apply(lambda x : re.sub(r'[a-zA-Z]', '', str(x)))
df_initial['WEIGHT'] = df_initial['WEIGHT'].apply(lambda x : re.sub(r'([.])\1+', r'\1', str(x)))
df_initial['WEIGHT'] = df_initial['WEIGHT'].apply(lambda x : re.sub(r'/', r'', str(x)))
df_initial['WEIGHT'] = df_initial['WEIGHT'].apply(lambda x : re.sub(r'[!]', '0', str(x)))
df_initial['WEIGHT'] = pd.to_numeric(df_initial['WEIGHT'])
weights_temp = df_initial['WEIGHT']

df_initial.WEIGHT[df_initial['WEIGHT'].isnull()] = -1
df_initial.WEIGHT[df_initial['WEIGHT'] >= 100] = df_initial.WEIGHT[df_initial['WEIGHT'] >= 100] / 1000
len(df_initial.WEIGHT[df_initial['WEIGHT'] >= 100] / 1000)
bins = [-2 , -1 ,0 , 10, 20, 30 ,40 ,50 ,60 ,70 ,df_initial['WEIGHT'].max()]
labels = ['Unknown', 'Low < 0','Low (>0 <10)','Low (>10 <20)','Med (>20 <30)', 
          'Med (>30 <40)','Med (>40 <50)','Hi (>50 <60)','Hi (>60 <70)','Highest (>70)']
df_initial['WEIGHT_BINNED'] = pd.cut(df_initial['WEIGHT'], bins=bins, labels=labels)

### UNIT PRICE Column Binning

In [82]:
df_initial['UNIT_PRICE'].describe()

count    2.350508e+06
mean     1.672094e+03
std      7.327753e+03
min      1.000000e-02
25%      2.500000e+02
50%      4.990000e+02
75%      9.990000e+02
max      3.498300e+06
Name: UNIT_PRICE, dtype: float64

In [129]:
df_initial['PRICE_BINNED'] = pd.cut(df_initial['UNIT_PRICE'], 4, labels=["low","lower medium", "medium","expensive ++"])

In [130]:
df_initial[['UNIT_PRICE','PRICE_BINNED']]['PRICE_BINNED'].value_counts()

low             2350501
lower medium          4
medium                2
expensive ++          1
Name: PRICE_BINNED, dtype: int64

In [134]:
df_initial[df_initial.PRICE_BINNED == 'lower medium']

Unnamed: 0,COD_ORDER_NR,SKU,DATE,CATEGORY_LEVEL_1,CATEGORY_LEVEL_2,CATEGORY_LEVEL_3,CATEGORY_LEVEL_4,PRODUCT_NAME,BRAND_NAME,UNIT_PRICE,...,MV,Quantity,REGION_NAME,WEIGHT,AVG_RATING,WEEKDAY,MONTH,WEIGHT_BINNED,PRICE_BINNED,_BINNED
534984,304877131,GE812HB071TMQNAFAMZ,2018-01-16,Beauty & Health,Personal Care,Hair Removal Tools,Clippers,Gm-722 - Hair Trimmer - White,Gemei,1012625.9,...,3037877.7,3,Sindh,-1.0,,Tuesday,Jan,Unknown,lower medium,expensive
588127,305869523,GE812HB1EGBMQNAFAMZ,2018-01-26,Beauty & Health,Personal Care,Hair Removal Tools,Clippers,Gm-722 - Hair Trimmer - White,Gemei,1012625.9,...,1012625.9,1,Sindh,-1.0,,Friday,Jan,Unknown,lower medium,expensive
716441,301452991,CR695HL0W3ZY6NAFAMZ,2017-12-11,Home & Living,Kitchen & Dining,Kitchen Tools & Accessories,Kitchen Accessories,Bundle of 5000 Pcs of Steel Fruits Grater,Crockery King,945000.0,...,945000.0,1,Sindh,-1.0,5.0,Monday,Dec,Unknown,lower medium,expensive
1817096,325564642,GE812HB00HZQMNAFAMZ,2018-03-20,Beauty & Health,Personal Care,Hair Removal Tools,Clippers,Gm-722 - Hair Trimmer - White,Gemei,1065922.0,...,1065727.46,1,Punjab,0.2,,Tuesday,Mar,Low (>0 <10),lower medium,expensive


In [171]:
df_initial[(df_initial.UNIT_PRICE > 200000) & (df_initial.CATEGORY_LEVEL_2.isin(['Kitchen & Dining','Women\'s Clothing']))]

Unnamed: 0,COD_ORDER_NR,SKU,DATE,CATEGORY_LEVEL_1,CATEGORY_LEVEL_2,CATEGORY_LEVEL_3,CATEGORY_LEVEL_4,PRODUCT_NAME,BRAND_NAME,UNIT_PRICE,...,MV,Quantity,REGION_NAME,WEIGHT,AVG_RATING,WEEKDAY,MONTH,WEIGHT_BINNED,PRICE_BINNED,_BINNED
716441,301452991,CR695HL0W3ZY6NAFAMZ,2017-12-11,Home & Living,Kitchen & Dining,Kitchen Tools & Accessories,Kitchen Accessories,Bundle of 5000 Pcs of Steel Fruits Grater,Crockery King,945000.0,...,945000.0,1,Sindh,-1.0,5.0,Monday,Dec,Unknown,lower medium,expensive
786142,304166991,GI257HLFZNJMNAFAMZ,2017-12-11,Home & Living,Kitchen & Dining,Kitchen Tools & Accessories,Kitchen Accessories,Bundle of 5000 Pcs of Fruits Planner With Card...,Daraz.pk,1842750.0,...,1842750.0,1,Sindh,10.0,,Monday,Dec,Low (>0 <10),medium,expensive
2289116,326148442,NI569FA1EH56MNAFAMZ,2018-03-12,Women's Fashion,Women's Clothing,Lingerie & Sleepwear,Night Suits,3Pc Short Semi Transparent Gown Set - Green,Nighty4u,1899009.0,...,18990090.0,10,Balochistan,0.5,,Monday,Mar,Low (>0 <10),medium,expensive
