# Importing required libraries
- pandas for segregating and cleaning the data
- numpy for array handling

### Issues faced
- datetime in pandas and tensorflow (*which has to be added*)

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import xlrd
import csv

In [3]:
file_location = 'BUSYv1.csv'
# file_location = 'BUSY7246fb6.csv'
df = pd.read_csv(file_location, sep='|', names=['POS_Application_Name','STOREID','MACID','BILLNO','BARCODE','GUID','CREATED_STAMP','CAPTURED_WINDOW','UPDATE_STAMP'])
barcode_size_corrected_df = df[(df['BARCODE'].str.len() == 12) | (df['BARCODE'].str.len() == 13) | (df['BARCODE'].str.len() == 8)]
no_capture_window = barcode_size_corrected_df[['POS_Application_Name','STOREID','BARCODE','GUID','CREATED_STAMP']]
barcode_pattern = '^[0-9]*$'
barcode_rectified = no_capture_window[no_capture_window['BARCODE'].str.contains(barcode_pattern)]
len(barcode_rectified.index)

56513

In [4]:
items_sold = barcode_rectified['BARCODE'].value_counts()
print(items_sold[:5])

8901030564185    668
8901030534898    425
8901058842722    363
8901030341663    300
8901030627378    295
Name: BARCODE, dtype: int64


#### Datatype issue

Here BARCODE and CREATED_STAMP have the datatype of **object** which is analogous to **string** in native python

In [5]:
print('barcode_rectified.BARCODE.dtype:\t' + str(barcode_rectified.BARCODE.dtype))
print('barcode_rectified.CREATED_STAMP.dtype:\t' + str(barcode_rectified.CREATED_STAMP.dtype))

barcode_rectified.BARCODE.dtype:	object
barcode_rectified.CREATED_STAMP.dtype:	object


#### Barcode Rectified
Using
```python
pd.to_datetime(<df>, format='<shell format>')
```
we have changed the CREATED_STAMP attribute from object to datetime Dtype

In [6]:
barcode_rectified['CREATED_STAMP'] = pd.to_datetime(barcode_rectified['CREATED_STAMP'], format='%Y-%m-%d %H:%M:%S')

In [7]:
print('barcode_rectified.CREATED_STAMP.dtype:\t' + str(barcode_rectified.CREATED_STAMP.dtype))

barcode_rectified.CREATED_STAMP.dtype:	datetime64[ns]


In [9]:
# making a COPY for joins and other operations (just to change tha name)
busy_df = barcode_rectified.copy(deep=True)

# Loading the product master DataFrame
pm_df = pd.read_excel('ProductMaster404b8b3.xlsx', header = 0)

pm_df

Unnamed: 0,COMPANY_CODE,CATEGORY_DESC,SUBCATEGORY_DESC,BRAND_DESC,BASEPACK,BASEPACK_DESC,BARCODE
0,HUL,PFW,Washing Powder,Rin,16040,RIN ADVANCED POWDER NM 4 Kg,8901030648229
1,HUL,PFW,Washing Powder,Rin,16207,RIN ADVANCED POWDER MOGRA NM 4 Kg,8901030648250
2,HUL,CPD,Instant Noodles,Knorr,80759,KNR DRY NOODLES - CHINESE HOT N SPICY,8901030649868
3,HUL,CPD,Instant Noodles,Knorr,80758,KNR DRY NOODLES - CHINESE SCHEZWAN,8901030657627
4,HUL,MPW,Personal Wash Bars,Lifebuoy,15194,LIFEBUOY TOTA10 125g MP,8901030653742
5,HUL,PPW,Personal Wash Bars,PEARS,17336,PEARS PURE AND GENTLE SOAP 3*75G AMBER,8901030652080
6,HUL,CPD,Instant Noodles,Knorr,80758,KNR DRY NOODLES - CHINESE SCHEZWAN,8901030649837
7,HUL,Skin,Fairness,Ponds,12682,Ponds WB Daily Lightening Cream 35 gms,8901030638084
8,HUL,CPD,Ketchup-Sauces,Kissan,80456,Kissan Ketchup -Sweet & Spicy 500Gms,8901030650574
9,HUL,Oral,Toothpaste,Pepsodent,10075,Pepsodent G TP Tube 150g,8901030651144


In [10]:
# Making the BARCODE as object (String)
busy_df.BARCODE = busy_df.BARCODE.astype('int')
# busy_df

# Joining the two DataFrames based on the barcodes and extracting only category_desc, subcategory_desc, brand_desc and basepack_desc
joined_df = busy_df.join(pm_df.set_index('BARCODE'), on='BARCODE', how='inner', sort=False)

# print(joined_df.BARCODE.dtype)
# print(busy_df.BARCODE.dtype)
# print(pm_df.BARCODE.dtype)
joined_df

Unnamed: 0,POS_Application_Name,STOREID,BARCODE,GUID,CREATED_STAMP,COMPANY_CODE,CATEGORY_DESC,SUBCATEGORY_DESC,BRAND_DESC,BASEPACK,BASEPACK_DESC
0,BUSY,DEL0000001446,8901399058561,a71a95e9-a4a5-4677-9d56-19102598a8ba,2017-01-01 11:55:38.916,,SOAP,HAND WASH,SANTOOR,SOAP,SANTOOR HAND WASH - GLYCERIN & APRICOT 180ML
11497,BUSY,DEL0000001446,8901399058561,94f3cde4-8fe1-4a18-abf3-b3d2a0c2f4c2,2017-01-02 18:05:45.437,,SOAP,HAND WASH,SANTOOR,SOAP,SANTOOR HAND WASH - GLYCERIN & APRICOT 180ML
20234,BUSY,DEL0000001446,8901399058561,d08dfb5f-0801-4fa1-a648-c0dd95d037a6,2017-01-25 17:03:22.006,,SOAP,HAND WASH,SANTOOR,SOAP,SANTOOR HAND WASH - GLYCERIN & APRICOT 180ML
36244,BUSY,DEL0000001446,8901399058561,ef95e2dc-0b28-4858-ab57-a32527b402cc,2017-01-04 21:16:46.908,,SOAP,HAND WASH,SANTOOR,SOAP,SANTOOR HAND WASH - GLYCERIN & APRICOT 180ML
59593,BUSY,DEL0000001446,8901399058561,92e0bc79-599a-4bb5-8065-1d1094d81086,2017-01-08 19:28:35.832,,SOAP,HAND WASH,SANTOOR,SOAP,SANTOOR HAND WASH - GLYCERIN & APRICOT 180ML
96263,BUSY,DEL0000001446,8901399058561,77adc976-bcee-414a-bef2-aa2eed8ba423,2017-01-13 16:56:00.282,,SOAP,HAND WASH,SANTOOR,SOAP,SANTOOR HAND WASH - GLYCERIN & APRICOT 180ML
101977,BUSY,DEL0000001446,8901399058561,41c9b7ce-26f2-4ba5-9fbf-01b34d300cad,2017-01-14 14:36:54.543,,SOAP,HAND WASH,SANTOOR,SOAP,SANTOOR HAND WASH - GLYCERIN & APRICOT 180ML
114688,BUSY,DEL0000001446,8901399058561,9961e86e-1cb9-4dd7-ab72-5d38a3fce68f,2017-01-16 15:29:32.431,,SOAP,HAND WASH,SANTOOR,SOAP,SANTOOR HAND WASH - GLYCERIN & APRICOT 180ML
295103,BUSY,DEL0000001446,8901399058561,7a391c7c-2455-4b20-906e-984f461398da,2017-03-12 19:21:36.353,,SOAP,HAND WASH,SANTOOR,SOAP,SANTOOR HAND WASH - GLYCERIN & APRICOT 180ML
295226,BUSY,DEL0000001446,8901399058561,c4084a76-657d-4bd3-968a-4f7d2535ad6f,2017-03-12 21:14:16.344,,SOAP,HAND WASH,SANTOOR,SOAP,SANTOOR HAND WASH - GLYCERIN & APRICOT 180ML


In [11]:
# Extracting only required columns
joined_df = joined_df[['BARCODE', 'CREATED_STAMP', 'CATEGORY_DESC', 'SUBCATEGORY_DESC', 'BRAND_DESC', 'BASEPACK']]

joined_df

Unnamed: 0,BARCODE,CREATED_STAMP,CATEGORY_DESC,SUBCATEGORY_DESC,BRAND_DESC,BASEPACK
0,8901399058561,2017-01-01 11:55:38.916,SOAP,HAND WASH,SANTOOR,SOAP
11497,8901399058561,2017-01-02 18:05:45.437,SOAP,HAND WASH,SANTOOR,SOAP
20234,8901399058561,2017-01-25 17:03:22.006,SOAP,HAND WASH,SANTOOR,SOAP
36244,8901399058561,2017-01-04 21:16:46.908,SOAP,HAND WASH,SANTOOR,SOAP
59593,8901399058561,2017-01-08 19:28:35.832,SOAP,HAND WASH,SANTOOR,SOAP
96263,8901399058561,2017-01-13 16:56:00.282,SOAP,HAND WASH,SANTOOR,SOAP
101977,8901399058561,2017-01-14 14:36:54.543,SOAP,HAND WASH,SANTOOR,SOAP
114688,8901399058561,2017-01-16 15:29:32.431,SOAP,HAND WASH,SANTOOR,SOAP
295103,8901399058561,2017-03-12 19:21:36.353,SOAP,HAND WASH,SANTOOR,SOAP
295226,8901399058561,2017-03-12 21:14:16.344,SOAP,HAND WASH,SANTOOR,SOAP


#### Making data for week wise
- This is done by adding a column for the week number

In [12]:
joined_df['CREATED_STAMP'] = joined_df['CREATED_STAMP'].apply(pd.datetools.normalize_date)

joined_df

  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,BARCODE,CREATED_STAMP,CATEGORY_DESC,SUBCATEGORY_DESC,BRAND_DESC,BASEPACK
0,8901399058561,2017-01-01,SOAP,HAND WASH,SANTOOR,SOAP
11497,8901399058561,2017-01-02,SOAP,HAND WASH,SANTOOR,SOAP
20234,8901399058561,2017-01-25,SOAP,HAND WASH,SANTOOR,SOAP
36244,8901399058561,2017-01-04,SOAP,HAND WASH,SANTOOR,SOAP
59593,8901399058561,2017-01-08,SOAP,HAND WASH,SANTOOR,SOAP
96263,8901399058561,2017-01-13,SOAP,HAND WASH,SANTOOR,SOAP
101977,8901399058561,2017-01-14,SOAP,HAND WASH,SANTOOR,SOAP
114688,8901399058561,2017-01-16,SOAP,HAND WASH,SANTOOR,SOAP
295103,8901399058561,2017-03-12,SOAP,HAND WASH,SANTOOR,SOAP
295226,8901399058561,2017-03-12,SOAP,HAND WASH,SANTOOR,SOAP


In [13]:
# Adding week number column
joined_df['WEEK_NUM'] = joined_df['CREATED_STAMP'].dt.week
joined_df['DAY_NUM'] = joined_df['CREATED_STAMP'].dt.dayofyear

joined_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,BARCODE,CREATED_STAMP,CATEGORY_DESC,SUBCATEGORY_DESC,BRAND_DESC,BASEPACK,WEEK_NUM,DAY_NUM
0,8901399058561,2017-01-01,SOAP,HAND WASH,SANTOOR,SOAP,52,1
11497,8901399058561,2017-01-02,SOAP,HAND WASH,SANTOOR,SOAP,1,2
20234,8901399058561,2017-01-25,SOAP,HAND WASH,SANTOOR,SOAP,4,25
36244,8901399058561,2017-01-04,SOAP,HAND WASH,SANTOOR,SOAP,1,4
59593,8901399058561,2017-01-08,SOAP,HAND WASH,SANTOOR,SOAP,1,8
96263,8901399058561,2017-01-13,SOAP,HAND WASH,SANTOOR,SOAP,2,13
101977,8901399058561,2017-01-14,SOAP,HAND WASH,SANTOOR,SOAP,2,14
114688,8901399058561,2017-01-16,SOAP,HAND WASH,SANTOOR,SOAP,3,16
295103,8901399058561,2017-03-12,SOAP,HAND WASH,SANTOOR,SOAP,10,71
295226,8901399058561,2017-03-12,SOAP,HAND WASH,SANTOOR,SOAP,10,71


In [40]:
# Removing the category "Others" so that association rule mining using Apriori

removed_nan = joined_df.dropna(how='any')

removed_nan.to_csv("data_for_apriori.csv", header=False, index=False)

In [41]:
!cat data_for_apriori.csv

8901399058561,2017-01-01,SOAP,HAND WASH,SANTOOR,SOAP,52,1
8901399058561,2017-01-02,SOAP,HAND WASH,SANTOOR,SOAP,1,2
8901399058561,2017-01-25,SOAP,HAND WASH,SANTOOR,SOAP,4,25
8901399058561,2017-01-04,SOAP,HAND WASH,SANTOOR,SOAP,1,4
8901399058561,2017-01-08,SOAP,HAND WASH,SANTOOR,SOAP,1,8
8901399058561,2017-01-13,SOAP,HAND WASH,SANTOOR,SOAP,2,13
8901399058561,2017-01-14,SOAP,HAND WASH,SANTOOR,SOAP,2,14
8901399058561,2017-01-16,SOAP,HAND WASH,SANTOOR,SOAP,3,16
8901399058561,2017-03-12,SOAP,HAND WASH,SANTOOR,SOAP,10,71
8901399058561,2017-03-12,SOAP,HAND WASH,SANTOOR,SOAP,10,71
8901399058561,2017-03-23,SOAP,HAND WASH,SANTOOR,SOAP,12,82
8901399058561,2017-03-24,SOAP,HAND WASH,SANTOOR,SOAP,12,83
8901399058561,2017-03-28,SOAP,HAND WASH,SANTOOR,SOAP,13,87
8901399058561,2017-03-28,SOAP,HAND WASH,SANTOOR,SOAP,13,87
8901030349751,2017-01-01,PFW,Detergent Bars,Surf Excel,16020,52,1
8901030349751,2017-01-01,PFW,Detergent Bars,Surf Excel,16020,52,1
8901030349751,2017-01-01,PFW,Detergen

8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,2017-01-11,CPD,Core Soups,Knorr,75298,2,11
8901030516306,

8901242178200,2017-03-22,PACKED FOOD,REGULAR PASTA,BAMBINO,PACKED FOOD,12,81
8901242178200,2017-03-22,PACKED FOOD,REGULAR PASTA,BAMBINO,PACKED FOOD,12,81
8901242178200,2017-03-24,PACKED FOOD,REGULAR PASTA,BAMBINO,PACKED FOOD,12,83
8906001052708,2017-01-08,PACKED FOOD,OTHER PICKLE,MOTHERS RECIPE,PACKED FOOD,1,8
8906001052708,2017-02-21,PACKED FOOD,OTHER PICKLE,MOTHERS RECIPE,PACKED FOOD,8,52
8906001052708,2017-02-22,PACKED FOOD,OTHER PICKLE,MOTHERS RECIPE,PACKED FOOD,8,53
8901571005659,2017-01-08,ORAL,TOOTHPASTE,SENSODYNE,ORAL,1,8
8901571005659,2017-01-06,ORAL,TOOTHPASTE,SENSODYNE,ORAL,1,6
8901571005659,2017-01-08,ORAL,TOOTHPASTE,SENSODYNE,ORAL,1,8
8901571005659,2017-01-13,ORAL,TOOTHPASTE,SENSODYNE,ORAL,2,13
8901571005659,2017-02-28,ORAL,TOOTHPASTE,SENSODYNE,ORAL,9,59
8901571005659,2017-02-10,ORAL,TOOTHPASTE,SENSODYNE,ORAL,6,41
8901571005659,2017-02-21,ORAL,TOOTHPASTE,SENSODYNE,ORAL,8,52
8901571005659,2017-02-24,ORAL,TOOTHPASTE,SENSODYNE,ORAL,8,55
8901571005659,2017-03-22,

8901030293566,2017-03-30,Oral,Toothpaste,Pepsodent,10059,13,89
8901030293566,2017-03-30,Oral,Toothpaste,Pepsodent,10059,13,89
8901030293566,2017-03-30,Oral,Toothpaste,Pepsodent,10059,13,89
8901030293566,2017-03-30,Oral,Toothpaste,Pepsodent,10059,13,89
8901030293566,2017-03-30,Oral,Toothpaste,Pepsodent,10059,13,89
8901030293566,2017-03-30,Oral,Toothpaste,Pepsodent,10059,13,89
8901030293566,2017-03-30,Oral,Toothpaste,Pepsodent,10059,13,89
8901030293566,2017-03-30,Oral,Toothpaste,Pepsodent,10059,13,89
8901725132194,2017-01-02,BISCUITS,"GLUCOSE, MARIE & MILK BISCUITS",SUNFEAST,BISCUITS,1,2
8901725132194,2017-01-02,BISCUITS,"GLUCOSE, MARIE & MILK BISCUITS",SUNFEAST,BISCUITS,1,2
8901725132194,2017-01-19,BISCUITS,"GLUCOSE, MARIE & MILK BISCUITS",SUNFEAST,BISCUITS,3,19
8901725132194,2017-01-19,BISCUITS,"GLUCOSE, MARIE & MILK BISCUITS",SUNFEAST,BISCUITS,3,19
8901725132194,2017-01-19,BISCUITS,"GLUCOSE, MARIE & MILK BISCUITS",SUNFEAST,BISCUITS,3,19
8901725132194,2017-01-31,BISCUITS,"

8901030619458,2017-01-23,PFW,Washing Powder,Surf Excel,16118,4,23
8901030619458,2017-01-23,PFW,Washing Powder,Surf Excel,16118,4,23
8901030619458,2017-01-23,PFW,Washing Powder,Surf Excel,16118,4,23
8901030619458,2017-01-23,PFW,Washing Powder,Surf Excel,16118,4,23
8901030619458,2017-01-23,PFW,Washing Powder,Surf Excel,16118,4,23
8901030619458,2017-01-23,PFW,Washing Powder,Surf Excel,16118,4,23
8901030619458,2017-01-23,PFW,Washing Powder,Surf Excel,16118,4,23
8901030619458,2017-01-26,PFW,Washing Powder,Surf Excel,16118,4,26
8901030619458,2017-01-26,PFW,Washing Powder,Surf Excel,16118,4,26
8901030619458,2017-01-26,PFW,Washing Powder,Surf Excel,16118,4,26
8901030619458,2017-01-26,PFW,Washing Powder,Surf Excel,16118,4,26
8901030619458,2017-01-26,PFW,Washing Powder,Surf Excel,16118,4,26
8901030619458,2017-01-26,PFW,Washing Powder,Surf Excel,16118,4,26
8901030619458,2017-01-26,PFW,Washing Powder,Surf Excel,16118,4,26
8901030619458,2017-01-26,PFW,Washing Powder,Surf Excel,16118,4

8901030444906,2017-01-26,Skin,Fairness,Fair & Lovely ,12659,4,26
8901030444906,2017-01-14,Skin,Fairness,Fair & Lovely ,12659,2,14
8901030444906,2017-01-14,Skin,Fairness,Fair & Lovely ,12659,2,14
8901030444906,2017-01-23,Skin,Fairness,Fair & Lovely ,12659,4,23
8901030444906,2017-01-23,Skin,Fairness,Fair & Lovely ,12659,4,23
8901030444906,2017-01-25,Skin,Fairness,Fair & Lovely ,12659,4,25
8901030444906,2017-01-25,Skin,Fairness,Fair & Lovely ,12659,4,25
8901030444906,2017-01-25,Skin,Fairness,Fair & Lovely ,12659,4,25
8901030444906,2017-01-26,Skin,Fairness,Fair & Lovely ,12659,4,26
8901030444906,2017-01-26,Skin,Fairness,Fair & Lovely ,12659,4,26
8901030444906,2017-01-31,Skin,Fairness,Fair & Lovely ,12659,5,31
8901030444906,2017-01-31,Skin,Fairness,Fair & Lovely ,12659,5,31
8901030444906,2017-02-10,Skin,Fairness,Fair & Lovely ,12659,6,41
8901030444906,2017-02-10,Skin,Fairness,Fair & Lovely ,12659,6,41
8901030444906,2017-03-12,Skin,Fairness,Fair & Lovely ,12659,10,71
890103044

8901764042706,2017-01-14,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,2,14
8901764042706,2017-01-24,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,4,24
8901764042706,2017-01-26,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,4,26
8901764042706,2017-01-30,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,5,30
8901764042706,2017-02-14,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,7,45
8901764042706,2017-02-03,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,5,34
8901764042706,2017-02-25,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,8,56
8901764042706,2017-02-17,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,7,48
8901764042706,2017-02-28,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,9,59
8901764042706,2017-02-26,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,8,57
8901764042706,2017-02-17,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,7,48
8901764042706,2017-02-25,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,8,56
8901764042706,2017-02-10,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGES,6,41
8901764042706,2017-02-21,BEVERAGES,COLA DRINKS,THUMS UP,BEVERAGE

8901542006227,2017-01-05,BEVERAGES,ENERGY POWDERS,GLUCON-D,BEVERAGES,1,5
8901542006227,2017-03-02,BEVERAGES,ENERGY POWDERS,GLUCON-D,BEVERAGES,9,61
8901542006227,2017-01-09,BEVERAGES,ENERGY POWDERS,GLUCON-D,BEVERAGES,2,9
8901542006227,2017-03-29,BEVERAGES,ENERGY POWDERS,GLUCON-D,BEVERAGES,13,88
8901207018251,2017-01-05,ORAL,TOOTHPASTE,MESWAK,ORAL,1,5
8901207018251,2017-01-13,ORAL,TOOTHPASTE,MESWAK,ORAL,2,13
8901207018251,2017-01-20,ORAL,TOOTHPASTE,MESWAK,ORAL,3,20
8901207018251,2017-02-24,ORAL,TOOTHPASTE,MESWAK,ORAL,8,55
8901207018251,2017-02-17,ORAL,TOOTHPASTE,MESWAK,ORAL,7,48
8902080514045,2017-02-21,BEVERAGES,ENERGY DRINKS,GATORADE,BEVERAGES,8,52
8902080514045,2017-01-06,BEVERAGES,ENERGY DRINKS,GATORADE,BEVERAGES,1,6
8902080514045,2017-01-06,BEVERAGES,ENERGY DRINKS,GATORADE,BEVERAGES,1,6
8902080514045,2017-01-06,BEVERAGES,ENERGY DRINKS,GATORADE,BEVERAGES,1,6
8902080514045,2017-01-06,BEVERAGES,ENERGY DRINKS,GATORADE,BEVERAGES,1,6
8902080514045,2017-01-30,BEVERAGES,ENERGY

8906002340033,2017-01-25,PACKED FOOD,WHEAT ATTA,RAJDHANI,PACKED FOOD,4,25
8901262173490,2017-01-07,OTHERS,ICE CREAMS & DESSERTS,AMUL,OTHERS,1,7
8901262173490,2017-02-21,OTHERS,ICE CREAMS & DESSERTS,AMUL,OTHERS,8,52
8901262173490,2017-03-21,OTHERS,ICE CREAMS & DESSERTS,AMUL,OTHERS,12,80
8902080013869,2017-01-07,BEVERAGES,OTHER JUICES,TROPICANA,BEVERAGES,1,7
8902080013869,2017-01-07,BEVERAGES,OTHER JUICES,TROPICANA,BEVERAGES,1,7
8902080013869,2017-01-22,BEVERAGES,OTHER JUICES,TROPICANA,BEVERAGES,3,22
8902080013869,2017-01-27,BEVERAGES,OTHER JUICES,TROPICANA,BEVERAGES,4,27
8902080013869,2017-01-28,BEVERAGES,OTHER JUICES,TROPICANA,BEVERAGES,4,28
8902080013869,2017-03-23,BEVERAGES,OTHER JUICES,TROPICANA,BEVERAGES,12,82
8901030591280,2017-01-07,Tea,Tea,Taj Mahal,80668,1,7
8901030591280,2017-01-16,Tea,Tea,Taj Mahal,80668,3,16
8901030591280,2017-01-21,Tea,Tea,Taj Mahal,80668,3,21
8901030591280,2017-01-27,Tea,Tea,Taj Mahal,80668,4,27
8901030591280,2017-02-08,Tea,Tea,Taj Mahal,8066

8901030600333,2017-03-30,Oral,Toothpaste,Pepsodent,10092,13,89
8901030600333,2017-03-30,Oral,Toothpaste,Pepsodent,10092,13,89
8901030600333,2017-03-30,Oral,Toothpaste,Pepsodent,10092,13,89
8901030600333,2017-03-30,Oral,Toothpaste,Pepsodent,10092,13,89
8901030600333,2017-03-30,Oral,Toothpaste,Pepsodent,10092,13,89
8901030600333,2017-03-30,Oral,Toothpaste,Pepsodent,10092,13,89
8901030600333,2017-03-30,Oral,Toothpaste,Pepsodent,10092,13,89
8901030600333,2017-03-30,Oral,Toothpaste,Pepsodent,10092,13,89
8901030600333,2017-03-30,Oral,Toothpaste,Pepsodent,10092,13,89
8901030600333,2017-03-30,Oral,Toothpaste,Pepsodent,10092,13,89
8901030235696,2017-03-26,Oral,Toothpaste,Pepsodent,10059,12,85
8906033740758,2017-01-10,BISCUITS,COOKIES,MCVITIES,BISCUITS,2,10
8906033740758,2017-02-16,BISCUITS,COOKIES,MCVITIES,BISCUITS,7,47
8906033740758,2017-02-21,BISCUITS,COOKIES,MCVITIES,BISCUITS,8,52
8906033740758,2017-02-23,BISCUITS,COOKIES,MCVITIES,BISCUITS,8,54
8906033740758,2017-02-23,BISCUIT

8908001017121,2017-02-14,PACKED FOOD,CORN SNACKS,CORNITOS,PACKED FOOD,7,45
8908001017121,2017-02-22,PACKED FOOD,CORN SNACKS,CORNITOS,PACKED FOOD,8,53
8908001017121,2017-03-16,PACKED FOOD,CORN SNACKS,CORNITOS,PACKED FOOD,11,75
8901571000098,2017-01-25,BEVERAGES,HEALTH DRINKS,HORLICKS,BEVERAGES,4,25
8901571000098,2017-03-12,BEVERAGES,HEALTH DRINKS,HORLICKS,BEVERAGES,10,71
8901571000098,2017-03-29,BEVERAGES,HEALTH DRINKS,HORLICKS,BEVERAGES,13,88
8901233013077,2017-01-25,BISCUITS,CREAM BISCUITS,OREO,BISCUITS,4,25
8901233013077,2017-01-28,BISCUITS,CREAM BISCUITS,OREO,BISCUITS,4,28
8901030571084,2017-01-25,Hair,Shampoo,Clinic Plus,11036,4,25
8901030571084,2017-01-25,Hair,Shampoo,Clinic Plus,11036,4,25
8901138511449,2017-01-25,BABY PRODUCTS,BABY OIL & SHAMPOOS,HIMALAYA,BABY PRODUCTS,4,25
8901396523604,2017-01-26,OTHERS,MOSQUITO REPELLENT,MORTEIN,OTHERS,4,26
8901396523604,2017-02-21,OTHERS,MOSQUITO REPELLENT,MORTEIN,OTHERS,8,52
8901396523604,2017-02-01,OTHERS,MOSQUITO REPELLENT,MO

8901030609923,2017-03-28,PPW,Personal Wash Bars,Lux,17021,13,87
8901030609923,2017-03-28,PPW,Personal Wash Bars,Lux,17021,13,87
8901526200399,2017-03-28,HAIR,HAIR OIL,GARNIER,HAIR,13,87
8901120143726,2017-03-28,PACKED FOOD,SUGAR FREE,SUGAR FREE,PACKED FOOD,13,87
8901571005864,2017-03-28,BEVERAGES,HEALTH DRINKS,BOOST,BEVERAGES,13,87
8901138818845,2017-03-29,OTHERS,BABY CARE ACCESSORIES,HIMALAYA,OTHERS,13,88
4902430602761,2017-03-29,OTHERS,"SHAVING CREAM, FOAM & GELS",GILLETTE,OTHERS,13,88
4902430602761,2017-03-01,OTHERS,"SHAVING CREAM, FOAM & GELS",GILLETTE,OTHERS,9,60
8901277012203,2017-03-29,SKIN,TALC,PARK AVENUE,SKIN,13,88
690225104197,2017-03-01,PACKED RICE / ATTA,BASMATI RICE,INDIA GATE,PACKED RICE / ATTA,9,60
8901088050562,2017-03-29,PACKED FOOD,OATS,SAFFOLA,PACKED FOOD,13,88
8904006302781,2017-03-30,DEODORANT,SHAVING BRUSH,WILD STONE,DEODORANT,13,89
8901030303180,2017-03-30,HHC,Floor Cleaner,Domex,13000,13,89
8901030303180,2017-03-30,HHC,Floor Cleaner,Domex,13000,13,

In [13]:
no_of_weeks = 53;
joined_df_copy = joined_df.copy(deep=True)

with open('weekly_data.csv', 'w') as weekly_file:
    weekly_csv = csv.writer(weekly_file)
    for x in range(1,no_of_weeks):
        weekly_df = joined_df_copy.where(joined_df_copy.WEEK_NUM == x)
        weekly_data = weekly_df['CATEGORY_DESC'].tolist()
        weekly_data = [x for x in weekly_data if not((str(x)=='nan' or str(x)=='OTHERS'))]
        weekly_csv.writerow(list(set(weekly_data)))
    

In [12]:
no_of_days = 31+28+31;
joined_df_copy = joined_df.copy(deep=True)

with open('daily_data.csv', 'w') as daily_file:
    daily_csv = csv.writer(daily_file)
    for x in range(1,no_of_days):
        daily_df = joined_df_copy.where(joined_df_copy.DAY_NUM == x)
        daily_data = daily_df['CATEGORY_DESC'].tolist()
        daily_data = [x for x in daily_data if not((str(x)=='nan' or str(x)=='OTHERS'))]
        daily_csv.writerow(list(set(daily_data)))

In [39]:
datas = ["", 'Divyaksh','Shukla','']
list(filter(None, datas))

['Divyaksh', 'Shukla']

# TensorFlow training

- open source tool to provide regression models
- Here, we used python to analyse the data

#### xlrd
- to extract data from xls

In [None]:
import tensorflow as tf
import xlrd

rng = np.random

data_file = "new3.xls"
epochs = 100
display_step = 50

dfile = xlrd.open_workbook(data_file, encoding_override="utf-8")
sheet = dfile.sheet_by_index(0)
data = np.asarray([sheet.row_values(i)for i in range(1, sheet.nrows)])
new_data = np.hsplit(data , 2)
# print(new_data[0])

**barcodes** are in new_data[0]
**created_stamps** are in new_data[1]

In [None]:
barcode = new_data[0]
created_stamps = new_data[1]

In [None]:
####creating placeholders

BARCODE =tf.placeholder(tf.float32,name="BARCODE")
CREATED_STAMP =tf.placeholder(tf.float32,name="CREATED_STAMP")

####creating weight and bias, initialized to 0

w =tf.Variable(rng.randn(),name="BARCODE")
b =tf.Variable(rng.randn(),name="CREATED_STAMP")

Y_pred = BARCODE * w + b

#### Mean squared error
cost = tf.reduce_sum(tf.pow(Y_pred-CREATED_STAMP, 2))/(2*n_samples)

#loss = tf.square(CREATED_STAMP-Y_pred, name="loss")

opt = tf.train.GradientDescentOptimizer(learning_rate = 0.001).minimize(cost)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for epoch in range(epochs):
        for x, y in data:
            sess.run(opt, feed_dict={BARCODE: x, CREATED_STAMP: y})

        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            c = sess.run(cost, feed_dict={BARCODE: x, CREATED_STAMP: y})
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
                "w=", sess.run(w), "b=", sess.run(b))

    w_value, b_value = sess.run([w, b])
    print("Optimization finished!!")
    training_cost = sess.run(cost, feed_dict={BARCODE: x, CREATED_STAMP: y})
    print("Training cost=", training_cost, "w=", sess.run(w), "b=", sess.run(b), '\n')

    # Graphic display
    plt.plot(x, y, 'ro', label='barcode vs timestamp')
    plt.plot(x, sess.run(w) * x + sess.run(b), label='Fitted line')
    plt.legend()
    plt.show()

### Graphic Data

The data is plotted on a matplotlib library. 