In [61]:
# # Market Basket Analysis: Apriori Algorithm
# Dataset: Order1.csv
# The dataset has 38765 rows of the purchase orders of people from the grocery stores.
# These orders can be analysed, and association rules can be generated using Market Basket Analysis by algorithms like Apriori Algorithm.
# Follow following Steps:

# Data Pre-processing
# Generate the list of transactions from the dataset
# Train Apriori on the dataset
# Visualize the list of datasets

In [62]:
import   numpy as np
import pandas as pd

In [63]:
df = pd.read_csv('Order1.csv')

In [64]:
df

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk
...,...,...,...
38760,4471,08-10-2014,sliced cheese
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,cake bar
38763,1510,03-12-2014,fruit/vegetable juice


In [65]:
transactions = df.groupby(['Member_number', 'Date'])['itemDescription'].apply(list).reset_index(name='Transaction')


In [66]:
transactions

Unnamed: 0,Member_number,Date,Transaction
0,1000,15-03-2015,"[sausage, whole milk, semi-finished bread, yog..."
1,1000,24-06-2014,"[whole milk, pastry, salty snack]"
2,1000,24-07-2015,"[canned beer, misc. beverages]"
3,1000,25-11-2015,"[sausage, hygiene articles]"
4,1000,27-05-2015,"[soda, pickled vegetables]"
...,...,...,...
14958,4999,24-01-2015,"[tropical fruit, berries, other vegetables, yo..."
14959,4999,26-12-2015,"[bottled water, herbs]"
14960,5000,09-03-2014,"[fruit/vegetable juice, onions]"
14961,5000,10-02-2015,"[soda, root vegetables, semi-finished bread]"


In [67]:
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
one_hot_encoded = te.fit(transactions['Transaction']).transform(transactions['Transaction'])
one_hot_encoded

array([[False, False, False, ...,  True,  True, False],
       [False, False, False, ...,  True, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [68]:
ndf = pd.DataFrame(one_hot_encoded,columns=te.columns_)

In [69]:
ndf

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,True,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14958,False,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,False
14959,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
14960,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
14961,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [70]:
from mlxtend.frequent_patterns import apriori , association_rules

frequent_itemsets = apriori(ndf,min_support=0.001,use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.004010,(Instant food products)
1,0.021386,(UHT-milk)
2,0.001470,(abrasive cleaner)
3,0.001938,(artif. sweetener)
4,0.008087,(baking powder)
...,...,...
745,0.001136,"(whole milk, sausage, rolls/buns)"
746,0.001002,"(soda, whole milk, rolls/buns)"
747,0.001337,"(yogurt, whole milk, rolls/buns)"
748,0.001069,"(soda, whole milk, sausage)"


In [82]:
rules = association_rules(frequent_itemsets,metric='lift',min_threshold=1)

In [85]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(tropical fruit),(UHT-milk),0.067767,0.021386,0.001537,0.022682,1.060617,8.785064e-05,1.001326,0.061307
1,(UHT-milk),(tropical fruit),0.021386,0.067767,0.001537,0.071875,1.060617,8.785064e-05,1.004426,0.058402
2,(brown bread),(beef),0.037626,0.033950,0.001537,0.040853,1.203301,2.597018e-04,1.007196,0.175559
3,(beef),(brown bread),0.033950,0.037626,0.001537,0.045276,1.203301,2.597018e-04,1.008012,0.174891
4,(citrus fruit),(beef),0.053131,0.033950,0.001804,0.033962,1.000349,6.297697e-07,1.000012,0.000369
...,...,...,...,...,...,...,...,...,...,...
235,"(yogurt, sausage)",(whole milk),0.005748,0.157923,0.001470,0.255814,1.619866,5.626300e-04,1.131541,0.384877
236,"(whole milk, sausage)",(yogurt),0.008955,0.085879,0.001470,0.164179,1.911760,7.012151e-04,1.093681,0.481231
237,(yogurt),"(whole milk, sausage)",0.085879,0.008955,0.001470,0.017121,1.911760,7.012151e-04,1.008307,0.521727
238,(whole milk),"(yogurt, sausage)",0.157923,0.005748,0.001470,0.009310,1.619866,5.626300e-04,1.003596,0.454430
