In [53]:
all_transactions = []

#open the file
with open('groceries.csv') as f:
    #read each line
    content = f.readlines()
    #remove white space from the beginning and end of the line
    transactions = [x.strip() for x in content]
    #iterate through each line and create a list of transaction
    for each_transaction in transactions:
        all_transactions.append(each_transaction.split(','))

In [54]:
all_transactions[0:5]

[['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups'],
 ['tropical fruit', 'yogurt', 'coffee'],
 ['whole milk'],
 ['pip fruit', 'yogurt', 'cream cheese ', 'meat spreads'],
 ['other vegetables',
  'whole milk',
  'condensed milk',
  'long life bakery product']]

In [55]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [56]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [57]:
#initialize the onehottransactions
one_hot_encoding = TransactionEncoder()
#transform the data into one-hot-encoding format
one_hot_transactions = one_hot_encoding.fit(all_transactions).transform(all_transactions)
#convert the matrix into the dataframe
one_hot_transactions_data = pd.DataFrame(one_hot_transactions, columns=one_hot_encoding.columns_)

In [58]:
one_hot_transactions_data.iloc[5:10, 10:20]

Unnamed: 0,berries,beverages,bottled beer,bottled water,brandy,brown bread,butter,butter milk,cake bar,candles
5,False,False,False,False,False,False,True,False,False,False
6,False,False,False,False,False,False,False,False,False,False
7,False,False,True,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False


In [59]:
one_hot_transactions_data.shape

(9835, 171)

In [60]:
#generate association rules
len(one_hot_transactions_data.columns)

171

In [61]:
frequent_itemsets = apriori(one_hot_transactions_data, min_support = 0.02, use_colnames=True)

In [62]:
frequent_itemsets.sample(10, )

Unnamed: 0,support,itemsets
53,0.104931,(tropical fruit)
105,0.021047,"(tropical fruit, root vegetables)"
31,0.037417,(long life bakery product)
92,0.043416,"(yogurt, other vegetables)"
120,0.023183,"(whole milk, root vegetables, other vegetables)"
30,0.025013,(ice cream)
0,0.033452,(UHT-milk)
41,0.075648,(pip fruit)
83,0.021657,"(pork, other vegetables)"
77,0.021047,"(fruit/vegetable juice, other vegetables)"


In [63]:
rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)

In [64]:
rules.sample(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
73,(pip fruit),(whole milk),0.075648,0.255516,0.030097,0.397849,1.557043,0.010767,1.236375
74,(whole milk),(pork),0.255516,0.057651,0.022166,0.086749,1.504719,0.007435,1.031862
55,(other vegetables),(shopping bags),0.193493,0.098526,0.023183,0.119811,1.216037,0.004119,1.024182
70,(pip fruit),(tropical fruit),0.075648,0.104931,0.020437,0.270161,2.574648,0.012499,1.226392
122,"(yogurt, other vegetables)",(whole milk),0.043416,0.255516,0.022267,0.512881,2.007235,0.011174,1.52834


In [65]:
rules.sort_values('confidence', ascending=False) [0:10]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
122,"(yogurt, other vegetables)",(whole milk),0.043416,0.255516,0.022267,0.512881,2.007235,0.011174,1.52834
17,(butter),(whole milk),0.055414,0.255516,0.027555,0.497248,1.946053,0.013395,1.480817
25,(curd),(whole milk),0.053279,0.255516,0.026131,0.490458,1.919481,0.012517,1.461085
116,"(root vegetables, other vegetables)",(whole milk),0.047382,0.255516,0.023183,0.48927,1.914833,0.011076,1.457687
114,"(whole milk, root vegetables)",(other vegetables),0.048907,0.193493,0.023183,0.474012,2.44977,0.013719,1.53332
29,(domestic eggs),(whole milk),0.063447,0.255516,0.029995,0.472756,1.850203,0.013783,1.41203
109,(whipped/sour cream),(whole milk),0.071683,0.255516,0.032232,0.449645,1.759754,0.013916,1.352735
91,(root vegetables),(whole milk),0.108998,0.255516,0.048907,0.448694,1.756031,0.021056,1.350401
50,(root vegetables),(other vegetables),0.108998,0.193493,0.047382,0.434701,2.246605,0.026291,1.426693
33,(frozen vegetables),(whole milk),0.048094,0.255516,0.020437,0.424947,1.663094,0.008149,1.294636
