In [1]:
from collections import defaultdict, OrderedDict
from csv import reader
from itertools import chain, combinations
from optparse import OptionParser
import pandas as pd

# if not installed yet: pip install mlxtend
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules

In [2]:
## Data Set
dataset = [['Corn', 'Light Cream', 'Chicken', 'Beef', 'Wine', 'Ice Cream'],
           ['Dill', 'Onion', 'Carrot', 'Beef', 'Wine', 'Ice Cream'],
           ['Milk', 'Wine', 'Beef', 'Ice Cream'],
           ['Light Cream', 'Chicken', 'Corn', 'Kidney Beans', 'Yogurt', 'Wine'],
           ['Corn', 'Onion', 'Light Cream', 'Kidney Beans', 'Chicken', 'Yogurt']]


In [3]:
## Find frequent itemsets using FPGrowth
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = fpgrowth(df, min_support=0.6, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.8,(Wine)
1,0.6,(Light Cream)
2,0.6,(Ice Cream)
3,0.6,(Corn)
4,0.6,(Chicken)
5,0.6,(Beef)
6,0.6,"(Ice Cream, Wine)"
7,0.6,"(Light Cream, Corn)"
8,0.6,"(Chicken, Corn)"
9,0.6,"(Chicken, Light Cream)"


In [4]:
## The association rules can be found in given dataset with the minimum support 60% and the minimum confidence 70%
association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Ice Cream),(Wine),0.6,0.8,0.6,1.0,1.25,0.12,inf
1,(Wine),(Ice Cream),0.8,0.6,0.6,0.75,1.25,0.12,1.6
2,(Light Cream),(Corn),0.6,0.6,0.6,1.0,1.666667,0.24,inf
3,(Corn),(Light Cream),0.6,0.6,0.6,1.0,1.666667,0.24,inf
4,(Chicken),(Corn),0.6,0.6,0.6,1.0,1.666667,0.24,inf
5,(Corn),(Chicken),0.6,0.6,0.6,1.0,1.666667,0.24,inf
6,(Chicken),(Light Cream),0.6,0.6,0.6,1.0,1.666667,0.24,inf
7,(Light Cream),(Chicken),0.6,0.6,0.6,1.0,1.666667,0.24,inf
8,"(Chicken, Light Cream)",(Corn),0.6,0.6,0.6,1.0,1.666667,0.24,inf
9,"(Chicken, Corn)",(Light Cream),0.6,0.6,0.6,1.0,1.666667,0.24,inf


In [5]:
## The association rules can be found in given dataset with the minimum support 60% and the minimum lift value 1.2
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Ice Cream),(Wine),0.6,0.8,0.6,1.0,1.25,0.12,inf
1,(Wine),(Ice Cream),0.8,0.6,0.6,0.75,1.25,0.12,1.6
2,(Light Cream),(Corn),0.6,0.6,0.6,1.0,1.666667,0.24,inf
3,(Corn),(Light Cream),0.6,0.6,0.6,1.0,1.666667,0.24,inf
4,(Chicken),(Corn),0.6,0.6,0.6,1.0,1.666667,0.24,inf
5,(Corn),(Chicken),0.6,0.6,0.6,1.0,1.666667,0.24,inf
6,(Chicken),(Light Cream),0.6,0.6,0.6,1.0,1.666667,0.24,inf
7,(Light Cream),(Chicken),0.6,0.6,0.6,1.0,1.666667,0.24,inf
8,"(Chicken, Light Cream)",(Corn),0.6,0.6,0.6,1.0,1.666667,0.24,inf
9,"(Chicken, Corn)",(Light Cream),0.6,0.6,0.6,1.0,1.666667,0.24,inf


## Report
We can spot the difference from the two sets of associations rules above is prominent in the rule wine -> beef: the confidence of the rule (0.75) barely reaches the threshhold of 1(a) (0.7),  but the high lift (1.25) suggests a positive correlation between those 2. Then we can assume that wine could be a complement to dishes made of beef, so they're often bought together.

In general, we can notice that (wine) is the most popular with (light cream, chicken, corn) and (wine, beef, ice cream) being the maximal frequent itemsets, so I would suggest a bundle of wine, beef, ice cream to combine these 2 aspects. Then we look at the rules generated from itemsets. For the rules of high confidence, patterns behind the consumer behaviour can be deduced, e.g. (beef, wine) -> (ice cream) may suggest a remantic night in for couples, ice cream is always a delight after wining and dining. For rules of high lift, phenonmena can be observed, e.g. chicken -> corn, light cream and vice versa may suggest a popular recipe at the moment.