In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

### I. Importing the Dataset

In [2]:
snacks = pd.read_csv('snacks.csv')

snacks.head()

Unnamed: 0,Bier,Chips,Coca Cola,Eiscreme,Erdnussflips,Erdnüsse,Flips,Gummibärchen,Ich mag keine Snacks,Karotten,...,Pistazien,Popcorn (ohne Zusatz),Popcorn (salzig),Popcorn (süß),Salzstangen,Saure Gummibaecht,Schokolade,Snacktomaten,Softdrinks,Toffiffee
0,0,1,0,0,0,1,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,1,1,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


### II. Applying the Apriori Algorithm

In [3]:
frequent_itemsets = apriori(snacks, min_support=0.01, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

frequent_itemsets



Unnamed: 0,support,itemsets,length
0,0.010526,(Bier),1
1,0.647368,(Chips),1
2,0.163158,(Eiscreme),1
3,0.210526,(Erdnüsse),1
4,0.247368,(Gummibärchen),1
...,...,...,...
575,0.010526,"(Nachos, Kekse, Popcorn (süß), Erdnüsse, Chips...",8
576,0.010526,"(Nachos, Kekse, Popcorn (süß), Gummibärchen, C...",8
577,0.010526,"(Nachos, Kekse, Popcorn (süß), Erdnüsse, Gummi...",8
578,0.010526,"(Nachos, Kekse, Popcorn (süß), Erdnüsse, Gummi...",8


$$
support(snack) = \frac {\text {occurences of snack}}{\text {nr of orders}}
$$

In [7]:
frequent_itemsets[ (frequent_itemsets['length'] == 4) ].head(10)

Unnamed: 0,support,itemsets,length
166,0.015789,"(Erdnüsse, Eiscreme, Chips, Gummibärchen)",4
167,0.021053,"(Erdnüsse, Eiscreme, Chips, Kekse)",4
168,0.015789,"(Erdnüsse, Eiscreme, Chips, M&Ms)",4
169,0.015789,"(Erdnüsse, Nachos, Eiscreme, Chips)",4
170,0.015789,"(Erdnüsse, Eiscreme, Chips, Popcorn (süß))",4
171,0.021053,"(Erdnüsse, Eiscreme, Chips, Schokolade)",4
172,0.021053,"(Gummibärchen, Eiscreme, Chips, Kekse)",4
173,0.010526,"(Gummibärchen, Eiscreme, Chips, M&Ms)",4
174,0.021053,"(Nachos, Eiscreme, Chips, Gummibärchen)",4
175,0.021053,"(Gummibärchen, Eiscreme, Chips, Popcorn (süß))",4


### III. Mining Association Rules

In [12]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)
rules["antecedents_length"] = rules["antecedents"].apply(lambda x: len(x))
rules["consequents_length"] = rules["consequents"].apply(lambda x: len(x))

Confidence describes how likely my rule is to be true.

$$
confidence(A -> B) = \frac {\text {number of transactions of A and B}}{\text {number of transactions containing A}}
$$

In [18]:
rules[rules.confidence != 1].sort_values("confidence",ascending=False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,antecedents_length,consequents_length
86,"(Erdnüsse, Nachos)",(Chips),0.078947,0.647368,0.073684,0.933333,1.441734,0.022576,5.289474,0.332653,2,1
1206,"(Nachos, Gummibärchen, Schokolade)",(Chips),0.057895,0.647368,0.052632,0.909091,1.404287,0.015152,3.878947,0.305587,3,1
73,"(Erdnüsse, Gummibärchen)",(Chips),0.057895,0.647368,0.052632,0.909091,1.404287,0.015152,3.878947,0.305587,2,1
1116,"(Gummibärchen, Popcorn (süß), Kekse)",(Chips),0.047368,0.647368,0.042105,0.888889,1.37308,0.01144,3.173684,0.285221,3,1
908,"(Erdnüsse, Gummibärchen, Schokolade)",(Chips),0.042105,0.647368,0.036842,0.875,1.351626,0.009584,2.821053,0.271586,3,1
2303,"(Nachos, Gummibärchen, Kekse)",(Schokolade),0.042105,0.268421,0.036842,0.875,3.259804,0.02554,5.852632,0.723705,3,1
2414,"(Nachos, M&Ms, Schokolade)",(Kekse),0.042105,0.194737,0.036842,0.875,4.493243,0.028643,6.442105,0.811617,3,1
82,"(Erdnüsse, M&Ms)",(Chips),0.042105,0.647368,0.036842,0.875,1.351626,0.009584,2.821053,0.271586,2,1
1088,"(Nachos, Gummibärchen, Kekse)",(Chips),0.042105,0.647368,0.036842,0.875,1.351626,0.009584,2.821053,0.271586,3,1
3600,"(Erdnüsse, Chips, Schokolade, Kekse)",(Gummibärchen),0.036842,0.247368,0.031579,0.857143,3.465046,0.022465,5.268421,0.738616,4,1


Lift is the metric to express how strong the association between to items/ sets is. The higher it is, the more likely it is to buy them together.

$$
lift(snack_A -> snack_B) = \frac {\text{confidence A -> B}}{\text{support{B}}}
$$

In [17]:
rules[rules.lift < 90].sort_values("lift",ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,antecedents_length,consequents_length
17559,"(Erdnüsse, M&Ms, Gummibärchen)","(Nachos, Popcorn (süß), Chips, Schokolade, Eis...",0.015789,0.010526,0.010526,0.666667,63.333333,0.010360,2.968421,1.000000,3,5
17529,"(Nachos, Eiscreme, Erdnüsse)","(Popcorn (süß), Gummibärchen, Chips, Schokolad...",0.015789,0.010526,0.010526,0.666667,63.333333,0.010360,2.968421,1.000000,3,5
18841,"(Kekse, Popcorn (süß), Gummibärchen, Schokolad...","(Nachos, Eiscreme, Chips, Erdnüsse)",0.010526,0.015789,0.010526,1.000000,63.333333,0.010360,inf,0.994681,5,4
15365,"(Nachos, M&Ms, Popcorn (süß), Kekse)","(Erdnüsse, Eiscreme, Gummibärchen)",0.010526,0.015789,0.010526,1.000000,63.333333,0.010360,inf,0.994681,4,3
16551,"(Erdnüsse, Eiscreme, Gummibärchen)","(Nachos, Kekse, Popcorn (süß), Chips, M&Ms)",0.015789,0.010526,0.010526,0.666667,63.333333,0.010360,2.968421,1.000000,3,5
...,...,...,...,...,...,...,...,...,...,...,...,...
37,(M&Ms),(Nachos),0.194737,0.378947,0.089474,0.459459,1.212462,0.015679,1.148947,0.217609,1,1
1294,"(Nachos, Chips)","(Popcorn (süß), Kekse)",0.257895,0.084211,0.026316,0.102041,1.211735,0.004598,1.019856,0.235461,2,2
1299,"(Popcorn (süß), Kekse)","(Nachos, Chips)",0.084211,0.257895,0.026316,0.312500,1.211735,0.004598,1.079426,0.190805,2,2
1331,"(Kekse, Popcorn (süß), Schokolade)",(Chips),0.047368,0.647368,0.036842,0.777778,1.201445,0.006177,1.586842,0.176006,3,1
