# Apriori

Hint 1: Use mlxtend

Hint 2: See http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/apriori/ for a working example

In [None]:
#################################
#
# Example adapted from http://intelligentonlinetools.com/blog/2018/02/10/how-to-create-data-visualization-for-association-rules-in-data-mining/
#
#################################

In [1]:
# Import necessary python libraries
import pandas as pd
import csv
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [7]:
# Read in CSV file into an array of arrays
dataset = []
with open('aprioriData1.csv') as f:
	reader = csv.reader(f)
	for row in reader:
		dataset.append(row)
for row in dataset: 
	print(row)



['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt']
['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt']
['Milk', 'Apple', 'Kidney Beans', 'Eggs', '', '']
['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt', '']
['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice Cream', 'Eggs']


In [8]:
# Transform your data for the apriori algorithm
oht = TransactionEncoder()
oht_ary = oht.fit(dataset).transform(dataset)
df = pd.DataFrame(oht_ary, columns=oht.columns_)
print(df)

          Apple   Corn   Dill   Eggs  Ice Cream  Kidney Beans   Milk  Nutmeg  \
0  False  False  False  False   True      False          True   True    True   
1  False  False  False   True   True      False          True  False    True   
2   True   True  False  False   True      False          True   True   False   
3   True  False   True  False  False      False          True   True   False   
4  False  False   True  False   True       True          True  False   False   

   Onion  Unicorn  Yogurt  
0   True    False    True  
1   True    False    True  
2  False    False   False  
3  False     True    True  
4   True    False   False  


In [5]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
print(frequent_itemsets)

    support                     itemsets
0       0.8                       (Eggs)
1       1.0               (Kidney Beans)
2       0.6                       (Milk)
3       0.6                      (Onion)
4       0.6                     (Yogurt)
5       0.8         (Eggs, Kidney Beans)
6       0.6                (Onion, Eggs)
7       0.6         (Milk, Kidney Beans)
8       0.6        (Onion, Kidney Beans)
9       0.6       (Yogurt, Kidney Beans)
10      0.6  (Onion, Eggs, Kidney Beans)


In [6]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
print(rules[['antecedents', 'consequents', 'support', 'confidence']])

              antecedents            consequents  support  confidence
0                  (Eggs)         (Kidney Beans)      0.8        1.00
1          (Kidney Beans)                 (Eggs)      0.8        0.80
2                 (Onion)                 (Eggs)      0.6        1.00
3                  (Eggs)                (Onion)      0.6        0.75
4                  (Milk)         (Kidney Beans)      0.6        1.00
5                 (Onion)         (Kidney Beans)      0.6        1.00
6                (Yogurt)         (Kidney Beans)      0.6        1.00
7           (Onion, Eggs)         (Kidney Beans)      0.6        1.00
8   (Onion, Kidney Beans)                 (Eggs)      0.6        1.00
9    (Eggs, Kidney Beans)                (Onion)      0.6        0.75
10                (Onion)   (Eggs, Kidney Beans)      0.6        1.00
11                 (Eggs)  (Onion, Kidney Beans)      0.6        0.75
