# Market Basket Analysis
# Using Association Rule Mining
# and Apriori Algorithm

In [1]:
# The dataset in the form of list
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [5]:
# Transaction Encoding
# We need to convert the data into True/False values
# to fit it into the model
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)

In [6]:
# he dataframe after encoding
import pandas as pd
df = pd.DataFrame(te_ary, columns=te.columns_)
print(df)

   Apple   Corn   Dill   Eggs  Ice cream  Kidney Beans   Milk  Nutmeg  Onion  \
0  False  False  False   True      False          True   True    True   True   
1  False  False   True   True      False          True  False    True   True   
2   True  False  False   True      False          True   True   False  False   
3  False   True  False  False      False          True   True   False  False   
4  False   True  False   True       True          True  False   False   True   

   Unicorn  Yogurt  
0    False    True  
1    False    True  
2    False   False  
3     True    True  
4    False   False  


In [9]:
# Importing the Model of Apriori ALgorithm
from mlxtend.frequent_patterns import apriori
# Instantiating the Model
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
# adding number of items in set as a column
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

In [10]:
print("Itemsets and the Probabilty of them being bought")
print(frequent_itemsets)

Itemsets and the Probabilty of them being bought
    support                     itemsets  length
0       0.8                       (Eggs)       1
1       1.0               (Kidney Beans)       1
2       0.6                       (Milk)       1
3       0.6                      (Onion)       1
4       0.6                     (Yogurt)       1
5       0.8         (Kidney Beans, Eggs)       2
6       0.6                (Eggs, Onion)       2
7       0.6         (Kidney Beans, Milk)       2
8       0.6        (Kidney Beans, Onion)       2
9       0.6       (Kidney Beans, Yogurt)       2
10      0.6  (Kidney Beans, Eggs, Onion)       3


In [12]:
print(frequent_itemsets[(frequent_itemsets['length'] > 2) &
                   (frequent_itemsets['support'] >= 0.6)])

    support                     itemsets  length
10      0.6  (Kidney Beans, Eggs, Onion)       3


In [13]:
# Importing the Model of Assocation Rule Mining
from mlxtend.frequent_patterns import association_rules
result = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

In [16]:
# The Association Rules
print(result)

              antecedents            consequents  antecedent support  \
0          (Kidney Beans)                 (Eggs)                 1.0   
1                  (Eggs)         (Kidney Beans)                 0.8   
2                  (Eggs)                (Onion)                 0.8   
3                 (Onion)                 (Eggs)                 0.6   
4                  (Milk)         (Kidney Beans)                 0.6   
5                 (Onion)         (Kidney Beans)                 0.6   
6                (Yogurt)         (Kidney Beans)                 0.6   
7    (Kidney Beans, Eggs)                (Onion)                 0.8   
8   (Kidney Beans, Onion)                 (Eggs)                 0.6   
9           (Onion, Eggs)         (Kidney Beans)                 0.6   
10                 (Eggs)  (Kidney Beans, Onion)                 0.8   
11                (Onion)   (Kidney Beans, Eggs)                 0.6   

    consequent support  support  confidence  lift  leverage  co