In [1]:
import pandas as pd
import numpy as np

In [2]:
dataset = [['Milk','Tea','Ghee','Biscuits','Chocolates','Soap'],
           ['Tea','Waffers','Cookies'],
           ['Pencil','Rubber','Ballpen'],
           ['Milk', 'Ballpen','Rubber','Biscuits','Soap'],
           ['Tea', 'Ghee', 'Cookies'],
           ['Waffers','Biscuits','Ballpen']]

In [3]:
dataset

[['Milk', 'Tea', 'Ghee', 'Biscuits', 'Chocolates', 'Soap'],
 ['Tea', 'Waffers', 'Cookies'],
 ['Pencil', 'Rubber', 'Ballpen'],
 ['Milk', 'Ballpen', 'Rubber', 'Biscuits', 'Soap'],
 ['Tea', 'Ghee', 'Cookies'],
 ['Waffers', 'Biscuits', 'Ballpen']]

In [5]:
#! pip install mlxtend

In [6]:
from mlxtend.preprocessing import TransactionEncoder

In [7]:
te = TransactionEncoder()

In [8]:
data = te.fit_transform(dataset)
unique_items = te.columns_

In [13]:
te.columns_mapping_

{'Ballpen': 0,
 'Biscuits': 1,
 'Chocolates': 2,
 'Cookies': 3,
 'Ghee': 4,
 'Milk': 5,
 'Pencil': 6,
 'Rubber': 7,
 'Soap': 8,
 'Tea': 9,
 'Waffers': 10}

In [9]:
print(data)

[[False  True  True False  True  True False False  True  True False]
 [False False False  True False False False False False  True  True]
 [ True False False False False False  True  True False False False]
 [ True  True False False False  True False  True  True False False]
 [False False False  True  True False False False False  True False]
 [ True  True False False False False False False False False  True]]


In [10]:
print(unique_items)

['Ballpen', 'Biscuits', 'Chocolates', 'Cookies', 'Ghee', 'Milk', 'Pencil', 'Rubber', 'Soap', 'Tea', 'Waffers']


In [11]:
df = pd.DataFrame(data, columns=unique_items)

In [12]:
df

Unnamed: 0,Ballpen,Biscuits,Chocolates,Cookies,Ghee,Milk,Pencil,Rubber,Soap,Tea,Waffers
0,False,True,True,False,True,True,False,False,True,True,False
1,False,False,False,True,False,False,False,False,False,True,True
2,True,False,False,False,False,False,True,True,False,False,False
3,True,True,False,False,False,True,False,True,True,False,False
4,False,False,False,True,True,False,False,False,False,True,False
5,True,True,False,False,False,False,False,False,False,False,True


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   Ballpen     6 non-null      bool 
 1   Biscuits    6 non-null      bool 
 2   Chocolates  6 non-null      bool 
 3   Cookies     6 non-null      bool 
 4   Ghee        6 non-null      bool 
 5   Milk        6 non-null      bool 
 6   Pencil      6 non-null      bool 
 7   Rubber      6 non-null      bool 
 8   Soap        6 non-null      bool 
 9   Tea         6 non-null      bool 
 10  Waffers     6 non-null      bool 
dtypes: bool(11)
memory usage: 194.0 bytes


## Training the Apriori model

In [15]:
from mlxtend.frequent_patterns import apriori

In [32]:
rules = apriori(df, min_support = 0.2)

In [33]:
print(rules)

     support   itemsets
0   0.500000        (0)
1   0.500000        (1)
2   0.333333        (3)
3   0.333333        (4)
4   0.333333        (5)
5   0.333333        (7)
6   0.333333        (8)
7   0.500000        (9)
8   0.333333       (10)
9   0.333333     (0, 1)
10  0.333333     (0, 7)
11  0.333333     (1, 5)
12  0.333333     (8, 1)
13  0.333333     (9, 3)
14  0.333333     (9, 4)
15  0.333333     (8, 5)
16  0.333333  (8, 1, 5)


In [34]:
frequent_itemset = apriori(df, min_support=0.2, use_colnames = True)

In [35]:
frequent_itemset

Unnamed: 0,support,itemsets
0,0.5,(Ballpen)
1,0.5,(Biscuits)
2,0.333333,(Cookies)
3,0.333333,(Ghee)
4,0.333333,(Milk)
5,0.333333,(Rubber)
6,0.333333,(Soap)
7,0.5,(Tea)
8,0.333333,(Waffers)
9,0.333333,"(Ballpen, Biscuits)"


In [36]:
frequent_itemset['length'] = frequent_itemset['itemsets'].apply(lambda x: len(x))

In [37]:
frequent_itemset

Unnamed: 0,support,itemsets,length
0,0.5,(Ballpen),1
1,0.5,(Biscuits),1
2,0.333333,(Cookies),1
3,0.333333,(Ghee),1
4,0.333333,(Milk),1
5,0.333333,(Rubber),1
6,0.333333,(Soap),1
7,0.5,(Tea),1
8,0.333333,(Waffers),1
9,0.333333,"(Ballpen, Biscuits)",2


In [41]:
(frequent_itemset['length'] == 2) & (frequent_itemset['support']>= 0.2)

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9      True
10     True
11     True
12     True
13     True
14     True
15     True
16    False
dtype: bool

In [45]:
frequent_itemset[(frequent_itemset['length'] == 2) & (frequent_itemset['support'] >= 0.3)]

Unnamed: 0,support,itemsets,length
9,0.333333,"(Ballpen, Biscuits)",2
10,0.333333,"(Ballpen, Rubber)",2
11,0.333333,"(Milk, Biscuits)",2
12,0.333333,"(Soap, Biscuits)",2
13,0.333333,"(Tea, Cookies)",2
14,0.333333,"(Ghee, Tea)",2
15,0.333333,"(Milk, Soap)",2


In [48]:
frequent_itemset[(frequent_itemset['support'] >= 0.4)]

Unnamed: 0,support,itemsets,length
0,0.5,(Ballpen),1
1,0.5,(Biscuits),1
7,0.5,(Tea),1


In [52]:
frequent_itemset[frequent_itemset['itemsets'] == {'Milk', 'Biscuits'}]

Unnamed: 0,support,itemsets,length
11,0.333333,"(Milk, Biscuits)",2


In [53]:
frequent_itemset[frequent_itemset['itemsets'] == {'Biscuits','Milk'}]

Unnamed: 0,support,itemsets,length
11,0.333333,"(Milk, Biscuits)",2


In [54]:
frequent_itemset[frequent_itemset['itemsets'] == frozenset(('Biscuits','Milk' ))]

Unnamed: 0,support,itemsets,length
11,0.333333,"(Milk, Biscuits)",2


In [55]:
frequent_itemset[frequent_itemset['itemsets'] == frozenset(('Milk', 'Biscuits'))]

Unnamed: 0,support,itemsets,length
11,0.333333,"(Milk, Biscuits)",2
