In [None]:
#http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/apriori/
#dataset
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [None]:
#The apriori function expects data in a one-hot encoded pandas DataFrame.
#We can transform it into the right format via the TransactionEncoder

In [None]:
#import packages
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

In [None]:
#create an array of onehot encoded dataset
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)

In [None]:
te_ary

array([[False, False, False,  True, False,  True,  True,  True,  True,
        False,  True],
       [False, False,  True,  True, False,  True, False,  True,  True,
        False,  True],
       [ True, False, False,  True, False,  True,  True, False, False,
        False, False],
       [False,  True, False, False, False,  True,  True, False, False,
         True,  True],
       [False,  True, False,  True,  True,  True, False, False,  True,
        False, False]])

In [None]:
#convert into dataframe
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


In [None]:
#import apriori class from mlxtend.frequent_patterns library
#https://github.com/rasbt/mlxtend/blob/master/mlxtend/frequent_patterns/apriori.py
from mlxtend.frequent_patterns import apriori

In [None]:
#Now, let us return the items and itemsets with at least 60% support
apriori(df, min_support=0.6)

Unnamed: 0,support,itemsets
0,0.8,(3)
1,1.0,(5)
2,0.6,(6)
3,0.6,(8)
4,0.6,(10)
5,0.8,"(3, 5)"
6,0.6,"(8, 3)"
7,0.6,"(5, 6)"
8,0.6,"(8, 5)"
9,0.6,"(10, 5)"


In [None]:
#By default, apriori returns the column indices of the items, which may be useful in downstream operations such as association rule mining. 
#For better readability, we can set use_colnames=True to convert these integer values into the respective item names:
apriori(df, min_support=0.6, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Eggs, Kidney Beans)"
6,0.6,"(Eggs, Onion)"
7,0.6,"(Milk, Kidney Beans)"
8,0.6,"(Onion, Kidney Beans)"
9,0.6,"(Yogurt, Kidney Beans)"


In [None]:
#Let's assume we are only interested in itemsets of length 2 that have a support of at least 80 percent. 
#First, we create the frequent itemsets via apriori and add a new column that stores the length of each itemset:

In [None]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
frequent_itemsets


Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Eggs, Kidney Beans)"
6,0.6,"(Eggs, Onion)"
7,0.6,"(Milk, Kidney Beans)"
8,0.6,"(Onion, Kidney Beans)"
9,0.6,"(Yogurt, Kidney Beans)"


In [None]:
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
#In Pandas, we have the freedom to add different functions whenever needed like lambda function, sort function, etc. 
#We can apply a lambda function to both the columns and rows of the Pandas data frame.

In [None]:
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.8,(Eggs),1
1,1.0,(Kidney Beans),1
2,0.6,(Milk),1
3,0.6,(Onion),1
4,0.6,(Yogurt),1
5,0.8,"(Eggs, Kidney Beans)",2
6,0.6,"(Eggs, Onion)",2
7,0.6,"(Milk, Kidney Beans)",2
8,0.6,"(Onion, Kidney Beans)",2
9,0.6,"(Yogurt, Kidney Beans)",2


In [None]:
#Then, we can select the results that satisfy our desired criteria
frequent_itemsets[ (frequent_itemsets['length'] == 2) &
                   (frequent_itemsets['support'] >= 0.8) ]

Unnamed: 0,support,itemsets,length
5,0.8,"(Eggs, Kidney Beans)",2


In [None]:
#Task: create a list of itemsets of length 3 and minimum support 0.6

In [None]:
#Similarly, using the Pandas API, we can select entries based on the "itemsets" column
frequent_itemsets[ frequent_itemsets['itemsets'] == {'Onion', 'Eggs'} ]

Unnamed: 0,support,itemsets,length
6,0.6,"(Eggs, Onion)",2


In [None]:
#http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/fpgrowth/
#Task: Use this link for practicing fpgrowth algorithm


In [None]:
#Association Rules
#https://ashishpandita12.files.wordpress.com/2019/10/apriori-algorithm-in-python.pdf


In [None]:
pip install apyori

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting apyori
  Downloading apyori-1.1.2.tar.gz (8.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: apyori
  Building wheel for apyori (setup.py) ... [?25l[?25hdone
  Created wheel for apyori: filename=apyori-1.1.2-py3-none-any.whl size=5976 sha256=c0d8b9f4282be29e930a31cad04849cb81b52136b914bcf3165ac7eacce3e8a5
  Stored in directory: /root/.cache/pip/wheels/32/2a/54/10c595515f385f3726642b10c60bf788029e8f3a1323e3913a
Successfully built apyori
Installing collected packages: apyori
Successfully installed apyori-1.1.2


In [None]:
import apyori
from apyori import apriori


In [None]:
#for this library data should be converted into list of lists

In [None]:
association_rules = apriori(dataset,min_support=0.50,min_confidence=0.7,min_lift=1.2,min_length=2)

In [None]:
association_results = list(association_rules)

In [None]:
print(len(association_results))

2


In [None]:
print(association_results)

[RelationRecord(items=frozenset({'Eggs', 'Onion'}), support=0.6, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Eggs'}), items_add=frozenset({'Onion'}), confidence=0.7499999999999999, lift=1.2499999999999998), OrderedStatistic(items_base=frozenset({'Onion'}), items_add=frozenset({'Eggs'}), confidence=1.0, lift=1.25)]), RelationRecord(items=frozenset({'Eggs', 'Onion', 'Kidney Beans'}), support=0.6, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Eggs'}), items_add=frozenset({'Onion', 'Kidney Beans'}), confidence=0.7499999999999999, lift=1.2499999999999998), OrderedStatistic(items_base=frozenset({'Onion'}), items_add=frozenset({'Eggs', 'Kidney Beans'}), confidence=1.0, lift=1.25), OrderedStatistic(items_base=frozenset({'Eggs', 'Kidney Beans'}), items_add=frozenset({'Onion'}), confidence=0.7499999999999999, lift=1.2499999999999998), OrderedStatistic(items_base=frozenset({'Onion', 'Kidney Beans'}), items_add=frozenset({'Eggs'}), confidence=1.0, lift=1.25)])]
