In [1]:
import pandas as pd
import numpy as np
import time
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules

In [2]:
dataset = [
  ['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
  ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
  ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
  ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
  ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']
  ]

In [3]:
enc = TransactionEncoder()
encoded_dataset = enc.fit_transform(dataset)
encoded_dataset

array([[False, False, False,  True, False,  True,  True,  True,  True,
        False,  True],
       [False, False,  True,  True, False,  True, False,  True,  True,
        False,  True],
       [ True, False, False,  True, False,  True,  True, False, False,
        False, False],
       [False,  True, False, False, False,  True,  True, False, False,
         True,  True],
       [False,  True, False,  True,  True,  True, False, False,  True,
        False, False]])

In [4]:
colnames = enc.columns_
colnames

['Apple',
 'Corn',
 'Dill',
 'Eggs',
 'Ice cream',
 'Kidney Beans',
 'Milk',
 'Nutmeg',
 'Onion',
 'Unicorn',
 'Yogurt']

In [5]:
df = pd.DataFrame(data=encoded_dataset, columns=colnames)
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


In [6]:
def mine_by_apriori(df: pd.DataFrame) -> any:
  start_time = time.time()

  frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)

  end_time = time.time()

  time_cost = end_time - start_time

  return frequent_itemsets.sort_values(by=['support'], ascending=False), time_cost

In [7]:
result, time_cost = mine_by_apriori(df)
result

Unnamed: 0,support,itemsets
1,1.0,(Kidney Beans)
0,0.8,(Eggs)
5,0.8,"(Eggs, Kidney Beans)"
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
6,0.6,"(Eggs, Onion)"
7,0.6,"(Milk, Kidney Beans)"
8,0.6,"(Onion, Kidney Beans)"
9,0.6,"(Kidney Beans, Yogurt)"


In [8]:
rules = association_rules(result)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Eggs),(Kidney Beans),0.8,1.0,0.8,1.0,1.0,0.0,inf
1,(Kidney Beans),(Eggs),1.0,0.8,0.8,0.8,1.0,0.0,1.0
2,(Onion),(Eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
3,(Milk),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
4,(Onion),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
5,(Yogurt),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
6,"(Eggs, Onion)",(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
7,"(Onion, Kidney Beans)",(Eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
8,(Onion),"(Eggs, Kidney Beans)",0.6,0.8,0.6,1.0,1.25,0.12,inf


In [None]:
rules['antecedent_count'] = rules['antecedents'].apply(lambda x: len(x))
rules

In [None]:
rules['consequent_count'] = rules['consequents'].apply(lambda x: len(x))
rules