In [1]:
from apriori import Apriori
import pandas as pd
import csv
from efficient_apriori import apriori


In [2]:
# Evaluation Test Dataset
# Took this test dataset from apriori-python repo
data = pd.read_csv('test_dataset/test2.csv')
data.head(10)

Unnamed: 0,milk,bread,biscuit,cornflakes,bournvita,jam,maggi,tea,coffee,cock,sugar
0,t,t,t,,,,,,,,
1,t,t,t,t,,,,,,,
2,,t,,,t,,,t,,,
3,t,t,,,,t,t,,,,
4,,,t,,,,t,t,,,
5,,t,,,t,,,t,,,
6,,,,t,,,t,t,,,
7,,t,t,,,,t,t,,,
8,,t,,,,t,t,t,,,
9,t,t,,,,,,,,,


# Own Algorithm

In [3]:
# Data Processing where we structure the data to fit our own implementation
# Create a dictionary where the key is the index and the value is a tuple of items
data_dict = data.apply(lambda row: (row.dropna().index.tolist()), axis=1).to_dict()
print(data_dict)


{0: ['milk', 'bread', 'biscuit'], 1: ['milk', 'bread', 'biscuit', 'cornflakes'], 2: ['bread', 'bournvita', 'tea'], 3: ['milk', 'bread', 'jam', 'maggi'], 4: ['biscuit', 'maggi', 'tea'], 5: ['bread', 'bournvita', 'tea'], 6: ['cornflakes', 'maggi', 'tea'], 7: ['bread', 'biscuit', 'maggi', 'tea'], 8: ['bread', 'jam', 'maggi', 'tea'], 9: ['milk', 'bread'], 10: ['biscuit', 'cornflakes', 'coffee', 'cock'], 11: ['biscuit', 'cornflakes', 'coffee', 'cock'], 12: ['bournvita', 'coffee', 'sugar'], 13: ['bread', 'coffee', 'cock'], 14: ['bread', 'biscuit', 'sugar'], 15: ['cornflakes', 'coffee', 'sugar'], 16: ['bread', 'bournvita', 'sugar'], 17: ['bread', 'coffee', 'sugar'], 18: ['bread', 'coffee', 'sugar'], 19: ['milk', 'cornflakes', 'tea', 'coffee']}


In [4]:
apriori_own = Apriori(data_dict, min_support=0.001, min_confidence=0.3)
item_freq = apriori_own.item_frequency()
item_freq

{('milk',): 5,
 ('bread',): 13,
 ('biscuit',): 7,
 ('cornflakes',): 6,
 ('bournvita',): 4,
 ('tea',): 7,
 ('jam',): 2,
 ('maggi',): 5,
 ('coffee',): 8,
 ('cock',): 3,
 ('sugar',): 6}

In [5]:

frequent_item_set = apriori_own.run_apriori()
for key in frequent_item_set:
    print(f"Itemsets Count {key}: ")
    for subkey in frequent_item_set[key]:
        print(f"Itemset {subkey}: {frequent_item_set[key][subkey]}")

100%|██████████| 55/55 [00:00<00:00, 55122.27it/s]
100%|██████████| 53/53 [00:00<?, ?it/s]
100%|██████████| 6/6 [00:00<00:00, 6016.21it/s]

No more frequent itemsets are further found
Itemsets Count 1: 
Itemset ('milk',): 5
Itemset ('bread',): 13
Itemset ('biscuit',): 7
Itemset ('cornflakes',): 6
Itemset ('bournvita',): 4
Itemset ('tea',): 7
Itemset ('jam',): 2
Itemset ('maggi',): 5
Itemset ('coffee',): 8
Itemset ('cock',): 3
Itemset ('sugar',): 6
Itemsets Count 2: 
Itemset ('milk', 'bread'): 4
Itemset ('milk', 'biscuit'): 2
Itemset ('milk', 'cornflakes'): 2
Itemset ('milk', 'tea'): 1
Itemset ('milk', 'jam'): 1
Itemset ('milk', 'maggi'): 1
Itemset ('milk', 'coffee'): 1
Itemset ('bread', 'biscuit'): 4
Itemset ('bread', 'cornflakes'): 1
Itemset ('bread', 'bournvita'): 3
Itemset ('bread', 'tea'): 4
Itemset ('bread', 'jam'): 2
Itemset ('bread', 'maggi'): 3
Itemset ('bread', 'coffee'): 3
Itemset ('bread', 'cock'): 1
Itemset ('bread', 'sugar'): 4
Itemset ('biscuit', 'cornflakes'): 3
Itemset ('biscuit', 'tea'): 2
Itemset ('biscuit', 'maggi'): 2
Itemset ('biscuit', 'coffee'): 2
Itemset ('biscuit', 'cock'): 2
Itemset ('biscuit', 's




# Efficient Apriori

In [6]:
# data processing
data_list = data.apply(lambda row: row.dropna().index.tolist(), axis=1).tolist()
data_list

[['milk', 'bread', 'biscuit'],
 ['milk', 'bread', 'biscuit', 'cornflakes'],
 ['bread', 'bournvita', 'tea'],
 ['milk', 'bread', 'jam', 'maggi'],
 ['biscuit', 'maggi', 'tea'],
 ['bread', 'bournvita', 'tea'],
 ['cornflakes', 'maggi', 'tea'],
 ['bread', 'biscuit', 'maggi', 'tea'],
 ['bread', 'jam', 'maggi', 'tea'],
 ['milk', 'bread'],
 ['biscuit', 'cornflakes', 'coffee', 'cock'],
 ['biscuit', 'cornflakes', 'coffee', 'cock'],
 ['bournvita', 'coffee', 'sugar'],
 ['bread', 'coffee', 'cock'],
 ['bread', 'biscuit', 'sugar'],
 ['cornflakes', 'coffee', 'sugar'],
 ['bread', 'bournvita', 'sugar'],
 ['bread', 'coffee', 'sugar'],
 ['bread', 'coffee', 'sugar'],
 ['milk', 'cornflakes', 'tea', 'coffee']]

In [7]:
itemsets, _ = apriori(data_list, min_support=0.001, min_confidence=1)
itemsets

{1: {('milk',): 5,
  ('bread',): 13,
  ('biscuit',): 7,
  ('cornflakes',): 6,
  ('bournvita',): 4,
  ('tea',): 7,
  ('jam',): 2,
  ('maggi',): 5,
  ('coffee',): 8,
  ('cock',): 3,
  ('sugar',): 6},
 2: {('biscuit', 'bread'): 4,
  ('biscuit', 'cock'): 2,
  ('biscuit', 'coffee'): 2,
  ('biscuit', 'cornflakes'): 3,
  ('biscuit', 'maggi'): 2,
  ('biscuit', 'milk'): 2,
  ('biscuit', 'sugar'): 1,
  ('biscuit', 'tea'): 2,
  ('bournvita', 'bread'): 3,
  ('bournvita', 'coffee'): 1,
  ('bournvita', 'sugar'): 2,
  ('bournvita', 'tea'): 2,
  ('bread', 'cock'): 1,
  ('bread', 'coffee'): 3,
  ('bread', 'cornflakes'): 1,
  ('bread', 'jam'): 2,
  ('bread', 'maggi'): 3,
  ('bread', 'milk'): 4,
  ('bread', 'sugar'): 4,
  ('bread', 'tea'): 4,
  ('cock', 'coffee'): 3,
  ('cock', 'cornflakes'): 2,
  ('coffee', 'cornflakes'): 4,
  ('coffee', 'milk'): 1,
  ('coffee', 'sugar'): 4,
  ('coffee', 'tea'): 1,
  ('cornflakes', 'maggi'): 1,
  ('cornflakes', 'milk'): 2,
  ('cornflakes', 'sugar'): 1,
  ('cornflakes', 

# Apyori

In [8]:
from apyori import apriori as apyori_apriori
results = list(apyori_apriori(data_list, min_support = 0.3))
print(results)

[RelationRecord(items=frozenset({'biscuit'}), support=0.35, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'biscuit'}), confidence=0.35, lift=1.0)]), RelationRecord(items=frozenset({'bread'}), support=0.65, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'bread'}), confidence=0.65, lift=1.0)]), RelationRecord(items=frozenset({'coffee'}), support=0.4, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'coffee'}), confidence=0.4, lift=1.0)]), RelationRecord(items=frozenset({'cornflakes'}), support=0.3, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'cornflakes'}), confidence=0.3, lift=1.0)]), RelationRecord(items=frozenset({'sugar'}), support=0.3, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'sugar'}), confidence=0.3, lift=1.0)]), RelationRecord(items=frozenset({'tea'}), support=0.35, ordered_statistics=[OrderedStatist