In [1]:
from apriori import Apriori
from improved_apriori import Improved_Apriori
import pandas as pd
import csv
from efficient_apriori import apriori


In [2]:
# Test Dataset
# Took this test dataset from apriori-python repo
data = pd.read_csv('test_dataset/test1.csv')
data.head(10)

Unnamed: 0,handphone,laptop,charger,powerbank,tablet
0,t,t,,,
1,t,t,t,,
2,t,t,t,t,
3,t,t,,,t
4,t,,t,,t
5,,,,t,t
6,t,t,t,,t
7,t,,t,,
8,t,,,t,
9,,t,t,t,


# Own Algorithm

In [3]:
# Data Processing where we structure the data to fit our own implementation
# Create a dictionary where the key is the index and the value is a tuple of items
data_dict = data.apply(lambda row: (row.dropna().index.tolist()), axis=1).to_dict()
print(data_dict)


{0: ['handphone', 'laptop'], 1: ['handphone', 'laptop', 'charger'], 2: ['handphone', 'laptop', 'charger', 'powerbank'], 3: ['handphone', 'laptop', 'tablet'], 4: ['handphone', 'charger', 'tablet'], 5: ['powerbank', 'tablet'], 6: ['handphone', 'laptop', 'charger', 'tablet'], 7: ['handphone', 'charger'], 8: ['handphone', 'powerbank'], 9: ['laptop', 'charger', 'powerbank']}


In [4]:
improved_apriori = Improved_Apriori(data_dict, min_support=0.001, min_confidence=1)
itemset = improved_apriori.apriori()


10it [00:00, 81920.00it/s]


[('handphone', 'laptop'), ('handphone', 'charger'), ('handphone', 'powerbank'), ('handphone', 'tablet'), ('laptop', 'charger'), ('laptop', 'powerbank'), ('laptop', 'tablet'), ('charger', 'powerbank'), ('charger', 'tablet'), ('powerbank', 'tablet')]
Candidate Sets: [('handphone', 'laptop', 'charger'), ('handphone', 'laptop', 'powerbank'), ('handphone', 'laptop', 'tablet'), ('handphone', 'charger', 'powerbank'), ('handphone', 'charger', 'tablet'), ('handphone', 'powerbank', 'tablet'), ('laptop', 'charger', 'powerbank'), ('laptop', 'charger', 'tablet'), ('laptop', 'powerbank', 'tablet'), ('charger', 'powerbank', 'tablet')]


10it [00:00, 55188.21it/s]


[('handphone', 'laptop', 'charger'), ('handphone', 'laptop', 'powerbank'), ('handphone', 'laptop', 'tablet'), ('handphone', 'charger', 'powerbank'), ('handphone', 'charger', 'tablet'), ('laptop', 'charger', 'powerbank'), ('laptop', 'charger', 'tablet')]
Candidate Sets: [('handphone', 'laptop', 'charger', 'powerbank'), ('handphone', 'laptop', 'charger', 'tablet'), ('handphone', 'laptop', 'powerbank', 'tablet'), ('handphone', 'charger', 'powerbank', 'tablet'), ('laptop', 'charger', 'powerbank', 'tablet')]


5it [00:00, 104335.92it/s]


[('handphone', 'laptop', 'charger', 'powerbank'), ('handphone', 'laptop', 'charger', 'tablet')]
Candidate Sets: [('handphone', 'laptop', 'charger', 'powerbank', 'tablet')]


1it [00:00, 8128.50it/s]


In [5]:
L1 = improved_apriori.generate_L1_itemsets()
L1

{('handphone',): 8,
 ('laptop',): 6,
 ('charger',): 6,
 ('powerbank',): 4,
 ('tablet',): 4}

In [6]:
itemset[3]

{('handphone', 'laptop', 'charger', 'powerbank'): 1,
 ('handphone', 'laptop', 'charger', 'tablet'): 1}

In [7]:
apriori_own = Apriori(data_dict, min_support=0.001, min_confidence=1)
most_frequent_item_set = apriori_own.run_apriori()
for key in most_frequent_item_set:
    print(f"Itemsets Count {key}: {most_frequent_item_set[key]}")

100%|██████████| 10/10 [00:00<00:00, 100824.62it/s]
100%|██████████| 10/10 [00:00<00:00, 111254.75it/s]
100%|██████████| 2/2 [00:00<00:00, 41943.04it/s]

No more frequent itemsets are further found
Itemsets Count 1: {('handphone',): 8, ('laptop',): 6, ('charger',): 6, ('powerbank',): 4, ('tablet',): 4}
Itemsets Count 2: {('handphone', 'laptop'): 5, ('handphone', 'charger'): 5, ('handphone', 'powerbank'): 2, ('handphone', 'tablet'): 3, ('laptop', 'charger'): 4, ('laptop', 'powerbank'): 2, ('laptop', 'tablet'): 2, ('charger', 'powerbank'): 2, ('charger', 'tablet'): 2, ('powerbank', 'tablet'): 1}
Itemsets Count 3: {('handphone', 'laptop', 'charger'): 3, ('handphone', 'laptop', 'powerbank'): 1, ('handphone', 'laptop', 'tablet'): 2, ('handphone', 'charger', 'powerbank'): 1, ('handphone', 'charger', 'tablet'): 2, ('laptop', 'charger', 'powerbank'): 2, ('laptop', 'charger', 'tablet'): 1}
Itemsets Count 4: {('handphone', 'laptop', 'charger', 'powerbank'): 1, ('handphone', 'laptop', 'charger', 'tablet'): 1}





# Efficient Apriori

In [8]:
# data processing
data_list = data.apply(lambda row: row.dropna().index.tolist(), axis=1).tolist()
data_list

[['handphone', 'laptop'],
 ['handphone', 'laptop', 'charger'],
 ['handphone', 'laptop', 'charger', 'powerbank'],
 ['handphone', 'laptop', 'tablet'],
 ['handphone', 'charger', 'tablet'],
 ['powerbank', 'tablet'],
 ['handphone', 'laptop', 'charger', 'tablet'],
 ['handphone', 'charger'],
 ['handphone', 'powerbank'],
 ['laptop', 'charger', 'powerbank']]

In [9]:
itemsets, _ = apriori(data_list, min_support=0.001, min_confidence=1)
itemsets

{1: {('handphone',): 8,
  ('laptop',): 6,
  ('charger',): 6,
  ('powerbank',): 4,
  ('tablet',): 4},
 2: {('charger', 'handphone'): 5,
  ('charger', 'laptop'): 4,
  ('charger', 'powerbank'): 2,
  ('charger', 'tablet'): 2,
  ('handphone', 'laptop'): 5,
  ('handphone', 'powerbank'): 2,
  ('handphone', 'tablet'): 3,
  ('laptop', 'powerbank'): 2,
  ('laptop', 'tablet'): 2,
  ('powerbank', 'tablet'): 1},
 3: {('charger', 'handphone', 'laptop'): 3,
  ('charger', 'handphone', 'powerbank'): 1,
  ('charger', 'handphone', 'tablet'): 2,
  ('charger', 'laptop', 'powerbank'): 2,
  ('charger', 'laptop', 'tablet'): 1,
  ('handphone', 'laptop', 'powerbank'): 1,
  ('handphone', 'laptop', 'tablet'): 2},
 4: {('charger', 'handphone', 'laptop', 'powerbank'): 1,
  ('charger', 'handphone', 'laptop', 'tablet'): 1}}

In [10]:
from apyori import apriori as apyori_apriori
results = list(apyori_apriori(data_list, min_support = 0.5))
print(results)

ModuleNotFoundError: No module named 'apyori'