In [1]:
from apriori import Apriori
import pandas as pd
import csv
from efficient_apriori import apriori


In [2]:
# Test Dataset
# Took this test dataset from apriori-python repo
data = pd.read_csv('test_dataset/test1.csv')
data.head(10)

Unnamed: 0,handphone,laptop,charger,powerbank,tablet
0,t,t,,,
1,t,t,t,,
2,t,t,t,t,
3,t,t,,,t
4,t,,t,,t
5,,,,t,t
6,t,t,t,,t
7,t,,t,,
8,t,,,t,
9,,t,t,t,


# Own Algorithm

In [3]:
# Data Processing where we structure the data to fit our own implementation
# Create a dictionary where the key is the index and the value is a tuple of items
data_dict = data.apply(lambda row: (row.dropna().index.tolist()), axis=1).to_dict()
print(data_dict)


{0: ['handphone', 'laptop'], 1: ['handphone', 'laptop', 'charger'], 2: ['handphone', 'laptop', 'charger', 'powerbank'], 3: ['handphone', 'laptop', 'tablet'], 4: ['handphone', 'charger', 'tablet'], 5: ['powerbank', 'tablet'], 6: ['handphone', 'laptop', 'charger', 'tablet'], 7: ['handphone', 'charger'], 8: ['handphone', 'powerbank'], 9: ['laptop', 'charger', 'powerbank']}


In [10]:
apriori_own = Apriori(data_dict, min_support=0.001, min_confidence=1)
most_frequent_item_set = apriori_own.run_apriori()
for key in most_frequent_item_set:
    print(f"Itemsets Count {key}: {most_frequent_item_set[key]}")

Previous itemsets:  {('handphone',): 1, ('laptop',): 1, ('charger',): 1, ('powerbank',): 1, ('tablet',): 1}
Previous itemsets:  {frozenset({'laptop', 'handphone'}): 5, frozenset({'charger', 'handphone'}): 5, frozenset({'powerbank', 'handphone'}): 2, frozenset({'tablet', 'handphone'}): 3, frozenset({'laptop', 'charger'}): 4, frozenset({'laptop', 'powerbank'}): 2, frozenset({'laptop', 'tablet'}): 2, frozenset({'powerbank', 'charger'}): 2, frozenset({'tablet', 'charger'}): 2, frozenset({'powerbank', 'tablet'}): 1}
Previous itemsets:  {frozenset({'laptop', 'charger', 'handphone'}): 3, frozenset({'laptop', 'powerbank', 'handphone'}): 1, frozenset({'laptop', 'tablet', 'handphone'}): 2, frozenset({'powerbank', 'charger', 'handphone'}): 1, frozenset({'tablet', 'charger', 'handphone'}): 2, frozenset({'laptop', 'powerbank', 'charger'}): 2, frozenset({'laptop', 'tablet', 'charger'}): 1}
Previous itemsets:  {frozenset({'powerbank', 'laptop', 'charger', 'handphone'}): 1, frozenset({'tablet', 'lapto

# Efficient Apriori

In [5]:
# data processing
data_list = data.apply(lambda row: row.dropna().index.tolist(), axis=1).tolist()
data_list

[['handphone', 'laptop'],
 ['handphone', 'laptop', 'charger'],
 ['handphone', 'laptop', 'charger', 'powerbank'],
 ['handphone', 'laptop', 'tablet'],
 ['handphone', 'charger', 'tablet'],
 ['powerbank', 'tablet'],
 ['handphone', 'laptop', 'charger', 'tablet'],
 ['handphone', 'charger'],
 ['handphone', 'powerbank'],
 ['laptop', 'charger', 'powerbank']]

In [6]:
itemsets, _ = apriori(data_list, min_support=0.001, min_confidence=1)
itemsets

{1: {('handphone',): 8,
  ('laptop',): 6,
  ('charger',): 6,
  ('powerbank',): 4,
  ('tablet',): 4},
 2: {('charger', 'handphone'): 5,
  ('charger', 'laptop'): 4,
  ('charger', 'powerbank'): 2,
  ('charger', 'tablet'): 2,
  ('handphone', 'laptop'): 5,
  ('handphone', 'powerbank'): 2,
  ('handphone', 'tablet'): 3,
  ('laptop', 'powerbank'): 2,
  ('laptop', 'tablet'): 2,
  ('powerbank', 'tablet'): 1},
 3: {('charger', 'handphone', 'laptop'): 3,
  ('charger', 'handphone', 'powerbank'): 1,
  ('charger', 'handphone', 'tablet'): 2,
  ('charger', 'laptop', 'powerbank'): 2,
  ('charger', 'laptop', 'tablet'): 1,
  ('handphone', 'laptop', 'powerbank'): 1,
  ('handphone', 'laptop', 'tablet'): 2},
 4: {('charger', 'handphone', 'laptop', 'powerbank'): 1,
  ('charger', 'handphone', 'laptop', 'tablet'): 1}}

In [7]:
from apyori import apriori as apyori_apriori
results = list(apyori_apriori(data_list, min_support = 0.5))
print(results)

[RelationRecord(items=frozenset({'charger'}), support=0.6, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'charger'}), confidence=0.6, lift=1.0)]), RelationRecord(items=frozenset({'handphone'}), support=0.8, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'handphone'}), confidence=0.8, lift=1.0)]), RelationRecord(items=frozenset({'laptop'}), support=0.6, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'laptop'}), confidence=0.6, lift=1.0)]), RelationRecord(items=frozenset({'charger', 'handphone'}), support=0.5, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'charger', 'handphone'}), confidence=0.5, lift=1.0), OrderedStatistic(items_base=frozenset({'charger'}), items_add=frozenset({'handphone'}), confidence=0.8333333333333334, lift=1.0416666666666667), OrderedStatistic(items_base=frozenset({'handphone'}), items_add=frozenset({'charger'}), confidence=0