In [2]:
import sys
sys.path.insert(1, '../../../..')

In [3]:
from algorithms.ml.associative_rules.AprioriModel import AprioriModel

### Correctness check

In [5]:
transactions = [
    ['a', 'b', 'c', 'd', 'e'],
    ['a', 'c', 'd', 'f'],
    ['a', 'b', 'c', 'd', 'e', 'g'],
    ['c', 'd', 'e', 'f'],
    ['c','e', 'f', 'h'],
    ['d', 'e', 'f'],
    ['a', 'f', 'g'],
    ['d', 'e', 'g', 'h'],
    ['a', 'b', 'c', 'f'],
    ['c', 'd', 'e', 'h'],
]

In [6]:
# Initialize and run Apriori
apriori = AprioriModel(min_support=0.4, min_confidence=0.6)
apriori.fit(transactions)

<algorithms.ml.associative_rules.AprioriModel.AprioriModel at 0x1f56d704610>

In [7]:
# Print frequent itemsets
print("Frequent Itemsets:")
for k, itemsets in apriori.get_frequent_itemsets().items():
    for itemset, support in itemsets.items():
        print(f"{set(itemset)}: {support:.2f}")

Frequent Itemsets:
{'a'}: 0.50
{'c'}: 0.70
{'d'}: 0.70
{'e'}: 0.70
{'f'}: 0.60
{'a', 'c'}: 0.40
{'c', 'd'}: 0.50
{'e', 'c'}: 0.50
{'e', 'd'}: 0.60
{'c', 'f'}: 0.40
{'e', 'c', 'd'}: 0.40


In [8]:
# Print association rules
print("\nAssociation Rules:")
for rule in apriori.get_association_rules():
    print(f"{rule['antecedent']} => {rule['consequent']}")
    print(f"Support: {rule['support']:.2f}, Confidence: {rule['confidence']:.2f}, Lift: {rule['lift']:.2f}")
    print()


Association Rules:
{'a'} => {'c'}
Support: 0.40, Confidence: 0.80, Lift: 1.14

{'c'} => {'d'}
Support: 0.50, Confidence: 0.71, Lift: 1.02

{'d'} => {'c'}
Support: 0.50, Confidence: 0.71, Lift: 1.02

{'e'} => {'c'}
Support: 0.50, Confidence: 0.71, Lift: 1.02

{'c'} => {'e'}
Support: 0.50, Confidence: 0.71, Lift: 1.02

{'e'} => {'d'}
Support: 0.60, Confidence: 0.86, Lift: 1.22

{'d'} => {'e'}
Support: 0.60, Confidence: 0.86, Lift: 1.22

{'f'} => {'c'}
Support: 0.40, Confidence: 0.67, Lift: 0.95

{'e', 'c'} => {'d'}
Support: 0.40, Confidence: 0.80, Lift: 1.14

{'e', 'd'} => {'c'}
Support: 0.40, Confidence: 0.67, Lift: 0.95

{'c', 'd'} => {'e'}
Support: 0.40, Confidence: 0.80, Lift: 1.14



### Execution time check

In [5]:
import time
import pandas as pd
from mlxtend.frequent_patterns import apriori as skl_apriori
from mlxtend.preprocessing import TransactionEncoder
import random

# Generate synthetic transaction data
def generate_transactions(num_transactions, num_items, max_items_per_transaction):
    items = [f'item_{i}' for i in range(num_items)]
    transactions = []
    for _ in range(num_transactions):
        transaction_length = random.randint(1, max_items_per_transaction)
        transaction = random.sample(items, transaction_length)
        transactions.append(transaction)
    return transactions

large_transactions = generate_transactions(5000, 50, 10)

# Time custom AprioriModel
start = time.time()
apriori_large = AprioriModel(min_support=0.1, min_confidence=0.5)
apriori_large.fit(large_transactions)
my_apriori_time = time.time() - start

# Prepare data for mlxtend apriori
te = TransactionEncoder()
te_ary = te.fit_transform(large_transactions)
df_large = pd.DataFrame(te_ary, columns=te.columns_)

# Time sklearn/mlxtend apriori
start = time.time()
frequent_itemsets = skl_apriori(df_large, min_support=0.1, use_colnames=True)
sklearn_apriori_time = time.time() - start

print(f"Custom AprioriModel execution time: {my_apriori_time:.4f} seconds")
print(f"sklearn/mlxtend Apriori execution time: {sklearn_apriori_time:.4f} seconds")

Custom AprioriModel execution time: 0.7671 seconds
sklearn/mlxtend Apriori execution time: 0.0173 seconds


---
Comment
---

### Why my implementation is slower?

My implementation:
- Uses Python sets and frozensets which have overhead
- Uses defaultdict and combinations which add some computational cost
- Checks all subsets in candidate generation (all_subsets_frequent)

mlxtend implementation:
- Uses more efficient data structures (like pandas df, bitmaps, numpy arrays)
- Implements smarter candidate generation that avoids checking all subsets
- Uses optimized counting methods
- Critical performance sections are often written in Cython or might call underlying C/Fortran libraries (via NumPy). This dramatically reduces the Python interpreter overhead and leads to much faster execution speeds for numerical computations and iterative processes.