In [None]:
from collections import defaultdict
from itertools import combinations

def pcy_algorithm(transactions, support_threshold, hash_table_size):
    # First pass: Count the occurrences of each item
    item_counts = defaultdict(int)
    for transaction in transactions:
        for item in transaction:
            item_counts[item] += 1

    # Second pass: Filter infrequent items
    frequent_items = set()
    for item, count in item_counts.items():
        if count >= support_threshold:
            frequent_items.add(item)

    # Third pass: Generate hash table and count item pairs
    hash_table = [0] * hash_table_size
    item_pairs_counts = defaultdict(int)
    for transaction in transactions:
        frequent_items_in_transaction = [item for item in transaction if item in frequent_items]
        frequent_items_in_transaction.sort()
        pairs = combinations(frequent_items_in_transaction, 2)
        for pair in pairs:
            hash_value = (hash(pair) % hash_table_size)
            hash_table[hash_value] += 1
            item_pairs_counts[pair] += 1

    # Fourth pass: Generate frequent itemsets
    frequent_itemsets = []
    for item_pair, count in item_pairs_counts.items():
        if count >= support_threshold:
            frequent_itemsets.append((item_pair, count))

    return frequent_itemsets


# Example usage
transactions = [
    [1, 2, 3, 4],
    [1, 2, 4],
    [1, 2],
    [2, 3, 4],
    [2, 3],
    [3, 4],
    [2, 4]
]

support_threshold = 3
hash_table_size = 10

frequent_itemsets = pcy_algorithm(transactions, support_threshold, hash_table_size)

# Print frequent itemsets
for itemset, count in frequent_itemsets:
    print(f"Itemset: {itemset}, Count: {count}")


Itemset: (1, 2), Count: 3
Itemset: (2, 3), Count: 3
Itemset: (2, 4), Count: 4
Itemset: (3, 4), Count: 3
