In [8]:
from itertools import combinations
from collections import Counter


In [9]:
def generate_candidates(prev_itemsets, k):
    candidates = set()
    for i in prev_itemsets:
        for j in prev_itemsets:
            if i != j:
                t = i.union(j)
                if len(t) == k:
                    candidates.add(t)
    return list(candidates)


In [10]:
def generate_itemsets(transactions, min_support):
    itemsets = set()
    for transaction in transactions:
        itemsets.update(transaction[1])
    itemsets = sorted(itemsets)

    support_count = Counter()
    for item in itemsets:
        for transaction in transactions:
            if item in transaction[1]:
                support_count[frozenset([item])] += 1

    print("C1:")
    for item in itemsets:
        print(f"[{item}]: {support_count[frozenset([item])]}")

    frequent_itemsets = set()
    for item in itemsets:
        if support_count[frozenset([item])] >= min_support:
            frequent_itemsets.add(frozenset([item]))

    print("\nL1:")
    for itemset in frequent_itemsets:
        print(f"{list(itemset)}: {support_count[itemset]}")

    k = 2
    while frequent_itemsets:
        candidates = generate_candidates(frequent_itemsets, k)
        support_count = Counter()

        for candidate in candidates:
            for transaction in transactions:
                if candidate.issubset(set(transaction[1])):
                    support_count[candidate] += 1

        print(f"\nC{k}:")
        for candidate in candidates:
            print(f"{list(candidate)}: {support_count[candidate]}")

        frequent_itemsets = set()
        for candidate in candidates:
            if support_count[candidate] >= min_support:
                frequent_itemsets.add(candidate)

        print(f"\nL{k}:")
        for itemset in frequent_itemsets:
            print(f"{list(itemset)}: {support_count[itemset]}")

        k += 1

In [12]:
if __name__ == "__main__":
    data = [
        ['T100', ['M', 'O', 'N', 'K', 'E', 'Y']],
        ['T200', ['D', 'O', 'N', 'K', 'E', 'Y']],
        ['T300', ['M', 'A', 'K', 'E']],
        ['T400', ['M', 'U', 'C', 'K', 'Y']],
        ['T500', ['C', 'O', 'O', 'K', 'I', 'E']]
    ]

    min_support = 3
    generate_itemsets(data,min_support)

C1:
[A]: 1
[C]: 2
[D]: 1
[E]: 4
[I]: 1
[K]: 5
[M]: 3
[N]: 2
[O]: 3
[U]: 1
[Y]: 3

L1:
['K']: 5
['M']: 3
['Y']: 3
['E']: 4
['O']: 3

C2:
['E', 'K']: 4
['O', 'K']: 3
['M', 'K']: 3
['Y', 'K']: 3
['Y', 'E']: 2
['M', 'E']: 2
['O', 'Y']: 2
['M', 'Y']: 2
['O', 'E']: 3
['M', 'O']: 1

L2:
['E', 'K']: 4
['O', 'K']: 3
['M', 'K']: 3
['Y', 'K']: 3
['O', 'E']: 3

C3:
['M', 'Y', 'K']: 2
['O', 'Y', 'K']: 2
['M', 'E', 'K']: 2
['M', 'O', 'K']: 1
['Y', 'E', 'K']: 2
['O', 'E', 'K']: 3

L3:
['O', 'E', 'K']: 3

C4:

L4:
