In [None]:
from collections import defaultdict

In [26]:
# Dữ liệu mẫu
transactions = {
    'T1': {'A', 'B'},
    'T2': {'A', 'B', 'C'},
    'T3': {'A'},
    'T4': {'B', 'C'}
}
minsup = 2

In [27]:
# Chuyển đổi dữ liệu sang định dạng dọc (tid-list)
def build_vertical_db(transactions):
    vertical_db = defaultdict(set)
    for tid, items in transactions.items():
        for item in items:
            vertical_db[item].add(tid)
    return vertical_db

# Tính Diffset
def compute_diffset(parent_tidlist, child_tidlist):
    return parent_tidlist - child_tidlist

# Thuật toán dECLAT
def dECLAT(vertical_db, minsup, prefix=None, parent_diffset=None, parent_support=None):
    if prefix is None:
        prefix = set()
    frequent_itemsets = []

    P = {item: tids for item, tids in vertical_db.items() if len(tids) >= minsup}

    for item in sorted(P.keys()):
        new_prefix = prefix | {item}
        tidlist = P[item]
        support = len(tidlist)
        diffset = set()  # Đảm bảo luôn có giá trị mặc định
        
        # Tính diffset nếu có thông tin cha
        if parent_diffset is not None and prefix:
            parent_item = list(prefix)[0]
            if parent_item in vertical_db:
                diffset = compute_diffset(vertical_db[parent_item], tidlist)
                support = parent_support - len(diffset)

        if support >= minsup:
            frequent_itemsets.append((new_prefix, support))
            P_next = {k: P[k] for k in P if k > item and len(P[k] & tidlist) >= minsup}
            sub_results = dECLAT(P_next, minsup, new_prefix, diffset, support)
            frequent_itemsets.extend(sub_results)

    return frequent_itemsets

In [28]:
# Chạy dECLAT
vertical_db = build_vertical_db(transactions)
results = dECLAT(vertical_db, minsup)

for itemset, sup in results:
    print(f"Itemset: {itemset}, Support: {sup}")

Itemset: {'A'}, Support: 3
Itemset: {'B', 'A'}, Support: 3
Itemset: {'B'}, Support: 3
Itemset: {'B', 'C'}, Support: 2
Itemset: {'C'}, Support: 2


In [29]:
# Kiểm tra tính đóng
def is_closed(itemset, support, vertical_db):
    for item in vertical_db:
        if item not in itemset:
            superset_support = len(vertical_db[item] & set.intersection(*[vertical_db[i] for i in itemset]))
            if superset_support == support:
                return False
    return True

# Thuật toán dCHARM
def dCHARM(vertical_db, minsup, prefix=None, parent_diffset=None, parent_support=None):
    if prefix is None:
        prefix = set()
    closed_itemsets = []

    P = {item: tids for item, tids in vertical_db.items() if len(tids) >= minsup}

    for item in sorted(P.keys()):
        new_prefix = prefix | {item}
        tidlist = P[item]
        support = len(tidlist)
        diffset = set()  # Đảm bảo diffset luôn có giá trị mặc định

        if parent_diffset is not None and prefix:
            parent_item = list(prefix)[0]
            if parent_item in vertical_db:
                diffset = compute_diffset(vertical_db[parent_item], tidlist)
                support = parent_support - len(diffset)

        if support >= minsup and is_closed(new_prefix, support, vertical_db):
            closed_itemsets.append((new_prefix, support))
            P_next = {k: P[k] for k in P if k > item and len(P[k] & tidlist) >= minsup}
            sub_results = dCHARM(P_next, minsup, new_prefix, diffset, support)
            closed_itemsets.extend(sub_results)

    return closed_itemsets



In [30]:
# Chạy dCHARM
vertical_db = build_vertical_db(transactions)
results = dCHARM(vertical_db, minsup)

for itemset, sup in results:
    print(f"Closed Itemset: {itemset}, Support: {sup}")

Closed Itemset: {'A'}, Support: 3
Closed Itemset: {'B', 'A'}, Support: 3
Closed Itemset: {'B'}, Support: 3
Closed Itemset: {'B', 'C'}, Support: 2


In [None]:
# Chuyển đổi dữ liệu sang định dạng dọc
def build_vertical_db(transactions):
    vertical_db = defaultdict(set)
    for tid, items in transactions.items():
        for item in items:
            vertical_db[item].add(tid)
    return vertical_db

# Kiểm tra tính tối đa
def is_maximal(itemset, support, vertical_db, maximal_sets, minsup):
    for m in maximal_sets:
        if itemset.issubset(m[0]):  # So sánh với tập itemset đã lưu
            return False
    for item in vertical_db:
        if item not in itemset:
            superset = itemset | {item}
            sup = len(set.intersection(*[vertical_db[i] for i in superset]))
            if sup >= minsup:
                return False
    return True

# Tính Diffset
def compute_diffset(parent_tidlist, child_tidlist):
    return parent_tidlist - child_tidlist

# Thuật toán dGenMax
def dGenMax(vertical_db, minsup, prefix=None, parent_diffset=None, parent_support=None):
    if prefix is None:
        prefix = set()
    maximal_itemsets = []
    
    P = {item: tids for item, tids in vertical_db.items() if len(tids) >= minsup}
    
    for item in sorted(P.keys()):
        new_prefix = prefix | {item}
        tidlist = P[item]
        support = len(tidlist)
        diffset = set()  # Đảm bảo diffset luôn có giá trị mặc định
        
        if parent_diffset is not None and prefix:
            parent_item = list(prefix)[0]
            if parent_item in vertical_db:
                diffset = compute_diffset(vertical_db[parent_item], tidlist)
                support = parent_support - len(diffset)

        if support >= minsup:
            P_next = {k: P[k] for k in P if k > item and len(P[k] & tidlist) >= minsup}
            sub_results = dGenMax(P_next, minsup, new_prefix, diffset, support)
            maximal_itemsets.extend(sub_results)
            
            if is_maximal(new_prefix, support, vertical_db, maximal_itemsets, minsup):
                maximal_itemsets.append((new_prefix, support))
    
    return maximal_itemsets

In [32]:
# Chạy dGenMax
vertical_db = build_vertical_db(transactions)
results = dGenMax(vertical_db, minsup)
for itemset, sup in results:
    print(f"Maximal Itemset: {itemset}, Support: {sup}")

Maximal Itemset: {'B', 'A'}, Support: 3
Maximal Itemset: {'B', 'C'}, Support: 2
