In [11]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

#Load dữ liệu
df = pd.read_csv('data.csv', header=None)
display(df)

Unnamed: 0,0,1,2,3,4,5
0,Wine,Chips,Bread,Butter,Milk,Apple
1,Wine,,Bread,Butter,Milk,
2,,,Bread,Butter,Milk,
3,,Chips,,,,Apple
4,Wine,Chips,Bread,Butter,Milk,Apple
5,Wine,Chips,,,Milk,
6,Wine,Chips,Bread,Butter,,Apple
7,Wine,Chips,,,Milk,
8,Wine,,Bread,,,Apple
9,Wine,,Bread,Butter,Milk,


In [12]:
records = []
for i in range(0, df.shape[0]):
    records.append([str(df.values[i, j]) for j in range(0, df.shape[1])])

In [13]:
#chuyển records thành transaction
te = TransactionEncoder()
te_ary = te.fit(records).transform(records)
df1 = pd.DataFrame(te_ary, columns=te.columns_)
display(df1)

Unnamed: 0,Apple,Bread,Butter,Chips,Milk,Wine,nan
0,True,True,True,True,True,True,False
1,False,True,True,False,True,True,True
2,False,True,True,False,True,False,True
3,True,False,False,True,False,False,True
4,True,True,True,True,True,True,False
5,False,False,False,True,True,True,True
6,True,True,True,True,False,True,True
7,False,False,False,True,True,True,True
8,True,True,False,False,False,True,True
9,False,True,True,False,True,True,True


In [14]:
frequent_itemsets = apriori(df1, min_support=0.6, use_colnames=True)
display(frequent_itemsets)

Unnamed: 0,support,itemsets
0,0.681818,(Apple)
1,0.727273,(Bread)
2,0.681818,(Butter)
3,0.636364,(Chips)
4,0.772727,(Milk)
5,0.727273,(Wine)
6,0.818182,(nan)
7,0.636364,"(Milk, Wine)"


In [15]:
# build association rules using support metric
rules = association_rules(frequent_itemsets, metric="support", support_only=True, 
                          min_threshold=0.1)

rules = rules[['antecedents', 'consequents', 'support']]
print(rules)

  antecedents consequents   support
0      (Milk)      (Wine)  0.636364
1      (Wine)      (Milk)  0.636364


In [16]:
from itertools import combinations

def get_frequent_itemsets(transactions, minsup):
    def count_support(candidates):
        counts = {}
        for transaction in transactions:
            for itemset in candidates:
                if itemset.issubset(transaction):
                    counts[itemset] = counts.get(itemset, 0) + 1
        return {itemset: count for itemset, count in counts.items() if count >= minsup}

    # Khởi tạo
    item_counts = {}
    for transaction in transactions:
        for item in transaction:
            itemset = frozenset([item])
            item_counts[itemset] = item_counts.get(itemset, 0) + 1

    F = []
    F1 = {itemset for itemset, count in item_counts.items() if count >= minsup}
    F.append(F1)
    k = 1

    while F[k - 1]:
        # Join step
        prev_frequent = list(F[k - 1])
        candidates = set()
        for i in range(len(prev_frequent)):
            for j in range(i + 1, len(prev_frequent)):
                union = prev_frequent[i] | prev_frequent[j]
                if len(union) == k + 1:
                    subsets = combinations(union, k)
                    if all(frozenset(s) in F[k - 1] for s in subsets):  # Prune step
                        candidates.add(union)

        # Count support
        frequent_itemsets = count_support(candidates)
        F.append(set(frequent_itemsets.keys()))
        k += 1

    # Trả về tất cả tập phổ biến
    return set().union(*F)

# Ví dụ sử dụng
transactions = [
    {'A', 'B', 'C'},
    {'A', 'B'},
    {'A', 'C'},
    {'B', 'C'},
    {'A'},
    {'B'},
    {'C'}
]
minsup = 2
frequent_itemsets = get_frequent_itemsets(np.array(df), minsup)
print("Frequent itemsets:", frequent_itemsets)

Frequent itemsets: {frozenset({nan, 'Chips', 'Wine', 'Butter'}), frozenset({nan, 'Chips', 'Butter', 'Bread'}), frozenset({'Milk', 'Butter', nan}), frozenset({'Milk', 'Butter', 'Bread', 'Apple', nan}), frozenset({'Bread', 'Apple'}), frozenset({nan, 'Wine', 'Butter', 'Apple'}), frozenset({'Milk', 'Chips', 'Apple'}), frozenset({'Butter', 'Bread', 'Chips'}), frozenset({nan, 'Bread', 'Chips'}), frozenset({'Wine', 'Butter', 'Bread', 'Apple'}), frozenset({'Wine', nan, 'Bread', 'Apple'}), frozenset({'Milk', 'Butter'}), frozenset({'Milk', nan}), frozenset({'Butter', 'Wine', 'Apple'}), frozenset({'Butter', 'Bread', 'Apple'}), frozenset({nan, 'Bread', 'Apple'}), frozenset({nan, 'Wine', 'Apple'}), frozenset({'Milk', 'Chips', 'Butter', 'Apple'}), frozenset({'Milk', 'Chips', nan, 'Apple'}), frozenset({'Chips', 'Butter', 'Bread', 'Apple', 'Wine'}), frozenset({nan, 'Chips', 'Bread', 'Apple', 'Wine'}), frozenset({'Milk', 'Apple'}), frozenset({'Butter', 'Bread', nan}), frozenset({'Milk', 'Chips'}), froz