In [31]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

data = pd.read_csv('Bakery_sales.csv')

transactions = data.groupby('ticket_number')['article'].apply(list).reset_index(name='items')

# Sử dụng TransactionEncoder để mã hóa dữ liệu thành dạng dữ liệu nhị phân
te = TransactionEncoder()
te_ary = te.fit(transactions['items']).transform(transactions['items'])
df = pd.DataFrame(te_ary, columns=te.columns_)

# Tìm tập hợp các mặt hàng phổ biến
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.7)

rules = rules.rename(columns={'antecedents': 'Mat hang', 'consequents': 'Mua kem'})
selected_columns = ['Mat hang', 'Mua kem', 'support', 'confidence', 'lift']

selected_rules = rules[selected_columns]

print(selected_rules.head(20))



              Mat hang             Mua kem   support  confidence      lift
0              (COUPE)          (BAGUETTE)  0.011271    0.079180  0.707408
1           (BAGUETTE)             (COUPE)  0.011271    0.100701  0.707408
2           (BAGUETTE)         (CROISSANT)  0.010788    0.096379  1.148964
3          (CROISSANT)          (BAGUETTE)  0.010788    0.128604  1.148964
4              (COUPE)        (BOULE 200G)  0.017479    0.122786  6.239965
5         (BOULE 200G)             (COUPE)  0.017479    0.888268  6.239965
6              (COUPE)        (BOULE 400G)  0.023759    0.166907  5.579279
7         (BOULE 400G)             (COUPE)  0.023759    0.794219  5.579279
8           (CAMPAGNE)             (COUPE)  0.022807    0.799589  5.617005
9              (COUPE)          (CAMPAGNE)  0.022807    0.160214  5.617005
10             (COUPE)           (COMPLET)  0.016958    0.119131  5.188490
11           (COMPLET)             (COUPE)  0.016958    0.738589  5.188490
12             (COUPE)   

In [32]:
# Lưu kết quả xuống một tệp CSV
selected_rules.to_csv('association_rules.csv', index=False)
