In [11]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# 读取数据（假设数据是事务列"表）
df = pd.read_excel("data.xls")
df.head()

Unnamed: 0,OrderID,ProductName,CategoryName,CategoryName_Description,Per_Price,Quantity
0,10248,Queso Cabrales,Dairy Products,Cheeses,14.0,12
1,10248,Singaporean Hokkien Fried Mee,Grains/Cereals,"Breads, crackers, pasta, and cereal",9.8,10
2,10248,Mozzarella di Giovanni,Dairy Products,Cheeses,34.8,5
3,10249,Tofu,Produce,Dried fruit and bean curd,18.6,9
4,10249,Manjimup Dried Apples,Produce,Dried fruit and bean curd,42.4,40


In [12]:
# transactions=df.groupby(by='OrderID').apply(lambda x: list(x.CategoryName))
transactions = df.groupby('OrderID', group_keys=False)['CategoryName'].apply(list)
transactions.head(6)

OrderID
10248    [Dairy Products, Grains/Cereals, Dairy Products]
10249                                  [Produce, Produce]
10250                      [Seafood, Produce, Condiments]
10251        [Grains/Cereals, Grains/Cereals, Condiments]
10252       [Confections, Dairy Products, Dairy Products]
10253            [Dairy Products, Beverages, Confections]
Name: CategoryName, dtype: object

In [None]:
# 转换为布尔矩阵（必须步骤）
te = TransactionEncoder()
te_ary = te.fit_transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

In [14]:
# 生成频繁项集（仅需 min_support）
min_supp = 0.1
frequent_itemsets = apriori(df_encoded, min_support=min_supp, use_colnames=True)

# 生成关联规则（指定置信度、提升度）
min_conf = 0.1
min_lift = 0.1
rules = association_rules(frequent_itemsets, 
                          metric="lift", 
                          min_threshold=min_lift)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Beverages),(Confections),0.426506,0.355422,0.143373,0.336158,0.945801,1.0,-0.008216,0.970982,-0.090845,0.224528,-0.029885,0.369774
1,(Confections),(Beverages),0.355422,0.426506,0.143373,0.40339,0.945801,1.0,-0.008216,0.961254,-0.081644,0.224528,-0.040308,0.369774
2,(Dairy Products),(Beverages),0.36506,0.426506,0.13253,0.363036,0.851187,1.0,-0.02317,0.900356,-0.215901,0.201097,-0.110672,0.336885
3,(Beverages),(Dairy Products),0.426506,0.36506,0.13253,0.310734,0.851187,1.0,-0.02317,0.921183,-0.233629,0.201097,-0.085561,0.336885
4,(Beverages),(Seafood),0.426506,0.350602,0.126506,0.29661,0.846002,1.0,-0.023028,0.92324,-0.240933,0.194444,-0.083142,0.328717
5,(Seafood),(Beverages),0.350602,0.426506,0.126506,0.360825,0.846002,1.0,-0.023028,0.897241,-0.218938,0.194444,-0.114528,0.328717
6,(Dairy Products),(Confections),0.36506,0.355422,0.113253,0.310231,0.872853,1.0,-0.016497,0.934484,-0.186608,0.186508,-0.070109,0.314438
7,(Confections),(Dairy Products),0.355422,0.36506,0.113253,0.318644,0.872853,1.0,-0.016497,0.931877,-0.184332,0.186508,-0.073103,0.314438
8,(Confections),(Seafood),0.355422,0.350602,0.10241,0.288136,0.82183,1.0,-0.022202,0.912249,-0.251687,0.169661,-0.096192,0.290116
9,(Seafood),(Confections),0.350602,0.355422,0.10241,0.292096,0.82183,1.0,-0.022202,0.910545,-0.250286,0.169661,-0.098243,0.290116


In [18]:
# supports=[]
# confidences=[]
# lifts=[]
# bases=[]
# adds=[]

# for r in rules:
#     for x in r.ordered_statistics:
#         supports.append(r.support)
#         confidences.append(x.confidence)
#         lifts.append(x.lift)
#         bases.append(list(x.items_base))
#         adds.append(list(x.items_add))
# resultshow = pd.DataFrame({
#     'support':supports,
#     'confidence':confidences,
#     'lift':lifts,
#     'base':bases,
#     'add':adds
# })
# resultshow.tail(8)

resultshow = rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
resultshow.columns = ['base', 'add', 'support', 'confidence', 'lift']
print(resultshow.tail(8))

                base               add   support  confidence      lift
4        (Beverages)         (Seafood)  0.126506    0.296610  0.846002
5          (Seafood)       (Beverages)  0.126506    0.360825  0.846002
6   (Dairy Products)     (Confections)  0.113253    0.310231  0.872853
7      (Confections)  (Dairy Products)  0.113253    0.318644  0.872853
8      (Confections)         (Seafood)  0.102410    0.288136  0.821830
9          (Seafood)     (Confections)  0.102410    0.292096  0.821830
10  (Dairy Products)         (Seafood)  0.110843    0.303630  0.866025
11         (Seafood)  (Dairy Products)  0.110843    0.316151  0.866025
