# 15.1.3 Apriori算法的代码实现

# 1.apyori库代码实现关联规则

In [1]:
# apyori库的安装：将下面代码注释取消后运行
# !pip install apyori

In [2]:
transactions = [['A', 'B', 'C'], ['A', 'B'], ['B', 'C'], ['A', 'B', 'C', 'D'], ['B', 'C', 'D']]

In [3]:
from apyori import apriori
rules = apriori(transactions, min_support=0.4, min_confidence=0.8)
results = list(rules)

In [4]:
results

[RelationRecord(items=frozenset({'B'}), support=1.0, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'B'}), confidence=1.0, lift=1.0)]),
 RelationRecord(items=frozenset({'C'}), support=0.8, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'C'}), confidence=0.8, lift=1.0)]),
 RelationRecord(items=frozenset({'B', 'A'}), support=0.6, ordered_statistics=[OrderedStatistic(items_base=frozenset({'A'}), items_add=frozenset({'B'}), confidence=1.0, lift=1.0)]),
 RelationRecord(items=frozenset({'B', 'C'}), support=0.8, ordered_statistics=[OrderedStatistic(items_base=frozenset({'B'}), items_add=frozenset({'C'}), confidence=0.8, lift=1.0), OrderedStatistic(items_base=frozenset({'C'}), items_add=frozenset({'B'}), confidence=1.0, lift=1.0)]),
 RelationRecord(items=frozenset({'B', 'D'}), support=0.4, ordered_statistics=[OrderedStatistic(items_base=frozenset({'D'}), items_add=frozenset({'B'}), confidence=1.0, lift=1.0)]),
 RelationRecor

In [5]:
type(results[0].ordered_statistics)

list

In [6]:
for i in results:  # 遍历results中的每一个频繁项集
    for j in i.ordered_statistics:  # 获取频繁项集中的关联规则
        X = j.items_base  # 关联规则的前件
        Y = j.items_add  # 关联规则的后件
        x = ', '.join([item for item in X])  # 连接前件中的元素
        y = ', '.join([item for item in Y])  # 连接后件中的元素
        if x != '':  # 防止出现关联规则前件为空的情况
            print(x + ' → ' + y)  # 通过字符串拼接的方式更好呈现结果

A → B
B → C
C → B
D → B
D → C
C, A → B
B, D → C
C, D → B


In [7]:
# 通过在内容后面加?可以查看官方介绍
# apriori?

# 2.mlxtend库代码实现关联规则

In [8]:
# mlxtend库的安装：将下面代码注释取消后运行
# !pip install mlxtend

In [9]:
transactions = [['A', 'B', 'C'], ['A', 'B'], ['B', 'C'], ['A', 'B', 'C', 'D'], ['B', 'C', 'D']]

In [10]:
from mlxtend.preprocessing import TransactionEncoder
TE = TransactionEncoder()  # 构造转换模型
data = TE.fit_transform(transactions)  # 将原始数据转换为bool值

In [11]:
data

array([[ True,  True,  True, False],
       [ True,  True, False, False],
       [False,  True,  True, False],
       [ True,  True,  True,  True],
       [False,  True,  True,  True]])

In [12]:
# 通过在内容后面加?可以查看官方介绍
# TE.columns_?

In [13]:
import pandas as pd
df = pd.DataFrame(data, columns=TE.columns_)  # 用DataFrame存储bool数据

In [14]:
df

Unnamed: 0,A,B,C,D
0,True,True,True,False
1,True,True,False,False
2,False,True,True,False
3,True,True,True,True
4,False,True,True,True


In [15]:
from mlxtend.frequent_patterns import apriori
items = apriori(df, min_support=0.4, use_colnames=True)  # use_colnames=True表示使用df的列名而不是列索引作为返回的DataFrame列名

In [16]:
# 通过在内容后面加?可以查看官方介绍
# apriori?

In [17]:
items 

Unnamed: 0,support,itemsets
0,0.6,(A)
1,1.0,(B)
2,0.8,(C)
3,0.4,(D)
4,0.6,"(B, A)"
5,0.4,"(C, A)"
6,0.8,"(B, C)"
7,0.4,"(B, D)"
8,0.4,"(C, D)"
9,0.4,"(B, C, A)"


In [18]:
items['itemsets'].apply(lambda x: len(x))  # 获取每一个项集的元素个数

0     1
1     1
2     1
3     1
4     2
5     2
6     2
7     2
8     2
9     3
10    3
Name: itemsets, dtype: int64

In [19]:
items[items['itemsets'].apply(lambda x: len(x)) >= 2]  # 选择长度 >=2 的频繁项集

Unnamed: 0,support,itemsets
4,0.6,"(B, A)"
5,0.4,"(C, A)"
6,0.8,"(B, C)"
7,0.4,"(B, D)"
8,0.4,"(C, D)"
9,0.4,"(B, C, A)"
10,0.4,"(B, C, D)"


In [20]:
# 根据最小置信度在频繁项集中产生强关联规则
from mlxtend.frequent_patterns import association_rules
rules = association_rules(items, min_threshold=0.8)

In [21]:
# 通过在内容后面加?可以查看官方介绍
# association_rules?

In [22]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(A),(B),0.6,1.0,0.6,1.0,1.0,0.0,inf
1,(B),(C),1.0,0.8,0.8,0.8,1.0,0.0,1.0
2,(C),(B),0.8,1.0,0.8,1.0,1.0,0.0,inf
3,(D),(B),0.4,1.0,0.4,1.0,1.0,0.0,inf
4,(D),(C),0.4,0.8,0.4,1.0,1.25,0.08,inf
5,"(C, A)",(B),0.4,1.0,0.4,1.0,1.0,0.0,inf
6,"(B, D)",(C),0.4,0.8,0.4,1.0,1.25,0.08,inf
7,"(C, D)",(B),0.4,1.0,0.4,1.0,1.0,0.0,inf
8,(D),"(B, C)",0.4,0.8,0.4,1.0,1.25,0.08,inf


In [23]:
# 通过如下代码更好地展示关联规则
for i, j in rules.iterrows():  # 遍历DataFrame二维表格的每一行
    X = j['antecedents']  # 关联规则的前件
    Y = j['consequents']  # 关联规则的后件
    x = ', '.join([item for item in X])  # 连接前件中的元素
    y = ', '.join([item for item in Y])  # 连接后件中的元素
    print(x + ' → ' + y)  # 通过字符串拼接打印关联规则

A → B
B → C
C → B
D → B
D → C
C, A → B
B, D → C
C, D → B
D → B, C
