In [1]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import warnings

# 禁止警告输出
warnings.filterwarnings('ignore')

# 商品列表
items = ['西红柿', '排骨', '鸡蛋', '茄子', '袜子', '酸奶', '土豆', '鞋子']

# 读取数据
data = pd.read_excel('tr.xlsx', header=None)
data = data.iloc[:, 1:]

# 构建商品与出现位置的字典
item_dict = {item: np.where(data == item)[0] for item in items}

# 构建布尔值数据表
D = pd.DataFrame({item: np.zeros(len(data)) for item in items})

for item, pos in item_dict.items():
    D.loc[pos, item] = 1

# 获取字段名称并转化为列表
c = list(D.columns)

# 定义最小置信度和最小支持度
min_confidence = 0.5
min_support = 0.2

# 预定义列表，用于存放规则、支持度和置信度
rules = []
supports = []
confidences = []

for i, item1 in enumerate(c):
    for item2 in c[i+1:]:
        # 生成规则
        rule = item1 + '->' + item2
        # 计算支持度和置信度
        support = D[[item1, item2]].all(axis=1).mean()
        confidence = D[item2][D[item1] == 1].mean()
        # 如果满足最小置信度和最小支持度的要求，则加入规则列表
        if support >= min_support and confidence >= min_confidence:
            rules.append(rule)
            supports.append(support)
            confidences.append(confidence)

# 将结果存入数据框
result = pd.DataFrame({'rule': rules, 'support': supports, 'confidence': confidences})

# 将结果导出到Excel
result.to_excel('rule1.xlsx', index=False)

# 导入自行编写的apriori函数
import apriori

# 设定最小支持度和最小置信度
min_support = 0.2
min_confidence = 0.4

# 生成关联规则，并将结果存入Excel
output_file = 'apriori_rules.xls'
apriori.find_rule(D, min_support, min_confidence, ms='->').to_excel(output_file, index=False)


正在进行第1次搜索...
数目：21...

正在进行第2次搜索...
数目：4...

结果为：
          support  confidence
西红柿->排骨  0.444444    0.800000
排骨->西红柿  0.444444    0.800000
袜子->鸡蛋   0.222222    0.666667
茄子->西红柿  0.222222    0.500000
茄子->排骨   0.222222    0.500000
茄子->鸡蛋   0.222222    0.500000


In [10]:
import pandas as pd
import numpy as np
import warnings
from itertools import combinations
warnings.filterwarnings('ignore')

data=pd.read_excel('国际股票价格指数日交易数据表.xlsx')
code=data['Indexcd'].unique()

# 将每个股票代码的涨跌情况进行统计
def get_changes(df):
    p1=df['Clsidx'].values[:-1]
    p2=df['Clsidx'].values[1:]
    changes=np.zeros(len(p1))
    changes[(p2-p1)/p1<=-0.005]=1
    return pd.Series(changes, index=df['Trddt'].values[1:])

# 将每个股票代码的数据处理成时间为行、股票代码为列的数据框
data_processed = pd.concat([get_changes(data.loc[data['Indexcd'].values==c,['Trddt','Clsidx']].sort_values('Trddt')) for c in code], axis=1, keys=code)

# 选取沪深300指数的交易日期作为关联规则挖掘的时间点
d000300=data.loc[data['Indexcd'].values=='000300',['Trddt','Clsidx']].sort_values('Trddt')
tdate=list(d000300['Trddt'].values[1:])

# 获取所有股票代码的涨跌情况在关联规则挖掘时间点的数据
data_processed = data_processed.loc[tdate]

# 使用combinations函数获取所有可能的关联规则
rules = list(combinations(code, 2))

# 定义函数，用于计算每个关联规则的支持度和置信度
def get_support_confidence(rule, data):
    supp = ((data[rule[0]] == 1) & (data[rule[1]] == 1)).sum() / len(data)
    conf = supp / (data[rule[0]] == 1).sum()
    return supp, conf

# 计算所有关联规则的支持度和置信度
results = []
for rule in rules:
    supp, conf = get_support_confidence(rule, data_processed)
    if supp >= 0.08 and conf >= 0.9:
        results.append({'rule': f"{rule[0]}--{rule[1]}", 'support': supp, 'confidence': conf})

# 将结果导出到Excel
pd.DataFrame(results).to_excel('rule1.xlsx')

import apriori as ap
support = 0.08 #最小支持度
confidence = 0.9 #最小置信度
ms = '--' #连接符，
outputfile = 'apriori_rules.xls' #结果文件
ap.find_rule(Data, support, confidence, ms).to_excel(outputfile) #联动



正在进行第1次搜索...
数目：66...

正在进行第2次搜索...
数目：186...

正在进行第3次搜索...
数目：85...

正在进行第4次搜索...
数目：6...

结果为：
                            support  confidence
DJI--FTSE--GDAXI--FCHI     0.097514    0.968354
FTSE--GDAXI--SENSEX--FCHI  0.087317    0.958042
DJI--FTSE--FCHI            0.104525    0.937143
DJI--GDAXI--FCHI           0.113448    0.936842
DJI--FCHI--FTSE--GDAXI     0.097514    0.932927
FTSE--GDAXI--N225--FCHI    0.085405    0.930556
FTSE--GDAXI--MCIX--FCHI    0.109624    0.929730
FTSE--GDAXI--HSI--FCHI     0.095602    0.914634
FCHI--FTSE--HSI--GDAXI     0.095602    0.914634
FTSE--GDAXI--FCHI          0.168260    0.913495
FCHI--FTSE--N225--GDAXI    0.085405    0.911565
GDAXI--SENSEX--FCHI        0.105163    0.906593
DJI--FTSE--GDAXI           0.100701    0.902857


In [14]:
import pandas as pd
data=pd.read_excel('tr.xlsx')
data=data.iloc[:,1:]
data.fillna("",inplace=True)
data=data.apply(lambda a:",".join(list(a)),axis=1)
bool_df=data.str.get_dummies(",")
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
out1=apriori(bool_df,min_support=0.2,use_colnames=True)
out2=association_rules(out1,metric="confidence",min_threshold=0.6)



In [17]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
data = pd.read_excel('tr.xlsx')
data=data.iloc[:,1:]
data.fillna("",inplace=True)
data=data.apply(lambda a:",".join(list(a)),axis=1)
bool_df=data.str.get_dummies(",")
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
out_1=apriori(bool_df,min_support=0.2,use_colnames=True)
out_2=association_rules(out_1,metric='confidence',min_threshold=0.5)
out_1

Unnamed: 0,support,itemsets
0,0.25,(土豆)
1,0.5,(排骨)
2,0.5,(茄子)
3,0.375,(袜子)
4,0.5,(西红柿)
5,0.25,(酸奶)
6,0.5,(鸡蛋)
7,0.25,"(排骨, 茄子)"
8,0.375,"(排骨, 西红柿)"
9,0.25,"(西红柿, 茄子)"


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(排骨),(茄子),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
1,(茄子),(排骨),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
2,(排骨),(西红柿),0.5,0.5,0.375,0.75,1.5,0.125,2.0,0.666667
3,(西红柿),(排骨),0.5,0.5,0.375,0.75,1.5,0.125,2.0,0.666667
4,(西红柿),(茄子),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
5,(茄子),(西红柿),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
6,(鸡蛋),(茄子),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
7,(茄子),(鸡蛋),0.5,0.5,0.25,0.5,1.0,0.0,1.0,0.0
8,(鸡蛋),(袜子),0.5,0.375,0.25,0.5,1.333333,0.0625,1.25,0.5
9,(袜子),(鸡蛋),0.375,0.5,0.25,0.666667,1.333333,0.0625,1.5,0.4


In [19]:
bool_df

Unnamed: 0,土豆,排骨,茄子,袜子,西红柿,酸奶,鞋子,鸡蛋
0,0,0,1,0,1,0,0,0
1,0,0,0,1,0,0,0,1
2,0,1,1,0,1,0,0,0
3,0,1,0,1,1,1,0,0
4,0,0,1,0,0,1,0,1
5,0,1,1,0,0,0,0,1
6,1,0,0,1,0,0,0,1
7,1,1,0,0,1,0,1,0
