In [1]:
## 调入相关的包
import fds  #金融数据库，从中获取股票的分钟k数据
import pandas as pd
import numpy as np
import warnings 

warnings.filterwarnings("ignore") #忽视警告的报错

In [2]:
## 数据读取
#-- 读入涨跌停价格数据
df_limit = pd.read_parquet("limit_2020-01-02.parq")
df_limit["date"] = pd.to_datetime(df_limit['date']) #将date字段由string转换成datetime64

#-- 从数据库调用2020-01-02的分钟k数据
df = fds.bars("2020-01-02","2020-01-02", period = "1m")

#-- 将上述两表按照时间和股票代码进行merge
df1 = pd.merge(df, df_limit, on=['date', 'symbol'], how='left')

## 判断每分钟是否涨跌停
df1['up'] = df1["high"]>=df1['limit_up_price'] #最高价大于等于涨停价即涨停
df1['down'] = df1["low"]<=df1['limit_down_price'] #最低价小于等于跌停价即跌停
df2 = df1[['date', 'time', 'symbol', 'up', 'down']]
df2['dt'] = pd.to_datetime(df['time']) #将time字段由string转换成datetime

df2

Unnamed: 0,date,time,symbol,up,down,dt
0,2020-01-02,09:30:00,000001,False,False,2023-07-13 09:30:00
1,2020-01-02,09:31:00,000001,False,False,2023-07-13 09:31:00
2,2020-01-02,09:32:00,000001,False,False,2023-07-13 09:32:00
3,2020-01-02,09:33:00,000001,False,False,2023-07-13 09:33:00
4,2020-01-02,09:34:00,000001,False,False,2023-07-13 09:34:00
...,...,...,...,...,...,...
901335,2020-01-02,14:56:00,688399,False,False,2023-07-13 14:56:00
901336,2020-01-02,14:57:00,688399,False,False,2023-07-13 14:57:00
901337,2020-01-02,14:58:00,688399,False,False,2023-07-13 14:58:00
901338,2020-01-02,14:59:00,688399,False,False,2023-07-13 14:59:00


In [3]:
## 判断每5分钟股票是否涨跌停
## 判断标准为5分钟之内有3分钟涨停即为涨停，有3分钟跌停即为跌停
#-- 生成时间列表，将交易时间划分成5分钟的频率
t = pd.date_range('9:30', '11:30', freq='5min')
t1 = pd.date_range('13:00', '15:00', freq='5min')
t = t.append(t1)

t

DatetimeIndex(['2023-07-13 09:30:00', '2023-07-13 09:35:00',
               '2023-07-13 09:40:00', '2023-07-13 09:45:00',
               '2023-07-13 09:50:00', '2023-07-13 09:55:00',
               '2023-07-13 10:00:00', '2023-07-13 10:05:00',
               '2023-07-13 10:10:00', '2023-07-13 10:15:00',
               '2023-07-13 10:20:00', '2023-07-13 10:25:00',
               '2023-07-13 10:30:00', '2023-07-13 10:35:00',
               '2023-07-13 10:40:00', '2023-07-13 10:45:00',
               '2023-07-13 10:50:00', '2023-07-13 10:55:00',
               '2023-07-13 11:00:00', '2023-07-13 11:05:00',
               '2023-07-13 11:10:00', '2023-07-13 11:15:00',
               '2023-07-13 11:20:00', '2023-07-13 11:25:00',
               '2023-07-13 11:30:00', '2023-07-13 13:00:00',
               '2023-07-13 13:05:00', '2023-07-13 13:10:00',
               '2023-07-13 13:15:00', '2023-07-13 13:20:00',
               '2023-07-13 13:25:00', '2023-07-13 13:30:00',
               '2023-07-

In [4]:
#-- 循环切片
for i in range(len(t)-1):
    # print(t[i], t[i+1])
    df3 = df2[(t[i]<df2['dt']) & (df2['dt']<=t[i+1])]
    grouped = df3.groupby('symbol')
    g1 = grouped[['up', 'down']].sum()
    g1['dt'] = t[i]
    if i==0:
        df4 = g1
    else:
        df4 = pd.concat([df4, g1])

df4

Unnamed: 0_level_0,up,down,dt
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
000001,0,0,2023-07-13 09:30:00
000002,0,0,2023-07-13 09:30:00
000004,0,0,2023-07-13 09:30:00
000005,0,0,2023-07-13 09:30:00
000006,0,0,2023-07-13 09:30:00
...,...,...,...
688368,0,0,2023-07-13 14:55:00
688369,0,0,2023-07-13 14:55:00
688388,0,0,2023-07-13 14:55:00
688389,0,0,2023-07-13 14:55:00


In [5]:
#-- 生成最后结果
df4['date'] = '2020-01-02' #判断的日期
df4['time'] = df4['dt'].apply(lambda x:x.strftime('%H:%M:%S')) #将time转换为string格式
df4.loc[df4['up']>=3, 'type'] = 1 #判断5分钟是否涨停，涨停为1
df4.loc[df4['down']>=3, 'type'] = -1 #判断5分钟是否跌停，跌停为-1
df4 = df4.reset_index() #将作为index的symbol放入表格作为字段
df4 = df4[['date', 'time', 'symbol', 'type']]

df4

Unnamed: 0,date,time,symbol,type
0,2020-01-02,09:30:00,000001,
1,2020-01-02,09:30:00,000002,
2,2020-01-02,09:30:00,000004,
3,2020-01-02,09:30:00,000005,
4,2020-01-02,09:30:00,000006,
...,...,...,...,...
179515,2020-01-02,14:55:00,688368,
179516,2020-01-02,14:55:00,688369,
179517,2020-01-02,14:55:00,688388,
179518,2020-01-02,14:55:00,688389,


In [6]:
df5 = df4[df4['type'].notnull()] #剔除type为空的记录即为当日涨停和跌停的股票

In [7]:
df5

Unnamed: 0,date,time,symbol,type
288,2020-01-02,09:30:00,000760,-1.0
307,2020-01-02,09:30:00,000792,1.0
431,2020-01-02,09:30:00,000971,1.0
836,2020-01-02,09:30:00,002385,1.0
1108,2020-01-02,09:30:00,002662,1.0
...,...,...,...,...
179319,2020-01-02,14:55:00,603738,1.0
179356,2020-01-02,14:55:00,603825,1.0
179381,2020-01-02,14:55:00,603880,1.0
179408,2020-01-02,14:55:00,603920,1.0
