In [9]:
# 导入必要的数据分析和可视化库
import pandas as pd                       # 用于数据处理和分析
import numpy as np                        # 用于数值计算
import statsmodels.api as sm              # 用于统计建模（回归分析）
from statsmodels.tools.sm_exceptions import MissingDataError  # 处理缺失数据异常

import matplotlib.pyplot as plt          # 用于数据可视化
import datetime as dt                     # 用于日期时间处理

# 设置matplotlib的中文字体显示 - 思源宋体无法显示时可以修改为SimSong或SimHei等
plt.rcParams["font.family"] = "Source Han Serif CN"
# 设置图形的默认大小为16x8英寸
plt.rcParams['figure.figsize']=[16,8]

In [None]:
from get_data import get_data
from resample import resample
from cal_return import cal_return

In [None]:
# 股票指数同步性分析类
# 只保留index的数据和计算结果，stock的数据每次调用计算函数的时候读取
# 目前只能使用一年内的计算
class IndexData:
    """
    股票指数数据处理和同步性计算类
    
    这个类用于处理股票指数的高频数据，计算指数成分股的同步性指标
    包括数据读取、重采样、收益率计算和回归分析等功能
    """
    
    def __init__(self, index_name, start, end):
        """
        初始化IndexData对象
        
        参数:
        index_name (str): 指数名称，如"SH000300"（沪深300）
        start (str): 分析开始日期，格式如"2024-01-01"
        end (str): 分析结束日期，格式如"2024-12-31"
        """
        # 类数据识别信息（基本信息）
        self.index_name = index_name      # 指数名称
        self.exg = index_name[:2]         # 交易所代码（SH/SZ）
        self.start = pd.to_datetime(start)  # 开始日期
        self.end = pd.to_datetime(end)      # 结束日期
        
        # 类数据成份信息和指数
        self.composites, self.composite_num = self.get_composites()
        # df_index一年的内存占用大约300MB
        # 以datetime.date 格式存储work_day
        self.df_index, self.workday_list = self.get_index_data()
    
        # 类数据计算信息（在调用set_freq方法后设置）
        self.freq = None           # 重采样频率
        self.t_range = None        # 有效交易时间范围
        self.index_return = None   # 指数收益率序列
        

    def get_composites(self):
        """
        获取指数成分股信息方法
        
        从Excel文件中读取指数的成分股信息，并进行数据清洗和格式化
        
        返回:
        tuple: (composites, composite_num)
            - composites (pd.DataFrame): 包含三列的DataFrame：full_code（完整代码）, exg（交易所）, code（股票代码）
            - composite_num (int): 成分股数量
        """
        index_name = self.index_name
        composites = pd.DataFrame()
        # 从Excel文件读取证券代码列
        composites['full_code'] = pd.read_excel(f'{index_name}.xlsx')['证券代码']

        # 删去同花顺标签（最后两行通常是数据来源信息）
        composites.drop(index=composites.index[-2:], inplace=True)

        # 解析股票代码格式（例如：000001.SZ -> SZ000001）
        composites['exg'] = composites['full_code'].str.split('.').str[1]      # 提取交易所代码
        composites['code'] = composites['full_code'].str.split('.').str[0]     # 提取股票代码
        composites['full_code'] = composites['exg'] + composites['code']       # 重新组合为统一格式
        
        composite_num = composites.shape[0]
        print(f"{index_name}成分股个数：{composite_num}")
        return composites, composite_num
    
    def get_industry(self):


def set_freq(self,freq:str,auction_am=True,first_exclude:int=0):
    """
    计算之前 先确定高频数据的计算频率、计算的时间范围
    如果是全天范围的计算，不得超过3分钟，因为下午从14：57-15：00属于集合竞价
    :param freq: 重采样频率
    :param auction_am: 是否包括早盘集合竞价
    :param first_exclude: 剔除开盘后连续交易期间开始多少个freq的数据 
    :return: 
    """
    self.freq=freq
    
    # 对于指数而言，连续交易之前提供两个价格
    # 9:10:00 昨日收盘价格
    # 9:25:00 集合竞价结束后，由个股开盘价格加权计算得出的指数开盘价格

    # 14:57：00-15:00:00 收盘前3分钟采取集合竞价
    
    # 允许时间范围有延迟 允许延迟为59秒 实际数据中一般延迟不超过10秒
    # 允许59秒的延迟同样意味着最大resample的时间限度为1分钟
    
    # t_last_close=pd.date_range(start="09:10:00",end="09:10:59",freq="S")
    
    t_continuous_am_exclude_5=pd.date_range(start="09:30:00",end="11:30:59",freq="S")
    t_continuous_pm=pd.date_range(start="13:00:00",end="14:57:59",freq="S")
    t_auction_pm=pd.date_range(start="15:00:00",end="15:00:59",freq="S")
    t_range = t_continuous_am_exclude_5.union(t_continuous_pm).union(t_auction_pm)
    
    if auction_am:
        t_auction_am=pd.date_range(start="09:25:00",end="09:25:59",freq="S")
        t_range=t_range.union(t_auction_am)
        
    self.t_range=t_range
    self.index_return=cal_return(self.df_index,self.t_range,self.freq)





    
    def cal_syn(self,start=None,end=None):
        # print(start,end)
        composites=self.composites.copy()
        R2_series=pd.Series(index=composites['full_code'].tolist())
        betas=pd.Series(index=composites['full_code'].tolist())
        vars=pd.Series(index=composites['full_code'].tolist())
        b2v2s=pd.Series(index=composites['full_code'].tolist())
        
        if start is not None and end is not None:
            in_day=False
            for date in pd.date_range(start=start,end=end,freq="D"):
                if date.date() in self.workday_list:
                    in_day=True
            if not in_day: 
                return np.nan,None,None,None,None
        
        for index,composite in composites.iterrows():
            full_code=composite['full_code']
            df_stock, halt_list=self.get_stock_data(full_code,start,end)
        
            stock_return=cal_return(df_stock,self.t_range,self.freq)
            
            #可能有股票选择区间内没有开盘一直处于停牌
            if stock_return.shape[0]>0:
                
                # 这一步的计算可以进一步优化，因为对于所有的个股，计算beta时使用的(XtX)-1Xt都是相同的
                
                # 将index_return对齐到个股上
                # 因为个股可能停牌，但是指数不会停牌
                # 对于期间内停牌的个股，只使用有价格变动区间的数据
                
                Y = stock_return
                X = self.index_return[stock_return.index].copy()
                
                # print(Y.index)
                # print(self.index_return[start:end].index)
                # break
                
                # if (len(X.index)!=len(self.index_return[start:end].index)): 
                    # print(f"{full_code}'s index doesn't fully match between {start} and {end}.")
                
                X = sm.add_constant(X)  # 添加常数项
                try:
                    model = sm.OLS(Y, X).fit()
                except MissingDataError as e:
                    print(f"{full_code} between {start} and {end} raised {e}.")
                    print(X.shape,Y.shape)
                    print(X.isnull().any())
                    print(Y.isnull().any())
                    print(X)
                R2_series.loc[full_code]=model.rsquared
                betas.loc[full_code]=model.params[1]  # 获取 β 值
                vars.loc[full_code]=stock_return.var(ddof=0)
                b2v2s.loc[full_code]=(model.params[1]**2)*(X.iloc[:,0].var(ddof=0))


            else:
                print(f"{full_code} doesn't have return data.")
                
        var_mkt=self.index_return.var(ddof=0)

        # syn2 = (var_mkt * np.square(betas)).sum() / np.sum(vars)
        syn2 = b2v2s.sum()/vars.sum()
        syn = (R2_series*vars).sum()/vars.sum()
        # print(syn,syn2)
        return syn,R2_series,betas,vars
        
    # 这里的period不支持月份运算
    def periodic_cal_syn(self,period,start=None,end=None):
        syns=pd.DataFrame(columns=["syn"])
        if start is None:
            start=self.start
        if end is None:
            end=self.end
        for period_start in pd.date_range(start=start,end=end,freq=period):
            # 对于pd.date_range(start,end) 包含start和end 因此在选择的时候需要不包含end中的日期
            period_end=dt.datetime.strptime(str(period_start),"%Y-%m-%d %H:%M:%S")+parse_timedelta(period)-dt.timedelta(seconds=1)
            period_end=dt.datetime.strftime(period_end,"%Y-%m-%d %H:%M:%S")
            # print(period_start,period_end)
            
            syns.loc[period_start,'syn']=self.cal_syn(period_start,period_end)[0]
            print(f'{period_start} to {period_end} done.')
        return syns

In [23]:
start="2024-09-01"
end="2024-09-04"
cn300=IndexData("SH000300",start,end)


SH000300成分股个数：300
From 2024-09-01 to 2024-09-04 (included), there are 3 workdays.
SH000300 done. 

In [24]:
cn300.set_freq('3min')

In [25]:
cn300.cal_syn()

SH600000 done. SH600009 done. SH600010 done. SH600011 done. SH600015 done. SH600016 done. SH600018 done. SH600019 done. SH600023 done. SH600025 done. SH600026 done. SH600027 done. SH600028 done. SH600029 done. SH600030 done. SH600031 done. SH600036 done. SH600039 done. SH600048 done. SH600050 done. SH600061 done. SH600066 done. SH600085 done. SH600089 done. SH600104 done. SH600111 done. SH600115 done. SH600150 on 2024-09-03 is not found.
SH600150 on 2024-09-04 is not found.
SH600150 done. SH600160 done. SH600161 done. SH600176 done. SH600183 done. SH600188 done. SH600196 done. SH600219 done. SH600233 done. SH600276 done. SH600309 done. SH600332 done. SH600346 done. SH600362 done. SH600372 done. SH600377 done. SH600406 done. SH600415 done. SH600426 done. SH600436 done. SH600438 done. SH600460 done. SH600482 done. SH600489 done. SH600515 done. SH600519 done. SH600547 done. SH600570 done. SH600584 done. SH600585 done. SH600588 done. SH600600 done. SH600660 done. SH600674 done. SH600690 do

(0.09649149598684789,
 SH600000    0.015758
 SH600009    0.061050
 SH600010    0.011147
 SH600011    0.066481
 SH600015    0.002194
               ...   
 SZ300832    0.022697
 SZ300896    0.154243
 SZ300979    0.001061
 SZ300999    0.100112
 SZ301269    0.109364
 Length: 300, dtype: float64,
 SH600000    0.403162
 SH600009    0.590613
 SH600010    0.758152
 SH600011    0.663434
 SH600015    0.180644
               ...   
 SZ300832    0.606739
 SZ300896    0.999351
 SZ300979    0.117476
 SZ300999    0.773638
 SZ301269    1.004478
 Length: 300, dtype: float64,
 SH600000    0.000004
 SH600009    0.000002
 SH600010    0.000021
 SH600011    0.000003
 SH600015    0.000006
               ...   
 SZ300832    0.000007
 SZ300896    0.000003
 SZ300979    0.000005
 SZ300999    0.000002
 SZ301269    0.000004
 Length: 300, dtype: float64,
 SH600000    0.0
 SH600009    0.0
 SH600010    0.0
 SH600011    0.0
 SH600015    0.0
            ... 
 SZ300832    0.0
 SZ300896    0.0
 SZ300979    0.0
 SZ300999

In [7]:
start="2024-01-01"
end="2024-12-31"

cn300=IndexData("SH000300",start,end)
cn300.set_freq("3min")
cn500=IndexData("SH000905",start,end)
cn500.set_freq("5min")
cn1000=IndexData("SH000852",start,end)
cn1000.set_freq("5min")
# syns=cn300.periodic_cal_syn("D",start,end)

SH000300成分股个数：300
From 2024-01-01 to 2024-12-31 (included), there are 241 workdays.
SH000300 done. SH000905成分股个数：500
From 2024-01-01 to 2024-12-31 (included), there are 241 workdays.
SH000905 done. SH000852成分股个数：1000
From 2024-01-01 to 2024-12-31 (included), there are 241 workdays.
SH000852 done. 

In [8]:
# for index_class in [cn300,cn500,cn1000]:
#     cn300.periodic_cal_syn()
result=pd.DataFrame()
for index_class in [cn300,cn500,cn1000]:
    s=index_class.periodic_cal_syn('D')
    result=pd.concat([result,s],axis=1)
    plt.plot(s,label=index_class.index_name)
    print(index_class.index_name)
plt.legend()
plt.show()
result.to_excel(f'high_freq.xlsx')

2024-01-01 00:00:00 to 2024-01-01 23:59:59 done.
2024-01-02 00:00:00 to 2024-01-02 23:59:59 done.
                     Price       original_time      5min
Time                                                    
2024-01-03 09:25:00    0.0 2024-01-03 09:25:05  09:25:05
2024-01-03 00:00:00 to 2024-01-03 23:59:59 done.
2024-01-04 00:00:00 to 2024-01-04 23:59:59 done.
                     Price       original_time      5min
Time                                                    
2024-01-05 09:25:00    0.0 2024-01-05 09:25:03  09:25:03
2024-01-05 00:00:00 to 2024-01-05 23:59:59 done.
2024-01-06 00:00:00 to 2024-01-06 23:59:59 done.
2024-01-07 00:00:00 to 2024-01-07 23:59:59 done.
                     Price       original_time      5min
Time                                                    
2024-01-08 09:25:00    0.0 2024-01-08 09:25:02  09:25:02
2024-01-08 00:00:00 to 2024-01-08 23:59:59 done.
                     Price       original_time      5min
Time                                  

  return 1 - self.ssr/self.centered_tss


2024-03-05 00:00:00 to 2024-03-05 23:59:59 done.
                     Price       original_time      5min
Time                                                    
2024-03-06 09:25:00    0.0 2024-03-06 09:25:04  09:25:04
2024-03-06 00:00:00 to 2024-03-06 23:59:59 done.
                     Price       original_time      5min
Time                                                    
2024-03-07 09:25:00    0.0 2024-03-07 09:25:03  09:25:03
2024-03-07 00:00:00 to 2024-03-07 23:59:59 done.
                     Price       original_time      5min
Time                                                    
2024-03-08 09:25:00    0.0 2024-03-08 09:25:02  09:25:02
2024-03-08 00:00:00 to 2024-03-08 23:59:59 done.
2024-03-09 00:00:00 to 2024-03-09 23:59:59 done.
2024-03-10 00:00:00 to 2024-03-10 23:59:59 done.
2024-03-11 00:00:00 to 2024-03-11 23:59:59 done.
2024-03-12 00:00:00 to 2024-03-12 23:59:59 done.
2024-03-13 00:00:00 to 2024-03-13 23:59:59 done.
2024-03-14 00:00:00 to 2024-03-14 23:59:59 don

  return 1 - self.ssr/self.centered_tss


2024-04-11 00:00:00 to 2024-04-11 23:59:59 done.
2024-04-12 00:00:00 to 2024-04-12 23:59:59 done.
2024-04-13 00:00:00 to 2024-04-13 23:59:59 done.
2024-04-14 00:00:00 to 2024-04-14 23:59:59 done.
2024-04-15 00:00:00 to 2024-04-15 23:59:59 done.
2024-04-16 00:00:00 to 2024-04-16 23:59:59 done.
                     Price       original_time      5min
Time                                                    
2024-04-17 09:25:00    0.0 2024-04-17 09:25:03  09:25:03
2024-04-17 00:00:00 to 2024-04-17 23:59:59 done.
2024-04-18 00:00:00 to 2024-04-18 23:59:59 done.
2024-04-19 00:00:00 to 2024-04-19 23:59:59 done.
2024-04-20 00:00:00 to 2024-04-20 23:59:59 done.
2024-04-21 00:00:00 to 2024-04-21 23:59:59 done.
2024-04-22 00:00:00 to 2024-04-22 23:59:59 done.
2024-04-23 00:00:00 to 2024-04-23 23:59:59 done.


  return 1 - self.ssr/self.centered_tss


2024-04-24 00:00:00 to 2024-04-24 23:59:59 done.
2024-04-25 00:00:00 to 2024-04-25 23:59:59 done.
2024-04-26 00:00:00 to 2024-04-26 23:59:59 done.
2024-04-27 00:00:00 to 2024-04-27 23:59:59 done.
2024-04-28 00:00:00 to 2024-04-28 23:59:59 done.
2024-04-29 00:00:00 to 2024-04-29 23:59:59 done.
2024-04-30 00:00:00 to 2024-04-30 23:59:59 done.
2024-05-01 00:00:00 to 2024-05-01 23:59:59 done.
2024-05-02 00:00:00 to 2024-05-02 23:59:59 done.
2024-05-03 00:00:00 to 2024-05-03 23:59:59 done.
2024-05-04 00:00:00 to 2024-05-04 23:59:59 done.
2024-05-05 00:00:00 to 2024-05-05 23:59:59 done.
2024-05-06 00:00:00 to 2024-05-06 23:59:59 done.
2024-05-07 00:00:00 to 2024-05-07 23:59:59 done.
2024-05-08 00:00:00 to 2024-05-08 23:59:59 done.
2024-05-09 00:00:00 to 2024-05-09 23:59:59 done.
2024-05-10 00:00:00 to 2024-05-10 23:59:59 done.
2024-05-11 00:00:00 to 2024-05-11 23:59:59 done.
2024-05-12 00:00:00 to 2024-05-12 23:59:59 done.
2024-05-13 00:00:00 to 2024-05-13 23:59:59 done.
2024-05-14 00:00:00 

  return 1 - self.ssr/self.centered_tss


SH601211 on 2024-09-30 is not found.
SH601211 doesn't have return data.
2024-09-30 00:00:00 to 2024-09-30 23:59:59 done.
2024-10-01 00:00:00 to 2024-10-01 23:59:59 done.
2024-10-02 00:00:00 to 2024-10-02 23:59:59 done.
2024-10-03 00:00:00 to 2024-10-03 23:59:59 done.
2024-10-04 00:00:00 to 2024-10-04 23:59:59 done.
2024-10-05 00:00:00 to 2024-10-05 23:59:59 done.
2024-10-06 00:00:00 to 2024-10-06 23:59:59 done.
2024-10-07 00:00:00 to 2024-10-07 23:59:59 done.


  return 1 - self.ssr/self.centered_tss
  return 1 - self.ssr/self.centered_tss
  return 1 - self.ssr/self.centered_tss
  return 1 - self.ssr/self.centered_tss


SH601211 on 2024-10-08 is not found.
SH601211 doesn't have return data.


  return 1 - self.ssr/self.centered_tss


2024-10-08 00:00:00 to 2024-10-08 23:59:59 done.


  return 1 - self.ssr/self.centered_tss


SH601211 on 2024-10-09 is not found.
SH601211 doesn't have return data.
2024-10-09 00:00:00 to 2024-10-09 23:59:59 done.


  return 1 - self.ssr/self.centered_tss


2024-10-10 00:00:00 to 2024-10-10 23:59:59 done.


  return 1 - self.ssr/self.centered_tss


2024-10-11 00:00:00 to 2024-10-11 23:59:59 done.
2024-10-12 00:00:00 to 2024-10-12 23:59:59 done.
2024-10-13 00:00:00 to 2024-10-13 23:59:59 done.
2024-10-14 00:00:00 to 2024-10-14 23:59:59 done.
2024-10-15 00:00:00 to 2024-10-15 23:59:59 done.
2024-10-16 00:00:00 to 2024-10-16 23:59:59 done.
2024-10-17 00:00:00 to 2024-10-17 23:59:59 done.
2024-10-18 00:00:00 to 2024-10-18 23:59:59 done.
2024-10-19 00:00:00 to 2024-10-19 23:59:59 done.
2024-10-20 00:00:00 to 2024-10-20 23:59:59 done.
2024-10-21 00:00:00 to 2024-10-21 23:59:59 done.
2024-10-22 00:00:00 to 2024-10-22 23:59:59 done.


  return 1 - self.ssr/self.centered_tss


2024-10-23 00:00:00 to 2024-10-23 23:59:59 done.
2024-10-24 00:00:00 to 2024-10-24 23:59:59 done.
2024-10-25 00:00:00 to 2024-10-25 23:59:59 done.
2024-10-26 00:00:00 to 2024-10-26 23:59:59 done.
2024-10-27 00:00:00 to 2024-10-27 23:59:59 done.


KeyError: "[Timestamp('2024-10-28 15:00:00')] not in index"

In [None]:
pd.to_numeric(syns['syn']).dropna().plot()
plt.show()

In [None]:
start="2024-10-03"
end="2024-10-30"

cn1000=IndexData("SH000905",start,end)
cn1000.set_freq("10min")
# cn1000.index_return
syns=cn1000.periodic_cal_syn("D",start,end)

In [None]:
freq="10min"

In [None]:
cn1000.df_index.set_index('Time').resample(freq).last()

In [None]:
resample(cn1000.df_index,cn1000.t_range,cn1000.freq)

In [None]:
cn1000.t_range

In [None]:
cn1000.index_return

In [None]:
start="2024-08-13"
end="2024-09-23"

cn300_not=IndexData("SH000300",start,end)
cn300_not.set_freq("5min")
syn_not,R2_series_not,betas_not,vars_not=cn300_not.cal_syn()

In [None]:
start="2024-09-24"
end="2024-11-07"

cn300_is=IndexData("SH000300",start,end)
cn300_is.set_freq("5min")
syn_is,R2_series_is,betas_is,vars_is=cn300_is.cal_syn()

In [None]:
syn_not,syn_is

V.	同步性的意义不明确的问题

In [None]:
df_600=pd.read_csv("E:\\2024\\ws20240102fb\\SH\\SH600519.csv")[['Time','Price']]
df_300=pd.read_csv("E:\\2024\\ws20240102fb\\SH\\SH000300.csv")[['Time','Price']]

In [None]:
df=(pd.merge(df_300,df_600,on="Time",how="outer",suffixes=("_300","_600")).ffill().bfill().set_index('Time'))
df.index=pd.to_datetime(df.index)
df.sort_index(inplace=True)
(df.resample('30s').mean().pct_change()+1).cumprod().plot()

In [None]:
Y=df[['Price_600']].resample('2min').mean().pct_change().dropna()
X=df[['Price_300']].resample('2min').mean().pct_change().dropna()
X = sm.add_constant(X)  # 添加常数项
model = sm.OLS(Y, X).fit()

In [None]:
model.summary()

In [None]:
df.resample('5min').mean().pct_change().dropna().plot(x="Price_300",y="Price_600",kind="scatter")

报错的试验田

In [None]:
df_index=pd.read_csv("E:\\2024\\ws20240102fb\\SH\\SH000905.csv")[['Time','Price']]
df_index.loc[0]

In [None]:
df_index=pd.read_csv("E:\\2024\\ws20240102fb\\SH\\SH000905.csv")[['Time','Price']]
df_index['Time']=pd.to_datetime(df_index['Time'])
# df_test.index=pd.to_datetime(df_test.index)
X=cal_return(df_index,cn300.t_range,"5min")

In [None]:
df_test=pd.read_csv("E:\\2024\\ws20240102fb\\SH\\SH600004.csv")[['Time','Price']]
df_test['Time']=pd.to_datetime(df_test['Time'])
# df_test.index=pd.to_datetime(df_test.index)
# Y=cal_return(df_test,cn300.t_range,"5min")

In [None]:
df_test=pd.read_csv("E:\\2024\\ws20240102fb\\SH\\SH600004.csv")
df_test

In [None]:
df_test=pd.read_csv("E:\\2024\\ws20240102fb\\SH\\SH600004.csv")
df_test.loc[df_test.shape[0]]=df_test.iloc[0]
df_test.loc[df_test.index[-1],'Time']="2024-01-02 09:15:00"
df_test['Time']=pd.to_datetime(df_test['Time'])
df_test.sort_values("Time",inplace=True,ascending=True)
df_test

In [None]:
df_test=pd.read_csv("E:\\2024\\ws20240102fb\\SH\\SH600004.csv")

df_test.loc[df_test.index[-1]:'Time']

In [None]:
df_test


In [None]:
X=sm.add_constant(X)
model = sm.OLS(Y, X).fit()
model.summary()

In [None]:
cal_return(df_test,cn300.t_range,"5min").shape


In [None]:
df_test

In [None]:
start="2024-01-01 12:00:00"
end="2024-04-03 12:00:00"
for time in pd.date_range(start,end,freq="1MIN"):    print(time)

In [None]:
df_600=pd.read_csv("E:\\2024\\ws20240102fb\\SZ\\SZ000003.csv")[['Time','Price']]
df_300=pd.read_csv("E:\\2024\\ws20240102fb\\SH\\SH000300.csv")[['Time','Price']]

In [None]:
df=(pd.merge(df_300,df_600,on="Time",how="outer",suffixes=("_300","_600")).ffill().bfill().set_index('Time'))
df.index=pd.to_datetime(df.index)
df.sort_index(inplace=True)
(df.resample('30s').mean().pct_change()+1).cumprod().plot()

In [None]:
Y=df[['Price_600']].resample('2min').mean().pct_change().dropna()
X=df[['Price_300']].resample('2min').mean().pct_change().dropna()
X = sm.add_constant(X)  # 添加常数项
model = sm.OLS(Y, X).fit()

In [None]:
model.summary()

In [None]:
df.resample('5min').mean().pct_change().dropna().plot(x="Price_300",y="Price_600",kind="scatter")