In [74]:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import scipy.optimize as optimize
from scipy.stats import norm

import warnings
warnings.filterwarnings("ignore")

In [3]:
import rqdatac as rq
rq.init()

In [4]:
index_weight = pd.read_json("index_weight.json")[0]
index_weight

PS    0.463813
V     0.141224
SF    0.153996
SA    0.065467
LC    0.135766
JM   -0.020120
MA    0.063913
AO   -0.005666
RU   -0.057773
SP    0.059381
Name: 0, dtype: float64

In [10]:
si_data = rq.futures.get_dominant_price("SI", frequency="1m").loc["SI"]
si_data

Unnamed: 0_level_0,trading_date,dominant_id,open,close,high,low,total_turnover,volume,open_interest
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-06-23 09:01:00,2025-06-23,SI2509,7415.0,7455.0,7460.0,7400.0,0,12150.0,306603.0
2025-06-23 09:02:00,2025-06-23,SI2509,7450.0,7425.0,7450.0,7420.0,0,5160.0,306403.0
2025-06-23 09:03:00,2025-06-23,SI2509,7425.0,7430.0,7435.0,7415.0,0,2793.0,306743.0
2025-06-23 09:04:00,2025-06-23,SI2509,7430.0,7440.0,7445.0,7430.0,0,6064.0,306858.0
2025-06-23 09:05:00,2025-06-23,SI2509,7440.0,7420.0,7440.0,7420.0,0,3458.0,306976.0
...,...,...,...,...,...,...,...,...,...
2025-09-22 14:56:00,2025-09-22,SI2511,8980.0,8970.0,8985.0,8970.0,0,1695.0,291268.0
2025-09-22 14:57:00,2025-09-22,SI2511,8975.0,8970.0,8975.0,8965.0,0,1638.0,290674.0
2025-09-22 14:58:00,2025-09-22,SI2511,8965.0,8965.0,8970.0,8965.0,0,1235.0,290032.0
2025-09-22 14:59:00,2025-09-22,SI2511,8965.0,8965.0,8970.0,8960.0,0,3108.0,289033.0


In [24]:
def preprocess(data: tuple):

    minute_price = data["close"]
    minute_price.loc[data["trading_date"].iloc[0] + pd.Timedelta(hours=9)] = data["open"].iloc[0]
    minute_price = minute_price.sort_index()
    trading_date = minute_price.index.strftime("%Y-%m-%d")
    mask = (minute_price.index >= pd.to_datetime(trading_date + " 09:00:00")) & \
           (minute_price.index <= pd.to_datetime(trading_date + " 15:00:00"))
    minute_price = minute_price[mask]

    return minute_price

In [59]:
si_minute = si_data.groupby("trading_date").apply(preprocess)
si_minute.name = "SI"
si_val = si_minute.groupby(level=0).apply(lambda x: (((x.diff() / x.shift()).fillna(0) + 1).cumprod())).droplevel(0)
si_val

trading_date  datetime           
2025-06-23    2025-06-23 09:00:00    1.000000
              2025-06-23 09:01:00    1.005394
              2025-06-23 09:02:00    1.001349
              2025-06-23 09:03:00    1.002023
              2025-06-23 09:04:00    1.003372
                                       ...   
2025-09-22    2025-09-22 14:56:00    0.966074
              2025-09-22 14:57:00    0.966074
              2025-09-22 14:58:00    0.965536
              2025-09-22 14:59:00    0.965536
              2025-09-22 15:00:00    0.963920
Name: SI, Length: 14916, dtype: float64

In [26]:
all_minute = pd.DataFrame()
for underlying in index_weight.index:
    data = rq.futures.get_dominant_price(underlying, frequency="1m").loc[underlying]
    minute = data.groupby("trading_date").apply(preprocess)
    minute.name = underlying
    all_minute = pd.concat([all_minute, minute], axis=1)

all_minute

Unnamed: 0,Unnamed: 1,PS,V,SF,SA,LC,JM,MA,AO,RU,SP
2025-06-23,2025-06-23 09:00:00,31190.0,5037.0,5488.0,1267.0,59020.0,908.5,2614.0,2940.0,14790.0,5254.0
2025-06-23,2025-06-23 09:01:00,31370.0,5089.0,5496.0,1269.0,59320.0,925.0,2659.0,2992.0,14920.0,5328.0
2025-06-23,2025-06-23 09:02:00,31265.0,5084.0,5500.0,1269.0,59300.0,921.0,2653.0,2997.0,14900.0,5330.0
2025-06-23,2025-06-23 09:03:00,31230.0,5081.0,5512.0,1266.0,59120.0,920.0,2649.0,2984.0,14885.0,5338.0
2025-06-23,2025-06-23 09:04:00,31180.0,5085.0,5514.0,1265.0,59140.0,920.0,2652.0,2984.0,14900.0,5334.0
...,...,...,...,...,...,...,...,...,...,...,...
2025-09-22,2025-09-22 14:56:00,51010.0,4943.0,5654.0,1294.0,73680.0,1219.5,2346.0,2935.0,15590.0,5010.0
2025-09-22,2025-09-22 14:57:00,50985.0,4941.0,5652.0,1295.0,73640.0,1218.5,2345.0,2936.0,15595.0,5010.0
2025-09-22,2025-09-22 14:58:00,51000.0,4940.0,5652.0,1294.0,73600.0,1219.0,2347.0,2936.0,15595.0,5010.0
2025-09-22,2025-09-22 14:59:00,51010.0,4940.0,5656.0,1293.0,73520.0,1217.5,2345.0,2935.0,15605.0,5010.0


In [60]:
index_val = all_minute.groupby(level=0).apply(lambda x: (((x.diff() / x.shift()).apply(lambda y: y.dot(index_weight), axis=1)).fillna(0) + 1).cumprod()).droplevel(0)
index_val

2025-06-23  2025-06-23 09:00:00    1.000000
            2025-06-23 09:01:00    1.006116
            2025-06-23 09:02:00    1.004514
            2025-06-23 09:03:00    1.003775
            2025-06-23 09:04:00    1.003161
                                     ...   
2025-09-22  2025-09-22 14:56:00    0.976504
            2025-09-22 14:57:00    0.976120
            2025-09-22 14:58:00    0.976150
            2025-09-22 14:59:00    0.976088
            2025-09-22 15:00:00    0.975484
Length: 14916, dtype: float64

In [None]:
val_diff = index_val - si_val
counts, bin_edges = np.histogram(val_diff, bins=100)
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2

def normal_func(x, mu, sigma, amplitude):
    return amplitude * norm.pdf(x, mu, sigma)

# 使用曲线拟合
params, params_covariance = optimize.curve_fit(
    normal_func, bin_centers, counts, p0=[val_diff.mean(), val_diff.std(), counts.max()]
)

print(f"拟合参数: μ={params[0]:.6f}, σ={params[1]:.6f}, 幅度={params[2]:.6f}")

拟合参数: μ=0.001220, σ=0.010095, 幅度=12.818321


In [78]:
confidence_levels = [0.10, 0.20, 0.30]  # 10%, 20%, 30%水平

print("标准正态分布的单边分位数:")
for level in confidence_levels:
    z_score = norm.ppf(1 - level)  # ppf是分位数函数
    print(f"{int(level*100)}%水平: μ + {z_score:.4f}σ")

标准正态分布的单边分位数:
10%水平: μ + 1.2816σ
20%水平: μ + 0.8416σ
30%水平: μ + 0.5244σ
