In [5]:
import numpy as np
import pandas as pd
import tushare as ts
import datetime
import time
pro = ts.pro_api('700c1d6015ad030ff20bf310c088243da030e6b79a2a1098d58d2614')

In [6]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.optim import lr_scheduler, Adam, AdamW
from scipy.stats import norm, t

In [7]:
class GetOHLCV():
    def __init__(self):
        pass

    def get_data(self, assets_code, pred_len, threshold_ratio):
        data_1 = pro.fut_daily(ts_code = assets_code, start_date = '20110101', end_date = '20180101')
        data_2 = pro.fut_daily(ts_code = assets_code, start_date = '20180101')

        data = pd.concat([data_1, data_2], ignore_index = True)

        data['oi_chg'] = 1
        data.dropna(inplace=True)
        data.sort_values(by = 'trade_date', inplace = True)

        data['label_return'] = data['close'].shift(-pred_len) -  data['close']

        data['ma_amount'] = data['amount'].rolling(window = 250).mean() # 过去一年的成交量均值
        data['ma_return_std'] = data['label_return'].rolling(window = 250).std()# 过去一年的收益标准差
        
        data['label_std'] = data['amount'].rolling(window = pred_len).mean().shift(-pred_len)/ data['ma_amount'] * data['ma_return_std'] # 根据当前成交量和历史成交量，估计当前隐含的标准差 由于用1年滚动，避免数据泄露

        data['upper_bond'] = data['label_return'].rolling(window = 250).quantile(1 - threshold_ratio) # 过去一年的收益下分位数
        data['lower_bond'] = data['label_return'].rolling(window = 250).quantile(threshold_ratio) # 过去一年的收益上分位数
        data['threshold'] = (abs(data['upper_bond']) + abs(data['lower_bond']))/2 # 过去一年的收益的分割阈值

        def down_probability(row):
            return norm.cdf(-row['threshold'], loc = row['label_return'], scale=row['label_std'])

        def middle_probability(row):
            return norm.cdf(row['threshold'], loc = row['label_return'], scale=row['label_std']) - norm.cdf(-row['threshold'], loc = row['label_return'], scale=row['label_std'])

        def up_probability(row):
            return 1 - norm.cdf(row['threshold'], loc = row['label_return'], scale=row['label_std'])
        
        data['down_prob'] = data.apply(down_probability, axis = 1)
        data['middle_prob'] = data.apply(middle_probability, axis = 1)
        data['up_prob'] = data.apply(up_probability, axis = 1)
        
        data.dropna(inplace=True)

        return data

In [12]:
source = GetOHLCV()
data = source.get_data('IH.CFX', 5, 0.25)
data

Unnamed: 0,ts_code,trade_date,pre_close,pre_settle,open,high,low,close,settle,change1,...,label_return,ma_amount,ma_return_std,label_std,upper_bond,lower_bond,threshold,down_prob,middle_prob,up_prob
414,IH.CFX,20160422,2136.4,2149.6,2123.4,2154.0,2121.2,2147.8,2146.2,-1.8,...,-23.2,1.096324e+07,140.985614,4.037794,52.75,-94.90,73.825,2.318827e-36,1.000000,0.000000
413,IH.CFX,20160425,2147.8,2146.2,2139.0,2143.0,2111.4,2136.8,2137.0,-9.4,...,18.2,1.085764e+07,141.008757,4.117978,52.75,-94.90,73.825,6.437913e-111,1.000000,0.000000
412,IH.CFX,20160426,2136.8,2137.0,2142.4,2164.6,2125.2,2141.4,2137.0,4.4,...,7.8,1.075481e+07,140.244926,3.977581,52.55,-94.90,73.725,1.165702e-93,1.000000,0.000000
411,IH.CFX,20160427,2141.4,2137.0,2147.2,2149.8,2122.8,2131.6,2133.0,-5.4,...,14.6,1.066426e+07,140.182956,3.898807,52.20,-94.90,73.550,1.748102e-113,1.000000,0.000000
410,IH.CFX,20160428,2131.6,2133.0,2140.8,2142.8,2119.6,2124.6,2128.2,-8.4,...,-33.4,1.057131e+07,140.182982,4.101822,52.20,-94.90,73.550,6.318130e-23,1.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
674,IH.CFX,20250915,2968.6,2969.8,2972.0,2984.0,2955.6,2962.4,2961.2,-7.4,...,-39.4,3.129743e+06,84.745401,72.806228,38.50,-26.55,32.525,5.376157e-01,0.300783,0.161602
673,IH.CFX,20250916,2962.4,2961.2,2968.2,2975.8,2942.4,2950.6,2950.4,-10.6,...,-25.6,3.132880e+06,84.688360,77.853630,38.50,-26.25,32.375,4.653269e-01,0.306436,0.228237
672,IH.CFX,20250917,2950.6,2950.4,2951.2,2962.6,2935.0,2956.2,2953.6,5.8,...,-16.4,3.136971e+06,84.575274,81.265749,38.50,-25.80,32.150,4.231629e-01,0.301725,0.275113
671,IH.CFX,20250918,2956.2,2953.6,2965.6,2976.0,2888.4,2910.8,2907.8,-42.8,...,42.8,3.140363e+06,84.475159,81.369293,38.75,-25.80,32.275,1.780960e-01,0.270445,0.551459
