In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import logging

import pandas as pd
import numpy as np
import os
from typing import Union, List
from datetime import datetime, time, timedelta
from dateutil.relativedelta import relativedelta
import matplotlib.pyplot as plt
import pickle
from statsmodels.regression.linear_model import OLS
import statsmodels.api as sm

from utils import calc_date2maturity, calc_time2maturity,plot_legend_outside
from backtest.preprocessors.preprocess import LobTimePreprocessor, LobFeatureEngineering
from backtest.predefined.macros import LobColTemplate, ColTemplate, CITICSF_ColTemplate
from backtest.statistic_tools.statistics import calc_r_squared
from backtest.utils import get_class_name

from py_vollib_vectorized.implied_volatility import vectorized_implied_volatility
from py_vollib_vectorized.greeks import delta, gamma, vega, theta, rho

root = '/Users/hongyifan/Desktop/work/internship/citic_futures/20231226bid-ask-spread/'
data_root = root + 'data/'
option_data_root = root + 'data/Option/'
future_data_root = root + 'data/Future/'
etf_data_root = root + 'data/ETF/'
res_root = root + 'res/'
model_root=root+'models/'

etf_future_symbol_dict = {
    '510050.XSHG': 'IH',
    '510300.XSHG': 'IF',
    '510500.XSHG': 'IC',

}

trading_dates = np.array(
    pd.read_csv(data_root + f'trading_dates_cn_2000-01-01_2024-12-31.csv')['trading_dates'].tolist(), dtype=str)

ltp = LobTimePreprocessor()



# 循环遍历所有合约


In [3]:
# etf价格
etf_price_dict = {}
for udly_symbol in ['510050.XSHG', '510300.XSHG', '510500.XSHG']:
    etf_price_dict[udly_symbol] = \
    pd.read_csv(data_root + "ETF/" + f"data_etf_{udly_symbol}_2014-12-01_2023-11-28.csv", usecols=['datetime', 'close'],
                index_col='datetime', parse_dates=True)['close'].rename('etf_price_' + udly_symbol)

In [4]:
# 无风险利率采用1年期国债收益率
def get_rfr(interest_rate_df, date_, days2maturity):
    idx = np.argmin(abs(interest_rate_df.columns - days2maturity))
    return interest_rate_df.loc[date_].iloc[idx]


interest_rate = pd.read_csv(data_root + f"interest_rate.csv", index_col=0).rename(columns={'ON': 1,  # over night
                                                                                           '1W': 5,
                                                                                           '2W': 10,
                                                                                           '1M': 20,
                                                                                           '3M': 60,
                                                                                           '6M': 120,
                                                                                           '9M': 180,
                                                                                           '1Y': 240}) / 100

In [5]:
# 获取所有日期
dates_list = os.listdir(option_data_root)
dates_list.remove('.DS_Store')
dates_list = sorted(dates_list)
dates_list = pd.to_datetime(dates_list, format='%Y-%m-%d').astype(str).tolist()

In [None]:
train_start='2023-08-17'
train_end='2023-09-27'
valid_start=''
valid_end=''
test_start=''
test_end=''

In [206]:
from copy import deepcopy

# 主函数：遍历所有csv计算iv
option_info_df = pd.read_csv(data_root + 'etf_options_basic_info.csv')
option_info_df['code'] = option_info_df['code'].apply(lambda x: x[:-5])
option_info_df = option_info_df.set_index('code').sort_index()
print("option_info_df 中的expire_date是闭，即到期日为expire_date当天收盘")
rfr = 0.03
exclude = ['510050.csv', '510300.csv', '510500.csv']
for date_ in dates_list:
    date_ = '2023-08-02'
    date_1 = date_.replace('-', '')
    files = []
    print(option_data_root + date_.replace('-', '') + '/')
    for r, _, f in os.walk(option_data_root + date_.replace('-', '') + '/'):
        for ff in f:
            # 去掉中证1000股指期权
            if not ff.startswith('.') and ff not in exclude and not ff.startswith('MO'):
                files.append(ff)
    option_symbol_list = [s[:-4] for s in files]
    option_symbol_list_bkp = deepcopy(option_symbol_list)

    for option_symbol in option_symbol_list_bkp:
        option_symbol = '10005684'
        # option_symbol = '10005679'

        factors = pd.DataFrame()
        if option_symbol not in option_symbol_list: continue
        option_trading_code, expire_date, option_type, option_strike, option_underlying_symbol = option_info_df.loc[
            option_symbol, ['trading_code', 'expire_date', 'contract_type', 'exercise_price',
                            'underlying_symbol']].values
        days2maturity = calc_date2maturity(current=date_, expiry=expire_date, trading_dates=trading_dates)  # 计算有多少天到期
        rfr = get_rfr(interest_rate_df=interest_rate, date_=date_, days2maturity=days2maturity)  # risk free rate
        # 
        # # 寻找相同mat和strike的对应期权
        # if option_trading_code[6] == 'C':
        #     option_trading_code1 = option_trading_code.replace('C', 'P')
        # elif option_trading_code[6] == 'P':
        #     option_trading_code1 = option_trading_code.replace('P', 'C')
        # else:
        #     raise ValueError()
        # option_symbol1 = option_info_df.loc[option_info_df['trading_code'] == option_trading_code1].index.values[0]
        # assert option_symbol1 + '.csv' in files
        # option_symbol_list.remove(option_symbol)
        # # option_symbol_list.remove(option_symbol1) # 不需要删掉相对的期权，多花一点时间重算一遍iv和greeks可以接受

        # 
        # # 第一个期权
        # option_price_df = pd.read_csv(option_data_root + date_.replace('-', '') + '/' + f'{option_symbol}.csv',
        #                               parse_dates=True, index_col='serverTime')
        # option_price_df = option_price_df.loc[~option_price_df['auctionLabel']]
        # option_price_df = ltp.del_untrade_time(option_price_df, cut_tail=False)
        # 
        # # 对应期权
        # option_price_df1 = pd.read_csv(option_data_root + date_.replace('-', '') + '/' + f'{option_symbol1}.csv',
        #                                parse_dates=True, index_col='serverTime')
        # option_price_df1 = option_price_df1.loc[~option_price_df1['auctionLabel']]
        # option_price_df1 = ltp.del_untrade_time(option_price_df1, cut_tail=False)
        # 
        # option_price_dict = {}
        # if option_type == 'CO':
        #     option_price_dict['c'] = option_price_df
        #     option_price_dict['p'] = option_price_df1
        # else:
        #     option_price_dict['p'] = option_price_df
        #     option_price_dict['c'] = option_price_df1
        #     
    break


option_info_df 中的expire_date是闭，即到期日为expire_date当天收盘
/Users/hongyifan/Desktop/work/internship/citic_futures/20231226bid-ask-spread/data/Option/20230802/
