In [92]:
import pandas as pd
import mysql.connector
from mysql.connector import Error
import numpy as np

# Basic Methods

In [95]:
config = {
    'user': 'root',
    'password': '123456789',
    'host': '34.124.177.28',
    'database': 'qf5214 test',
    'raise_on_warnings': True
}
def fast_write_data_into_sql(table_name, data, create_table_query, insert_query, drop_table_query):
    try:
        with mysql.connector.connect(**config) as connection:
            with connection.cursor() as cursor:
                if drop_table_query is not None:
                    cursor.execute(drop_table_query)
                    connection.commit()  
                    print(f'Table {table_name} dropped because it has existed.')
                if create_table_query is not None:
                    cursor.execute(create_table_query)
                    connection.commit()  
                    print(f'Successfully created table {table_name}')

                data_to_insert = list(map(tuple, data.to_numpy()))

                cursor.executemany(insert_query, data_to_insert)
                connection.commit()
                print(f'Successfully inserted {len(data_to_insert)} rows into {table_name}')

    except Error as e:
        print("Error during database operation:", e)



def slow_write_data_into_sql(table_name, data, create_table_query, insert_query, drop_table_query):
    try:
        with mysql.connector.connect(**config) as connection:
            with connection.cursor() as cursor:
                if drop_table_query is not None:
                    cursor.execute(drop_table_query)
                    connection.commit()  
                    print(f'Table {table_name} dropped because it has existed.')
                    
                if create_table_query is not None:
                    cursor.execute(create_table_query)
                    connection.commit()  
                    print(f'Successfully created table {table_name}')

                i = 1
                for index, row in data.iterrows():
                    cursor.execute(insert_query, tuple(row.values))
                    print(f'\rNumber of rows imported: {i}', end="", flush=True)
                    i += 1
                connection.commit() 
                print('\nSuccessfully inserted data into {table_name}') 
                
    except Error as e:
        print("Error during database operation:", e)


def split_dataframe(df, batch_size):
    """
    Splits a dataframe into a list of subsets so that the length of each subset is no larger than 100000.

    Args:
        df: The dataframe to split.

    Returns:
        A list of subsets.
    """

    #Calculate the number of subsets needed.
    num_subsets = (len(df) + batch_size - 1) // batch_size

    #Create a list of subsets.
    subsets = []
    for i in range(num_subsets):
        start_idx = i * batch_size
        end_idx = (i + 1) * batch_size
        subsets.append(df[start_idx:end_idx])

    return subsets


# idmap

In [96]:
codemap = pd.read_pickle('/Users/suguanting/Documents/5214/option_data/code_map')
codemap = pd.DataFrame(codemap)
codemap.reset_index(inplace=True)
codemap.columns = ['id', 'symbol_in_exchange']
codemap

Unnamed: 0,id,symbol_in_exchange
0,10000001,10000001.SH
1,10000002,10000002.SH
2,10000003,10000003.SH
3,10000004,10000004.SH
4,10000005,10000005.SH
...,...,...
106925,ZN2408P24000,ZN2408P24000.SHF
106926,ZN2408P24200,ZN2408P24200.SHF
106927,ZN2408P24400,ZN2408P24400.SHF
106928,ZN2408P24600,ZN2408P24600.SHF


In [97]:
drop_table_query = "DROP TABLE IF EXISTS id_map;"

create_table_query = """
CREATE TABLE IF NOT EXISTS id_map (
  id VARCHAR(255) NOT NULL,
  symbol_in_exchange VARCHAR(255),
  PRIMARY KEY (id)
);
"""

insert_query = """
INSERT INTO id_map (id, symbol_in_exchange)
VALUES (%s, %s) AS alias 
ON DUPLICATE KEY UPDATE symbol_in_exchange = alias.symbol_in_exchange;
"""

fast_write_data_into_sql(table_name='id_map', data=codemap, create_table_query=create_table_query, insert_query=insert_query, drop_table_query=None)

Successfully created table id_map
Successfully inserted 106930 rows into id_map


# option_info

In [98]:
option_info = pd.read_pickle('/Users/suguanting/Documents/5214/option_data/option_info')
option_info.rename(columns={'order_book_id': 'id'}, inplace=True)
option_info

Unnamed: 0,listed_date,exchange,underlying_symbol,symbol,underlying_order_book_id,round_lot,de_listed_date,maturity_date,option_type,exercise_type,type,contract_multiplier,strike_price,id,market_tplus,trading_hours,product_name,trading_code
0,2015-02-09,XSHG,510050.XSHG,50ETF购3月2200,510050.XSHG,1.0,2015-03-25,2015-03-25,C,E,Option,10000.0,2.20,10000001,0.0,"09:31-11:30,13:01-15:00",510050C1503M02200,10000001
1,2015-02-09,XSHG,510050.XSHG,50ETF购3月2250,510050.XSHG,1.0,2015-03-25,2015-03-25,C,E,Option,10000.0,2.25,10000002,0.0,"09:31-11:30,13:01-15:00",510050C1503M02250,10000002
2,2015-02-09,XSHG,510050.XSHG,50ETF购3月2300,510050.XSHG,1.0,2015-03-25,2015-03-25,C,E,Option,10000.0,2.30,10000003,0.0,"09:31-11:30,13:01-15:00",510050C1503M02300,10000003
3,2015-02-09,XSHG,510050.XSHG,50ETF购3月2350,510050.XSHG,1.0,2015-03-25,2015-03-25,C,E,Option,10000.0,2.35,10000004,0.0,"09:31-11:30,13:01-15:00",510050C1503M02350,10000004
4,2015-02-09,XSHG,510050.XSHG,50ETF购3月2400,510050.XSHG,1.0,2015-03-25,2015-03-25,C,E,Option,10000.0,2.40,10000005,0.0,"09:31-11:30,13:01-15:00",510050C1503M02400,10000005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106925,2024-04-11,SHFE,ZN,zn2408沽24000,ZN2408,1.0,2024-07-25,2024-07-25,P,A,Option,5.0,24000.00,ZN2408P24000,0.0,"21:01-01:00,09:01-10:15,10:31-11:30,13:31-15:00",zn2408沽24000,zn2408P24000
106926,2024-04-11,SHFE,ZN,zn2408沽24200,ZN2408,1.0,2024-07-25,2024-07-25,P,A,Option,5.0,24200.00,ZN2408P24200,0.0,"21:01-01:00,09:01-10:15,10:31-11:30,13:31-15:00",zn2408沽24200,zn2408P24200
106927,2024-04-11,SHFE,ZN,zn2408沽24400,ZN2408,1.0,2024-07-25,2024-07-25,P,A,Option,5.0,24400.00,ZN2408P24400,0.0,"21:01-01:00,09:01-10:15,10:31-11:30,13:31-15:00",zn2408沽24400,zn2408P24400
106928,2024-04-12,SHFE,ZN,zn2408沽24600,ZN2408,1.0,2024-07-25,2024-07-25,P,A,Option,5.0,24600.00,ZN2408P24600,0.0,"21:01-01:00,09:01-10:15,10:31-11:30,13:31-15:00",zn2408沽24600,zn2408P24600


In [100]:
drop_table_query = "DROP TABLE IF EXISTS option_info;"

create_table_query = """
CREATE TABLE IF NOT EXISTS option_info (
  listed_date DATE,
  exchange VARCHAR(255),
  underlying_symbol VARCHAR(255),
  symbol VARCHAR(255),
  underlying_order_book_id VARCHAR(255),
  round_lot INT,
  de_list_date DATE,
  maturity_date DATE,
  option_type VARCHAR(255),
  exercise_type VARCHAR(255),
  type VARCHAR(255),
  contract_multiplier FLOAT,
  strike_price FLOAT,
  id VARCHAR(255),
  market_tplus INT,
  trading_hours VARCHAR(255),
  product_name VARCHAR(255),
  trading_code VARCHAR(255),
  PRIMARY KEY (id)
);
"""

insert_query = """
INSERT INTO option_info (
  listed_date, 
  exchange, 
  underlying_symbol, 
  symbol, 
  underlying_order_book_id, 
  round_lot, 
  de_list_date, 
  maturity_date, 
  option_type, 
  exercise_type, 
  type, 
  contract_multiplier, 
  strike_price, 
  id, 
  market_tplus, 
  trading_hours, 
  product_name, 
  trading_code
) VALUES (
  %s, %s, %s, %s, %s, 
  %s, %s, %s, %s, %s, 
  %s, %s, %s, %s, %s, 
  %s, %s, %s
);

"""

fast_write_data_into_sql(table_name='option_info', data=option_info, create_table_query=create_table_query, insert_query=insert_query, drop_table_query=None)

Successfully created table option_info
Successfully inserted 106930 rows into option_info


# exchange

In [101]:
exchange = {
    'XSHG': 'Shanghai Stock Exchange',
    'XSHE': 'Shenzhen Stock Exchange',
    'DCE': 'Dalian Commodity Exchange',
    'SHFE': 'Shanghai Futures Exchange',
    'CZCE': 'Zhengzhou Commodity Exchange',
    'CFFEX': 'China Financial Future Exchange',
    'GFEX': 'Guangzhou Future Exchange',
    'INE': 'Shanghai International Energy Exchange'
}

exchange_df = pd.DataFrame(list(exchange.items()), columns=['exchange', 'exchange_name'])
exchange_df

Unnamed: 0,exchange,exchange_name
0,XSHG,Shanghai Stock Exchange
1,XSHE,Shenzhen Stock Exchange
2,DCE,Dalian Commodity Exchange
3,SHFE,Shanghai Futures Exchange
4,CZCE,Zhengzhou Commodity Exchange
5,CFFEX,China Financial Future Exchange
6,GFEX,Guangzhou Future Exchange
7,INE,Shanghai International Energy Exchange


In [102]:
drop_table_query = "DROP TABLE IF EXISTS exchange_map;"

create_table_query = """
CREATE TABLE IF NOT EXISTS exchange_map (
  exchange VARCHAR(255) NOT NULL,
  exchange_name VARCHAR(255),
  PRIMARY KEY (exchange)
);
"""

insert_query = """
INSERT INTO exchange_map(exchange, exchange_name)
VALUES (%s, %s) AS alias 
ON DUPLICATE KEY UPDATE exchange_name = alias.exchange_name;
"""

fast_write_data_into_sql(table_name='exchange_map', data=exchange_df, create_table_query=create_table_query, insert_query=insert_query, drop_table_query=None)

Successfully created table exchange_map
Successfully inserted 8 rows into exchange_map


# option_data

In [103]:
option_data = pd.read_pickle('/Users/suguanting/Documents/5214/option_data/option_eod')
option_data.reset_index(inplace=True)
option_data.rename(columns={'order_book_id': 'id'}, inplace=True)
option_data

Unnamed: 0,id,date,open,high,low,close,total_turnover,volume,open_interest
0,10000001,2015-02-09,0.1820,0.2029,0.1699,0.1826,4712265.0,2501.0,674.0
1,10000001,2015-02-10,0.1856,0.2144,0.1800,0.2072,3744873.0,1842.0,1087.0
2,10000001,2015-02-11,0.2083,0.2195,0.2028,0.2107,4231181.0,1999.0,1628.0
3,10000001,2015-02-12,0.2141,0.2143,0.1915,0.2109,4804803.0,2334.0,1930.0
4,10000001,2015-02-13,0.2130,0.2459,0.2090,0.2090,4604396.0,2066.0,2215.0
...,...,...,...,...,...,...,...,...,...
11833077,ZN2408P23600,2024-04-11,1614.0000,1614.0000,1614.0000,1555.0000,0.0,0.0,0.0
11833078,ZN2408P23800,2024-04-11,1768.0000,1768.0000,1768.0000,1695.0000,0.0,0.0,0.0
11833079,ZN2408P24000,2024-04-11,1926.0000,1926.0000,1926.0000,1837.0000,0.0,0.0,0.0
11833080,ZN2408P24200,2024-04-11,2091.0000,2091.0000,2091.0000,1987.0000,0.0,0.0,0.0


In [104]:
drop_table_query = "DROP TABLE IF EXISTS option_data;"

create_table_query = """
CREATE TABLE IF NOT EXISTS option_data (
  id VARCHAR(255) NOT NULL,
  date DATE NOT NULL,
  open FLOAT,
  high FLOAT,
  low FLOAT,
  close FLOAT,
  total_turnover FLOAT,
  volume FLOAT,
  open_interest FLOAT,
  PRIMARY KEY (id, date)
);
"""

insert_query = """
INSERT INTO option_data (
  id,
  date,
  open,
  high,
  low,
  close,
  total_turnover,
  volume,
  open_interest
) VALUES (
  %s, %s, %s, %s, %s, %s, %s, %s, %s
);
"""

subsets = split_dataframe(option_data, 200000)
create = True

for data in subsets:
    if create:
        fast_write_data_into_sql(table_name='option_data', data=data, create_table_query=create_table_query, insert_query=insert_query, drop_table_query=None)
        create = False
    else:
        fast_write_data_into_sql(table_name='option_data', data=data, create_table_query=None, insert_query=insert_query, drop_table_query=None)

Successfully created table option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into option_data
Successfully inserted 200000 rows into opti

# option_greeks

In [107]:
option_greeks = pd.read_pickle('/Users/suguanting/Documents/5214/option_data/option_greeks')
option_greeks.reset_index(inplace=True)
option_greeks.rename(columns={'order_book_id': 'id', 'trading_date': 'date'}, inplace=True)
option_greeks

Unnamed: 0,id,date,iv,delta,gamma,vega,theta,rho
0,10000001,2015-02-09,0.357860,0.686521,1.218722,0.285668,-0.366122,0.170898
1,10000001,2015-02-10,0.349854,0.738125,1.138464,0.263337,-0.333927,0.181592
2,10000001,2015-02-11,0.310493,0.785274,1.157185,0.234189,-0.306694,0.190813
3,10000001,2015-02-12,0.291468,0.808921,1.160751,0.215990,-0.289689,0.192932
4,10000001,2015-02-13,0.259664,0.844097,1.153831,0.187550,-0.235144,0.198180
...,...,...,...,...,...,...,...,...
11741023,ZN2408P23600,2024-04-11,0.157878,-0.719059,0.000175,4120.468454,-2225.587046,-5160.469012
11741024,ZN2408P23800,2024-04-11,0.153922,-0.757826,0.000166,3817.767421,-2178.729434,-5454.844960
11741025,ZN2408P24000,2024-04-11,0.148105,-0.798230,0.000155,3439.471717,-2107.610364,-5760.527313
11741026,ZN2408P24200,2024-04-11,0.141727,-0.837831,0.000142,2999.686193,-2025.371439,-6063.245573


In [108]:
drop_table_query = "DROP TABLE IF EXISTS option_greeks_data;"

create_table_query = """
CREATE TABLE IF NOT EXISTS option_greeks_data (
  id VARCHAR(255) NOT NULL,
  date DATE NOT NULL,
  iv FLOAT,
  delta FLOAT,
  gamma FLOAT,
  vega FLOAT,
  theta FLOAT,
  rho FLOAT,
  PRIMARY KEY (id, date)
);
"""

insert_query = """
INSERT INTO option_greeks_data (
  id,
  date,
  iv,
  delta,
  gamma,
  vega,
  theta,
  rho
) VALUES (
  %s, %s, %s, %s, %s, %s, %s, %s
);
"""

subsets = split_dataframe(option_greeks, 200000)
create = True

for data in subsets:
    if create:
        fast_write_data_into_sql(table_name='option_greeks_data', data=data, create_table_query=create_table_query, insert_query=insert_query, drop_table_query=None)
        create = False
    else:
        fast_write_data_into_sql(table_name='option_greeks_data', data=data, create_table_query=None, insert_query=insert_query, drop_table_query=None)

Successfully created table option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 200000 rows into option_greeks_data
Successfully inserted 2000

# option_underlying

In [110]:
option_underlying_data = pd.read_pickle('/Users/suguanting/Documents/5214/option_data/underlying_eod')
option_underlying_data.reset_index(inplace=True)
option_underlying_data.rename(columns={'order_book_id': 'id'}, inplace=True)
option_underlying_data

Unnamed: 0,id,date,open,close,low,high,volume
0,510050.XSHG,2015-01-05,2.580,2.622,2.555,2.655,1.596469e+09
1,510050.XSHG,2015-01-06,2.600,2.592,2.561,2.651,1.556187e+09
2,510050.XSHG,2015-01-07,2.575,2.588,2.558,2.622,8.887969e+08
3,510050.XSHG,2015-01-08,2.599,2.518,2.510,2.610,7.936625e+08
4,510050.XSHG,2015-01-09,2.510,2.524,2.496,2.644,1.156150e+09
...,...,...,...,...,...,...,...
306290,ZN2408,2024-04-03,21120.000,21200.000,21100.000,21270.000,3.873000e+03
306291,ZN2408,2024-04-08,21700.000,21675.000,21440.000,21775.000,6.483000e+03
306292,ZN2408,2024-04-09,21795.000,21800.000,21710.000,21940.000,6.959000e+03
306293,ZN2408,2024-04-10,21950.000,22595.000,21950.000,22595.000,1.175000e+04


In [111]:
drop_table_query = "DROP TABLE IF EXISTS option_underlying_data;"

create_table_query = """
CREATE TABLE IF NOT EXISTS option_underlying_data (
  id VARCHAR(255) NOT NULL,
  date DATE NOT NULL,
  open FLOAT,
  close FLOAT,
  low FLOAT,
  high FLOAT,
  volume FLOAT,
  PRIMARY KEY (id, date)
);
"""

insert_query = """
INSERT INTO option_underlying_data (
  id,
  date,
  open,
  close,
  low,
  high,
  volume
) VALUES (
  %s, %s, %s, %s, %s, %s, %s
);
"""

subsets = split_dataframe(option_underlying_data, 200000)
create = True

for data in subsets:
    if create:
        fast_write_data_into_sql(table_name='option_underlying_data', data=data, create_table_query=create_table_query, insert_query=insert_query, drop_table_query=None)
        create = False
    else:
        fast_write_data_into_sql(table_name='option_underlying_data', data=data, create_table_query=None, insert_query=insert_query, drop_table_query=None)

Successfully created table option_underlying_data
Successfully inserted 200000 rows into option_underlying_data
Successfully inserted 106295 rows into option_underlying_data


# macro

In [153]:
macro_data = pd.read_excel('/Users/suguanting/Documents/5214/data.xlsx', header=0)
macro_data

Unnamed: 0,指标名称,10年期国债期货基差:本季,5年期国债期货基差:本季,2年期国债期货基差:本季,中债国债到期收益率:3个月,中债国债到期收益率:2年,中债国债到期收益率:5年,中债国债到期收益率:10年,中债国债即期收益率:3个月,中债国债即期收益率:2年,中债国债即期收益率:5年,中债国债即期收益率:10年,美国:国债收益率:3个月,美国:国债收益率:2年,美国:国债收益率:5年,美国:国债收益率:10年,美国:国债收益率利差:10年-2年
0,2019-01-02,-1.1459,-0.3808,-0.2626,2.5237,2.6167,2.9466,3.1734,2.5237,2.6357,2.9802,3.2240,2.42,2.50,2.49,2.66,0.16
1,2019-01-03,-1.1736,0.2636,-0.6650,2.4709,2.6233,2.8965,3.1410,2.4709,2.6427,2.9270,3.1873,2.41,2.39,2.37,2.56,0.17
2,2019-01-04,-0.5486,-0.0857,-0.0338,2.3251,2.6069,2.8850,3.1513,2.3251,2.6266,2.9147,3.1996,2.42,2.50,2.49,2.67,0.17
3,2019-01-07,0.1758,0.8080,0.0830,2.3409,2.5672,2.9303,3.1507,2.3409,2.5863,2.9639,3.1962,2.45,2.53,2.53,2.70,0.17
4,2019-01-08,-0.3176,0.0927,0.0806,2.3116,2.5222,2.8796,3.1211,2.3116,2.5410,2.9118,3.1679,2.46,2.58,2.58,2.73,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1406,2024-04-08,-0.0035,-0.0103,0.1003,1.5892,1.8630,2.1690,2.2822,1.5830,1.8651,2.1773,2.3045,5.43,4.78,4.43,4.42,-0.36
1407,2024-04-09,0.0604,0.0582,0.0714,1.5792,1.8562,2.1533,2.2802,1.5730,1.8583,2.1611,2.3030,5.43,4.74,4.37,4.36,-0.38
1408,2024-04-10,0.0920,0.0432,0.0855,1.5601,1.8560,2.1591,2.2972,1.5540,1.8579,2.1669,2.3212,5.45,4.97,4.61,4.55,-0.42
1409,2024-04-11,0.1885,-0.0143,0.0815,1.5501,1.8538,2.1378,2.2928,1.5442,1.8557,2.1446,2.3174,5.45,4.93,4.61,4.56,-0.37


In [154]:
macro_data = pd.read_excel('/Users/suguanting/Documents/5214/data.xlsx', header=0)
macro_data.columns = [
    'date',
    '10Yr_T_Fut_Basis_Q',
    '5Yr_T_Fut_Basis_Q',
    '2Yr_T_Fut_Basis_Q',
    'CCDC_Yield_3M',
    'CCDC_Yield_2Y',
    'CCDC_Yield_5Y',
    'CCDC_Yield_10Y',
    'CCDC_Spot_Yield_3M',
    'CCDC_Spot_Yield_2Y',
    'CCDC_Spot_Yield_5Y',
    'CCDC_Spot_Yield_10Y',
    'US_Yield_3M',
    'US_Yield_2Y',
    'US_Yield_5Y',
    'US_Yield_10Y',
    'US_Yield_Spread_10Y-2Y'
]
macro_data

Unnamed: 0,date,10Yr_T_Fut_Basis_Q,5Yr_T_Fut_Basis_Q,2Yr_T_Fut_Basis_Q,CCDC_Yield_3M,CCDC_Yield_2Y,CCDC_Yield_5Y,CCDC_Yield_10Y,CCDC_Spot_Yield_3M,CCDC_Spot_Yield_2Y,CCDC_Spot_Yield_5Y,CCDC_Spot_Yield_10Y,US_Yield_3M,US_Yield_2Y,US_Yield_5Y,US_Yield_10Y,US_Yield_Spread_10Y-2Y
0,2019-01-02,-1.1459,-0.3808,-0.2626,2.5237,2.6167,2.9466,3.1734,2.5237,2.6357,2.9802,3.2240,2.42,2.50,2.49,2.66,0.16
1,2019-01-03,-1.1736,0.2636,-0.6650,2.4709,2.6233,2.8965,3.1410,2.4709,2.6427,2.9270,3.1873,2.41,2.39,2.37,2.56,0.17
2,2019-01-04,-0.5486,-0.0857,-0.0338,2.3251,2.6069,2.8850,3.1513,2.3251,2.6266,2.9147,3.1996,2.42,2.50,2.49,2.67,0.17
3,2019-01-07,0.1758,0.8080,0.0830,2.3409,2.5672,2.9303,3.1507,2.3409,2.5863,2.9639,3.1962,2.45,2.53,2.53,2.70,0.17
4,2019-01-08,-0.3176,0.0927,0.0806,2.3116,2.5222,2.8796,3.1211,2.3116,2.5410,2.9118,3.1679,2.46,2.58,2.58,2.73,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1406,2024-04-08,-0.0035,-0.0103,0.1003,1.5892,1.8630,2.1690,2.2822,1.5830,1.8651,2.1773,2.3045,5.43,4.78,4.43,4.42,-0.36
1407,2024-04-09,0.0604,0.0582,0.0714,1.5792,1.8562,2.1533,2.2802,1.5730,1.8583,2.1611,2.3030,5.43,4.74,4.37,4.36,-0.38
1408,2024-04-10,0.0920,0.0432,0.0855,1.5601,1.8560,2.1591,2.2972,1.5540,1.8579,2.1669,2.3212,5.45,4.97,4.61,4.55,-0.42
1409,2024-04-11,0.1885,-0.0143,0.0815,1.5501,1.8538,2.1378,2.2928,1.5442,1.8557,2.1446,2.3174,5.45,4.93,4.61,4.56,-0.37


In [157]:
drop_table_query = "DROP TABLE IF EXISTS timeseries_daily_macro;"

create_table_query = """
CREATE TABLE IF NOT EXISTS timeseries_daily_macro (
  date DATE,
  10Yr_T_Fut_Basis_Q FLOAT,
  5Yr_T_Fut_Basis_Q FLOAT,
  2Yr_T_Fut_Basis_Q FLOAT,
  CCDC_Yield_3M FLOAT,
  CCDC_Yield_2Y FLOAT,
  CCDC_Yield_5Y FLOAT,
  CCDC_Yield_10Y FLOAT,
  CCDC_Spot_Yield_3M FLOAT,
  CCDC_Spot_Yield_2Y FLOAT,
  CCDC_Spot_Yield_5Y FLOAT,
  CCDC_Spot_Yield_10Y FLOAT,
  US_Yield_3M FLOAT,
  US_Yield_2Y FLOAT,
  US_Yield_5Y FLOAT,
  US_Yield_10Y FLOAT,
  US_Yield_Spread_10Y_vs_2Y FLOAT,
  PRIMARY KEY (date)
);
"""

insert_query = """
INSERT INTO timeseries_daily_macro (
  date,
  10Yr_T_Fut_Basis_Q,
  5Yr_T_Fut_Basis_Q,
  2Yr_T_Fut_Basis_Q,
  CCDC_Yield_3M,
  CCDC_Yield_2Y,
  CCDC_Yield_5Y,
  CCDC_Yield_10Y,
  CCDC_Spot_Yield_3M,
  CCDC_Spot_Yield_2Y,
  CCDC_Spot_Yield_5Y,
  CCDC_Spot_Yield_10Y,
  US_Yield_3M,
  US_Yield_2Y,
  US_Yield_5Y,
  US_Yield_10Y,
  US_Yield_Spread_10Y_vs_2Y
) VALUES (
  %s, %s, %s, %s, %s, 
  %s, %s, %s, %s, %s, 
  %s, %s, %s, %s, %s, 
   %s, %s
);
"""

fast_write_data_into_sql(table_name='timeseries_daily_macro', data=macro_data, create_table_query=create_table_query, insert_query=insert_query, drop_table_query=drop_table_query)

Table timeseries_daily_macro dropped because it has existed.
Successfully created table timeseries_daily_macro
Successfully inserted 1411 rows into timeseries_daily_macro


# index

In [112]:
codemap = pd.read_pickle('/Users/suguanting/Documents/5214/index_data/code_map')
codemap = pd.DataFrame(codemap)
codemap.reset_index(inplace=True)
codemap.columns = ['id', 'symbol_in_exchange']
codemap

Unnamed: 0,id,symbol_in_exchange
0,000001.XSHG,000001.SH
1,000002.XSHG,000002.SH
2,000003.XSHG,000003.SH
3,000004.XSHG,000004.SH
4,000005.XSHG,000005.SH
...,...,...
6339,NH0600.INDX,nh0600.SH
6340,NH0700.INDX,nh0700.SH
6341,NH0800.INDX,nh0800.SH
6342,SSE180.INDX,sse180.SH


In [113]:
insert_query = """
INSERT INTO id_map (id, symbol_in_exchange)
VALUES (%s, %s) AS alias 
ON DUPLICATE KEY UPDATE symbol_in_exchange = alias.symbol_in_exchange;
"""

fast_write_data_into_sql(table_name='id_map', data=codemap, create_table_query=None, insert_query=insert_query, drop_table_query=None)

Successfully inserted 6344 rows into id_map


In [121]:
index_info = pd.read_pickle('/Users/suguanting/Documents/5214/index_data/index_info')
index_info.rename(columns={'order_book_id': 'id'}, inplace=True)
index_info = index_info.replace('0000-00-00', 'N/A')
index_info = index_info.fillna('N/A')
index_info

Unnamed: 0,id,trading_hours,market_tplus,symbol,exchange,abbrev_symbol,round_lot,type,de_listed_date,listed_date,status
0,000001.XSHG,"09:31-11:30,13:01-15:00",0.0,上证指数,XSHG,SZZS,1.0,INDX,,1991-07-15,Active
1,000002.XSHG,"09:31-11:30,13:01-15:00",0.0,A股指数,XSHG,AGZS,1.0,INDX,,1992-02-21,Active
2,000003.XSHG,"09:31-11:30,13:01-15:00",0.0,B股指数,XSHG,BGZS,1.0,INDX,,1992-08-17,Active
3,000004.XSHG,"09:31-11:30,13:01-15:00",0.0,工业指数,XSHG,GYZS,1.0,INDX,,1993-05-03,Active
4,000005.XSHG,"09:31-11:30,13:01-15:00",0.0,商业指数,XSHG,SYZS,1.0,INDX,,1993-05-03,Active
...,...,...,...,...,...,...,...,...,...,...,...
6339,NH0600.INDX,"09:01-10:15,10:31-11:30,13:31-15:00",0.0,南华贵金属指数,,,1.0,INDX,,2012-09-11,
6340,NH0700.INDX,"09:01-10:15,10:31-11:30,13:31-15:00",0.0,南华有色金属指数,,,1.0,INDX,,2018-06-29,
6341,NH0800.INDX,"09:01-10:15,10:31-11:30,13:31-15:00",0.0,南华黑色指数,,,1.0,INDX,,2018-06-29,
6342,SSE180.INDX,"09:31-11:30,13:01-15:00",0.0,上证180,XSHG,SZYBL,1.0,INDX,,2002-07-01,


In [123]:
drop_table_query = "DROP TABLE IF EXISTS index_info;"

create_table_query = """
CREATE TABLE IF NOT EXISTS index_info (
  id VARCHAR(255),
  trading_hours VARCHAR(255),
  market_tplus INT,
  symbol VARCHAR(255),
  exchange VARCHAR(255),
  abbrev_symbol VARCHAR(255),
  round_lot INT,
  type VARCHAR(255),
  de_list_date VARCHAR(255),
  listed_date VARCHAR(255),
  status VARCHAR(255),
  PRIMARY KEY (id)
);
"""

insert_query = """
INSERT INTO index_info (
  id,
  trading_hours,
  market_tplus,
  symbol,
  exchange,
  abbrev_symbol,
  round_lot,
  type,
  de_list_date,
  listed_date,
  status
) VALUES (
  %s, %s, %s, %s, %s, 
  %s, %s, %s, %s, %s, 
  %s
);
"""

fast_write_data_into_sql(table_name='index_info', data=index_info, create_table_query=create_table_query, insert_query=insert_query, drop_table_query=drop_table_query)

Table index_info dropped because it has existed.
Successfully created table index_info
Successfully inserted 6344 rows into index_info


In [127]:
index_data = pd.read_pickle('/Users/suguanting/Documents/5214/index_data/index_eod')
index_data.reset_index(inplace=True)
index_data.rename(columns={'order_book_id': 'id'}, inplace=True)
index_data.dropna(inplace=True)
index_data

Unnamed: 0,id,date,open,high,low,close,volume
0,000001.XSHG,2019-01-02,2497.8805,2500.2783,2456.4233,2465.2910,1.099320e+10
1,000001.XSHG,2019-01-03,2461.7829,2488.4790,2455.9256,2464.3628,1.243975e+10
2,000001.XSHG,2019-01-04,2446.0193,2515.3160,2440.9066,2514.8682,1.688777e+10
3,000001.XSHG,2019-01-07,2528.6987,2536.9775,2515.5083,2533.0887,1.773050e+10
4,000001.XSHG,2019-01-08,2530.3001,2531.3450,2520.1648,2526.4622,1.580992e+10
...,...,...,...,...,...,...,...
6502561,SSE50.INDX,2024-04-08,2419.0293,2431.4053,2407.3590,2414.0910,3.987360e+09
6502562,SSE50.INDX,2024-04-09,2411.7806,2418.5972,2399.2833,2404.6089,3.562287e+09
6502563,SSE50.INDX,2024-04-10,2403.7309,2412.5898,2388.6829,2396.7427,3.361271e+09
6502564,SSE50.INDX,2024-04-11,2383.7027,2402.7498,2380.5128,2394.8667,3.128804e+09


In [128]:
drop_table_query = "DROP TABLE IF EXISTS index_data;"

create_table_query = """
CREATE TABLE IF NOT EXISTS index_data (
  id VARCHAR(255) NOT NULL,
  date DATE NOT NULL,
  open FLOAT,
  high FLOAT,
  low FLOAT,
  close FLOAT,
  volume FLOAT,
  PRIMARY KEY (id, date)
);
"""

insert_query = """
INSERT INTO index_data (
  id,
  date,
  open,
  high,
  low,
  close,
  volume
) VALUES (
  %s, %s, %s, %s, %s, %s, %s
);
"""

subsets = split_dataframe(index_data, 200000)
create = True

for data in subsets:
    if create:
        fast_write_data_into_sql(table_name='index_data', data=data, create_table_query=create_table_query, insert_query=insert_query, drop_table_query=drop_table_query)
        create = False
    else:
        fast_write_data_into_sql(table_name='index_data', data=data, create_table_query=None, insert_query=insert_query, drop_table_query=None)

Table index_data dropped because it has existed.
Successfully created table index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully inserted 200000 rows into index_data
Successfully 

# future

In [129]:
codemap = pd.read_pickle('/Users/suguanting/Documents/5214/future_data/code_map')
codemap = pd.DataFrame(codemap)
codemap.reset_index(inplace=True)
codemap.columns = ['id', 'symbol_in_exchange']
codemap

Unnamed: 0,id,symbol_in_exchange
0,A0303,A0303.DCE
1,A0305,A0305.DCE
2,A0307,A0307.DCE
3,A0309,A0309.DCE
4,A0311,A0311.DCE
...,...,...
9098,ZN88,ZN88.SHF
9099,ZN888,ZN888.SHF
9100,ZN889,ZN889.SHF
9101,ZN88A2,ZN88A2.SHF


In [130]:
insert_query = """
INSERT INTO id_map (id, symbol_in_exchange)
VALUES (%s, %s) AS alias 
ON DUPLICATE KEY UPDATE symbol_in_exchange = alias.symbol_in_exchange;
"""

fast_write_data_into_sql(table_name='id_map', data=codemap, create_table_query=None, insert_query=insert_query, drop_table_query=None)

Successfully inserted 9103 rows into id_map


In [133]:
future_info = pd.read_pickle('/Users/suguanting/Documents/5214/future_data/future_info')
future_info.rename(columns={'order_book_id': 'id'}, inplace=True)
future_info.drop('underlying_order_book_id', axis=1, inplace=True)
future_info = future_info.replace('0000-00-00', 'N/A')
future_info = future_info.fillna('N/A')
future_info

Unnamed: 0,id,underlying_symbol,market_tplus,symbol,margin_rate,maturity_date,type,trading_code,exchange,product,contract_multiplier,round_lot,trading_hours,listed_date,industry_name,de_listed_date,start_delivery_date,end_delivery_date
0,A0303,A,0.0,豆一0303,0.05,2003-03-14,Future,a0303,DCE,Commodity,10.0,1.0,"21:01-23:00,09:01-10:15,10:31-11:30,13:31-15:00",2002-03-15,油脂,2003-03-14,2003-03-17,2003-03-21
1,A0305,A,0.0,豆一0305,0.05,2003-05-23,Future,a0305,DCE,Commodity,10.0,1.0,"21:01-23:00,09:01-10:15,10:31-11:30,13:31-15:00",2002-03-15,油脂,2003-05-23,2003-05-26,2003-05-30
2,A0307,A,0.0,豆一0307,0.05,2003-07-14,Future,a0307,DCE,Commodity,10.0,1.0,"21:01-23:00,09:01-10:15,10:31-11:30,13:31-15:00",2002-03-15,油脂,2003-07-14,2003-07-15,2003-07-21
3,A0309,A,0.0,豆一0309,0.05,2003-09-12,Future,a0309,DCE,Commodity,10.0,1.0,"21:01-23:00,09:01-10:15,10:31-11:30,13:31-15:00",2002-05-22,油脂,2003-09-12,2003-09-15,2003-09-19
4,A0311,A,0.0,豆一0311,0.05,2003-11-14,Future,a0311,DCE,Commodity,10.0,1.0,"21:01-23:00,09:01-10:15,10:31-11:30,13:31-15:00",2002-05-22,油脂,2003-11-14,2003-11-17,2003-11-21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9098,ZN88,ZN,0.0,zn主力连续,0.08,,Future,ZN2406,SHFE,Commodity,5.0,1.0,"21:01-01:00,09:01-10:15,10:31-11:30,13:31-15:00",,有色,,,
9099,ZN888,ZN,0.0,zn主力连续价差平滑,0.08,,Future,ZN2406,SHFE,Commodity,5.0,1.0,"21:01-01:00,09:01-10:15,10:31-11:30,13:31-15:00",,有色,,,
9100,ZN889,ZN,0.0,zn主力连续价差平滑（后复权）,0.08,,Future,ZN2406,SHFE,Commodity,5.0,1.0,"21:01-01:00,09:01-10:15,10:31-11:30,13:31-15:00",,有色,,,
9101,ZN88A2,ZN,0.0,zn次主力连续,0.08,,Future,ZN2407,SHFE,Commodity,5.0,1.0,"21:01-01:00,09:01-10:15,10:31-11:30,13:31-15:00",,有色,,,


In [135]:
drop_table_query = "DROP TABLE IF EXISTS future_info;"

create_table_query = """
CREATE TABLE IF NOT EXISTS future_info (
  id VARCHAR(255),
  underlying_symbol VARCHAR(255),
  market_tplus INT,
  symbol VARCHAR(255),
  margin_rate FLOAT,
  maturity_date VARCHAR(255),
  type VARCHAR(255),
  trading_code VARCHAR(255),
  exchange VARCHAR(255),
  product VARCHAR(255),
  contract_multiplier INT,
  round_lot INT,
  trading_hours VARCHAR(255),
  listed_date VARCHAR(255),
  industry_name VARCHAR(255),
  de_list_date VARCHAR(255),
  start_delivery_date VARCHAR(255),
  end_delivery_date VARCHAR(255),
  PRIMARY KEY (id)
);
"""

insert_query = """
INSERT INTO future_info (
  id,
  underlying_symbol,
  market_tplus,
  symbol,
  margin_rate,
  maturity_date,
  type,
  trading_code,
  exchange,
  product,
  contract_multiplier,
  round_lot,
  trading_hours,
  listed_date,
  industry_name,
  de_list_date,
  start_delivery_date,
  end_delivery_date
) VALUES (
  %s, %s, %s, %s, %s, 
  %s, %s, %s, %s, %s, 
  %s, %s, %s, %s, %s, 
  %s, %s, %s
);
"""

fast_write_data_into_sql(table_name='future_info', data=future_info, create_table_query=create_table_query, insert_query=insert_query, drop_table_query=drop_table_query)

Table future_info dropped because it has existed.
Successfully created table future_info
Successfully inserted 9103 rows into future_info


In [176]:
future_data = pd.read_pickle('/Users/suguanting/Documents/5214/future_data/future_eod')
future_data.reset_index(inplace=True)
future_data.rename(columns={'order_book_id': 'id'}, inplace=True)
future_data.dropna(inplace=True)
future_data

Unnamed: 0,id,date,open,high,low,close,volume
0,A1901,2019-01-02,3166.000000,3201.000000,3156.000000,3199.000000,4024.0
1,A1901,2019-01-03,3210.000000,3255.000000,3191.000000,3250.000000,1648.0
2,A1901,2019-01-04,3250.000000,3250.000000,3228.000000,3228.000000,84.0
3,A1901,2019-01-07,3250.000000,3253.000000,3216.000000,3216.000000,246.0
4,A1901,2019-01-08,3216.000000,3216.000000,3206.000000,3206.000000,58.0
...,...,...,...,...,...,...,...
1288652,ZN99,2024-04-08,21750.001759,21839.690196,21496.860655,21727.249756,269066.0
1288653,ZN99,2024-04-09,21951.914705,21984.570203,21744.799716,21845.013246,270971.0
1288654,ZN99,2024-04-10,22062.547707,22626.724465,22021.396970,22624.334497,366241.0
1288655,ZN99,2024-04-11,22602.394840,22951.850908,22235.218171,22833.114520,491146.0


In [178]:
drop_table_query = "DROP TABLE IF EXISTS timeseries_daily_future;"

create_table_query = """
CREATE TABLE IF NOT EXISTS timeseries_daily_future (
  id VARCHAR(255) NOT NULL,
  date DATE NOT NULL,
  open FLOAT,
  high FLOAT,
  low FLOAT,
  close FLOAT,
  volume FLOAT,
  PRIMARY KEY (id, date)
);
"""

insert_query = """
INSERT INTO timeseries_daily_future (
  id,
  date,
  open,
  high,
  low,
  close,
  volume
) VALUES (
  %s, %s, %s, %s, %s, %s, %s
);
"""

subsets = split_dataframe(future_data, 200000)
create = True

for data in subsets:
    if create:
        fast_write_data_into_sql(table_name='timeseries_daily_future', data=data, create_table_query=create_table_query, insert_query=insert_query, drop_table_query=drop_table_query)
        create = False
    else:
        fast_write_data_into_sql(table_name='timeseries_daily_future', data=data, create_table_query=None, insert_query=insert_query, drop_table_query=None)

Table timeseries_daily_future dropped because it has existed.
Successfully created table timeseries_daily_future
Successfully inserted 200000 rows into timeseries_daily_future
Successfully inserted 200000 rows into timeseries_daily_future
Successfully inserted 200000 rows into timeseries_daily_future
Successfully inserted 200000 rows into timeseries_daily_future
Successfully inserted 200000 rows into timeseries_daily_future
Successfully inserted 200000 rows into timeseries_daily_future
Successfully inserted 88657 rows into timeseries_daily_future


# News

In [143]:
news = pd.read_pickle('/Users/suguanting/Documents/5214/news_demo')
news.reset_index(inplace=True)
news = news[['datetime', 'channel', 'content']]
news = news.sort_values(by=['datetime'], ascending=True)
news.reset_index(inplace=True, drop=True)
news.drop_duplicates(subset=['datetime', 'channel'], inplace=True)
news

Unnamed: 0,datetime,channel,content
0,2021-03-01 07:30:01,commodity,全球债市跌势暂歇，美股科技股上周五小幅反弹，道指则跌1.5%。美国众议院通过1.9万亿财政刺...
1,2021-03-01 07:33:11,commodity,周一到周五到期规模分别为200亿、100亿、100亿、200亿和200亿元。
2,2021-03-01 09:00:38,commodity,中国央行公开市场进行100亿元7天期逆回购操作，另有200亿元逆回购到期。
3,2021-03-01 09:00:59,commodity,乙二醇期货主力开盘涨超5%，硅铁涨超4%，锰硅、纯碱涨超3%。20号胶跌超3%，沪银、菜粕、...
4,2021-03-01 09:45:00,commodity,中国2月财新制造业PMI 50.9，预期 51.3，前值 51.5。
...,...,...,...
4233,2021-03-30 23:50:10,global,意大利新增16017例新冠肺炎病例，上日增加12916例。
4234,2021-03-30 23:51:29,a-stock,据悉，美国食品公司嘉吉（Cargill）同意出售股份给巴西Copersucar（全球第一大糖...
4235,2021-03-30 23:51:29,commodity,据悉，美国食品公司嘉吉（Cargill）同意出售股份给巴西Copersucar（全球第一大糖...
4236,2021-03-30 23:51:29,global,据悉，美国食品公司嘉吉（Cargill）同意出售股份给巴西Copersucar（全球第一大糖...


In [144]:
drop_table_query = "DROP TABLE IF EXISTS news_info;"

create_table_query = """
CREATE TABLE IF NOT EXISTS news_info (
  datetime DATETIME,
  channel VARCHAR(255),
  content TEXT,
  PRIMARY KEY (datetime, channel)
);
"""

insert_query = """
INSERT INTO news_info (
  datetime,
  channel,
  content
) VALUES (
  %s, %s, %s
);
"""

fast_write_data_into_sql(table_name='news_info', data=news, create_table_query=create_table_query, insert_query=insert_query, drop_table_query=drop_table_query)

Table news_info dropped because it has existed.
Successfully created table news_info
Successfully inserted 4126 rows into news_info


In [147]:
cnx = mysql.connector.connect(**config)
cursor = cnx.cursor()


rename_tables_query = """
RENAME TABLE 
    exchange_map TO static_map_exchange,
    future_data TO timeseries_daily_future,
    future_info TO static_info_future,
    id_map TO static_map_id,
    index_data TO timeseries_daily_index,
    index_info TO static_info_index,
    news_info TO timeseries_minute_news,
    option_data TO timeseries_daily_option,
    option_greeks_data TO timeseries_daily_option_greeks,
    option_info TO static_info_option,
    option_underlying_data TO timeseries_daily_option_underlying
"""


try:
    cursor.execute(rename_tables_query)
    cnx.commit()
    print("Tables renamed successfully.")
except mysql.connector.Error as err:
    print(f"Error: {err}")


cursor.close()
cnx.close()

Tables renamed successfully.
