In [12]:
import pandas as pd
import numpy as np
%pwd

'/Users/IvanTang/quant/IMC_Prosperity3_GraniteFlow'

In [16]:
df1 = pd.read_csv('./data/round-1-island-data-bottle/prices_round_1_day_-2.csv', delimiter = ";")
df2 = pd.read_csv('./data/round-1-island-data-bottle/prices_round_1_day_-1.csv', delimiter = ";")
df3 = pd.read_csv('./data/round-1-island-data-bottle/prices_round_1_day_0.csv', delimiter = ";")

In [19]:
df = pd.concat([df1, df2, df3])

In [22]:
def extract_product_df(df, symbol: str):
    df_product = df[df['product'] == symbol]
    return df_product

In [36]:
def orderbook_preprocess(df):
    """
    对订单簿数据进行预处理
    1.填充NAN为0
    2.计算best_bid, best_ask, spread, mid_price, vwap
    """

    #填充orderbook数据,nan填为0
    columns = ['ask_price_1', 'ask_volume_1', 'ask_price_2', 'ask_volume_2', 'ask_price_3', 'ask_volume_3', 'bid_price_1', 'bid_volume_1', 'bid_price_2', 'bid_volume_2', 'bid_price_3', 'bid_volume_3']
    for column in columns:
        df[column] = df[column].fillna(0)
    
    #计算best_bid, best_ask, spread, mid_price, vwap
    df['best_bid'] = df[['bid_price_1', 'bid_price_2', 'bid_price_3']].min(axis=1)
    df['best_ask'] = df[['ask_price_1', 'ask_price_2', 'ask_price_3']].max(axis=1)

    #计算best_bid, best_ask

    def get_best_bid_ask_spread(row):
    # 获取有效的买价（去掉NaN） 计算最佳买价
        bid_prices = [row['bid_price_1'], row['bid_price_2'], row['bid_price_3']]
        bid_prices = [price for price in bid_prices if price != 0]
        best_bid = min(bid_prices) if bid_prices else None

        # 获取有效的卖价（去掉NaN） 计算最佳卖价
        ask_prices = [row['ask_price_1'], row['ask_price_2'], row['ask_price_3']]
        ask_prices = [price for price in ask_prices if price != 0]
        best_ask = max(ask_prices) if ask_prices else None
        
        # 计算价差
        spread = best_ask - best_bid
        return best_bid, best_ask, spread

    df[['best_bid', 'best_ask','spread']] = df.apply(get_best_bid_ask_spread, axis=1, result_type='expand')

    #计算mid_price 
    df['mid_price'] = (df['ask_price_1']+ df['bid_price_1']) / 2

    #计算vwap
    df['vwap'] = (df['ask_price_1']*df['ask_volume_1'] + df['ask_price_2']*df['ask_volume_2'] + df['ask_price_3']*df['ask_volume_3'] + df['bid_price_1']*df['bid_volume_1'] + df['bid_price_2']*df['bid_volume_2'] + df['bid_price_3']*df['bid_volume_3']) / (df['ask_volume_1'] + df['ask_volume_2'] + df['ask_volume_3'] + df['bid_volume_1'] + df['bid_volume_2'] + df['bid_volume_3'])
    
    #计算orderbook_imbalance
    def orderbook_imbalance(row):
        """ 计算订单簿不平衡度 """
        buy_orders = [(row['bid_price_1'], row['bid_volume_1']), (row['bid_price_2'], row['bid_volume_2']), (row['bid_price_3'], row['bid_volume_3'])]
        sell_orders = [(row['ask_price_1'], row['ask_volume_1']), (row['ask_price_2'], row['ask_volume_2']), (row['ask_price_3'], row['ask_volume_3'])]
        buy_pressure = sum(amount for price, amount in buy_orders if price != 0)
        sell_pressure = sum(amount for price, amount in sell_orders if price != 0)
        total_pressure = buy_pressure + sell_pressure
        if total_pressure == 0:
            return 0
        return (buy_pressure - sell_pressure) / total_pressure
    
    df['orderbook_imbalance'] = df.apply(orderbook_imbalance, axis=1)
    
    return df

In [37]:
df = orderbook_preprocess(df)

In [38]:
print(df.head())

   day  timestamp           product  bid_price_1  bid_volume_1  bid_price_2  \
0   -2          0  RAINFOREST_RESIN         9996             1       9995.0   
1   -2          0              KELP         1998            26          0.0   
2   -2          0         SQUID_INK         1998            26          0.0   
3   -2        100         SQUID_INK         1999             5       1998.0   
4   -2        100  RAINFOREST_RESIN        10000             5       9995.0   

   bid_volume_2  bid_price_3  bid_volume_3  ask_price_1  ...  ask_volume_2  \
0          25.0          0.0           0.0        10004  ...          25.0   
1           0.0          0.0           0.0         2002  ...           0.0   
2           0.0          0.0           0.0         2002  ...           0.0   
3          26.0          0.0           0.0         2001  ...           0.0   
4          20.0          0.0           0.0        10005  ...           0.0   

   ask_price_3  ask_volume_3  mid_price  profit_and_loss

In [None]:
import math
def fractional_derivative(ts, alpha, n_terms=10):
    """
    计算时间序列 ts 的分数阶导数
    参数:
        ts: 时间序列
        alpha: 阶数
        n_terms: 历史项的数量，控制内存长短

    返回:
        分数阶导数序列（长度与 ts 相同，前面一些值为 nan）
    """

    def binomial_coeff(a, k):
        return math.gamma(a + 1) / (math.gamma(k + 1) * math.gamma(a - k + 1))

    ts = np.asarray(ts)
    result = np.full_like(ts, np.nan, dtype=np.float64)

    for t in range(n_terms, len(ts)):
        val = 0.0
        for k in range(n_terms):
            coeff = (-1) ** k * binomial_coeff(alpha, k)
            val += coeff * ts[t - k]
        result[t] = val
    return result

def rolling_window(ts, window_length: int):
    ts_diff = ts.diff(window_length)
    ts_vol = ts.rolling(window_length).std()
    ts_mean = ts.rolling(window_length).mean()
    ts_zscore = (ts - ts_mean) / ts_vol
    return ts_diff, ts_vol, ts_mean, ts_zscore


In [None]:
df_kelp = extract_product_df(df, 'KELP')
df_resin = extract_product_df(df, 'RAINFOREST_RESIN')
df_ink = extract_product_df(df, 'SQUID_INK')