In [1]:
import datetime
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

from tqdm.notebook import tqdm
tqdm.pandas()

In [2]:
target_path = 'data/target.csv'
ticker_path = 'data/ticker.csv' 
book_path = 'data/book.csv' 
trades_path = 'data/trades.csv' 

ticker_spot_path = 'data/ticker.spot.csv'
book_spot_path = 'data/book.spot.csv'
trades_spot_path = 'data/trades.spot.csv'

In [3]:
df_target = pd.read_csv(target_path)
df_ticker = pd.read_csv(ticker_path,
                        usecols = [0, 4, 5, 6, 7])
df_book = pd.read_csv(book_path, usecols=range(3, 104))

In [4]:
df_ticker_spot = pd.read_csv(ticker_spot_path,
                        usecols = [0, 2, 3, 4, 5]) 
df_book_spot = pd.read_csv(book_spot_path, usecols=range(3, 104))

# Preprocessing

In [5]:
df_target['local_ts'] = pd.to_datetime(df_target['local_ts'])
df_ticker['local_ts'] = pd.to_datetime(df_ticker['local_ts'])
df_book['local_timestamp'] = pd.to_datetime(df_book['local_timestamp'])

In [6]:
df_ticker_spot['local_ts'] = pd.to_datetime(df_ticker_spot['local_ts'])
df_book_spot['local_timestamp'] = pd.to_datetime(df_book_spot['local_timestamp'])

Удалим дупликаты и установим в качестве индекса 'local_ts'

In [7]:
df_target = (
    df_target.drop_duplicates(subset='local_ts', keep='last')
    .set_index('local_ts').sort_index()
)

df_ticker = (
    df_ticker.drop_duplicates(subset='local_ts', keep='last')
    .set_index('local_ts').sort_index()
)

df_book.rename(columns={'local_timestamp': 'local_ts'}, inplace=True)
df_book = (
    df_book.drop_duplicates(subset='local_ts', keep='last')
    .set_index('local_ts').sort_index()
)

In [8]:
df_ticker_spot = (
    df_ticker_spot.drop_duplicates(subset='local_ts', keep='last')
    .set_index('local_ts').sort_index()
)

df_book_spot.rename(columns={'local_timestamp': 'local_ts'}, inplace=True)
df_book_spot = (
    df_book_spot.drop_duplicates(subset='local_ts', keep='last')
    .set_index('local_ts').sort_index()
)

Преобразуем df_book и df_book_spot: вместо множества столбцов сделаем несколько со списками внутри

In [9]:
for df_ in tqdm([df_book, df_book_spot]):
    df_['best_bid'] = df_['bids[0].price']
    df_['best_ask'] = df_['asks[0].price']
    df_['worst_bid'] = df_['bids[24].price']
    df_['worst_ask'] = df_['asks[24].price']

    filters = [('bid_prices', [f'bids[{i}].price' for i in range(0, 25)]),
               ('bid_amounts', [f'bids[{i}].amount' for i in range(0, 25)]),
               ('ask_prices', [f'asks[{i}].price' for i in range(0, 25)]),
               ('ask_amounts', [f'asks[{i}].amount' for i in range(0, 25)])]
    tqdm_filters = tqdm(filters)
    for key, filt in tqdm_filters:
        tqdm_filters.set_description(f'{key} are processing')

        df_[key] = df_[filt].values.tolist()
        df_.drop(columns=filt, inplace=True)

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

In [10]:
## более универсальный способ сделать то, что сделано в предыдущей ячейке, но возможно более медленный

# filters = [('ask_prices', 'asks\[\d+\]\.price'), ('ask_amounts', 'asks\[\d+\]\.amount'),
#            ('bid_prices', 'bids\[\d+\]\.price'), ('bid_amounts', 'bids\[\d+\]\.amount')]
# for key, filt in filters:
#     df[key] = df.filter(regex=filt).values.tolist()
#     df.drop(columns=df.filter(regex=filt).columns, inplace=True)

In [11]:
df_book.head(3)

Unnamed: 0_level_0,best_bid,best_ask,worst_bid,worst_ask,bid_prices,bid_amounts,ask_prices,ask_amounts
local_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-03-22 00:00:00.485131264,28091.1,28091.2,28087.6,28093.7,"[28091.1, 28091.0, 28090.8, 28090.7, 28090.6, ...","[0.001, 0.003, 0.002, 0.002, 0.002, 0.02, 0.00...","[28091.2, 28091.3, 28091.4, 28091.5, 28091.6, ...","[16.183, 7.874, 1.451, 5.937, 0.298, 0.018, 0...."
2023-03-22 00:00:02.065465600,28090.8,28090.9,28087.4,28093.5,"[28090.8, 28090.7, 28090.6, 28090.5, 28090.4, ...","[0.002, 0.002, 0.002, 0.02, 0.001, 0.001, 0.00...","[28090.9, 28091.0, 28091.2, 28091.3, 28091.4, ...","[6.995, 0.735, 15.633, 7.874, 1.451, 5.937, 0...."
2023-03-22 00:00:02.691394048,28090.8,28090.9,28087.4,28093.5,"[28090.8, 28090.7, 28090.6, 28090.5, 28090.4, ...","[0.002, 0.002, 0.002, 0.02, 0.001, 0.001, 0.00...","[28090.9, 28091.0, 28091.2, 28091.3, 28091.4, ...","[7.005, 0.735, 15.633, 7.874, 1.451, 5.937, 0...."


Соединим df_target, df_ticker и df_ticker_spot в один датафрейм, в который в дальнейшем будем добавлять сгенерированные фичи

In [12]:
perp_columns = df_ticker.columns
perp_columns = [f'{column}_perp' for column in perp_columns]
df_ticker.columns = perp_columns

spot_columns = df_ticker_spot.columns
spot_columns = [f'{column}_spot' for column in spot_columns]
df_ticker_spot.columns = spot_columns

In [13]:
df = pd.merge_asof(df_target, df_ticker, on='local_ts', direction='backward')
df = pd.merge_asof(df, df_ticker_spot, on='local_ts', direction='backward')

In [14]:
del df_target, df_ticker, df_ticker_spot

In [15]:
df.set_index('local_ts', inplace=True)
df.sort_index(inplace=True)

In [16]:
df.head(3)

Unnamed: 0_level_0,target,bid_price_perp,bid_amount_perp,ask_price_perp,ask_amount_perp,bid_price_spot,bid_amount_spot,ask_price_spot,ask_amount_spot
local_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-03-22 00:00:00.002269696,0,28091.1,0.885,28091.2,17.243,,,,
2023-03-22 00:00:00.023076352,0,28091.1,0.685,28091.2,17.243,28105.8,0.00191,28106.89,0.00638
2023-03-22 00:00:00.025819392,0,28091.1,0.685,28091.2,17.223,28105.08,0.03912,28105.42,0.004


Как можно видеть, у нас могут появляться NaN значения в самом начале. Пока их не удаляю и никак не заполняю

In [17]:
# df.dropna(axis=0, inplace=True)
# df.interpolate(method='backfill', inplace=True)

# Features

## Orderbook Imbalances

В статье для расчёта данной фичи используется "the median liquidity within the top **five basis points** of the top of the book". Для начала посмотрим, насколько у нас глубокий стакан в bp.

In [18]:
for df_ in [df_book, df_book_spot]:
    df_['bid_diff'] = (df_['best_bid'] / df_['worst_bid'] - 1) * 10000
    df_['ask_diff'] = (df_['worst_ask'] / df_['best_ask'] - 1) * 10000

In [19]:
df_book['bid_diff'].describe(), df_book['ask_diff'].describe()

(count    6.403490e+06
 mean     1.204156e+00
 std      3.698358e-01
 min      8.361728e-01
 25%      9.941840e-01
 50%      1.099453e+00
 75%      1.282563e+00
 max      4.290430e+01
 Name: bid_diff, dtype: float64,
 count    6.403490e+06
 mean     1.211033e+00
 std      3.749822e-01
 min      8.311400e-01
 25%      9.929078e-01
 50%      1.099611e+00
 75%      1.306922e+00
 max      4.422557e+01
 Name: ask_diff, dtype: float64)

In [20]:
df_book_spot['bid_diff'].describe(), df_book_spot['ask_diff'].describe()

(count    2.305858e+06
 mean     1.043676e+00
 std      6.328813e-01
 min      1.425134e-01
 25%      6.592752e-01
 50%      8.686287e-01
 75%      1.220260e+00
 max      2.707499e+01
 Name: bid_diff, dtype: float64,
 count    2.305858e+06
 mean     1.046253e+00
 std      6.609816e-01
 min      1.338028e-01
 25%      6.533608e-01
 50%      8.681165e-01
 75%      1.230606e+00
 max      4.174346e+01
 Name: ask_diff, dtype: float64)

Как мы видим, в среднем глубина у нас 1.2 bp как для бид стороны, так и для аска (для спота примерно 1 bp). Поэтому если брать 5bp, то мы, по сути, будем брать медиану по всему стакану (для бида и аска в отдельности). И тут непонятно, насколько нас такая ситуация устраивает. Если в какой-то момент стакан асимметричен (2 bp по аску и 0.5 bp по биду), то рассчитывать так медианы, кажется, некорректно. 

Какое именно кол-во bp брать - отдельный вопрос, на который сходу ответить тяжело. С одной стороны, чем больше bp брать, тем менее информативным становится признак (много ситуаций с нулевыми значениями как по биду, так и по аску). С другой стороны, если мы берем низкие значения bp, то мы как бы смотрим, насколько вероятно пробитие стакана на меньшее кол-во bp, а у нас целевая переменная, насколько я помню, завязана на 1 bp, в связи с чем небольшие пробития стакана меньше, чем на 1bp нас не интересуют.

По итогу, я решил изменить предложенные в статье 5 bp на 1bp.

In [21]:
for df_ in [df_book, df_book_spot]:
    df_['bid_bp'] = df_['best_bid'] * (1 - 0.0001)
    df_['ask_bp'] = df_['best_ask'] * (1 + 0.0001)
    
    df_.drop(columns=['best_bid', 'best_ask', 'worst_bid', 'worst_ask', 'bid_diff', 'ask_diff'], inplace=True)

In [22]:
df_book.head(3)

Unnamed: 0_level_0,bid_prices,bid_amounts,ask_prices,ask_amounts,bid_bp,ask_bp
local_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-03-22 00:00:00.485131264,"[28091.1, 28091.0, 28090.8, 28090.7, 28090.6, ...","[0.001, 0.003, 0.002, 0.002, 0.002, 0.02, 0.00...","[28091.2, 28091.3, 28091.4, 28091.5, 28091.6, ...","[16.183, 7.874, 1.451, 5.937, 0.298, 0.018, 0....",28088.29089,28094.00912
2023-03-22 00:00:02.065465600,"[28090.8, 28090.7, 28090.6, 28090.5, 28090.4, ...","[0.002, 0.002, 0.002, 0.02, 0.001, 0.001, 0.00...","[28090.9, 28091.0, 28091.2, 28091.3, 28091.4, ...","[6.995, 0.735, 15.633, 7.874, 1.451, 5.937, 0....",28087.99092,28093.70909
2023-03-22 00:00:02.691394048,"[28090.8, 28090.7, 28090.6, 28090.5, 28090.4, ...","[0.002, 0.002, 0.002, 0.02, 0.001, 0.001, 0.00...","[28090.9, 28091.0, 28091.2, 28091.3, 28091.4, ...","[7.005, 0.735, 15.633, 7.874, 1.451, 5.937, 0....",28087.99092,28093.70909


К сожалению, расчёт данной фичи в векторном виде мне придумать не удалось, поэтому воспользуемся решением через apply

In [23]:
def get_imb(x):
    bid_prices = np.array(x['bid_prices'])
    bid_amounts = np.array(x['bid_amounts'])
    bid_bp = x['bid_bp']
    ask_prices = np.array(x['ask_prices'])
    ask_amounts = np.array(x['ask_amounts'])
    ask_bp = x['ask_bp']
    
    n_bid = np.nanmedian(np.where(bid_prices >= bid_bp, bid_amounts, np.nan))
    n_ask = np.nanmedian(np.where(ask_prices <= ask_bp, ask_amounts, np.nan))
    n = (n_bid + n_ask) / 2
    
    bid_cumsum = np.cumsum(bid_amounts)
    bid_idxs = np.where(bid_cumsum >= n)[0]
    p_bid = bid_prices[bid_idxs[0]] if bid_idxs.size else bid_prices[-1]
    
    ask_cumsum = np.cumsum(ask_amounts)
    ask_idxs = np.where(ask_cumsum >= n)[0]
    p_ask = ask_prices[ask_idxs[0]] if ask_idxs.size else ask_prices[-1]
    
    x['imb_bid'] = (bid_prices[0] / p_bid - 1) * 10000
    x['imb_ask'] = (p_ask / ask_prices[0] - 1) * 10000
    
    return x

In [24]:
for df_, postfix in tqdm([(df_book, 'perp'), (df_book_spot, 'spot')]):
    df_ = df_.progress_apply(get_imb, axis=1)
    
    df_.drop(columns=['bid_prices', 'bid_amounts', 'ask_prices', 'ask_amounts', 'bid_bp', 'ask_bp'], inplace=True)
    columns_ = df_.columns
    columns_= [f'{column}_{postfix}' for column in columns_]
    df_.columns = columns_
    
    df = pd.merge_asof(df, df_, on='local_ts', direction='backward')
    
    df.set_index('local_ts', inplace=True)
    df.sort_index(inplace=True)
del df_book, df_book_spot

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/6403490 [00:00<?, ?it/s]

  0%|          | 0/2305858 [00:00<?, ?it/s]

Так как первый снепшот ордербука у нас за "00:00:00.485131264", то IMB_perp фичи  до этого времени имеют NaN (для спота такая же ситуация). Опять же пока оставим пропуски, никак их не заполняя.

In [25]:
# df.dropna(axis=0, inplace=True)
# df.interpolate(method='backfill', inplace=True)

In [26]:
df.head(3)

Unnamed: 0_level_0,target,bid_price_perp,bid_amount_perp,ask_price_perp,ask_amount_perp,bid_price_spot,bid_amount_spot,ask_price_spot,ask_amount_spot,imb_bid_perp,imb_ask_perp,imb_bid_spot,imb_ask_spot
local_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023-03-22 00:00:00.002269696,0,28091.1,0.885,28091.2,17.243,,,,,,,,
2023-03-22 00:00:00.023076352,0,28091.1,0.685,28091.2,17.243,28105.8,0.00191,28106.89,0.00638,,,,
2023-03-22 00:00:00.025819392,0,28091.1,0.685,28091.2,17.223,28105.08,0.03912,28105.42,0.004,,,,


## Trade Imbalances

In [27]:
# df = pd.read_csv('data/df.csv')
# df['local_ts'] = pd.to_datetime(df['local_ts'])
df_trades = pd.read_csv(trades_path, usecols = [0, 4, 5, 6])
df_trades_spot = pd.read_csv(trades_spot_path, usecols = [0, 4, 5, 6])

In [28]:
df_trades['local_ts'] = pd.to_datetime(df_trades['local_ts'])
df_trades = (
    df_trades.drop_duplicates(subset='local_ts', keep='last')
    .set_index('local_ts').sort_index()
)

df_trades_spot['local_ts'] = pd.to_datetime(df_trades_spot['local_ts'])
df_trades_spot = (
    df_trades_spot.drop_duplicates(subset='local_ts', keep='last')
    .set_index('local_ts').sort_index()
)

Опять же Nan'ы оставляю

In [29]:
for df_, postfix in tqdm([(df_trades, 'perp'), (df_trades_spot, 'spot')]):
    df_['side'].replace(['S', 'B'], [-1, 1], inplace=True)
    df_['amount'] = df_['amount'] *  df_['side']
    df_.drop(columns=['price', 'side'], inplace=True)

    tqdm_horizons = tqdm([100, 250, 500, 1000, 2000])
    for time_horizon in tqdm_horizons:
        tqdm_horizons.set_description(f'{time_horizon}ms is processing')

        df_[f'tfi_{time_horizon}_{postfix}'] = df_['amount'].rolling(
            f'{time_horizon}ms', min_periods=1
        ).sum()

    df_.drop(columns=['amount'], inplace=True)
    df = pd.merge_asof(df, df_, on='local_ts', direction='backward')
    df.set_index('local_ts', inplace=True)
    df.sort_index(inplace=True)
    
del df_trades, df_trades_spot

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

In [30]:
df.head(3)

Unnamed: 0_level_0,target,bid_price_perp,bid_amount_perp,ask_price_perp,ask_amount_perp,bid_price_spot,bid_amount_spot,ask_price_spot,ask_amount_spot,imb_bid_perp,...,tfi_100_perp,tfi_250_perp,tfi_500_perp,tfi_1000_perp,tfi_2000_perp,tfi_100_spot,tfi_250_spot,tfi_500_spot,tfi_1000_spot,tfi_2000_spot
local_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-03-22 00:00:00.002269696,0,28091.1,0.885,28091.2,17.243,,,,,,...,-0.004,-0.004,-0.004,-0.004,-0.004,-0.00147,-0.00147,-0.00147,-0.00147,-0.00147
2023-03-22 00:00:00.023076352,0,28091.1,0.685,28091.2,17.243,28105.8,0.00191,28106.89,0.00638,,...,-0.204,-0.204,-0.204,-0.204,-0.204,-0.72505,-0.72505,-0.72505,-0.72505,-0.72505
2023-03-22 00:00:00.025819392,0,28091.1,0.685,28091.2,17.223,28105.08,0.03912,28105.42,0.004,,...,-0.204,-0.204,-0.204,-0.204,-0.204,-0.72932,-0.72932,-0.72932,-0.72932,-0.72932


## Past Returns

In [31]:
df_trades = pd.read_csv(trades_path, usecols = [0, 4, 5])
df_trades['local_ts'] = pd.to_datetime(df_trades['local_ts'])
df_trades = (
    df_trades.drop_duplicates(subset='local_ts', keep='last')
    .set_index('local_ts').sort_index()
)

df_trades_spot = pd.read_csv(trades_spot_path, usecols = [0, 4, 5])
df_trades_spot['local_ts'] = pd.to_datetime(df_trades_spot['local_ts'])
df_trades_spot = (
    df_trades_spot.drop_duplicates(subset='local_ts', keep='last')
    .set_index('local_ts').sort_index()
)

In [32]:
# def get_pret(x, df_trades, time_horizon=100):
#     trades_now = df_trades.loc[(x['local_ts'] - datetime.timedelta(milliseconds=50)):x['local_ts']]
#     p_now = (trades_now['price'] * trades_now['amount']).sum() / trades_now['amount'].sum()
    
#     time_lagged = x['local_ts'] - datetime.timedelta(milliseconds=time_horizon)
#     trades_lagged = df_trades.loc[(time_lagged - datetime.timedelta(milliseconds=50)):time_lagged]
#     p_lagged = (trades_lagged['price'] * trades_lagged['amount']).sum() / trades_lagged['amount'].sum()
    
#     return (p_now / p_lagged - 1) * 10000

# for time_horizon in [100, 250, 500, 1000, 2000]: 
#     df[f'pret_{time_horizon}'] = df.progress_apply(
#         lambda x: get_pret(x, df_trades, time_horizon=time_horizon),
#         axis=1
#     )

In [33]:
for df_, postfix in tqdm([(df_trades, 'perp'), (df_trades_spot, 'spot')]):    
    df_['p_a'] = df_['price'] * df_['amount']
    df_.drop(columns=['price'], inplace=True)

    df_[['cum_amount', 'cum_p_a']] = df_[['amount', 'p_a']].rolling('50ms', min_periods=1).sum()
    df_.drop(columns=['amount', 'p_a'], inplace=True)

    df_['p'] = df_['cum_p_a'] / df_['cum_amount']
    df_.drop(columns=['cum_amount', 'cum_p_a'], inplace=True)

    tqdm_horizons = tqdm([100, 250, 500, 1000, 2000])
    for time_horizon in tqdm_horizons:
        tqdm_horizons.set_description(f'{time_horizon}ms is processing')

        df_[f'p_lag_{time_horizon}'] = df_.set_index(
            df_.index + pd.Timedelta(f'{time_horizon}ms')
        ).reindex(df_.index, method='ffill')['p']

        df_[f'pret_{time_horizon}_{postfix}'] = (
            (df_['p'] / df_[f'p_lag_{time_horizon}'] - 1) * 10000
        )

        df_.drop(columns=[f'p_lag_{time_horizon}'], inplace=True)

    df_.drop(columns=['p'], inplace=True)
    df = pd.merge_asof(df, df_, on='local_ts', direction='backward')
    df.set_index('local_ts', inplace=True)
    df.sort_index(inplace=True)
del df_trades, df_trades_spot

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

## Mean Divergence

In [35]:
df['mid_price_perp'] = (df['bid_price_perp'] + df['ask_price_perp']) / 2
df['mid_price_spot'] = (df['bid_price_spot'] + df['ask_price_spot']) / 2

df['div'] = (df['mid_price_perp'] / df['mid_price_spot'] - 1) * 10000

In [36]:
tqdm_horizons = tqdm([5, 9, 19, 38, 75, 150, 300, 600])
for horizon in tqdm_horizons:
    tqdm_horizons.set_description(f'{time_horizon}s is processing')
    
    df['roll_div'] = df['div'].rolling(f'{horizon}s', min_periods=1).mean()
    df[f'mean_div_{horizon}s'] = df['div'] - df['roll_div']
    df.drop(columns=['roll_div'], inplace=True)

  0%|          | 0/8 [00:00<?, ?it/s]

In [37]:
df.head(3)

Unnamed: 0_level_0,target,bid_price_perp,bid_amount_perp,ask_price_perp,ask_amount_perp,bid_price_spot,bid_amount_spot,ask_price_spot,ask_amount_spot,imb_bid_perp,...,mid_price_spot,div,mean_div_5s,mean_div_9s,mean_div_19s,mean_div_38s,mean_div_75s,mean_div_150s,mean_div_300s,mean_div_600s
local_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-03-22 00:00:00.002269696,0,28091.1,0.885,28091.2,17.243,,,,,,...,,,,,,,,,,
2023-03-22 00:00:00.023076352,0,28091.1,0.685,28091.2,17.243,28105.8,0.00191,28106.89,0.00638,,...,28106.345,-5.406253,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-03-22 00:00:00.025819392,0,28091.1,0.685,28091.2,17.223,28105.08,0.03912,28105.42,0.004,,...,28105.25,-5.016856,0.194698,0.194698,0.194698,0.194698,0.194698,0.194698,0.194698,0.194698


In [38]:
df.to_csv('data/df.csv')