In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from copy import deepcopy

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
bs_df = pd.read_csv('IMBALANCES/BITMEX_SIGNALS.csv')
i_df = pd.read_csv('IMBALANCES/Imb 7.csv')

In [3]:
start_date = max([bs_df['datetime'].iloc[0], i_df['datetime'].iloc[0]])
finish_date = min([bs_df['datetime'].iloc[-1], i_df['datetime'].iloc[-1]])

In [4]:
bs_df = bs_df[
    (bs_df['datetime'] >= start_date) & 
    (bs_df['datetime'] <= finish_date)
]
i_df = i_df[
    (i_df['datetime'] >= start_date) & 
    (i_df['datetime'] <= finish_date)
]

In [5]:
def get_signals(prices: (float, float)):
    bitmex_price, aax_price = prices
    price_diff = abs(bitmex_price - aax_price)
    price_thr = aax_price * 0.05 / 100
    if (abs(price_diff) <= price_thr):
        return 'Unchanged'
    
    if bitmex_price - aax_price > 0:
        return 'Up'
    else:
        return 'Down'

In [6]:
bitmex_prices = bs_df['bitmex_price'].values
aax_prices = bs_df['aax_price'].values

In [7]:
signals = list(map(get_signals, list(zip(bitmex_prices, aax_prices))))

In [8]:
bs_df['signals'] = signals

In [9]:
bs_df['datetime'] = pd.to_datetime(bs_df['datetime'])
i_df['datetime'] = pd.to_datetime(i_df['datetime'])

In [10]:
bs_df.index = range(0, len(bs_df))
i_df.index = range(0, len(i_df))

In [11]:
bs_df['diff_price'] = bs_df['bitmex_price'] - bs_df['aax_price']

In [12]:
imbs = i_df['Labels 7'].values 
imb_datetimes = i_df['datetime'].values

In [13]:
value = 'Unknown'
values = []
for i, imb in enumerate(imbs):    
    if value != imb:      
        value = imb
        values.append((value, i))        

In [14]:
#values[:10]

In [15]:
bs_df

Unnamed: 0,datetime,bitmex_price,aax_price,signals,diff_price
0,2020-08-10 21:42:00.307023,11832.75,11835.75,Unchanged,-3.00
1,2020-08-10 21:42:00.901199,11832.75,11836.00,Unchanged,-3.25
2,2020-08-10 21:42:01.204288,11832.75,11835.75,Unchanged,-3.00
3,2020-08-10 21:42:01.966371,11832.25,11835.75,Unchanged,-3.50
4,2020-08-10 21:42:06.332528,11832.25,11833.75,Unchanged,-1.50
...,...,...,...,...,...
1443807,2020-08-31 23:59:55.877350,11657.75,11661.25,Unchanged,-3.50
1443808,2020-08-31 23:59:57.707444,11657.75,11666.25,Down,-8.50
1443809,2020-08-31 23:59:58.012177,11657.75,11666.00,Down,-8.25
1443810,2020-08-31 23:59:58.318224,11657.75,11666.25,Down,-8.50


In [16]:
items = []
for i in tqdm(range(len(values) - 1)):
    d = {}
    first_date = imb_datetimes[values[i][1]]
    second_date = imb_datetimes[values[i + 1][1]]
    
    temp_df = bs_df[(bs_df['datetime'] >= first_date) & (bs_df['datetime'] < second_date)]
    
    if len(temp_df) == 0:
        continue
        
    first_bp = temp_df['bitmex_price'].iloc[0]
    last_bp = temp_df['bitmex_price'].iloc[-1]
    max_bp = temp_df['bitmex_price'].max()
    min_bp = temp_df['bitmex_price'].min()
    
    first_ap = temp_df['aax_price'].iloc[0]
    last_ap = temp_df['aax_price'].iloc[-1]
    max_ap = temp_df['aax_price'].max()
    min_ap = temp_df['aax_price'].min()
    
    first_signal = temp_df['signals'].iloc[0]
    last_signal = temp_df['signals'].iloc[-1]
    
    imbalance = values[i][0]
    
    d['first_date'] = first_date
    d['second_date'] = second_date
    
    d['first_bp'] = first_bp
    d['last_bp'] = last_bp
    d['max_bp'] = max_bp
    d['min_bp'] = min_bp
    
    d['first_ap'] = first_ap
    d['last_ap'] = last_ap
    d['max_ap'] = max_ap
    d['min_ap'] = min_ap
    
    d['first_signal'] = first_signal
    d['last_signal'] = last_signal
    
    d['imbalance'] = imbalance
    
    items.append(d)

100%|██████████| 28069/28069 [01:33<00:00, 299.90it/s]


In [17]:
df = pd.DataFrame(items)

In [18]:
df['first_date'] = pd.to_datetime(df['first_date'])
df['second_date'] = pd.to_datetime(df['second_date'])

In [19]:
df['date_diff'] = df['second_date'] - df['first_date']

In [20]:
df[(df['imbalance'] != 'Unchanged')].head()

Unnamed: 0,first_date,second_date,first_bp,last_bp,max_bp,min_bp,first_ap,last_ap,max_ap,min_ap,first_signal,last_signal,imbalance,date_diff
0,2020-08-10 21:42:01,2020-08-10 21:42:09,11832.75,11832.25,11832.75,11832.25,11835.75,11833.75,11835.75,11833.75,Unchanged,Unchanged,Down,00:00:08
2,2020-08-10 21:44:38,2020-08-10 21:44:54,11833.75,11832.25,11833.75,11832.25,11831.5,11831.5,11831.5,11831.5,Unchanged,Unchanged,Down,00:00:16
4,2020-08-10 21:44:57,2020-08-10 21:45:03,11830.0,11823.75,11830.0,11823.5,11831.5,11831.5,11831.5,11831.5,Unchanged,Down,Down,00:00:06
6,2020-08-10 21:50:54,2020-08-10 21:51:07,11838.25,11838.75,11838.75,11838.25,11834.75,11837.75,11838.25,11834.75,Unchanged,Unchanged,Up,00:00:13
8,2020-08-10 21:51:10,2020-08-10 21:51:46,11839.75,11839.75,11839.75,11839.75,11839.5,11840.25,11840.25,11839.5,Unchanged,Unchanged,Up,00:00:36


In [21]:
df['price_diff'] = df['first_ap'] - df['last_ap']

In [22]:
def price_move(diff_price: float):
    if abs(diff_price) <= 1:
        return 'Unchanged'
    
    if diff_price > 0:
        return 'Down'
    else:
        return 'Up'

In [23]:
price_diffs = df['price_diff'].values

In [24]:
df['price_move'] = list(map(price_move, price_diffs))

In [25]:
df.head(3)

Unnamed: 0,first_date,second_date,first_bp,last_bp,max_bp,min_bp,first_ap,last_ap,max_ap,min_ap,first_signal,last_signal,imbalance,date_diff,price_diff,price_move
0,2020-08-10 21:42:01,2020-08-10 21:42:09,11832.75,11832.25,11832.75,11832.25,11835.75,11833.75,11835.75,11833.75,Unchanged,Unchanged,Down,00:00:08,2.0,Down
1,2020-08-10 21:42:09,2020-08-10 21:44:38,11832.25,11834.25,11834.25,11832.25,11831.75,11831.5,11831.75,11831.25,Unchanged,Unchanged,Unchanged,00:02:29,0.25,Unchanged
2,2020-08-10 21:44:38,2020-08-10 21:44:54,11833.75,11832.25,11833.75,11832.25,11831.5,11831.5,11831.5,11831.5,Unchanged,Unchanged,Down,00:00:16,0.0,Unchanged


In [73]:
df_exp = deepcopy(df)
df_exp.columns = ['start','end',
                  'Open_bm','Close_bm','High_bm','Low_bm',
                  'Open_ax','Close_ax','High_ax','Low_ax',
                  'start_signal','end_signal',
                  'imbalance',
                  'duration','price_diff',
                  'price_move',
                 ]

In [74]:
#df[(df['first_signal']=="Unchanged")&(df['last_signal']=="Unchanged")].head(15)

In [75]:
df_exp = df_exp[['start','end','duration',
                 'Open_bm','Close_bm','High_bm','Low_bm','price_move',
                 'start_signal','end_signal','imbalance',]]

In [76]:
df_exp.head(3)

Unnamed: 0,start,end,duration,Open_bm,Close_bm,High_bm,Low_bm,price_move,start_signal,end_signal,imbalance
0,2020-08-10 21:42:01,2020-08-10 21:42:09,00:00:08,11832.75,11832.25,11832.75,11832.25,Down,Unchanged,Unchanged,Down
1,2020-08-10 21:42:09,2020-08-10 21:44:38,00:02:29,11832.25,11834.25,11834.25,11832.25,Unchanged,Unchanged,Unchanged,Unchanged
2,2020-08-10 21:44:38,2020-08-10 21:44:54,00:00:16,11833.75,11832.25,11833.75,11832.25,Unchanged,Unchanged,Unchanged,Down


In [77]:
df_exp['mean_price'] = (df_exp['Open_bm']+df_exp['Close_bm']+df_exp['High_bm']+df_exp['Low_bm'])/4

In [78]:
# Вычисление стандартной волатильности
def calculation_volatility_STD(df_data_close_price:pd.Series,
                               win_size):
    volatility_STD = df_data_close_price.pct_change().rolling(win_size).std()*1000
    return volatility_STD

df_exp['volat'] = calculation_volatility_STD(df_exp['mean_price'], win_size=5)
df_exp = df_exp.drop('mean_price', axis=1)

In [79]:
df_exp = df_exp.dropna()
df_exp.index = range(len(df_exp))

In [80]:
df_exp.head(3)

Unnamed: 0,start,end,duration,Open_bm,Close_bm,High_bm,Low_bm,price_move,start_signal,end_signal,imbalance,volat
0,2020-08-10 21:45:03,2020-08-10 21:50:54,00:05:51,11823.25,11837.25,11837.25,11817.25,Up,Down,Unchanged,Unchanged,0.20086
1,2020-08-10 21:50:54,2020-08-10 21:51:07,00:00:13,11838.25,11838.75,11838.75,11838.25,Up,Unchanged,Unchanged,Up,0.452059
2,2020-08-10 21:51:07,2020-08-10 21:51:08,00:00:01,11839.75,11839.75,11839.75,11839.75,Up,Unchanged,Unchanged,Unchanged,0.447888


In [59]:
df_exp['volat'].describe()

count    19300.000000
mean         0.313394
std          0.376171
min          0.000000
25%          0.141531
50%          0.235147
75%          0.372101
max         12.729723
Name: volat, dtype: float64

In [83]:
df_exp[(df_exp['start_signal']=='Unchanged') &
       (df_exp['end_signal']=='Unchanged') & 
       (df_exp['imbalance']=='Unchanged')
      ]['volat'].describe()

count    5269.000000
mean        0.283891
std         0.311216
min         0.000000
25%         0.135070
50%         0.225042
75%         0.342987
max        12.329809
Name: volat, dtype: float64