# Загрузка Order book из CSV

In [31]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import cross_val_predict
import datetime as dt
import matplotlib.pyplot as plt
%matplotlib inline
from alex.database import Database
from pprint import pprint

### Скоростная загрузка с диска, из заархивированного CSV-файла

In [32]:
# результат выполнения запроса:
# select * from tvf_get_order_book_snapshots(2880) where exchange='binance' and pair='ETH/USDT' # запрос на 20 минут
#order = pd.read_csv('tvf_get_order_book_snapshots 15.09.2018-01.10.2018.zip', index_col='dt')

In [33]:
# загрузка истории
# 15 секунд
db = Database()
sql_history = f"select * from v_history with (snapshot) where exchange='binance' and pair='ETH/USDT' and dt>'2018-09-23'"
df_history = db.query(sql_history)
df_history.set_index('dt', inplace=True)

In [34]:
len(df_history)

1131253

### Получение Order Book в другом виде, напрямую из исторической таблицы dbo.order_book

Это не снимки, а только новые записи, добавляемые в order book в момент отрабатывания воркера. Поля ValidFrom и ValidTill показывают время жизни каждого ордера

In [35]:
# должно быть около 30 секунд
sql_orderbook = "SELECT *, ValidFrom, ValidTill FROM mem.order_book for system_time from '2018-09-23 00:00:00' to '2018-10-01 00:00:00' where id_ex_pair=19"
# id_ex_pair=19 - это Binance + ETH/USDT
df_orderbook = db.query(sql_orderbook)

In [36]:
df_orderbook['dt'] = pd.to_datetime(df_orderbook.dt)
df_orderbook['id'] = df_orderbook['id'].astype(int)
df_orderbook['ValidFrom'] = pd.to_datetime(df_orderbook.ValidFrom)
df_orderbook['ValidTill'] = pd.to_datetime(df_orderbook.ValidTill)
del df_orderbook['id_ex_pair']

In [37]:
# dt - время загрузки, id - порядковый номер записи, обычный инкремент identity(1,1)
df_orderbook.sort_values(['dt','id'], inplace=True)
df_orderbook.set_index(['dt','id'], inplace=True)

##### Скрипт загружает Order Book 1 раз в 3 секунды. Интервалы более 3 секунд между соседними записями могут означать, что Order Book не менялся, либо по каким-то причинам увеличилось время срабатывания скрипта

In [38]:
df_orderbook.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,is_bid,price,amount,ValidFrom,ValidTill
dt,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-09-22 21:38:49,10674654,False,242.78,0.20449,2018-09-22 21:38:50,2018-09-23 01:06:12
2018-09-22 21:55:53,10707602,False,242.74,1.25381,2018-09-22 21:55:54,2018-09-23 01:06:27
2018-09-22 22:44:34,10795140,False,241.98,0.9954,2018-09-22 22:44:35,2018-09-23 00:02:06
2018-09-22 22:46:46,10798913,False,242.79,0.20728,2018-09-22 22:46:47,2018-09-23 01:06:08
2018-09-22 22:46:49,10799010,False,242.91,0.05014,2018-09-22 22:46:51,2018-09-23 01:05:54


In [39]:
#df_orderbook.ValidFrom.dt.tz_localize('Europe/Moscow')
df_orderbook['ValidFromMsc'] = df_orderbook.ValidFrom - pd.Timedelta('03:00:00')
df_orderbook['ValidTillMsc'] = df_orderbook.ValidTill - pd.Timedelta('03:00:00')

In [40]:
df_orderbook.shape

(2317073, 7)

In [41]:
df_orderbook['volume'] = df_orderbook.price * df_orderbook.amount

In [42]:
df_orderbook.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,is_bid,price,amount,ValidFrom,ValidTill,ValidFromMsc,ValidTillMsc,volume
dt,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-09-22 21:38:49,10674654,False,242.78,0.20449,2018-09-22 21:38:50,2018-09-23 01:06:12,2018-09-22 18:38:50,2018-09-22 22:06:12,49.646082
2018-09-22 21:55:53,10707602,False,242.74,1.25381,2018-09-22 21:55:54,2018-09-23 01:06:27,2018-09-22 18:55:54,2018-09-22 22:06:27,304.349839
2018-09-22 22:44:34,10795140,False,241.98,0.9954,2018-09-22 22:44:35,2018-09-23 00:02:06,2018-09-22 19:44:35,2018-09-22 21:02:06,240.866892
2018-09-22 22:46:46,10798913,False,242.79,0.20728,2018-09-22 22:46:47,2018-09-23 01:06:08,2018-09-22 19:46:47,2018-09-22 22:06:08,50.325511
2018-09-22 22:46:49,10799010,False,242.91,0.05014,2018-09-22 22:46:51,2018-09-23 01:05:54,2018-09-22 19:46:51,2018-09-22 22:05:54,12.179507


In [43]:
#df_grouped = pd.DataFrame(columns = ['dt', 'id', 'is_bid', 'price', 'amount', 'ValidFromMsc', 'ValidTillMsc', 'volume'])

In [44]:
df_history.shape

(1131253, 11)

In [45]:
df_orderbook.shape

(2317073, 8)

In [50]:
df_history.sort_values('dt').head()

Unnamed: 0_level_0,rownum,id_ex_pair,exchange,pair,price,amount,type,side,id,location,insert_date
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-09-26 02:59:58,5496343,19,binance,ETH/USDT,219.610001,0.84415,,buy,37299194.0,memory,2018-09-26 03:00:02.2166667
2018-09-26 02:59:58,5496342,19,binance,ETH/USDT,219.509995,0.00174,,buy,37299192.0,memory,2018-09-26 03:00:02.2166667
2018-09-26 02:59:58,5496345,19,binance,ETH/USDT,219.619995,4.25755,,buy,37299195.0,memory,2018-09-26 03:00:02.2166667
2018-09-26 02:59:58,5496344,19,binance,ETH/USDT,219.619995,0.89826,,buy,37299193.0,memory,2018-09-26 03:00:02.2166667
2018-09-26 03:00:00,5496346,19,binance,ETH/USDT,219.5,1e-05,,sell,37299196.0,memory,2018-09-26 03:00:02.2166667


In [56]:
df_history_test = df_history['2018-09-27 0:00':'2018-09-30 0:00']

In [57]:
df_history_test.shape

(510576, 11)

In [64]:
df_orderbook.reset_index(inplace=True)
df_orderbook_test = df_orderbook[(df_orderbook.dt > '2018-09-27 0:00') & (df_orderbook.dt < '2018-09-30 0:00')]

In [65]:
df_orderbook_test.shape

(901855, 10)

In [67]:
df_merged = pd.merge(df_orderbook_test, df_history_test, how='outer', on='dt')

In [69]:
df_merged.shape

(2532630, 21)