In [12]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [92]:
import torch
import numpy as np, pandas as pd

from torch.utils.data.dataloader import DataLoader
from torch.utils.data.dataset import Dataset
from pathlib import Path

import fastai.structured as structured
import fastai.column_data as column_data

#torch.set_default_tensor_type('torch.DoubleTensor')

## Load the price data

In [71]:
!ls data/NORM

btc_simple_norm.csv  eth_simple_norm.csv  ltc_simple_norm.csv
btc_stat_series.csv  eth_stat_series.csv  ltc_stat_series.csv


In [15]:
DATA = Path('./data')
NORM_DATA = DATA / 'NORM'

In [31]:
dfs = []
for csv_path in NORM_DATA.iterdir():
    if 'stat_series' in str(csv_path):
        df = pd.read_csv(csv_path)
        df = df.drop(columns=['Unnamed: 0'])
        dfs.append(df)
        
dfs[1].head()

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume From,Volume To
0,2018-08-01 10-PM,BTCUSD,-0.015135,0.000487,-0.017669,-0.002056,1.583925,1.588192
1,2018-08-01 09-PM,BTCUSD,0.000424,-0.015676,0.003926,-0.015135,-1.151363,-1.165019
2,2018-08-01 08-PM,BTCUSD,0.012335,0.007309,0.000791,0.000424,0.858586,0.865158
3,2018-08-01 07-PM,BTCUSD,-0.003837,0.0,0.008628,0.012335,-1.27983,-1.281979
4,2018-08-01 06-PM,BTCUSD,-0.00048,-0.001851,-0.000848,-0.003837,-0.265436,-0.270589


In [17]:
dfs[1].columns

Index(['Date', 'Symbol', 'Open', 'High', 'Low', 'Close', 'Volume From',
       'Volume To'],
      dtype='object')

In [18]:
numeric_columns = [ column for column in dfs[1].columns if np.issubdtype(dfs[1][column].dtype, np.number) ]
numeric_columns

['Open', 'High', 'Low', 'Close', 'Volume From', 'Volume To']

In [19]:
[len(df) for df in dfs]

[9511, 9511, 9511]

## Load the trades and merge 

In [25]:
TRADES_DATA = DATA / 'TRADES'
trades = []
for npfile in TRADES_DATA.iterdir():
    if 'binom_prob_v4' in str(npfile):
        print(npfile)
        trades.append(np.load(npfile))

data/TRADES/LTCUSD_binom_prob_v4.npy
data/TRADES/BTCUSD_binom_prob_v4.npy
data/TRADES/ETHUSD_binom_prob_v4.npy


In [29]:
len(trades), len(trades[0])

(3, 9463)

## Lets create our dataset 

In [81]:

class CryptoDataset(Dataset):
    def __init__(self, numeric_columns, trades, df, work_window=2*24, op_window=2*24):
        self.shape = (len(numeric_columns), len(df))
        self._data = np.zeros(self.shape)
        self.trades = trades
        self.work_window_len = work_window
        self.op_window_len = op_window
        
        for col_idx, column in enumerate(numeric_columns):
            self._data[col_idx, :] = df[column].values
                
    def __len__(self):
        return len(self.trades)
    
    def __getitem__(self, idx):
        start ,end = idx+self.work_window_len, idx+self.work_window_len+self.op_window_len
        x = self._data[:, idx: idx+self.work_window_len]
        y =  np.array(self.trades[idx])
#         x,y = torch.from_numpy(x), torch.from_numpy(y)
        return  x, y

In [82]:
ds = CryptoDataset(numeric_columns, trades[0], dfs[0]) # only bitcoin

In [83]:
ds._data.shape, len(ds)

((6, 9511), 9463)

In [84]:
x, y = ds[3]

In [85]:
x.shape, y.shape

((6, 48), (2, 2))

In [128]:
dataloader = DataLoader(ds, batch_size=164, shuffle=True, num_workers=0)

###  Using the fast.ai library

In [95]:
val_idxs = [i for i in range(int(9463 * 0.8), 9463)]

In [99]:
model_data = column_data.ColumnarModelData.from_data_frame('', val_idxs, dfs[0], trades[0], numeric_columns, bs=152)

TypeError: only integer scalar arrays can be converted to a scalar index

### Different DataFrame 
so fast.ai requires that each row correspond to a training example

In [101]:
from fastai.dataset import ModelData
from fastai.dataloader import DataLoader

In [118]:
val_id = int(9463 * 0.8)
train_df, train_y = df.iloc[:val_id, : ], ds.trades[:val_id]
val_df, val_y = df.iloc[val_id:, :], ds.trades[val_id:]

In [119]:
train_df.shape

(7570, 8)

In [121]:
train_ds = CryptoDataset(numeric_columns, train_y, train_df)
val_ds = CryptoDataset(numeric_columns, val_y, val_df)

In [123]:
train_dl = DataLoader(train_ds, batch_size=152, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=152,shuffle=True)

In [125]:
md = column_data.ColumnarModelData('', train_ds, val_ds,bs=152)

In [127]:
for x, y in iter(train_dl):
    print(x.shape, y.shape)

ValueError: all input arrays must have the same shape