In [2]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [4]:
import torch
import torch.nn as nn

import numpy as np, pandas as pd

from torch.utils.data.dataset import Dataset
from pathlib import Path

import fastai.structured as structured
import fastai.column_data as column_data
from fastai.dataset import ModelData
from fastai.dataloader import DataLoader


## Load the price data

In [5]:
!ls data/NORM

btc_simple_norm.csv  eth_simple_norm.csv  ltc_simple_norm.csv
btc_stat_series.csv  eth_stat_series.csv  ltc_stat_series.csv


In [6]:
DATA = Path('./data')
NORM_DATA = DATA / 'NORM'

In [93]:
dfs = {}
for file_path in NORM_DATA.iterdir():
    if 'stat_series' in str(file_path):
        coin = file_path.name.split('_')[0].upper()
        df = pd.read_csv(file_path)
        df = df.drop(columns=['Unnamed: 0'])
        dfs[coin] = df
        
print("LTC frame: ")
dfs['LTC'].head()

LTC frame: 


Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume From,Volume To
0,2018-08-01 10-PM,LTCUSD,-0.019644,0.004935,-0.017324,0.000868,1.300439,1.29749
1,2018-08-01 09-PM,LTCUSD,-0.000253,-0.016129,0.001898,-0.019644,-0.906104,-0.916326
2,2018-08-01 08-PM,LTCUSD,0.019525,0.009213,0.0,-0.000253,0.194636,0.198967
3,2018-08-01 07-PM,LTCUSD,-0.005718,0.000743,0.013683,0.019525,-0.875367,-0.867487
4,2018-08-01 06-PM,LTCUSD,-0.001247,-0.005215,-0.001372,-0.005718,-0.072912,-0.076443


In [26]:
dfs['LTC'].columns

Index(['Date', 'Symbol', 'Open', 'High', 'Low', 'Close', 'Volume From',
       'Volume To'],
      dtype='object')

In [28]:
numeric_columns = [ column for column in dfs['LTC'].columns if np.issubdtype(dfs['LTC'][column].dtype, np.number) ]
numeric_columns

['Open', 'High', 'Low', 'Close', 'Volume From', 'Volume To']

In [29]:
[len(df) for _, df in dfs.items()]

[9511, 9511, 9511]

## Load the trades and merge 

In [94]:
TRADES_DATA = DATA / 'TRADES'
trades = {}
for npfile in TRADES_DATA.iterdir():
    if 'multi_trades_v3' in str(npfile):
        coin = npfile.name[:3]
        
        trades[coin] = np.load(npfile)
print('LTC trades shape: ', trades['LTC'].shape)

LTC trades shape:  (9463, 48)


In [99]:
coins = {k: v[numeric_columns].values for k,v in dfs.items()}

In [101]:
coins['LTC']

array([-0.01964,  0.00494, -0.01732,  0.00087,  1.30044,  1.29749])

In [100]:
# not a great way to do this
l = int(9463 * 0.8)
train_coins = {k: v[:l] for k,v in coins.items()}
train_trades = {k: v[:l] for k,v in trades.items()}

test_coins = {k: v[l:] for k,v in coins.items()}
test_trades = {k: v[l:] for k,v in trades.items()}

{'LTC': array([[-0.01964,  0.00494, -0.01732,  0.00087,  1.30044,  1.29749],
        [-0.00025, -0.01613,  0.0019 , -0.01964, -0.9061 , -0.91633],
        [ 0.01953,  0.00921,  0.     , -0.00025,  0.19464,  0.19897],
        ...,
        [ 0.01517,  0.00946,  0.00239, -0.00334, -0.67629, -0.66842],
        [ 0.00141,  0.00329,  0.00975,  0.01517, -0.85248, -0.84786],
        [ 0.00445,  0.00351, -0.00237,  0.00141,  0.71685,  0.71873]]),
 'BTC': array([[-0.01514,  0.00049, -0.01767, -0.00206,  1.58392,  1.58819],
        [ 0.00042, -0.01568,  0.00393, -0.01514, -1.15136, -1.16502],
        [ 0.01234,  0.00731,  0.00079,  0.00042,  0.85859,  0.86516],
        ...,
        [ 0.01374,  0.00584,  0.00145, -0.002  , -0.03176, -0.0279 ],
        [ 0.00829,  0.00976,  0.01237,  0.01374, -0.56901, -0.56722],
        [-0.00144,  0.0002 ,  0.00406,  0.00829, -0.31013, -0.31469]]),
 'ETH': array([[-0.02725,  0.00019, -0.0222 ,  0.0031 ,  0.85268,  0.8438 ],
        [ 0.00192, -0.02104,  0.00267, 

In [None]:
coins_data = np.stack(coins.values())
trades = np.stack(trades.values())

In [96]:
coins_data.shape, trades.shape

((3, 9511, 6), (3, 9463, 48))

## Lets create our dataset 

In [98]:
class CryptoDataset(Dataset):
    def __init__(self, trades, coins_data, unit_len=9463):
                
        self.work_window_len = work_window
        self.op_window_len = op_window
        self.unit_len = unit_len
        self.idx_to_num = lambda idx: idx // self.unit_len
        self.data = coins_data 
        self.trades = trades
        self.shape = self.trades.shape

        
    def __len__(self):
        return self.shape[1]
    
    def __getitem__(self, idx):
        n = self.idx_to_num(idx)
        start = idx - n*self.unit_len
        x = self._data[n, start: start+self.work_window_len]
        y =  np.array(self.trades[n, start])
        return  n, x, y

In [None]:
train_coins

In [None]:
train_ds = CryptoDataset(numeric_columns, train_y, train_df)
val_ds = CryptoDataset(numeric_columns, val_y, val_df)

In [None]:
train_ds._data.shape, len(train_ds.trades)

In [None]:
train_dl = DataLoader(train_ds, batch_size=152, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=152, shuffle=True)

In [None]:
md = ModelData('.', train_dl, val_dl)

In [None]:
class StatefulRNN(nn.Module):
    def __init__(self, vocab_size, emb_size, n_hidden, bs):
        super().__init__()
        self.emb_size = emb_size
        self.n_hidden = n_hidden
        
        self.e = nn.Embedding(vocab_size, emb_size)
        self.rnn = nn.GRU(emb_size, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0) # may not always be self.bs
        if self.h.size(0) != bs: self.init_hidden(bs)
        inp = self.e(cs)
        out, h = self.rnn(inp, self.h)
        
        self.h = repackage_var(h)

        return F.log_softmax(self.l_out(out), dim=-1).view(-1, vocab_size) # returns all outputs
    
    def init_hidden(self, bs): self.h = V(torch.zeros(1, bs,  self.n_hidden))