In [37]:
#https://www.youtube.com/watch?v=ne-dpRdNReI
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn import preprocessing
from collections import deque, OrderedDict
import random
from old.fastai.dataset import *
from old.fastai.column_data import *
from fastai.metrics import accuracy

In [None]:
torch.__version__

In [9]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True
CUDA_LAUNCH_BLOCKING=1

In [10]:
PATH = Path("../data/crypto_data/")
sets = ["BTC-USD", "BCH-USD", "ETH-USD", "LTC-USD"]
df_s = [pd.read_csv(PATH / f"{s}.csv", names = ["time","low","high","open",f"{s}_close",f"{s}_volume"],index_col="time").drop(["low","high","open"],axis=1) for s in sets]
df_m = pd.concat(df_s, axis=1, join="inner")

In [71]:
seq_len = 30 #transactions prior to prediction point
pred_period = 3 #minutes ahead to predict 
pred_ratio = "LTC-USD"
val_pct = .05
bs = 25
epochs = 10

In [12]:
def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0

df_m["future"] = df_m[f"{pred_ratio}_close"].shift(-pred_period)
df_m["future"].fillna(method="ffill",inplace=True)
df_m["target"] = list(map(classify,df_m[f"{pred_ratio}_close"],df_m["future"]))
df_m.sort_index(inplace=True)
val_idx = int(len(df_m) * val_pct)
df_v = df_m.iloc[-val_idx:].copy()
df_t = df_m.iloc[:-val_idx].copy()

In [72]:
def prep_df(df):
    df = df.copy().drop("future",1)
    
    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
    sequential_data = []
    prev_period = deque(maxlen=seq_len)
    buys = []
    sells = []
    for i in df.values:
        prev_period.append([n for n in i[:-1]])
        if len(prev_period) == seq_len:
#             sequential_data.append([np.array(prev_period), i[-1]])
            if i[-1] == 0:
                sells.append([np.array(prev_period),np.array(i[-1])])
            elif i[-1] == 1:
                buys.append([np.array(prev_period),np.array(i[-1])])
    print(len(buys),len(sells))
    lower = min(len(buys), len(sells))
    buys = buys[:lower-1]
    sells = sells[:lower-1]
    balanced = buys+sells
    random.shuffle(balanced)
    data = OrderedDict()
    labels = OrderedDict()
    for i, (seq, targ) in enumerate(balanced):
        data[i] = torch.FloatTensor(seq)
        labels[i] =torch.FloatTensor(targ)
    print(f"Total: {len(balanced)}, Buys: {len(buys)}, Sells: {len(sells)}")
    
    return data, labels

In [73]:
train_x, train_y = prep_df(df_t)
val_x, val_y = prep_df(df_v)

35161 46614
Total: 70320, Buys: 35160, Sells: 35160
1799 2469
Total: 3596, Buys: 1798, Sells: 1798


In [74]:
from torch.utils import data
class Dataset(data.Dataset):
    def __init__(self, data, labels):
        super().__init__()
        self.data = data
        self.labels = labels
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx].float(), self.labels[idx].float()
    
trn_ds = Dataset(train_x, train_y)
val_ds = Dataset(val_x, val_y)

trn_dl = data.DataLoader(trn_ds, batch_size=bs)
val_dl = data.DataLoader(val_ds, batch_size=bs)

In [94]:
class lstmclass(nn.Module):
    def __init__(self,n_layers,h_dim):
        super().__init__()
        self.n_layers = n_layers
        self.h_dim = h_dim
        self.lstm = nn.LSTM(8, h_dim, n_layers, batch_first=True)
        self.lin1 = nn.Linear(h_dim, h_dim//2)
#         self.bn = nn.BatchNorm1d(seq_len)
        self.drop = nn.Dropout(.2)
        self.lin2 = nn.Linear(h_dim//2, h_dim//4)
        self.drop2 = nn.Dropout(.1)
        self.lin3 = nn.Linear(h_dim//4,1)
        
    def forward(self, x):
        hidden = self._init_hidden(x)
#         print("x:",x.shape, "\nHidden:",hidden[0].shape)
        self.lstm.flatten_parameters()
        x, h = self.lstm(x, hidden)
        x = self.lin1(x)
        x = F.relu(x)
        x = self.drop(x)
        x = self.drop2(F.relu(self.lin2(x)))
        x = F.relu(self.lin3(x))
        return x.squeeze()
    
    def _init_hidden(self,x):
        return (Variable(torch.zeros(self.n_layers, x.size(0), self.h_dim)),
                Variable(torch.zeros(self.n_layers, x.size(0), self.h_dim)))

In [None]:
class LSTM(nn.Module):
    def __init__(self,h_dim,h_layers):
        super().__init__()
        self.h_dim = h_dim
        self.h_layers = h_layers
        self.lstm = nn.LSTM(8,h_dim,h_layers,batch_first=True)
        self.lin1 = nn.Linear(h_dim, h_dim//2)
        self.bn = nn.BatchNorm1d(seq_len)
        self.drop = nn.Dropout(.1)
        self.lin2 = nn.Linear(h_dim//2, 1)
    
    def forward(self, x):
        hidden = Variable(torch.zeros(2,self.h_layers, x.size(0), self.h_dim).cuda())
        print(hidden.shape)
        self.lstm.flatten_parameters()
        return x
        x, h = self.lstm(x,hidden)
        x = x.contiguous().view(-1,np.prod(x.size()[1:]))
        x = self.lin1(x)
        x = F.relu(self.bn(x))
        x = self.drop(x)
#         x = x.view(bs,-1)
        x = F.softmax(self.lin2(x))
        return x

In [44]:
def append_stats(ep_vals, epoch, values, decimals=6):
    ep_vals[epoch]=list(np.round(values, decimals))
    return ep_vals

def print_stats(epoch, values, decimals=3):
    layout = "{!s:^10}" + " {!s:10}" * len(values)
    values = [epoch] + list(np.round(values, decimals))
    print(layout.format(*values))

names = ["ep / it","trn_loss","val_loss","accuracy"]
layout = "{!s:10} " * len(names)

In [106]:
# net = LSTM(128,1)
net = lstmclass(2,128)
# net = net.cuda()
lr = 2e-3
optim = torch.optim.Adam(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()

In [108]:
t_ls = []
v_ls = []
accs = []
ep_vals = OrderedDict()
for epoch in range(epochs):
    if epoch == 0: print(layout.format(*names))
    val_it = iter(val_dl)
    for i, (batch, ys) in enumerate(trn_dl, 0):
        net.train()
        optim.zero_grad()
        pred = net(batch)
        ls = loss(pred,ys.long())
        t_ls.append(ls.item())
        ls.backward()
        optim.step()
        vals = [np.mean(t_ls[-100:])]

        if i % 500 == 0:
            net.eval()
            with torch.no_grad():
                v_bat, v_ys = next(val_it)
                v = net(v_bat)
                v_loss = loss(v, v_ys.long())
                v_ls.append(v_loss.item())
            accs.append(accuracy(v, v_ys.long().item()))
            vals.extend((v_loss.item(), np.mean(accs[-3:])))
            print_stats(f"{epoch+1} / {i+1}",vals)
    ep_vals = append_stats(ep_vals, epoch+1, vals)

ep / it    trn_loss   val_loss   accuracy   


ValueError: only one element tensors can be converted to Python scalars

In [78]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, PATH / filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')
        
save_checkpoint({
            'epoch': epochs,
            'arch': lstm2,
            'state_dict': net.state_dict(),
            'last_acc': accs[-1],
            'optimizer' : optimizer.state_dict(),
        }, False)


NameError: name 'lstm2' is not defined

In [92]:
v_bat, v_ys = next(val_it)
v = net(v_bat)
# accuracy(v,v_ys.long())

tensor(0.)

In [93]:
v.shape

torch.Size([25, 30])