In [1]:
import torch
import torch.nn as nn
from network import *
from data import *
import pandas as pd
%matplotlib notebook
import matplotlib.pyplot as plt
import math
from mpl_finance import candlestick_ohlc
import json

print("Loading : ")

train_data = load_data(['2018', '2019'], ['BTCUSDT', 'ETHUSDT','LTCUSDT'], '5m')
test_data = load_data(['2020'], ['BTCUSDT', 'ETHUSDT', 'LTCUSDT'], '5m')
#train_data = load_data(['2020'], ['BTCUSDT', 'ETHUSDT', 'LTCUSDT'], '5m')






    Please use `mplfinance` instead (no hyphen, no underscore).

    To install: `pip install --upgrade mplfinance` 

   For more information, see: https://pypi.org/project/mplfinance/




Loading : 
done
done


In [2]:
params = {
    'continuous_columns' : ['Open', 'High', 'Low', 'Close', 'Volume'],
    'discrete_columns' : ['Hour'],#, 'Day', 'Month']
    'target_columns' : ['Close'],
    'n_var_past_continuous' : 5,
    'n_var_future_continuous' : 0,
    'n_var_past_discrete' : [24],#, 31, 12]
    'n_var_future_discrete' : [24],#, 31, 12]
    'n_var_static_discrete' : [5],

    'batch_size' : 20,
    'test_batch_size' : 20,
    'n_tests' : 25,
    'dim_model' : 140,
    'n_lstm_layers' : 3,
    'n_attention_layers' : 8,
    'n_heads' : 8,
    'dropout_r' : 0.05,

    'quantiles' : [0.1, 0.5, 0.9],
    
    'past_seq_len' : 120,
    'future_seq_len' : 24
}

load_model = True
dir = "D/"
#path = dir+"model_119200.pt"
path = dir +"model_198800.pt"
with open(dir + 'params.json', 'w') as fp:
    json.dump(params, fp)
    
    
t = TFN(params).cuda()
optimizer = torch.optim.Adam(t.parameters(), lr=0.0001)

#try to load from checkpoint
if load_model:
    checkpoint = torch.load(path)
    t = checkpoint['model_state']
    #t.load_state_dict(checkpoint['model_state'].state_dict())
    optimizer = checkpoint['optimizer_state']
    losses = checkpoint['losses']
    test_losses = checkpoint['test_losses']
    print("Loaded model with {} parameters".format(get_n_params(t)))
else:    
    losses = []
    test_losses = []
    print("Initialised model with {} parameters".format(get_n_params(t)))

indexer = Indexer(1, train_data[0].shape[0] - (params['past_seq_len'] + params['future_seq_len'] + 1)
                  , params['batch_size'])

train_data_gens = []
for d in train_data:
    train_data_gens.append(get_batches(d, params['past_seq_len'], 
                params['future_seq_len'], params['continuous_columns'], params['discrete_columns'], 
                params['target_columns'], batch_size = params['batch_size'], indexer = indexer))
    
test_indexer = Indexer(1, test_data[0].shape[0] - (params['past_seq_len'] + params['future_seq_len'] + 1)
                       , params['test_batch_size'])

test_data_gens = []
for idx, d in enumerate(test_data):
    test_data_gens.append(get_batches(d, params['past_seq_len'], 
                params['future_seq_len'], params['continuous_columns'], params['discrete_columns'], 
                params['target_columns'], batch_size = params['test_batch_size'],
                                      indexer = test_indexer, norm = train_data[idx]))
    
quantiles = torch.tensor(params['quantiles']).float().type(torch.cuda.FloatTensor)

Loaded model with 6421567 parameters


In [None]:
fig = plt.figure()
ax = fig.add_subplot(411)
ax1 = fig.add_subplot(412)
ax2 = fig.add_subplot(413)
ax3 = fig.add_subplot(414)
plt.ion()

fig.canvas.draw()
fig.show()

steps = 200000
for e in range(steps):
    #run model against test set every 50 batches
    if(len(losses) % 50 == 0 and len(losses) != 0 ):
        t.eval()
        m_test_losses = []
        for i in range(params['n_tests']):
            test_loss,_ , _, _ = forward_pass(t, test_data_gens, params['test_batch_size'], quantiles, test_indexer)
            m_test_losses.append(test_loss.cpu().detach().numpy())
            del test_loss
            del _
        
        test_losses.append(np.log10(np.array(m_test_losses).mean(axis = 0)))
        t.train()
        
    #save model every 400 batches
    if(len(losses) % 400 == 0 and len(losses) != 0):
        torch.save({'model_state' : t,
                    'optimizer_state': optimizer,
                   'losses' : losses, 'test_losses' : test_losses} , dir + "model_{}.pt".format(len(losses)))
    
    #forward pass
    optimizer.zero_grad()
    loss, net_out, vs_weights, given_data = forward_pass(t,  train_data_gens , params['batch_size'], quantiles, indexer)
    
    net_out = net_out.cpu().detach()[0]#[0]
    #backwards pass
    losses.append(np.log10(loss.cpu().detach().numpy()))
    torch.mean(loss).backward()
    optimizer.step()
     
    if(e % 50 == 0):
        #loss graphs
        fig.tight_layout(pad = 0.1)
        ax.clear()
        ax.title.set_text("Training loss")
        ax.plot(losses)

        ax1.clear()
        ax1.title.set_text("Test loss")
        ax1.plot(test_losses) 
        
        #compare network out put and data
        ax2.clear()
        ax2.title.set_text("Network output comparison")
        c = given_data[0][0].cpu()
        a = torch.arange(-params['past_seq_len'], 0).unsqueeze(-1).unsqueeze(-1).float()
        c = torch.cat((a,c), dim = 1)
        candlestick_ohlc(ax2, c.squeeze(), colorup = "green", colordown = "red")

        ax2.plot(net_out[:,0], color = "red")
        ax2.plot(net_out[:,1], color = "blue")
        ax2.plot(net_out[:,2], color = "red")
        ax2.plot(given_data[3].cpu().detach().numpy()[0], label = "target", color = "orange")

        #visualise variable selection weights
        vs_weights = torch.mean(torch.mean(vs_weights, dim = 0), dim = 0).squeeze()
        vs_weights = vs_weights.cpu().detach().numpy()
        ax3.clear()
        ax3.title.set_text("Variable Selection Weights")
        plt.xticks(rotation=-30)
        x = params['continuous_columns'] + params['discrete_columns']
        ax3.bar(x = x, height = vs_weights)
        fig.canvas.draw()
    
    del loss
    del net_out
    del vs_weights
    del given_data


In [None]:


#Draw test cases
fig = plt.figure()
axes = []
batch_size_ = 4

for i in range(batch_size_):
    axes.append(fig.add_subplot(511 + i))


loss, net_out, vs_weights, given_data = forward_pass(t, test_data_gens, params['test_batch_size'], quantiles)
net_out = net_out.cpu().detach()
t.eval()
for idx, a in enumerate(axes):
    a.clear()
    
    c = given_data[0][idx].cpu()
    
    past_seq_len = params['past_seq_len']
    b = torch.arange(-params['past_seq_len'], 0).unsqueeze(-1).unsqueeze(-1).float()
    c = torch.cat((b,c), dim = 1)#[250:]
    #print(c.shape)
    candlestick_ohlc(a, c.squeeze(), colorup = "green", colordown = "red")
    
    a.plot(net_out[idx][:,0], color = "red")
    a.plot(net_out[idx][:,1], color = "blue")
    a.plot(net_out[idx][:,2], color = "red")
    a.plot(given_data[3].cpu().detach().numpy()[idx], label = "target", color = "orange")

t.train()    
plt.ion()

fig.show()
fig.canvas.draw()


del loss

del net_out
del vs_weights

del given_data

In [None]:
np.array(losses ).argmin() #* 50

In [None]:
np.array(test_losses)[:,0].min() #* 50#.shape

In [None]:
losses

In [None]:

del loss
del net_out
del vs_weights

del given_data