In [1]:
import os
import numpy as np
import torch
from model import Transformer, Conv, Temporal_Conv_Transformer, Temporal_Conv_Transformer_Vol
from utils.prices_dataset import Prices, PricesVol
from utils.loss_functions import CustomLoss, SignWeightedLoss, SignWeightedTgtLoss
from train_test import train, test, test_conf, test_conf_ensemble, test_conf_val, test_conf_time, test_mus_stds_tgts
import matplotlib.pyplot as plt

In [40]:
vol = True
epoch = 20
lr = 0.0000005
feature_size = 3
out_channels = 3
num_layers = 1
nhead = 1
dropout = 0.0
batch_size = 512
seq_length = 60
shift = 30
tgt_step = 3
reg_weight=100
center = True
norm =  False
#loss_func = SignWeightedLoss(weight=reg_weight)
#loss_func = SignWeightedTgtLoss(weight=reg_weight)
loss_func = torch.nn.L1Loss(reduce=False, reduction='sum')
timeframe = "1m"

device = 'cuda' if torch.cuda.is_available() else 'cpu'


if vol:
    trainset = PricesVol("data/BINANCE_PERP_close_train.npy", "data/BINANCE_PERP_volume_train.npy", seq_length, shift=shift, center=center, norm=norm, tgt_step=tgt_step)
    testset = PricesVol("data/BINANCE_PERP_close_test.npy", "data/BINANCE_PERP_volume_test.npy", seq_length, shift=shift, center=center, norm=norm, tgt_step=tgt_step)
    model = Temporal_Conv_Transformer_Vol(seq_length = seq_length, feature_size=feature_size, dropout=dropout, num_layers=num_layers)
else:
    trainset = Prices("data/train{}.npy".format(timeframe), seq_length, shift=shift, center=center, norm=norm, tgt_step=tgt_step)
    testset = Prices("data/test{}.npy".format(timeframe), seq_length, shift=shift, center=center, norm=norm, tgt_step=tgt_step)
    model = Temporal_Conv_Transformer(seq_length = seq_length, feature_size=feature_size, out_channels=out_channels, dropout=dropout, num_layers=num_layers, nhead=nhead)
model = model.to(device)
#trans_model = Transformer(seq_length = seq_length, feature_size=1, dropout=0.1)
#conv_model = Conv(seq_length = seq_length)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=0, shuffle=False, drop_last=True)

In [12]:
train(epoch, model, lr, trainloader, testloader, seq_length, batch_size, loss_func = loss_func, volb=vol)
#train(epoch, trans_model, lr, trainloader, testloader, seq_length, batch_size)
#train(epoch, conv_model, lr, trainloader, testloader, seq_length, batch_size)
if not os.path.exists("model"):
    os.makedirs("model")
PATH = f"model/pred_model_{timeframe}_len_{seq_length}_shift_{shift}_epoch_{epoch}_lr_{lr}.pt"
torch.save(model.state_dict(), PATH)

pretrain test:
pred: 
 tensor([0.0548, 0.1200, 0.0206, 0.0463, 0.0846]) 
tgt: 
 tensor([ 28.4004, -16.8984,  15.8008,  -3.9004,  -4.5000]) 
mus: 
 tensor([ 0.0279,  0.0729,  0.0172, -0.1226,  0.1147]) 
stds: 
 tensor([ 0.1271,  0.0392, -0.1182, -0.1985, -0.0861]) 
eps: 
 tensor([ 0.2116,  1.2010,  0.0291,  0.8509, -0.3499])
Predictions:  tensor(19.1948) 
Zero Guess:  tensor(19.1918)
Average guess (check if network predicts based on general change):  tensor(-0.0092)


KeyboardInterrupt: 

In [46]:
print(max(1, torch.tensor([0])))

1


In [None]:
model = Temporal_Conv_Transformer_Vol(seq_length = seq_length, feature_size=feature_size, dropout=dropout, num_layers=num_layers)
PATH = f"model/pred_model_vol_{timeframe}_len_{seq_length}_shift_{shift}_epoch_{epoch}_lr_{lr}.pt"
PATH = f"model/006.pt"
model.load_state_dict(torch.load(PATH, map_location=device))
#model.train()

In [17]:
model = model.to(device)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=0, shuffle=False, drop_last=True)
mus, stds, tgts = test_mus_stds_tgts(model, testloader, seq_length, batch_size)

In [21]:
print(torch.max(torch.abs(mus)))
mean_mu = torch.mean(torch.abs(mus))
mx_mu = torch.max(torch.abs(mus))
med_mu = torch.median(torch.abs(mus))
print(mean_mu, mx_mu, med_mu)
print(torch.max(torch.abs(stds)))
mean_std = torch.mean(torch.abs(stds))
mx_std = torch.max(torch.abs(stds))
med_std = torch.median(torch.abs(stds))
print(mean_std, mx_std, med_std)
tgt_t = []
tgt_f = []
t = 0
f = 0
tm = 0
fm = 0
me = 0
for ii, mu in enumerate(mus):
    me = 0
    if torch.abs(mu)>0.25*mx_mu: #and torch.abs(stds[ii])<=0.5*mx_std:
        me = 1
    if torch.sign(mu)==torch.sign(tgts[ii]):
        tgt_t.append(tgts[ii])
        t += 1
        tm += me*1
    else:
        tgt_f.append(tgts[ii])
        f += 1
        fm += me*1
print(t,f, t/(t+f))
print(tm,fm, tm/(tm+fm))
print((tm+fm)/(t+f))
print(torch.mean(torch.abs(torch.tensor((tgt_t)))), torch.mean(torch.abs(torch.tensor((tgt_f)))))

tensor(0.7516)
tensor(0.2316) tensor(0.7516) tensor(0.2255)
tensor(0.4391)
tensor(0.0814) tensor(0.4391) tensor(0.0667)
5374 4866 0.5248046875
3310 2843 0.5379489679830977
0.60087890625
tensor(18.7524) tensor(19.6770)


In [24]:
model1 = Temporal_Conv_Transformer(seq_length = seq_length, feature_size=feature_size, dropout=dropout, num_layers=num_layers)
model2 = Temporal_Conv_Transformer(seq_length = seq_length, feature_size=feature_size, dropout=dropout, num_layers=num_layers)

PATH1 = f"model/001.pt"
PATH2 = f"model/006.pt"
#PATH = f"model/001.pt"
model1.load_state_dict(torch.load(PATH1, map_location=device))
model2.load_state_dict(torch.load(PATH2, map_location=device))
#model.train()
model1 = model1.to(device)
model2 = model2.to(device)

'''accs = test_conf(model1, testloader, seq_length, batch_size)
accs = test_conf_ensemble(model1, model2, testloader, seq_length, batch_size)'''

'accs = test_conf(model1, testloader, seq_length, batch_size)\naccs = test_conf_ensemble(model1, model2, testloader, seq_length, batch_size)'

In [None]:
model = model.to(device)
batch_size=256
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=0, shuffle=False, drop_last=True)
_ = test(model, testloader, seq_length, batch_size)

In [None]:
model = model.to(device)
batch_size =
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=0, shuffle=False, drop_last=True)
accs = test_conf(model, testloader, seq_length, batch_size)

In [None]:
model = model.to(device)
batch_size=1
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=0, shuffle=False, drop_last=True)
test_conf_val(model, testloader, seq_length, batch_size)

In [None]:
model = model.to(device)
batch_size=1
win = 1000
testset = Prices("data/2023test{}.npy".format(timeframe), seq_length, shift=shift, center=center, norm=norm, tgt_step=tgt_step)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=0, shuffle=False, drop_last=False)
avgs, tgts = test_conf_time(model, testloader, seq_length, batch_size, avg_win=win)

In [None]:
avgs = np.asarray(avgs)
prices = np.load("data/2023test{}.npy".format(timeframe))
prices = (prices-np.mean(prices))/np.std(prices)
avg_cent = (avg-np.mean(avgs))/np.std(avgs)
plt.plot(avg_cent)
plt.plot(prices)
plt.show()
plt.plot(np.cumsum(np.asarray(torch.tensor(tgts))))
plt.show()

In [None]:
plt.hist((torch.tensor(mus)).numpy())

In [None]:

sorted, ind = torch.sort(torch.abs(torch.tensor(tgts)), descending=True)
print(sorted[:100])

In [None]:

tgt = []
tgt_true = []
for ii, mu in enumerate(mus):
    if torch.sign(mu)==torch.sign(tgts[ii]):
        tgt_true.append(torch.abs(tgts[ii]))
    tgt.append(torch.abs(tgts[ii]))
print(torch.mean(torch.tensor(tgt)))
print(torch.mean(torch.tensor(tgt_true)))

In [None]:
epoch = 10
lr = 0.00001
feature_size = 3
num_layers = 3
dropout = 0.0
batch_size = 100
seq_length = 500
shift = 260
tgt_step = 5
center = True
norm =  False
loss_func = torch.nn.L1Loss(reduce=False, reduction='sum')
timeframe = "5m"


shift = 6
trainset = Prices("data/train{}.npy".format(timeframe), seq_length, shift=shift, center=center, norm=norm, tgt_step=tgt_step)
testset = Prices("data/test{}.npy".format(timeframe), seq_length, shift=shift, center=center, norm=norm, tgt_step=tgt_step)
#model = Temporal_Conv_Transformer(seq_length = seq_length, feature_size=feature_size, dropout=dropout, num_layers=num_layers)
#trans_model = Transformer(seq_length = seq_length, feature_size=1, dropout=0.1)
#conv_model = Conv(seq_length = seq_length)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=True)
acc = test_conf(model, testloader, seq_length, batch_size)

In [None]:
accs_mod = np.zeros((13,30))
for ii in range(390):
    ind = ii%13
    ind2 = int((ii-ind)/13)
    #print(ind2)
    accs_mod[ind,ind2] += accs[ii]
#print(accs_mod)
print(np.mean(accs_mod,axis=1))
print(np.std(accs_mod,axis=1))
#plt.plot(accs_mod/7)

In [None]:
smo = []
for i in range(386):
    smo.append(np.mean(accs[i:i+100]))
plt.plot(smo)

In [None]:
#testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=0, shuffle=False, drop_last=True)
#test(model, testloader, seq_length, batch_size)

In [None]:
epoch = 50
lr = 0.00001
feature_size = 3
num_layers = 3
dropout = 0.0
batch_size = 1
seq_length = 500
shift = 505
tgt_step = 5
center = True
norm =  False
loss_func = torch.nn.L1Loss(reduce=False, reduction='sum')
timeframe = "5m"


trainset = Prices("data/train{}.npy".format(timeframe), seq_length, shift=shift, center=center, norm=norm, tgt_step=tgt_step)
testset = Prices("data/test{}.npy".format(timeframe), seq_length, shift=shift, center=center, norm=norm, tgt_step=tgt_step)
#model = Temporal_Conv_Transformer(seq_length = seq_length, feature_size=feature_size, dropout=dropout, num_layers=num_layers)
#trans_model = Transformer(seq_length = seq_length, feature_size=1, dropout=0.1)
#conv_model = Conv(seq_length = seq_length)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=True)


In [None]:
mean_first = True
mean_tgt = 0
for ii, (src,tgt) in enumerate(testloader):
    #print(src)
    #x  = np.linspace()
    #plt.plot(src[0])
    #if ii>10:
     #   break
    if mean_first:
        mean = src
        mean_first=False
    else:
        mean += src
    mean_tgt += tgt
#print(mean)
plt.plot(mean[0])
plt.show()
print(mean_tgt)

In [None]:
### Check expected change in training set against expected change in test set ###
tgt_sum = 0
for ii, (src,tgt) in enumerate(iter(trainloader)):
    src_last = src[:,-1]
    src_last = torch.unsqueeze(src_last, dim=1).repeat(1,seq_length)
    src = torch.sub(src, src_last)
    tgt = tgt - src_last[:,0]
    tgt_sum += torch.sum(tgt-src[:,-1])
    print((ii+1)*batch_size)
print(tgt_sum/((ii+1)*batch_size))
tgt_sum = tgt_sum/((ii+1)*batch_size)
zero_test_sum = 0
test_tgt_sum = 0
train_test_sum = 0
for ii, (src,tgt) in enumerate(iter(testloader)):
    src_last = src[:,-1]
    src_last = torch.unsqueeze(src_last, dim=1).repeat(1,seq_length)
    src = torch.sub(src, src_last)
    tgt = tgt - src_last[:,0]
    zero_test_sum += torch.sum(torch.abs(tgt))
    test_tgt_sum += torch.sum(tgt)
    train_test_sum += torch.sum(torch.abs(tgt-tgt_sum))
print(zero_test_sum/((ii+1)*batch_size), train_test_sum/((ii+1)*batch_size), test_tgt_sum/((ii+1)*batch_size))

In [None]:
model.load_state_dict(torch.load("model/pred_model_5m"))

In [None]:
### check percentage of correct prediction by sign
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=0, shuffle=False, drop_last=False)
model.eval()
#model.train()
test_avg = 0
for it in range(100):
    sums = 0
    nums = 0
    tgt_signs = 0
    mus = 0
    for ii, (src,tgt) in enumerate(iter(testloader)):
        nums += src.size()[0]
        eps = torch.normal(torch.zeros(src.size()[0]),torch.ones(src.size()[0]))
        with torch.no_grad():
            prediction, mus, stds = model(src, eps)
            #print(mus)
        signs = torch.sign(mus)*torch.sign(tgt)
        cor_pred_signs = torch.ones(src.size()[0])
        cor_pred_signs[torch.where(signs==-1)] = 0
        cor_pred_signs = cor_pred_signs*torch.sign(mus)
        print(cor_pred_signs.sum())
        #print(torch.sign(mus).sum()/src.size()[0])
        sums += signs.sum()
        tgt_signs += torch.sign(tgt).sum()
    test_avg += sums/nums
    print(sums/nums)
    #print(tgt_signs/nums)
print(test_avg/100)

In [None]:
sums = 0
nums = 0
tgt_signs = 0
for ii, (src,tgt) in enumerate(iter(trainloader)):
    nums += src.size()[0]
    eps = torch.normal(torch.zeros(src.size()[0]),torch.ones(src.size()[0]))
    prediction, mus, stds = temp_conv_model(src, eps)
    signs = torch.sign(prediction)*torch.sign(tgt)
    sums += signs.sum()
    tgt_signs += torch.sign(tgt).sum()
print(sums/nums)
print(tgt_signs/nums)