In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from SPModel import ReferEncoder
from SPModel import QueryEncoder
from SPModel import SequenceConvTransformer
from dataload import SPDataset

Using cpu device


In [2]:
trainnpdir = '../input/train_files/processed/'
train_sp_dir = trainnpdir+'train_sp_nonan.npy'

ReferLen = 1200
QueryLen = 60
QueryDelay = 20
Datalength = ReferLen + QueryDelay

train_ratio = 0.8

SPdataset = SPDataset(train_sp_dir)
datasettotallen = SPdataset.__len__()
trainlen = int(train_ratio*datasettotallen)
testlen = datasettotallen - trainlen
trainSPdataset, testSPdataset = torch.utils.data.random_split(SPdataset, [trainlen, testlen])


2496632 (2000, 10, 1258)
0 (76000, 8, 1220)


In [3]:
trainSPdataloader = DataLoader(trainSPdataset, batch_size=2, shuffle=True)
testSPdataloader = DataLoader(testSPdataset, batch_size=128, shuffle=True)

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

spmodel = SequenceConvTransformer().to(device)
print(spmodel)

Using cpu device
SequenceConvTransformer(
  (ReLUlayer): LeakyReLU(negative_slope=0.01)
  (referencoder): ReferEncoder(
    (ReLUlayer): LeakyReLU(negative_slope=0.01)
    (dateweekencoding): DateEncoding(
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (conv1dday): Conv1d(6, 2, kernel_size=(1,), stride=(1,))
    (BNday): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv1dweek): Conv1d(8, 32, kernel_size=(5,), stride=(1,), padding=(2,), padding_mode=circular)
    (BNweek): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (datepollingmonth): AvgPool1d(kernel_size=(5,), stride=(5,), padding=(0,))
    (datemonthencoding): DateEncoding(
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (conv1dmonth1): Conv1d(32, 64, kernel_size=(10,), stride=(5,), padding=(4,), padding_mode=circular)
    (BNmonth1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv1dmonth2): Con

In [5]:
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(spmodel.parameters(), lr=1e-3)

In [6]:
def train(dataloader, model, loss_func, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (refer, query, target) in enumerate(dataloader):
        refer, query, target = refer.to(device), query.to(device), target.to(device)

        pred = model(refer, query)
        loss = loss_func(pred, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 1 == 0:
            loss, current = loss.item(), batch * len(refer)
            print(torch.isnan(pred).any(), torch.isnan(target).any())
            print(f"loss: {loss:>7f}    [{current:5d}/{size:>5d}]\n", end='\r')

In [7]:
def test(dataloader, model, loss_func):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for refer, query, target in dataloader:
            refer, query, target = refer.to(device), query.to(device), target.to(device)
            pred = model(refer, query)
            test_loss += loss_func(pred, target).item()
    test_loss /= num_batches
    print(f"Test Error: \n Avg loss: {test_loss:>8f}\n")

    return test_loss

In [8]:
for r, q, t in trainSPdataloader:
    r, q, t = r.to(device), q.to(device), t.to(device)
    print(r.shape, q.shape, t.shape, r.dtype, q.dtype, t.dtype)
    pred = spmodel(r, q)
    print(pred.shape, pred.dtype)
    print(torch.isnan(r).any(), torch.isnan(q).any(), torch.isnan(t).any(), torch.isnan(pred).any())
    break

torch.Size([2, 7, 1200]) torch.Size([2, 7, 60]) torch.Size([2, 1]) torch.float32 torch.float32 torch.float32
torch.Size([2, 1]) torch.float32
tensor(False) tensor(False) tensor(False) tensor(False)


In [9]:
epochs = 5
lossarray = np.zeros((epochs), dtype=np.float32)
ckptpath = './models/ckpts/'
bestckpt = ckptpath + 'bestmodel'
bestloss = 100
for t in range(epochs):
    print(f"Epoch {t+1}\n------------------------")
    train(trainSPdataloader, spmodel, loss_func, optimizer)
    print()
    testloss = test(testSPdataloader, spmodel, loss_func)
    lossarray[t] = testloss
    if testloss <= bestloss:
        torch.save({
            'epoch': t,
            'model_state_dict': spmodel.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': testloss,
        }, bestckpt+f'_epoch_{t:>d}_loss_{testloss:>4f}.pt')
    if t % 30 == 0:
        torch.save({
            'epoch': t,
            'model_state_dict': spmodel.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': testloss,
        }, ckptpath+f'ckpt_epoch_{t:>d}_loss_{testloss:>4f}.pt')
print("Done")
torch.save(spmodel, './models/spmodel.pth')

Epoch 1
------------------------
tensor(False) tensor(False)
loss: 0.582681    [    0/60800]
tensor(False) tensor(False)
loss: 0.572315    [    2/60800]
tensor(False) tensor(False)
loss: 0.600099    [    4/60800]
tensor(False) tensor(False)
loss: 0.565656    [    6/60800]
tensor(False) tensor(False)
loss: 0.562034    [    8/60800]
tensor(False) tensor(False)
loss: 0.592593    [   10/60800]
tensor(False) tensor(False)
loss: 0.558128    [   12/60800]
tensor(False) tensor(False)
loss: 0.595441    [   14/60800]
tensor(False) tensor(False)
loss: 0.567993    [   16/60800]
tensor(False) tensor(False)
loss: 0.579252    [   18/60800]
tensor(False) tensor(False)
loss: 0.611914    [   20/60800]
tensor(False) tensor(False)
loss: 0.579786    [   22/60800]
tensor(False) tensor(False)
loss: 0.582300    [   24/60800]
tensor(False) tensor(False)
loss: 0.505468    [   26/60800]
tensor(False) tensor(False)
loss: 0.591380    [   28/60800]
tensor(False) tensor(False)
loss: 0.559418    [   30/60800]
tensor(

KeyboardInterrupt: 