In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from BaselineNN import *
import time
torch.set_default_dtype(torch.float64)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
trainDataset = SimpleDataset('tData.parquet')
valDataset = SimpleDataset('vData.parquet')

In [4]:
nEpoch = 100
batchSize = 128

In [5]:
dataloader = DataLoader(dataset=trainDataset, batch_size=batchSize, shuffle=True)
numBatches = len(dataloader)

In [6]:
model = baselineNN(10, 128).to(device)
opt = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-6, amsgrad=True)
sched = torch.optim.lr_scheduler.StepLR(opt, 2, gamma=0.9925)
model.train()
print("Model loaded successfully! Num Parameters = {}".format(sum(p.numel() for p in model.parameters() if p.requires_grad)))
L2 = nn.MSELoss()

Model loaded successfully! Num Parameters = 149636


In [7]:
def Checkpoint(torchModel, epoch):
    name = "checkpoints/save{}.pth".format(epoch)
    torchModel.save(name)

In [8]:
Checkpoint(model, 0)
time_ = time.time()

valX, valY = valDataset.xData, valDataset.yData
valX = torch.from_numpy(valX).to(device)
valY = torch.from_numpy(valY).to(device)

tLoss = np.zeros(nEpoch)
vLoss = np.zeros(nEpoch)

for ii in range(nEpoch):    
    epochLoss = 0
    time1 = time.time()
    for jj, (x, y) in enumerate(dataloader):
        opt.zero_grad(set_to_none=True)
        x = x.to(device)
        y = y.to(device)
        loss = L2(y, model(x))
        loss.backward()
        opt.step()
        epochLoss += loss.detach().cpu().numpy()
        time2 = time.time() - time1
        rt = (len(dataloader) - jj - 1) * time2 / (jj+1)
        if jj % 100 == 99:
            print('[Epoch {:05d}/{:05d}, Batch {:05d}/{:05d}, Remaining Time: {:02d}:{:02d}, Rate = {:.4f}] : Training Loss = {:.8f}'.format(ii+1, nEpoch, jj+1, numBatches, int(rt//60), int(rt%60), (jj+1)/time2, epochLoss/(jj+1)), end='\r', flush=True)
    
    tLoss[ii] = epochLoss / numBatches
    vLoss[ii] = L2(valY, model(valX)).detach().cpu().numpy()
    torch.cuda.empty_cache()
    print('[Epoch {:05d}/{:05d}] : Training Loss = {:.8f}, Validation Loss = {:.8f}'.format(ii+1, nEpoch, tLoss[ii], vLoss[ii]))
    sched.step()
    Checkpoint(model, ii+1)

[Epoch 00001/00100] : Training Loss = 0.93955241, Validation Loss = 1.904521802] : Training Loss = 0.93954282
[Epoch 00002/00100] : Training Loss = 0.77711900, Validation Loss = 0.923213655] : Training Loss = 0.77715333
[Epoch 00003/00100] : Training Loss = 0.73820334, Validation Loss = 0.792170276] : Training Loss = 0.73817338
[Epoch 00004/00100] : Training Loss = 0.71623307, Validation Loss = 1.403792738] : Training Loss = 0.71622588
[Epoch 00005/00100] : Training Loss = 0.69276465, Validation Loss = 0.646732380] : Training Loss = 0.69277443
[Epoch 00006/00100, Batch 121100/198078, Remaining Time: 09:56, Rate = 128.9828] : Training Loss = 0.68421609

KeyboardInterrupt: 