In [1]:
import os
import gc
import sys

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader

from torchinfo import summary

from kitti_data_lotter import KITTI
from prednet_lotterly2 import PredNet

from debug import info

In [2]:
gc.collect()
torch.cuda.empty_cache()

In [3]:
num_epochs = 150
batch_size = 1
A_channels = (3, 48, 96, 192)
R_channels = (3, 48, 96, 192)
lr = 0.001 # if epoch < 75 0.001 else 0.0001
nt = 10 # num of time steps

DATA_DIR = 'kitti_data_raw'

In [4]:
layer_loss_weights = Variable(torch.FloatTensor([[1.], [0.], [0.], [0.]]).cuda())
time_loss_weights = 1./(nt - 1) * torch.ones(nt, 1)
time_loss_weights[0] = 0
time_loss_weights = Variable(time_loss_weights.cuda())
print("layer loss: ", layer_loss_weights.shape, '\n',
      "time loss: ", time_loss_weights.shape)

layer loss:  torch.Size([4, 1]) 
 time loss:  torch.Size([10, 1])


In [5]:
train_file = os.path.join(DATA_DIR, 'X_train.hkl')
train_sources = os.path.join(DATA_DIR, 'sources_train.hkl')
val_file = os.path.join(DATA_DIR, 'X_val.hkl')
val_sources = os.path.join(DATA_DIR, 'sources_val.hkl')

# time_step, height, width, channels
kitti_train = KITTI(train_file, train_sources, nt)
kitti_val = KITTI(val_file, val_sources, nt)



In [6]:
# import hickle as hkl
# # t = hkl.load(train_file)
# # print(t.shape)
# # print(len(t))

# # v = hkl.load(val_file)
# # print(v.shape)
# # print(len(v))

# X = hkl.load(train_file)
# sources = hkl.load(train_sources)
# possible_starts = []
# cur_loc = max(X.shape[0] - 500*10, 0)
# while cur_loc < X.shape[0] - nt + 1:
#     if sources[cur_loc] == sources[cur_loc + nt - 1]:
#         possible_starts.append(cur_loc)
#         cur_loc += nt
#     else:
#         cur_loc += 1
# len(possible_starts)

In [7]:
# packed with batch_size, [batch_size, time_step, height, width, channels]
train_loader = DataLoader(kitti_train, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(kitti_val, batch_size=batch_size, shuffle=True)

In [8]:
model = PredNet(R_channels, A_channels, output_mode='error_all')
if torch.cuda.is_available():
    print('Using GPU.')
    model.cuda()

Using GPU.


In [9]:
model.eval()

PredNet(
  (cell0): ConvLSTMCell(
    (Gates): Conv2d(57, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (cell1): ConvLSTMCell(
    (Gates): Conv2d(240, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (cell2): ConvLSTMCell(
    (Gates): Conv2d(480, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (cell3): ConvLSTMCell(
    (Gates): Conv2d(576, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (conv0): Sequential(
    (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (satlu): SatLU (min_val=0, max_val=255)
  )
  (conv1): Sequential(
    (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (conv3): Sequential(
    (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (upsample): Upsample(scale_factor=2

In [10]:
summary(model, (batch_size,nt,3,128,160))

Layer (type:depth-idx)                   Output Shape              Param #
PredNet                                  --                        --
├─ConvLSTMCell: 1-1                      [1, 192, 16, 20]          --
│    └─Conv2d: 2-1                       [1, 768, 16, 20]          3,982,080
├─Upsample: 1-2                          [1, 192, 32, 40]          --
├─ConvLSTMCell: 1-3                      [1, 96, 32, 40]           --
│    └─Conv2d: 2-2                       [1, 384, 32, 40]          1,659,264
├─Upsample: 1-4                          [1, 96, 64, 80]           --
├─ConvLSTMCell: 1-5                      [1, 48, 64, 80]           --
│    └─Conv2d: 2-3                       [1, 192, 64, 80]          414,912
├─Upsample: 1-6                          [1, 48, 128, 160]         --
├─ConvLSTMCell: 1-7                      [1, 3, 128, 160]          --
│    └─Conv2d: 2-4                       [1, 12, 128, 160]         6,168
├─Sequential: 1-8                        [1, 3, 128, 160]      

In [11]:
# optimizer = torch.optim.Adam(model.parameters(), lr=lr)
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

# loss_fn = nn.MSELoss().cuda()

In [12]:
def lr_scheduler(optimizer, epoch):
    if epoch < num_epochs //2:
        return optimizer
    else:
        for param_group in optimizer.param_groups:
            param_group['lr'] = 0.0001
        return optimizer

In [13]:
def progressbar(iter_, prefix="", size=60, file=sys.stdout):
    count = len(iter_)
    
    def show(j):
        x = int(size * j / count)
        file.write("{}[{}{}] {:3d}/{}\r".format(prefix, "#"*x, "."*(size-x), j, count))
        file.flush()
    show(0)
    
    loss = 0
    for i, item in enumerate(iter_):
        inputs = item.permute(0,1,4,2,3) # batch x time_steps x channel x width x height
        inputs = Variable(inputs.cuda())
        
        loss_temp = model(inputs) # batch x n_layers x nt
        loss += loss_temp
        del loss_temp
        
        show(i+1)
    file.write("\n")
    file.flush()
    
    return loss

In [14]:
for epoch in range(100):
    optimizer = lr_scheduler(optimizer, epoch)
    
    loss = progressbar(train_loader, "step: ", 40)
        
    optimizer.zero_grad()
#     loss.requires_grad_(True)
    loss.backward()
    optimizer.step()
    print('Epoch: {}/{}, loss: {}'.format(epoch, num_epochs, loss))
    
    with pytorch.no_grad():
            torch.cuda.empty_cache()
    
    if epoch%10 == 0:
        torch.save(model.state_dict(), 'models/training_{:04d}.pt'.format(epoch))
torch.save(model.state_dict(), 'models/training.pt')

step: [#.......................................]  20/496

RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 11.18 GiB total capacity; 10.23 GiB already allocated; 1024.00 KiB free; 638.45 MiB cached)

In [None]:
# for epoch in range(100):
#     optimizer = lr_scheduler(optimizer, epoch)
#     loss = 0
#     for i, inputs in enumerate(train_loader):
#         inputs = inputs.permute(0, 1, 4, 2, 3) # batch x time_steps x channel x width x height
#         inputs = Variable(inputs.cuda())
        
#         errors = model(inputs) # batch x n_layers x nt
# #         loc_batch = errors.size(0)
# #         errors = torch.mm(errors.view(-1, nt), time_loss_weights) # batch*n_layers x 1
# #         errors = torch.mm(errors.view(loc_batch, -1), layer_loss_weights)
# #         errors = torch.mean(errors)

#         targets = Variable(torch.zeros(errors.shape))
#         loss += loss_fn(errors, targets)

#         optimizer.zero_grad()
        
# #         errors.backward()
#         loss.backward()
#         #print(errors.item())
#         optimizer.step()
#         if i%10 == 0:
# #             print('Epoch: {}/{}, step: {}/{}, errors: {}'.format(
# #                   epoch, num_epochs, i, len(kitti_train)//batch_size, errors.item()))
#             print('Epoch: {}/{}, step: {}/{}, loss: {}'.format(
#                   epoch, num_epochs, i, len(kitti_train)//batch_size, loss))
#     if epoch%10 == 0:
#         torch.save(model.state_dict(), 'models/training_{:04d}.pt'.format(epoch))
# torch.save(model.state_dict(), 'models/training.pt')

In [None]:
# for epoch in range(100):
#     optimizer = lr_scheduler(optimizer, epoch)
#     loss = 0
#     for i, inputs in enumerate(train_loader):
#         errors = [None] * nt
#         inputs = inputs.permute(0, 1, 4, 2, 3) # batch x time_steps x channel x width x height
#         inputs = Variable(inputs.cuda())
        
#         errors = model(inputs) # batch x n_layers x nt

#         targets = Variable(torch.zeros(errors.shape))
#         loss += loss_fn(errors.data.cpu().detach(), targets)
        
#     optimizer.zero_grad()
#     loss.requires_grad_(True)
#     loss.backward()
#     optimizer.step()
#     print('Epoch: {}/{}, loss: {}'.format(epoch, num_epochs, loss))
    
#     if epoch%10 == 0:
#         torch.save(model.state_dict(), 'models/training_{:04d}.pt'.format(epoch))
# torch.save(model.state_dict(), 'models/training.pt')

In [None]:
for i in range(90):
    print(errors[i].shape)