In [1]:
import math
import time

import cv2
import numpy as np
import pandas as pd
import pt_util
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import tqdm

In [2]:
DATA_PATH = '/home/wangc21/datasets/ARC/right_loop/'
#DATA_PATH = '/home/wangc21/datasets/ARC/left_loop/'

In [3]:
class ARCDataset(torch.utils.data.Dataset):
    def __init__(self, data_path, seq_len, batch_size, transform=None):
        super(ARCDataset, self).__init__()
        self.seq_len = seq_len
        self.batch_size = batch_size
        self.transform = transform
        
        self.data_path = data_path
        self.labels = pd.read_csv(self.data_path + 'labels.csv', sep=',', header=None).values.astype(np.float32)
        #n = len(self.labels) // self.batch_size
        #self.labels = self.labels[:n * self.batch_size]
        
    def __len__(self):
        n = len(self.labels) // self.batch_size
        return math.ceil(n / self.seq_len) * self.batch_size
        
    def __getitem__(self, idx):
        #    0      1
        # 0 [0:5]  [10:15]
        # 1 [4:9]  [14:19]
        # 2 [8:10] [18:20]
        #
        # row C {0, 1, 2}
        # col C {0, 1}
        # E.g., idx = 3 -> (row, col) = (1, 1) -> [14 : 19]
        row = idx // self.batch_size
        col = idx % self.batch_size

        # Size of each chunk: we divide the dataset of length
        # len(dataset) into batch_size chunks, so each has size:
        chunk_size = len(self.labels) // self.batch_size

        # start = row * seq + col * chunk
        start_idx = row * self.seq_len + col * chunk_size

        # If last row, add remaining elements only.
        end_idx = start_idx
        end_idx += min(self.seq_len, chunk_size - row * self.seq_len)
        
        input_list = []
        label_list = []
        for idx in range(start_idx, end_idx):

            # process color frame, using defined augmentations
            image = cv2.imread(self.data_path + 'images/' + str(idx) + '.jpg')
            if self.transform:
                image = self.transform(image)
        
            # process depth data, convert to tensor
            depth = cv2.imread(self.data_path + 'depth/' + str(idx) + '.jpg', 0)
            depth = transforms.ToTensor()(depth)
        
            # make input and label sequences
            input_list.append(torch.cat((image, depth)))
            label_list.append(self.labels[idx])
        
        #input_tensor = torch.stack(input_list).resize_((self.seq_len, 640 * 480 * 4))
        input_tensor = torch.stack(input_list)
        label_tensor = transforms.ToTensor()(np.asarray(label_list)).squeeze(0)
        
        return (input_tensor, label_tensor)

In [4]:
BATCH_SIZE = 1
SEQ_LEN = 5

# Perform data augmentation only on color frames, but not depth data.
train_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

data_train = ARCDataset(DATA_PATH + 'train/', SEQ_LEN, BATCH_SIZE, transform=train_transforms)
print(len(data_train))
data_test = ARCDataset(DATA_PATH + 'val/', SEQ_LEN, BATCH_SIZE, transform=test_transforms)
print(len(data_test))

640
160


In [5]:
'''
input_size – The number of expected features in the input x

hidden_size – The number of features in the hidden state h

num_layers – Number of recurrent layers. E.g., setting num_layers=2 would mean stacking two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of the first LSTM and computing the final results. Default: 1

bias – If False, then the layer does not use bias weights b_ih and b_hh. Default: True

batch_first – If True, then the input and output tensors are provided as (batch, seq, feature). Default: False

dropout – If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to dropout. Default: 0

bidirectional – If True, becomes a bidirectional LSTM. Default: False
'''

class DarkLSTM(nn.Module):
    def __init__(self):
        super(DarkLSTM, self).__init__()
        # convolutional encoder
        self.conv1 = nn.Conv2d(4, 16, 3, 1, 1)
        self.conv2 = nn.Conv2d(16, 32, 3, 1, 1)
        self.conv3 = nn.Conv2d(32, 64, 3, 1, 1)
        self.conv4 = nn.Conv2d(64, 128, 3, 1, 1)
        self.conv5 = nn.Conv2d(128, 256, 3, 1, 1)
        self.conv6 = nn.Conv2d(256, 512, 3, 1, 1)
        self.conv7 = nn.Conv2d(512, 1024, 3, 1, 1)
        
        # recurrent cell
        self.lstm = nn.LSTM(
            input_size = 5 * 3 * 1024,
            hidden_size = 32,
            batch_first = True,
        )
        
        # linear decoder
        self.fc_1 = nn.Linear(32, 1)
        self.fc_2 = nn.Linear(32, 1)
        
        self.lowest_error = float("inf")
    
    # x is (batch_size, seq_len, (input_size))
    def forward(self, x, hidden_state = None):
        x = x.squeeze(0)
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv4(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv5(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv6(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv7(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.unsqueeze(0).reshape((BATCH_SIZE, SEQ_LEN, 5 * 3 * 1024))
        
        x, hidden_state = self.lstm(x, hidden_state)
        throttle = self.fc_1(x)
        angle = self.fc_2(x)
        return torch.cat((throttle, angle), 2), hidden_state

    def loss(self, prediction, label, reduction='mean'):
        loss = F.mse_loss(prediction, label, reduction = reduction)
        return loss
    
    def save_model(self, file_path, num_to_keep=1):
        pt_util.save(self, file_path, num_to_keep)
        
    def save_best_model(self, error, file_path, num_to_keep=1):
        if error < self.lowest_error:
          self.lowest_error = error
          pt_util.save(self, file_path, num_to_keep)

    def load_model(self, file_path):
        pt_util.restore(self, file_path)

    def load_last_model(self, dir_path):
        return pt_util.restore_latest(self, dir_path)

In [6]:
def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)   
    
def train(model, device, optimizer, train_loader, lr, epoch, log_interval):
    model.train()
    losses = []
    hidden = None
    for batch_idx, (data, label) in enumerate(tqdm.tqdm(train_loader)):
        data, label = data.to(device), label.to(device)
        # Separates the hidden state across batches. 
        # Otherwise the backward would try to go all the way to the beginning every time.
        if hidden is not None:
            hidden = repackage_hidden(hidden)
        optimizer.zero_grad()
        output, hidden = model(data)
        loss = model.loss(output, label)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    return np.mean(losses)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0

    with torch.no_grad():
        hidden = None
        for batch_idx, (data, label) in enumerate(test_loader):    
            data, label = data.to(device), label.to(device)
            output, hidden = model(data, hidden)
            test_loss += model.loss(output, label, reduction='mean').item()

    test_loss /= len(test_loader)
    print('\nTest set: Average loss: \n' + str(test_loss))
    return test_loss

In [7]:
EPOCHS = 100
LEARNING_RATE = 0.0001
MOMENTUM = 0.9
USE_CUDA = True
SEED = 0
PRINT_INTERVAL = 100
WEIGHT_DECAY = 0.0005
LOG_PATH = DATA_PATH + 'log.pkl'

use_cuda = USE_CUDA and torch.cuda.is_available()

#torch.manual_seed(SEED)

device = torch.device("cuda" if use_cuda else "cpu")
print('Using device', device)
import multiprocessing
print('num cpus:', multiprocessing.cpu_count())

kwargs = {'num_workers': multiprocessing.cpu_count(),
          'pin_memory': True} if use_cuda else {}

train_loader = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE,
                                           shuffle=False, **kwargs)
test_loader = torch.utils.data.DataLoader(data_test, batch_size=BATCH_SIZE,
                                          shuffle=False, **kwargs)

model = DarkLSTM().to(device)
#optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
#start_epoch = model.load_last_model(DATA_PATH + 'checkpoints')
start_epoch = 0

train_losses, test_losses = pt_util.read_log(LOG_PATH, ([], []))
test_loss = test(model, device, test_loader)
test_losses.append((start_epoch, test_loss))

try:
    for epoch in range(start_epoch, EPOCHS + 1):
        train_loss = train(model, device, optimizer, train_loader, LEARNING_RATE, epoch, PRINT_INTERVAL) 
        test_loss = test(model, device, test_loader)
        train_losses.append((epoch, train_loss))
        test_losses.append((epoch, test_loss))
        pt_util.write_log(LOG_PATH, (train_losses, test_losses))
        model.save_best_model(test_loss, DATA_PATH + 'checkpoints/%03d.pt' % epoch)


except KeyboardInterrupt as ke:
    print('Interrupted')
except:
    import traceback
    traceback.print_exc()
finally:
    model.save_model(DATA_PATH + 'checkpoints/%03d.pt' % epoch, 0)
    ep, val = zip(*train_losses)
    pt_util.plot(ep, val, 'Train loss', 'Epoch', 'Error')
    ep, val = zip(*test_losses)
    pt_util.plot(ep, val, 'Test loss', 'Epoch', 'Error')
    

Using device cuda
num cpus: 16


  0%|          | 0/640 [00:00<?, ?it/s]


Test set: Average loss: 
0.24004756454378368
Saved /home/wangc21/datasets/ARC/right_loop/checkpoints/000.pt




Traceback (most recent call last):
  File "<ipython-input-7-ba5aaeef1b3b>", line 39, in <module>
    train_loss = train(model, device, optimizer, train_loader, LEARNING_RATE, epoch, PRINT_INTERVAL)
  File "<ipython-input-6-c4046bc267c6>", line 19, in train
    output, hidden = model(data)
  File "/home/wangc21/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "<ipython-input-5-8817dca1d4c4>", line 59, in forward
    x = x.unsqueeze(0).resize_((BATCH_SIZE, SEQ_LEN, 5 * 3 * 1024))
RuntimeError: cannot resize variables that require grad


ValueError: not enough values to unpack (expected 2, got 0)