In [24]:
# MODEL

class LSTMNet(nn.Module):
    def __init__(self):
        super(LSTMNet, self).__init__()
        
        # Conv
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
        # Feed into GRU.
        self.gru = nn.GRU(self.feature_size, self.feature_size, batch_first=True)
        self.decoder = nn.Linear(self.feature_size, self.vocab_size)
        
        # WEIGHT SHARING??
        self.decoder.weight = self.fc3.weight
        self.decoder.bias.data.zero_()
        
        self.best_accuracy = -1
    
    def forward(self, x, hidden_state=None):
        batch_size = x.shape[0]
        sequence_length = x.shape[1]
        
        # Conv
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        # Feed into GRU.
        x, hidden_state = self.gru(x, hidden_state)
        x = self.decoder(x)
        return x, hidden_state

    # This defines the function that gives a probability distribution and implements the temperature computation.
    def inference(self, x, hidden_state=None, temperature=1):
        x = x.view(-1, 1)
        x, hidden_state = self.forward(x, hidden_state)
        x = x.view(1, -1)
        x = x / max(temperature, 1e-20)
        x = F.softmax(x, dim=1)
        return x, hidden_state

    # Predefined loss function
    def loss(self, prediction, label, reduction='mean'):
        loss_val = F.cross_entropy(prediction.view(-1, self.vocab_size), label.view(-1), reduction=reduction)
        return loss_val

    # Saves the current model
    def save_model(self, file_path, num_to_keep=1):
        pt_util.save(self, file_path, num_to_keep)

    # Saves the best model so far
    def save_best_model(self, accuracy, file_path, num_to_keep=1):
        if accuracy > self.best_accuracy:
            self.save_model(file_path, num_to_keep)
            self.best_accuracy = accuracy

    def load_model(self, file_path):
        pt_util.restore(self, file_path)

    def load_last_model(self, dir_path):
        return pt_util.restore_latest(self, dir_path)

In [1]:
import time

import cv2
import numpy as np
import pandas as pd
import pt_util
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
#DATA_PATH = '/home/wangc21/datasets/ARC/right_loop/'
DATA_PATH = '/home/wangc21/datasets/ARC/left_loop/'

In [3]:
class ARCDataset(torch.utils.data.Dataset):
    def __init__(self, data_path, val=False, transform=None):
        self.data_path = data_path
        df_labels = pd.read_csv(data_path + 'labels.csv', sep=',', header=None)
        self.labels = df_labels.values.astype(np.float32)
        self.val = val
        self.transform = transform
        
    def __len__(self):
        return len(self.labels)
        
    def __getitem__(self, idx):
        # process color frame, using defined augmentations
        image = cv2.imread(self.data_path + 'images/' + str(idx) + '.jpg')
        if self.transform:
            image = self.transform(image)
        
        # process depth data, convert to tensor
        depth = cv2.imread(self.data_path + 'depth/' + str(idx) + '.jpg', 0)
        depth = transforms.ToTensor()(depth)
        
        # concat with color frame
        concat = torch.cat((image, depth))
        return (concat, self.labels[idx])

In [4]:
# Perform data augmentation only on color frames, but not depth data.

train_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

data_train = ARCDataset(DATA_PATH + 'train/', transform=train_transforms)
print(len(data_train))
data_test = ARCDataset(DATA_PATH + 'val/', True, transform=test_transforms)
print(len(data_test))

3200
800


In [5]:
def train(model, device, train_loader, optimizer, epoch, log_interval):
    model.train()
    losses = []
    for batch_idx, (data, label) in enumerate(train_loader):
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = model.loss(output, label)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('{} Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                time.ctime(time.time()),
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    return np.mean(losses)

def test(model, device, test_loader, log_interval=None):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for batch_idx, (data, label) in enumerate(test_loader):
            data, label = data.to(device), label.to(device)
            output = model(data)
            test_loss_on = model.loss(output, label, reduction='sum').item()
            test_loss += test_loss_on
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: ' + str(test_loss) + '\n')
    return test_loss

In [6]:
# Baseline Model
# Epoch 35 loss: 0.009380207437316131 

class CaeLeNet(nn.Module):
    def __init__(self):
        super(CaeLeNet, self).__init__()
        self.conv1 = nn.Conv2d(4, 64, 3, 1, 1)
        self.conv2 = nn.Conv2d(64, 128, 3, 1, 1)
        self.conv3 = nn.Conv2d(128, 256, 3, 1, 1)
        self.fc1 = nn.Linear(80*60*256, 100)
        self.fc2_1 = nn.Linear(100, 1)
        self.fc2_2 = nn.Linear(100, 1)
        self.drop = nn.Dropout(0.1)
        
        self.lowest_error = float("inf")
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 80*60*256)
        x = F.relu(self.fc1(x))
        x = self.drop(x)
        throttle = self.fc2_1(x)
        angle = self.fc2_2(x)
        return torch.cat((throttle, angle), 1)
        
    def loss(self, prediction, label, reduction='mean'):
        loss = F.mse_loss(prediction, label, reduction = reduction)
        return loss
    
    def save_model(self, file_path, num_to_keep=1):
        pt_util.save(self, file_path, num_to_keep)
        
    def save_best_model(self, error, file_path, num_to_keep=1):
        if error < self.lowest_error:
          self.lowest_error = error
          pt_util.save(self, file_path, num_to_keep)

    def load_model(self, file_path):
        pt_util.restore(self, file_path)

    def load_last_model(self, dir_path):
        return pt_util.restore_latest(self, dir_path)

In [None]:
# Play around with these constants, you may find a better setting.
BATCH_SIZE = 10
TEST_BATCH_SIZE = 5
EPOCHS = 100
LEARNING_RATE = 0.001
MOMENTUM = 0.9
USE_CUDA = True
SEED = 0
PRINT_INTERVAL = 100
WEIGHT_DECAY = 0.0005
LOG_PATH = DATA_PATH + 'log.pkl'
# Now the actual training code
use_cuda = USE_CUDA and torch.cuda.is_available()

#torch.manual_seed(SEED)

device = torch.device("cuda" if use_cuda else "cpu")
print('Using device', device)
import multiprocessing
print('num cpus:', multiprocessing.cpu_count())

kwargs = {'num_workers': multiprocessing.cpu_count(),
          'pin_memory': True} if use_cuda else {}

train_loader = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE,
                                           shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(data_test, batch_size=TEST_BATCH_SIZE,
                                          shuffle=False, **kwargs)

model = CaeLeNet().to(device)
#optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
#start_epoch = model.load_last_model(DATA_PATH + 'checkpoints')
start_epoch = 0

train_losses, test_losses = pt_util.read_log(LOG_PATH, ([], []))
test_loss = test(model, device, test_loader)
test_losses.append((start_epoch, test_loss))

try:
    for epoch in range(start_epoch, EPOCHS + 1):
        train_loss = train(model, device, train_loader, optimizer, epoch, PRINT_INTERVAL)
        test_loss = test(model, device, test_loader)
        train_losses.append((epoch, train_loss))
        test_losses.append((epoch, test_loss))
        pt_util.write_log(LOG_PATH, (train_losses, test_losses))
        model.save_best_model(test_loss, DATA_PATH + 'checkpoints/%03d.pt' % epoch)


except KeyboardInterrupt as ke:
    print('Interrupted')
except:
    import traceback
    traceback.print_exc()
finally:
    model.save_model(DATA_PATH + 'checkpoints/%03d.pt' % epoch, 0)
    ep, val = zip(*train_losses)
    pt_util.plot(ep, val, 'Train loss', 'Epoch', 'Error')
    ep, val = zip(*test_losses)
    pt_util.plot(ep, val, 'Test loss', 'Epoch', 'Error')

Using device cuda
num cpus: 16

Test set: Average loss: 0.25452835116535427


Test set: Average loss: 0.05783337225215291

Saved /home/wangc21/datasets/ARC/left_loop/checkpoints/000.pt


Test set: Average loss: 0.024976413917902392

Saved /home/wangc21/datasets/ARC/left_loop/checkpoints/001.pt


Test set: Average loss: 0.028122466830827763


Test set: Average loss: 0.01904195419730968

Saved /home/wangc21/datasets/ARC/left_loop/checkpoints/003.pt


Test set: Average loss: 0.0176809943059925

Saved /home/wangc21/datasets/ARC/left_loop/checkpoints/004.pt


Test set: Average loss: 0.01739559879875742

Saved /home/wangc21/datasets/ARC/left_loop/checkpoints/005.pt


Test set: Average loss: 0.01668108985351864

Saved /home/wangc21/datasets/ARC/left_loop/checkpoints/006.pt


Test set: Average loss: 0.020815500513417646


Test set: Average loss: 0.01979705540201394


Test set: Average loss: 0.02017237676656805


Test set: Average loss: 0.0177257277345052


Test set: Average loss: 0.01688507797