In [1]:
# param
BASE_FOLDER = '/home/jim/Documents/'
TAR_FILE_NAME = 'clean_data_0627.tar'
FOLDER_PATH = BASE_FOLDER + 'clean_data'
IMG_FOLDER_PATH = BASE_FOLDER + 'clean_data/img'
BATCH_SIZE = 8
NUM_EPOCHS = 200
LR = 0.0001
LOADER_THREAD = 4

In [2]:
import time
class Timer():
    
    def __init__(self):
        self.t = time.time()
        
    def track(self, label):
        _t = time.time()
        print(label, _t - self.t)
        self.t = _t

In [2]:
import pandas as pd

train_df = pd.read_csv(BASE_FOLDER + 'clean_data/train.csv')
test_df = pd.read_csv(BASE_FOLDER + 'clean_data/test.csv')

In [3]:
import torch
import torchvision
import torch.nn as nn

class NvidiaModel2(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=24, kernel_size=5, stride=2),
            nn.ELU(),
            nn.Conv2d(24, 36, 5, 2),
            nn.ELU(),
            nn.Conv2d(36, 48, 5, 2),
            nn.ELU(),
            nn.Conv2d(48, 64, 3),
            nn.ELU(),
            nn.Conv2d(64, 64, 3),
            nn.ELU(),
        )

        self.classifier = nn.Sequential(
            nn.Linear(in_features=64*18, out_features=100),
            nn.ELU(),
            nn.Linear(100, 50),  # 100 -> 200
            nn.ELU(),
            nn.Linear(50, 10),
            nn.ELU(),
            nn.Linear(10, 2),  # 1 -> 2
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size()[0], -1)
        x = self.classifier(x)
        return x

In [5]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import numpy as np
from PIL import Image

class SteeringDataset(Dataset):
    
    def __init__(self, df, folder_path, transform=None):
        self.df = df
        self.folder_path = folder_path
            
        self.transform = transform

    def __getitem__(self, index):
        key = self.df.im_key[index]
        steering = self.df.steering[index]
        speed = self.df.speed[index]
        img = Image.open(f'{self.folder_path}/{key}.jpg')

        if np.random.rand() < 0.5:
            img = transforms.functional.hflip(img)
            steering = -steering
        
        if self.transform:
            img = self.transform(img)
            
        steering_speed = torch.tensor([steering, speed]).float() / 100

        return img, steering_speed

    def __len__(self):
        return len(self.df)

In [6]:
from torchvision import transforms

tf_compose = transforms.Compose([
    transforms.ColorJitter(0.1, 0.1, 0.1, 0.1),
    transforms.RandomAffine(degrees=0, translate=(0.05, 0.05), scale=(0.95, 1.05)),
    transforms.Resize((200, 66)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_dataset = SteeringDataset(
    train_df,
    IMG_FOLDER_PATH,
    tf_compose,
)

test_dataset = SteeringDataset(
    test_df,
    IMG_FOLDER_PATH,
    tf_compose,
)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=LOADER_THREAD
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=LOADER_THREAD
)

In [4]:
import numpy as np

class SteerInputIteratorDALI():
    def __init__(self, df, folder_path, batch_size=8):
        self.df = df.sample(frac=1)  # shuffle
        self.folder_path = folder_path
        self.batch_size = batch_size

    def __iter__(self):
        self.i = 0
        self.n = len(self.df)
        return self

    def __next__(self):
        batch = []
        labels = []
        for i in range(self.batch_size):
            row = self.df.iloc[i]
            im_file = open(f'{self.folder_path}/{row.im_key}.jpg', 'rb')
            label = [row.steering, row.speed]
            
            batch.append(np.frombuffer(im_file.read(), dtype = np.uint8))
            labels.append(np.array(label, dtype = np.float16))
            
            self.i = (self.i + 1) % self.n
        return (batch, labels)

    next = __next__

In [5]:
import nvidia.dali.ops as ops
import nvidia.dali.types as types
from nvidia.dali.pipeline import Pipeline

class SteeringSourcePipeline(Pipeline):
    def __init__(self, data_iterator, batch_size, num_threads, device_id):
        super().__init__(batch_size, num_threads, device_id, seed=12)
        self.data_iterator = data_iterator
        self.input = ops.ExternalSource()
        self.input_label = ops.ExternalSource()
        self.decode = ops.decoders.Image(device = "mixed", output_type = types.RGB)
        # resizing is *must* because loaded images maybe of different sizes
        # and to create GPU tensors we need image arrays to be of same size
        self.twist_range = ops.random.Uniform(range=[0.5, 1.5])
        self.hue_range = ops.random.Uniform(range=[-0.5, 0.5])
        self.twist = ops.ColorTwist(device="gpu")
        
        self.resize = ops.Resize(
            device="gpu", resize_x=200, resize_y=66, interp_type=types.INTERP_TRIANGULAR
        )
        self.normalize = ops.CropMirrorNormalize(
            device="gpu", mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]
        )
        self.flip = ops.Flip(device='gpu', horizontal=1)
        
        
    def define_graph(self):
        self.jpegs = self.input()
        self.labels = self.input_label()
        images = self.decode(self.jpegs)
        
        images = self.twist(
            images, saturation=self.twist_range(), contrast=self.twist_range(),
            brightness=self.twist_range(), hue=self.hue_range()
        )
        images = self.resize(images)
        images = self.normalize(images)
        return (images, self.labels)

    def iter_setup(self):
        # the external data iterator is consumed here and fed as input to Pipeline
        images, labels = self.data_iterator.next()
        self.feed_input(self.jpegs, images)
        self.feed_input(self.labels, labels)

In [6]:
from nvidia.dali.plugin.pytorch import DALIGenericIterator

train_iter = iter(SteerInputIteratorDALI(train_df, IMG_FOLDER_PATH, batch_size=BATCH_SIZE))
test_iter = iter(SteerInputIteratorDALI(test_df, IMG_FOLDER_PATH, batch_size=BATCH_SIZE))

train_pipe = SteeringSourcePipeline(data_iterator=train_iter, batch_size=BATCH_SIZE, num_threads=4, device_id=0)
train_pipe.build()
test_pipe = SteeringSourcePipeline(data_iterator=test_iter, batch_size=BATCH_SIZE, num_threads=4, device_id=0)
test_pipe.build()

dali_train_iter = DALIGenericIterator([train_pipe], ['images', 'labels'])
dali_test_iter = DALIGenericIterator([train_pipe], ['images', 'labels'])

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 981.44 MiB total capacity; 0 bytes already allocated; 12.50 MiB free; 0 bytes reserved in total by PyTorch)

In [7]:
import torch.optim as optim
import torch.nn.functional as F
import time
from ipywidgets import IntProgress
from IPython.display import display, clear_output
import matplotlib.pyplot as plt

MODEL_FOLDER = BASE_FOLDER + 'model/'
MODEL_FN = 'steering_ep_{}.pth'
MODEL_PATH = MODEL_FOLDER + MODEL_FN

def get_device():
    if torch.cuda.is_available():
        print('use CUDA')
        return torch.device('cuda')
    else:
        print('use CPU')
        return torch.device('cpu')

device = get_device()
model = NvidiaModel2()
model = model.to(device)

optimizer = optim.Adam(model.parameters(), lr=LR)

result = pd.DataFrame(columns=['train_loss', 'test_loss', 'time'])

for epoch in range(NUM_EPOCHS):
    pbar = IntProgress()
    display(pbar)

    print(f'epoch: {epoch + 1} ...') 
    _t = time.time()

    # train
    
    model.train()
    train_loss = 0.0

    pbar.value = 0
    pbar.max = len(train_loader)

    print('train...')
    
    for images, labels in iter(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = F.mse_loss(outputs, labels)
        
        loss.backward()
        optimizer.step()
        tt = time.time()

        train_loss += float(loss)

        pbar.value += 1


    train_loss /= len(train_loader)

    # eval
    model.eval()
    test_loss = 0.0

    pbar.value = 0
    pbar.max = len(test_loader)

    print('eval...')
    
    for images, labels in iter(test_loader):
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = F.mse_loss(outputs, labels)

        test_loss += float(loss)

        pbar.value += 1
        
    test_loss /= len(test_loader)
    
    _dt = time.time() - _t

    torch.save(model.state_dict(), MODEL_PATH.format(epoch + 1))

    result = result.append({
        'train_loss': train_loss,
        'test_loss': test_loss,
        'time': _dt
    }, ignore_index=True)
    # save result
    result.to_csv(MODEL_FOLDER + 'result.csv', index=False)

    clear_output(wait=True)

    print(result)

    # plot
    x = range(1, len(result) + 1)

    plt.plot(x, list(result['train_loss']), '--', color=(100/255,100/255,255/255))
    plt.plot(x, list(result['test_loss']), color=(255/255,100/255,100/255))
    
    # save plt
    plt.savefig(MODEL_FOLDER + 'result.png')
    plt.show()

use CUDA


IntProgress(value=0)

epoch: 1 ...
train...
trans transtrans  0.11340427398681640.09826874732971191
trans 0.104657173156738280.12157726287841797


trans 0.09667038917541504trans
trans trans  0.105655670166015620.108273029327392580.10967779159545898


transtrans 0.09338903427124023 
trans0.10083770751953125 0.10353565216064453trans

 0.10909295082092285
trans 0.09007525444030762trans 0.09296250343322754
trans 
0.10555076599121094
trans 0.11329793930053711
trans 0.09056687355041504trans
trans 0.12676644325256348 
0.10703277587890625
trans 0.10783839225769043
trans transtrans 0.08864545822143555
0.1275632381439209
 0.10442757606506348trans 
0.09480547904968262
transtrans 0.10259342193603516
 0.10643172264099121
transtrans  0.10431480407714844
0.12250399589538574
trans 0.09262394905090332
trans 0.1113896369934082
trans transtrans0.11376309394836426 0.14817333221435547
 0.09466981887817383

transtrans 
0.09336137771606445 0.1491551399230957
transtrans  0.12502813339233398
loader 1.1262376308441162
send data to g

trans  0.15517234802246094
0.10489320755004883
trans 0.1062006950378418
transtrans trans0.12728261947631836
  0.11488533020019531
0.11187386512756348
trans 0.09376668930053711loader 0.2230687141418457
send data to gpu 0.0009090900421142578
train 0.004224538803100586
loader 0.0061757564544677734
send data to gpu 0.0010311603546142578
train 0.004242420196533203
loader 0.0059282779693603516
send data to gpu 0.001180887222290039
train 0.0052378177642822266

trans 0.09270858764648438trans
 0.11052179336547852
trans 0.10437798500061035
trans trans0.09430384635925293 0.1029207706451416

trans 0.09276056289672852
trans

KeyboardInterrupt: 