In [1]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
import glob
import PIL.Image
import os
import numpy as np

In [2]:
!unzip -q road_following.zip -d images

In [3]:

def get_x(path):
    """Gets the x value from the image filename"""
    _, _, my_x, _ = path.split('_')
    return (float(int(my_x)) - 50.0) / 50.0

def get_y(path):
    """Gets the y value from the image filename"""
    _, _, _, my_y = path.split('_')
    my_y, _ = my_y.split('.')
    return (float(int(my_y)) - 50.0) / 50.0

class XYDataset(torch.utils.data.Dataset):
    
    def __init__(self, directory, random_hflips=False):
        self.directory = directory
        self.random_hflips = random_hflips
        self.image_paths = glob.glob(os.path.join(self.directory, 'images/*.jpg'))
        self.color_jitter = transforms.ColorJitter(0.3, 0.3, 0.3, 0.3)
        print(self.directory)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        
        image = PIL.Image.open(image_path)
        x = float(get_x(os.path.basename(image_path)))
        y = float(get_y(os.path.basename(image_path)))
        
        if float(np.random.rand(1)) > 0.5:
            image = transforms.functional.hflip(image)
            x = -x
        
        image = self.color_jitter(image)
        image = transforms.functional.resize(image, (224, 224))
        image = transforms.functional.to_tensor(image)
        image = image.numpy()[::-1].copy()
        image = torch.from_numpy(image)
        image = transforms.functional.normalize(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        
        return image, torch.tensor([x, y]).float()
    
dataset = XYDataset('', random_hflips=False)
print(dataset[1])


(tensor([[[ 0.2282,  1.9064,  2.2318,  ...,  2.2318,  2.2489,  2.2318],
         [-0.3369,  1.0673,  2.1804,  ...,  2.2318,  2.2147,  2.0263],
         [-0.6623, -0.0629,  1.7009,  ...,  2.1119,  1.6838,  1.3413],
         ...,
         [-1.2959, -1.2617, -1.1247,  ..., -0.9705, -0.7822, -0.7479],
         [-1.2959, -1.2274, -1.1589,  ..., -1.1418, -0.9192, -0.9192],
         [-1.3130, -1.2445, -1.1760,  ..., -1.3302, -1.1075, -1.0048]],

        [[ 0.1352,  2.0259,  2.4111,  ...,  2.3761,  2.3761,  2.3936],
         [-0.3901,  1.1331,  2.3585,  ...,  2.3936,  2.3761,  2.1660],
         [-0.6001, -0.0224,  1.8333,  ...,  2.2885,  1.8333,  1.4832],
         ...,
         [-1.0553, -1.0378, -1.0203,  ..., -0.8452, -0.8277, -0.8627],
         [-1.0553, -1.0378, -1.0028,  ..., -0.9853, -0.8627, -0.8803],
         [-1.0728, -1.0378, -1.0028,  ..., -1.1604, -1.0028, -0.9153]],

        [[ 0.3393,  2.2217,  2.6400,  ...,  2.6400,  2.6400,  2.6400],
         [-0.2010,  1.3328,  2.5529,  ..., 

In [4]:
test_percent = 0.1
num_test = int(test_percent * len(dataset))
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [len(dataset) - num_test, num_test])
print(len(train_dataset))

304


In [5]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=4
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=4
)

In [6]:
model = models.resnet18(pretrained=True)

In [7]:
model.fc = torch.nn.Linear(512, 2)
device = torch.device('cuda')
model = model.to(device)

In [8]:
NUM_EPOCHS = 70
BEST_MODEL_PATH = 'best_steering_model_xy.pth'
best_loss = 1e9

optimizer = optim.Adam(model.parameters())

for epoch in range(NUM_EPOCHS):
    print('Epoch '+str(epoch))
    model.train()
    train_loss = 0.0
    
    for images, labels in iter(train_loader):
        print('chiCKEN')
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = F.mse_loss(outputs, labels)
        train_loss += float(loss)
        loss.backward()
        optimizer.step()
    train_loss /= len(train_loader)
    print('ROOSTER')
    model.eval()
    test_loss = 0.0
    for images, labels in iter(test_loader):
        print('frog')
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = F.mse_loss(outputs, labels)
        test_loss += float(loss)
    test_loss /= len(test_loader)
    
    print('%f, %f' % (train_loss, test_loss))
    if test_loss < best_loss:
        torch.save(model.state_dict(), BEST_MODEL_PATH)
        best_loss = test_loss

Epoch 0
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
ROOSTER
frog
frog
frog
11.175747, 23.589393
Epoch 1
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
ROOSTER
frog
frog
frog
2.511390, 5.864944
Epoch 2
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
ROOSTER
frog
frog
frog
2.000827, 0.311914
Epoch 3
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
ROOSTER
frog
frog
frog
1.601313, 1.277473
Epoch 4
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
chiCKEN
ROOSTER
frog
frog
frog
1.32629

KeyboardInterrupt: 