In [1]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
import glob
import PIL.Image
import os
import numpy as np

In [2]:
!unzip -q road_following.zip

In [3]:

def get_x(path):
    """Gets the x value from the image filename"""
    _, _, my_x, _ = path.split('_')
    return (float(int(my_x)) - 50.0) / 50.0

def get_y(path):
    """Gets the y value from the image filename"""
    _, _, _, my_y = path.split('_')
    my_y, _ = my_y.split('.')
    return (float(int(my_y)) - 50.0) / 50.0

class XYDataset(torch.utils.data.Dataset):
    
    def __init__(self, directory, random_hflips=False):
        self.directory = directory
        self.random_hflips = random_hflips
        self.image_paths = glob.glob(os.path.join(self.directory, '*.jpg'))
        self.color_jitter = transforms.ColorJitter(0.3, 0.3, 0.3, 0.3)
        print(self.directory)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        
        image = PIL.Image.open(image_path)
        x = float(get_x(os.path.basename(image_path)))
        y = float(get_y(os.path.basename(image_path)))
        
        if float(np.random.rand(1)) > 0.5:
            image = transforms.functional.hflip(image)
            x = -x
        
        image = self.color_jitter(image)
        image = transforms.functional.resize(image, (224, 224))
        image = transforms.functional.to_tensor(image)
        image = image.numpy()[::-1].copy()
        image = torch.from_numpy(image)
        image = transforms.functional.normalize(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        
        return image, torch.tensor([x, y]).float()
    
dataset = XYDataset('', random_hflips=False)
print(dataset[1])


(tensor([[[-0.4054, -0.4739, -0.6452,  ..., -1.7069, -1.6727, -1.6384],
         [-0.8849, -0.9705, -1.0390,  ..., -1.7069, -1.6727, -1.6384],
         [-1.1932, -1.2274, -1.2445,  ..., -1.6898, -1.6727, -1.6384],
         ...,
         [ 0.0912, -0.3198, -0.4911,  ..., -0.6281, -0.6452, -0.5082],
         [ 0.3309, -0.1486, -0.5596,  ..., -0.2342, -0.5253, -0.6281],
         [ 0.1254, -0.3027, -0.6452,  ..., -0.0287, -0.1828, -0.4911]],

        [[-0.3901, -0.4776, -0.6527,  ..., -1.6506, -1.6155, -1.5805],
         [-0.8452, -0.9503, -1.0378,  ..., -1.6506, -1.6155, -1.5805],
         [-1.1078, -1.1604, -1.2129,  ..., -1.6506, -1.6155, -1.5805],
         ...,
         [-0.3200, -0.6352, -0.7227,  ..., -0.9153, -0.8803, -0.6352],
         [-0.4776, -0.5126, -0.6001,  ..., -0.7052, -0.8452, -0.7927],
         [-0.9153, -0.6877, -0.5826,  ..., -0.6001, -0.4951, -0.6176]],

        [[ 0.0082, -0.0441, -0.1661,  ..., -1.4384, -1.4210, -1.4036],
         [-0.6018, -0.6541, -0.7064,  ..., 

In [4]:
test_percent = 0.1
num_test = int(test_percent * len(dataset))
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [len(dataset) - num_test, num_test])
print(len(train_dataset))

26


In [5]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=4
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=4
)

In [6]:
model = models.resnet18(pretrained=True)

In [7]:
model.fc = torch.nn.Linear(512, 2)
device = torch.device('cuda')
model = model.to(device)

In [8]:
NUM_EPOCHS = 70
BEST_MODEL_PATH = 'best_steering_model_xy.pth'
best_loss = 1e9

optimizer = optim.Adam(model.parameters())

for epoch in range(NUM_EPOCHS):
    print('Epoch '+str(epoch))
    model.train()
    train_loss = 0.0
    
    for images, labels in iter(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = F.mse_loss(outputs, labels)
        train_loss += float(loss)
        loss.backward()
        optimizer.step()
    train_loss /= len(train_loader)
    
    model.eval()
    test_loss = 0.0
    for images, labels in iter(test_loader):
        print('green')
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = F.mse_loss(outputs, labels)
        test_loss += float(loss)
    test_loss /= len(test_loader)
    
    print('%f, %f' % (train_loss, test_loss))
    if test_loss < best_loss:
        torch.save(model.state_dict(), BEST_MODEL_PATH)
        best_loss = test_loss

Epoch 0
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
27.004556, 34.328094
Epoch 1
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
11.195392, 221.246002
Epoch 2
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
4.099383, 150.128708
Epoch 3
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
1.837668, 43.487091
Epoch 4
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
0.436221, 26.220190
Epoch 5
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
0.354928, 24.633156
Epoch 6
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
0.619337, 12.428535
Epoch 7
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
0.791639, 9.198294
Epoch 8
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
0.812093, 11.704357
Epoch 9
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
0.541477, 10.729708
Epoch 10
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
0.397308, 12.609006
Epoch 11
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
0.205554, 6.339871
Epoch 12
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
0.241362, 4.446982
Epoch 13
dog
1
2
3
4
5
6
7
dog
1
2
3
4
5
6
7
green
0.185279, 2.825650
Epoch 14
dog
1
2