In [32]:
import numpy as np
import torch
from torchvision.io import VideoReader
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
import cv2 as cv
import math
import sys
import pickle
import os

In [33]:
def rotate_image(image, angle):
    image_center = tuple(np.array(image.shape[1::-1]) / 2)
    rot_mat = cv.getRotationMatrix2D(image_center, angle, 1.0)
    result = cv.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv.INTER_LINEAR)
    return result


def find_nearest(array, value):
    idx = np.searchsorted(array, value, side="left")
    if idx > 0 and (idx == len(array) or math.fabs(value - array[idx-1]) < math.fabs(value - array[idx])):
        return idx-1
    
    else:
        return idx



In [34]:
def get_videos_for_chunk(num):
    # Pre process data

    base_dir = f"/media/nevin/Trash Games1/Downloads/comma2k19/extracted/Chunk_{num}"
    save_dir = "/media/nevin/Trash Games1/Downloads/comma2k19/processed"

    dirs = os.listdir(base_dir)

    videos = []

    for _dir in dirs:
        project_dir = os.path.join(base_dir, _dir)
        segments = os.listdir(project_dir)


        for segment in segments:
            seg_dir = os.path.join(project_dir, segment)
            video_path = os.path.join(seg_dir, "video.hevc")

            frame_times = os.path.join(seg_dir, "global_pose/", "frame_times")
            angle_t = os.path.join(seg_dir, "processed_log/", "CAN/", "steering_angle/", "t")
            angle_v = os.path.join(seg_dir, "processed_log/", "CAN/", "steering_angle/", "value")
    # self, vid_path, ft_path, angle_t_path, angle_v_path
            videos.append(VideoWithInfo(video_path, frame_times, angle_t, angle_v))
    
    return VideoAngleDataset(videos[:20])

train_dataset = get_videos_for_chunk(1)
test_dataset = get_videos_for_chunk(2)

print("got videos")

got videos


In [35]:
class VideoAngleDataset(torch.utils.data.IterableDataset):
    def __init__(self, videos):
        super(VideoAngleDataset).__init__()
        self.videos = videos
        self.video = None
    

    def __iter__(self):
        self.vid_idx = 0
        self.current_vid = None
        self.frame_idx = 0
        return self
    

    def __next__(self):
        if self.vid_idx >= len(self.videos):
            raise StopIteration

        if self.current_vid == None:
            self.current_vid = iter(self.videos[self.vid_idx])
        
        frame = next(self.current_vid, None)
        if frame == None:
            self.vid_idx += 1

            if self.vid_idx >= len(self.videos):
                raise StopIteration

            self.current_vid = iter(self.videos[self.vid_idx])
            frame = next(self.current_vid)

        return frame

In [36]:
class VideoWithInfo:
    def __init__(self, vid_path, ft_path, angle_t_path, angle_v_path):
        self.vid_path = vid_path
        self.ft_path = ft_path
        self.angle_t_path = angle_t_path
        self.angle_v_path = angle_v_path
        self.video = None
    

    def __iter__(self):
        self.video = cv.VideoCapture(self.vid_path)
        self.frame_times = np.load(self.ft_path)
        self.angle_t = np.load(self.angle_t_path)
        self.angle_v = np.load(self.angle_v_path)
        self.frame_idx = 0

        return self


    def __next__(self):
        if self.video.isOpened() == False:
            raise StopIteration
        
        status, frame = self.video.read()

        if status == False:
            raise StopIteration
        

        resized = cv.resize(frame, (129, 97))
        resized = cv.cvtColor(resized, cv.COLOR_BGR2RGB)
        tensor = transform(resized)

        rot_idx = find_nearest(self.angle_t, self.frame_times[self.frame_idx])
        angle = self.angle_v[rot_idx]

        self.frame_idx += 1

        return {'tensor': tensor, 'frame': frame, 'angle': angle}



In [18]:
base_dir = "/media/nevin/Trash Games1/Downloads/comma2k19/extracted/Chunk_1/b0c9d2329ad1606b_2018-07-27--06-03-57"
wheel = cv.imread("/home/nevin/Downloads/wheel.png")
video_1 = VideoWithInfo(f"{base_dir}/3/video.hevc", f"{base_dir}/3/global_pose/frame_times", f"{base_dir}/3/processed_log/CAN/steering_angle/t", f"{base_dir}/3/processed_log/CAN/steering_angle/value")
video_2 = VideoWithInfo(f"{base_dir}/4/video.hevc", f"{base_dir}/4/global_pose/frame_times", f"{base_dir}/4/processed_log/CAN/steering_angle/t", f"{base_dir}/4/processed_log/CAN/steering_angle/value")

videos = [video_1, video_2]
dataset = VideoAngleDataset(videos)

for frame in dataset:
    # print(frame["frame"].shape)
    rotated = rotate_image(wheel, frame["angle"])
    # print(frame["tensor"].shape)
    # normal numpy().transpose(1, 2, 0)
    cv.imshow("frame", frame["frame"])
    cv.imshow("wheel", rotated)

    key = cv.waitKey()

    if key == 113:
        cv.destroyAllWindows()
        break

cv.destroyAllWindows()
# print(sys.getsizeof(video))

In [37]:
class PrintLayer(nn.Module):
    def __init__(self):
        super(PrintLayer, self).__init__()
    
    
    def forward(self, x):
        print("got here 1")
        return x

transform = transforms.Compose([
    transforms.ToTensor()
])

class AutoDrive(nn.Module):
    def __init__(self):
        super(AutoDrive, self).__init__()

        self.flatten = nn.Flatten()
        self.activation = nn.ELU()

        self.cnn_layers = nn.Sequential(
            # PrintLayer(),
            nn.Conv2d(in_channels=3, out_channels=8, kernel_size=5, stride=2),
            nn.ELU(),
            nn.Conv2d(in_channels=8, out_channels=12, kernel_size=5, stride=2),
            nn.ELU(),
            nn.Conv2d(in_channels=12, out_channels=16, kernel_size=5, stride=2),
            nn.ELU(),
            nn.Conv2d(in_channels=16, out_channels=20, kernel_size=3, stride=1),
            nn.ELU(),
            nn.Conv2d(in_channels=20, out_channels=22, kernel_size=3, stride=1)
        )

        self.linear_layers = nn.Sequential(
            nn.Linear(990, 480),
            nn.ELU(),
            nn.Linear(480, 120),
            nn.ELU(),
            nn.Linear(120, 70),
            nn.ELU(),
            nn.Linear(70, 10),
            nn.ELU(),
            nn.Linear(10, 1)
        )
    

    def forward(self, x):
        x = self.cnn_layers(x)
        x = self.flatten(x)
        x = self.linear_layers(x)
        # print("got here")
        # print(x.shape)
        return x

In [39]:
def train_loop(dataloader, model, loss_fn, optim):
    for batch, frame in enumerate(dataloader):
        tensor = frame["tensor"].to("cuda")
        angle = frame["angle"].float().to("cuda")
        pred = model(tensor)

        loss = loss_fn(pred[0], angle)
        optim.zero_grad()
        loss.backward()
        optim.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch
            print(f"loss: {loss:>7f}, batch: {current}")


def test_loop(dataloader, model, loss_fn):
    test_loss = 0, 0
    with torch.no_grad():
        for frame in dataloader:
            tensor = frame["tensor"].to("cuda")
            angle = frame["angle"].float().to("cuda")

            pred = model(tensor)
            test_loss += loss_fn(pred[0], angle).item()
            test_loss /= 2
    
    print(f"test error: {test_loss:>8f}")


device = "cuda" if torch.cuda.is_available() else "cpu"

train_dataloader = DataLoader(train_dataset)
test_dataloader = DataLoader(test_dataset)

model = AutoDrive().to(device)

learning_rate = 1e-3
batch_size = 64
epochs = 1

loss_fn = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for i in range(epochs):
    train_loop(train_dataloader, model, loss_fn, optimizer)
    # test_loop(test_dataloader, model, loss_fn)
    print(f"Epoch {i+1}")

torch.save(model.state_dict(), "/media/nevin/Trash Games1/Downloads/comma2k19/processed/model.pth")
print("saved")


loss: 0.024996, batch: 0
loss: 3.504597, batch: 100
loss: 0.067881, batch: 200
loss: 1.006213, batch: 300
loss: 1.326488, batch: 400
loss: 0.127966, batch: 500
loss: 17.440996, batch: 600
loss: 0.005894, batch: 700
loss: 0.005116, batch: 800
loss: 0.034407, batch: 900
loss: 3.480537, batch: 1000
loss: 0.250694, batch: 1100
loss: 0.024224, batch: 1200
loss: 2.377019, batch: 1300
loss: 0.802102, batch: 1400
loss: 53.818317, batch: 1500
loss: 2.493272, batch: 1600
loss: 0.226461, batch: 1700
loss: 1.273128, batch: 1800
loss: 31.803204, batch: 1900
loss: 194.403580, batch: 2000
loss: 281.361969, batch: 2100
loss: 192.810181, batch: 2200
loss: 109.303734, batch: 2300
loss: 55.643318, batch: 2400
loss: 10.421726, batch: 2500
loss: 23.023067, batch: 2600
loss: 549.032654, batch: 2700
loss: 7.002925, batch: 2800
loss: 0.820114, batch: 2900
loss: 4.078269, batch: 3000
loss: 1.904649, batch: 3100
loss: 3.652811, batch: 3200
loss: 5.814215, batch: 3300
loss: 0.033741, batch: 3400
loss: 0.012560, 

In [63]:
with torch.no_grad():
    for frame in test_dataset:

        pred = model(frame["tensor"].to("cuda")[None, ...])
        print(np.asarray(pred[0].to("cpu"))[0])
        rotated = rotate_image(wheel, np.asarray(pred[0].to("cpu"))[0])
        cv.imshow("frame", frame["frame"])
        cv.imshow("angle", rotated)
        # pred = model(frame["tensor"].to("cuda"))
        # print(pred)

        key = cv.waitKey()

        if key == 113:
            cv.destroyAllWindows()
            break

    cv.destroyAllWindows()

0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
0.8741414
