In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch
import pickle
import numpy as np
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
import cv2 as cv

Custom Dataset Class

In [30]:
class LaneDataset(Dataset):
    '''Expects x and y to be np arrays
    x.shape=(num_samples,80,160,3)
    y.shape=(num_samples,80,160,1)
    converts them to pytroch (3,80,160) and (1,80,160)'''
    def __init__(self,images,labels):
        super().__init__()
        self.images = images
        self.labels=labels

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img = torch.tensor(self.images[idx],dtype=torch.float)
        label = torch.tensor(self.labels[idx],dtype=torch.float)
        return img,label

In [31]:
train_pickle=pickle.load(open('data/full_CNN_train.p','rb'))
train_labels=pickle.load(open('data/full_CNN_labels.p','rb'))
train_features=np.array(train_pickle)
train_labels=np.array(train_labels)/255#normalize
# train_features,train_labels=shuffle(train_features,train_labels)
X_train, X_val, y_train, y_val=train_test_split(train_features,train_labels)
train_dataset=LaneDataset(X_train, y_train)
val_dataset=LaneDataset(X_val,y_val)
train_loader=DataLoader(train_dataset,batch_size=128,shuffle=True)
val_loader=DataLoader(val_dataset,batch_size=128,shuffle=True)
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Model

In [35]:
class LaneNet(nn.Module):
    def __init__(self):
        super(LaneNet, self).__init__()
        self.bn_in=nn.BatchNorm2d(num_features=3)
        self.conv1=nn.Conv2d(3,8,3)
        self.conv2=nn.Conv2d(8,16,3)
        self.pool1=nn.MaxPool2d(2,2)
        self.conv3=nn.Conv2d(16,16,3)
        self.dropout3=nn.Dropout2d(0.2)
        self.conv4=nn.Conv2d(16,32,3)
        self.dropout4=nn.Dropout2d(0.2)
        self.conv5=nn.Conv2d(32,32,3)
        self.dropout5=nn.Dropout2d(0.2)
        self.pool2=nn.MaxPool2d(2,2)
        self.conv6=nn.Conv2d(32,64,3)
    
    def forward(self,x):
        x=self.bn_in(x)
        x=self.conv1(x)
        x=self.conv2(x)
        x=self.pool1(x)
        x=self.conv3(x)
        x=self.dropout3(x)
        x=self.conv4(x)
        x=self.dropout4(x)
        x=self.conv5(x)
        x=self.dropout5(x)
        x=self.pool2(x)
        x=self.conv6(x)
        return x

In [36]:
model=LaneNet().to(device)
criterion=torch.nn.MSELoss()
optimizer=torch.optim.Adam(model.parameters(),lr=1e-4)

In [37]:
num_epochs=100
for epoch in range(num_epochs):
    model.train()
    running_loss=0
    for batch_x,batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device),batch_y.to(device)
        optimizer.zero_grad()
        output=model(batch_x)
        loss=criterion(output,batch_y)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
    print(f'Epoch {epoch+1}, loss: {running_loss/len(train_loader)}')

RuntimeError: running_mean should contain 80 elements not 3

In [None]:
torch.save(model.state_dict(),'models/model.pth')

In [None]:
model.load_state_dict(torch.load('model.pth',map_location=device))

Test

In [11]:
import cv2 as cv
from moviepy.editor import VideoFileClip
import numpy as np

In [12]:

class Lanes():
    def __nit__(self):
        self.recent_fit = []
        self.avg_fit = []

def road_lines(image,model,lanes):
    small_img = cv.imresize(image,(80,160,3))
    small_img = np.array(small_img)
    small_img = small_img[None,:,:,:]

    prediction = model.eval()
    lanes.recent_fit.append(prediction)

    if len(lanes.recent_fit) >5:
        lanes.recent_fit = lanes.recent_fit[1:]
    
    lanes.avg_fit = np.mean(np.array([i for i in lanes.recent_fit]),axis=0)

    blanks = np.zeros_like(lanes.avg_fit).astype(np.uint8)
    lane_drawn = np.dstack((blanks, lanes.avg_fit, blanks))

    lane_image = cv.imresize(lane_drawn, (720,1280,3))
    result = cv.addWeighted(image,1,lane_image,1,0)

    return result
# lanes= Lanes()
# vid_input = VideoFileClip(r'.mp4')
# vid_output = 'output_test.mp4'

# vid_clip = vid_input.fl_image(road_lines)
# vid_clip.write_videofile(vid_output)

In [None]:
model.eval()
lanes=Lanes()
clip_input=VideoFileClip('input_clip.mp4')
vid_output='output_video.mp4'
def process_frame(frame):
    frame_bgr=cv.cvtColor(frame,cv.COLOR_RGB2BGR)
    output_bgr=road_lines(frame_bgr,model,lanes)
    return cv.cvtColor(output_bgr,cv.COLOR_BGR2RGB)
vid_clip=clip_input.fl_image(process_frame)
vid_clip.write_videofile(vid_output,audio=False)