# Line and Vehicle Detection and Tracking Project 

Line and Vehicle detection in a video stream using image processing

## Load Packages

In [1]:
# General system level packages
import os
import sys
import time
import glob

# For numerical and image processing
import cv2
import pickle
import numpy as np

# For images and plotting 
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches

# For image processing
from skimage.feature import hog
# For processing video files
from moviepy.editor import VideoFileClip

# For image processing 
from scipy.ndimage.measurements import label

# For machine learning tasks such as standardization, splitting and linear SVM classification
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC

# For processing video files 
from moviepy.editor import VideoFileClip
import io
import base64
from IPython.display import HTML

# Import functions library for the project
from Cha_Line import Line

# Inline plotting 
%matplotlib inline

## Function library

In [2]:
##Vehicle Detection

def color_hist(img, nbins = 32):
    """Computes the histogram of each color channel, concatenate them together
    and return the data as features for classifier"""
    
    # Compute the histogram of the color channels separately
    channel1_hist = np.histogram(img[:,:,0], bins=nbins)
    channel2_hist = np.histogram(img[:,:,1], bins=nbins)
    channel3_hist = np.histogram(img[:,:,2], bins=nbins)
    
    # Concatenate the histograms into a single feature vector
    hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
    
    # Histogram function returns a tuple of two arrays.
    # first element contains the counts in each of the bins
    # second element contains the bin edges (it is one element longer than first one)
    # Generate bin centers - we can prefer to use any channel
    bin_edges = channel1_hist[1]
    bin_centers = (bin_edges[1:] + bin_edges[0:len(bin_edges) - 1]) / 2
    
    # Return the individual histograms, bin_centers and feature vector
    return hist_features, bin_centers, channel1_hist, channel2_hist, channel3_hist


def bin_spatial(img, size = (32, 32)):
    """Spatial binning of the image to do downsampling"""
    features = cv2.resize(img, size, interpolation = cv2.INTER_NEAREST).ravel()
    # Return the feature vector
    return features


def get_hog_features(img, orient, pix_per_cell, cell_per_block, vis = False, feature_vec = True):
    """Function that return HOG features"""
    # HOG is robust to variations in shape while keeping the signature distinct enough
    # It computes the gradient of the image and creates histograms based on the cells defined
    # Histograms are constructed based on the weighted count of gradient directions in each cell
    # Here weight is the magnitude of the gradien
    
    # The scikit-image hog() function takes in a single color channel or grayscaled image as input,
    # as well as various other parameters.
    # These parameters include orientations, pixels_per_cell and cells_per_block.
    # The number of orientations is specified as an integer, and represents the number of orientation
    #         bins that the gradient information will be split up into in the histogram.
    #         Typical values are between 6 and 12 bins.
    # The pixels_per_cell parameter specifies the cell size over which each gradient histogram is computed.
    #         This paramater is passed as a 2-tuple so you could have different cell sizes in x and y,
    #         but cells are commonlychosen to be square.
    # The cells_per_block parameter is also passed as a 2-tuple, and specifies the local area over which the
    #         histogram counts in a given cell will be normalized. Block normalization is not necessarily
    #         required, but generally leads to a more robust feature set.
    
    # When we specify feature_vec=True then we will get an the feature vectors calculated for the specified
    # parameters
    
    # Return two outputs if visualization is True
    if vis:
        features, hog_image = hog(img, orientations = orient,
                                  pixels_per_cell = (pix_per_cell, pix_per_cell),
                                  cells_per_block = (cell_per_block, cell_per_block),
                                  transform_sqrt = False,
                                  visualise = vis, feature_vector = feature_vec)
        return features, hog_image
    # Return only features when visualization is False
    else:
        features = hog(img, orientations = orient,
                       pixels_per_cell = (pix_per_cell, pix_per_cell),
                       cells_per_block = (cell_per_block, cell_per_block),
                       transform_sqrt = False,
                       visualise = vis, feature_vector = feature_vec)
        return features


def cs_convert_from_rgb(img, color_space = 'RGB'):
    """Function that converts RGB image to a desired color space"""
    # Options are HSV, LUV, HLS, YUV and YCrCb
    if color_space != 'RGB':
        if color_space == 'HSV':
            new_image = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
        elif color_space == 'LUV':
            new_image = cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
        elif color_space == 'HLS':
            new_image = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
        elif color_space == 'YUV':
            new_image = cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
        elif color_space == 'YCrCb':
            new_image = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
    else:
        new_image = np.copy(img)
    return new_image


def extract_features(imgs, color_space = 'RGB', spatial_size = (32, 32), hist_bins = 32,
                     orient = 9, pix_per_cell = 8, cell_per_block = 2, hog_channel = 0,
                     spatial_feat = True, hist_feat = True, hog_feat = True):
    """Function that extract features from images using color histogram, binned spatial and HOG features"""
    # This function gets all the parameters that are used in the individual feature extraction functions
    # Calls color_hist(), bin_spatial(), get_hog_features() functions based on the parameters specified (True/False)
    # Using the same ordering of the features is important for training and testing
    # Need to provide a list of image paths and the function will loop through them
    # Each image will have a 1-D array of features. Function will append this array to features list and return it
    
    # Create a list to append feature vectors to
    features = []
    
    # Iterate through the list of images
    for file in imgs:
        file_features = []
        
        # Read in each one by one
        image = mpimg.imread(file)
        
        # apply color conversion if other than 'RGB'
        feature_image = cs_convert_from_rgb(image, color_space)
        
        if spatial_feat == True:
            # Get binned spatial features and update file_feature list
            spatial_features = bin_spatial(feature_image, size = spatial_size)
            file_features.append(spatial_features)
        
        if hist_feat == True:
            # Get color histogram featueres and update file_feature list
            hist_features, _, _, _, _ = color_hist(feature_image, nbins = hist_bins)
            file_features.append(hist_features)
        
        if hog_feat == True:
            # Get HOG features for desired channels and update file_feature list
            if hog_channel == 'ALL':
                hog_features = []
                for channel in range(feature_image.shape[2]):
                    hog_features.append(get_hog_features(feature_image[:,:,channel],
                                                         orient, pix_per_cell, cell_per_block,
                                                         vis = False, feature_vec = True))
                hog_features = np.ravel(hog_features)
            else:
                hog_features = get_hog_features(feature_image[:,:,hog_channel], orient,
                                                pix_per_cell, cell_per_block,
                                                vis = False, feature_vec = True)
            file_features.append(hog_features)
        
        # Update main feature list by 1-D file features
        features.append(np.concatenate(file_features))
    
    # Return list of feature vectors
    return features


def slide_window(img, x_start_stop = [None, None], y_start_stop = [None, None], xy_window = (64, 64), xy_overlap = (0.5, 0.5)):
    """Function that returns sliding window positions"""
    
    # If x and/or y start/stop positions not defined, set to image size
    if x_start_stop[0] == None:
        x_start_stop[0] = 0
    if x_start_stop[1] == None:
        x_start_stop[1] = img.shape[1] #columns
    
    if y_start_stop[0] == None:
        y_start_stop[0] = 0
    if y_start_stop[1] == None:
        y_start_stop[1] = img.shape[0] #rows
    
    # Compute the span of the region to be searched
    x_span = x_start_stop[1] - x_start_stop[0]
    y_span = y_start_stop[1] - y_start_stop[0]
    
    # Compute the number of pixels per step in x/y
    x_pixels_per_step = np.int(xy_window[0] * (1.0 - xy_overlap[0]))
    y_pixels_per_step = np.int(xy_window[1] * (1.0 - xy_overlap[1]))
    
    # Compute the number of windows in x/y
    x_window_count = np.int(x_span / x_pixels_per_step) - 1
    y_window_count = np.int(y_span / y_pixels_per_step) - 1
    
    # Initialize a list to append window positions to
    window_list = []
    
    # Loop through finding x and y window positions
    for ypos in range(y_window_count):
        for xpos in range(x_window_count):
            # Calculate each window position
            window = ((xpos * x_pixels_per_step + x_start_stop[0],
                       ypos * y_pixels_per_step + y_start_stop[0]),
                      (xpos * x_pixels_per_step + x_start_stop[0] +
                       xy_window[0], ypos * y_pixels_per_step + y_start_stop[0] + xy_window[1]))
            
            # Append window position to list
            window_list.append(window)
    
    # Return the list of windows
    return window_list


def find_cars(img, scale, x_start_stop, y_start_stop, clf, scaler,
              orient, pix_per_cell, cell_per_block, spatial_size, hist_bins, color_space):
    """Function that efficiciently loops through windows and classify them"""
    # The benefit of this function is that you don't have to call HOG features function all the time
    # It gets the features once at the beginning for the entire image and then subsets the related window area
    # This way the computation time decreases significantly
    # The scale parameters scales the image so that window size does not have to be changed
    # NOTE: This function is only useful if you included HOG features during training
    
    # output image
    draw_img = np.copy(img)
    
    # Make a heatmap of zeros
    heatmap = np.zeros_like(img[:,:,0])
    
    # Convert the image pixel value range so that jpg images are same as png images - training was done on png
    img = img.astype(np.float32) / 255
    
    # Using desired start/stop positions we can reduce the image size that needs to be searched
    if x_start_stop[0] == None:
        x_start_stop[0] = 0
    if x_start_stop[1] == None:
        x_start_stop[1] = img.shape[1]
    
    if y_start_stop[0] == None:
        y_start_stop[0] = 0
    if y_start_stop[1] == None:
        y_start_stop[1] = img.shape[0]
    
    # Get the desired section of the image based on the start/stop definitions
    search_img = img[y_start_stop[0]:y_start_stop[1], x_start_stop[0]:x_start_stop[1], :]
    
    # Convert it to the desired color space
    search_img_color = cs_convert_from_rgb(search_img, color_space = color_space)

    # Scale the image if a scale value other than 1 is defined.
    # Rather than selecting different window sizes, we scale the image so that window size stays same
    # but spatial information corresponding to the window changes due to scaling of the image
    if scale != 1:
        img_shape = search_img_color.shape
        search_img_color = cv2.resize(search_img_color, (np.int(img_shape[1]/scale),
                                                         np.int(img_shape[0]/scale)))
    
    # Let's get each channel as a separate array
    ch1 = search_img_color[:,:,0]
    ch2 = search_img_color[:,:,1]
    ch3 = search_img_color[:,:,2]
    
    # Now since we will use the raw HOG features and subset depending on the window position
    # We need to define the parameters around getting increments
    
    # Number of blocks in x and y
    nx_blocks = (ch1.shape[1] // pix_per_cell) - 1
    ny_blocks = (ch1.shape[0] // pix_per_cell) - 1
    
    # Number of features per block
    nfeat_per_block = orient * cell_per_block**2
    
    # Window size
    window_px = 64
    
    # Number of blocks per window
    nblocks_per_window = (window_px // pix_per_cell) - 1
    
    # Cells per step
    # Instead of overlap, define how many cells to step
    # with pix_per_cell = 8 and cells_per_step we get 0.75 overlap
    cells_per_step = 2
    
    # Compute the steps in x and y directions
    # How many steps we will do across HOG array to extract features
    nxsteps = (nx_blocks - nblocks_per_window) // cells_per_step
    nysteps = (ny_blocks - nblocks_per_window) // cells_per_step
    
    # Compute individual channel HOG features for the entire image
    hog1 = get_hog_features(ch1, orient = orient, pix_per_cell = pix_per_cell, cell_per_block = cell_per_block, feature_vec = False)
    hog2 = get_hog_features(ch2, orient = orient, pix_per_cell = pix_per_cell, cell_per_block = cell_per_block, feature_vec = False)
    hog3 = get_hog_features(ch2, orient = orient, pix_per_cell = pix_per_cell, cell_per_block = cell_per_block, feature_vec = False)
    
    for xb in range(nxsteps):
        for yb in range(nysteps):
            ypos = yb * cells_per_step
            xpos = xb * cells_per_step
            
            # Extract HOG for this patch
            hog_feat1 = hog1[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel()
            hog_feat2 = hog2[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel()
            hog_feat3 = hog3[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel()
            hog_features = np.hstack((hog_feat1, hog_feat2, hog_feat3))
            
            # coordinates of the top left corner
            xleft = xpos * pix_per_cell
            ytop = ypos * pix_per_cell
            
            # Extract the image patch
            subimg = cv2.resize(search_img_color[ytop:ytop + window_px, xleft:xleft + window_px], (64, 64)) # we use 64x64 since training data is of this size
            
            # Get color features
            hist_features, _, _, _, _ = color_hist(subimg, nbins = hist_bins)
            
            # Get spatial features
            spatial_features = bin_spatial(subimg, size = spatial_size)
            
            # Combine the features and scale it with the scaler
            test_features = scaler.transform(np.hstack((spatial_features, hist_features, hog_features)).reshape(1, -1))
            # print(test_features.shape)
            
            # Make prediction
            test_prediction = clf.predict(test_features)
            
            # If the window has a car in it then draw it on the original image
            # Also add heat to the heat map
            if test_prediction == 1:
                # Use the scale
                xbox_left = np.int(xleft * scale)
                ytop_draw = np.int(ytop * scale)
                win_draw = np.int(window_px * scale)
                cv2.rectangle(draw_img, (xbox_left + x_start_stop[0], ytop_draw + y_start_stop[0]),
                             (xbox_left + win_draw + x_start_stop[0], ytop_draw + win_draw + y_start_stop[0]),
                             (0, 0, 255), thickness = 6)
                # img_boxes.append(((xbox_left + x_start_stop[0], ytop_draw + y_start_stop[0]),
                #                   (xbox_left + win_draw + x_start_stop[0], ytop_draw + win_draw + y_start_stop[0])))
                heatmap[ytop_draw + y_start_stop[0]:ytop_draw + win_draw + y_start_stop[0],
                        xbox_left + x_start_stop[0]:xbox_left + win_draw + x_start_stop[0]] += 1
    
    # Return the image and heatmap
    return draw_img, heatmap


def apply_threshold(heatmap, threshold):
    # Zero out pixels below the threshold
    # Reduces the false positives by removing some low value regions from the heatmap
    heatmap[heatmap <= threshold] = 0
    return heatmap


def draw_labeled_bboxes(img, labels):
    # Iterate through all detected cars and draw boxes around them
    
    for car_number in range(1, labels[1] +1):
        # Find pixels with each car number label value
        nonzero = (labels[0] == car_number).nonzero()
        
        # Identify x and y values of those pixels
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        
        # Define the bounding box based on min/max value of non-zero x and y
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        
        # Draw the box on the image
        cv2.rectangle(img, bbox[0], bbox[1], (0, 0, 255), 6)
    
    # Return the image
    return img


##Line Detection

def get_points(folder_name, out_folder, corner_count, force = True):
    """Points"""
    output_pickle = 'corners_pickle.p'
    
    if not os.path.exists(output_pickle) or force:
        # Make a list of calibration images
        # Glob is useful because there is a pattern in the image file names
        file_name_pattern = folder_name + '/*.jpg'
        images = glob.glob(file_name_pattern)
        
        # Initialize the arrays to store the corner information
        index_array = []  # this is a 3D array with x, y, z grid locations (real world space)
        corners_array = []  # this array will store the corner points in image plane
        
        # Each chess board has 9x6 corners to detect (inside corners)
        # prepare indices, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
        # top left corner will be (0,0,0) and bottom right corner (8,6,0)
        indices = np.zeros((corner_count[0] * corner_count[1], 3), np.float32)
        
        # Now we can use numpy's mgrid to populate the content of the indices array
        # We will only assign values to the x, y coordinates
        # z position will always be zero as images are 2D
        indices[:, :2] = np.mgrid[0:9, 0:6].T.reshape(-1, 2)
        
        for idx, img_name in enumerate(images):
            print("Working on calibration image # ", idx + 1)
            
            # Read image using cv2
            img = cv2.imread(img_name)
            # Convert the colorspace to grayscale - reading images using cv2 returns BGR
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
            # Find the chessboard corners using cv2's findChessboardCorners function
            # This function requires and image and expected number of inside corners in x and y directions
            # The last argument to the function is any flag that we might have. At this points we will set it to None
            # Last parameter is for any flags - we don't have any
            # The function returns whether or not it was successful and the corner locations
            ret, corners = cv2.findChessboardCorners(gray, corner_count, None)
            
            # When we can find the corners, we will add the resulting information to our arrays
            if ret:
                # indices will not change
                index_array.append(indices)
                # Add corners for each image that is successfully identified
                corners_array.append(corners)
                
                # Draw and show the corners on each image using cv2's function
                cv2.drawChessboardCorners(img, corner_count, corners, ret)
                cv2.imwrite(os.path.join(out_folder, str('calibration_points_' + str(idx + 1) + '.jpg')), img)
        
        cv2.destroyAllWindows()
        
        # Pickle the results for later use
        corners_pickle = dict()
        corners_pickle["corners"] = corners_array
        corners_pickle["indices"] = index_array
        pickle.dump(corners_pickle, open(output_pickle, "wb"))
    else:
        print('--corners_pickle.p-- file already exist! Use \'force=True\' to overwrite')


def get_calibration(out_folder, test_folder, test_img, force = True):
    """Calibration"""
    input_pickle = 'corners_pickle.p'
    output_pickle = 'calibration.p'
    
    if not os.path.exists(output_pickle) or force:
        if os.path.exists(input_pickle):
            # Read the corner information from the pickle file
            corners_pickle = pickle.load(open(input_pickle, 'rb'))
            indices = corners_pickle['indices']
            corners = corners_pickle['corners']
            
            # Read the calibration test image
            img = cv2.imread(os.path.join(test_folder, test_img))
            img_size = (img.shape[1], img.shape[0])
            
            # Do camera calibration given object points and image points
            # mtx is the camera matrix
            # dist = distortion coefficients
            # rvecs, tvecs = position of the camera in real world with rotation and translation vecs
            ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(indices, corners, img_size, None, None)
            
            # Test calibration on an image - undistort and save
            # This is usually called destination image
            dst = cv2.undistort(img, mtx, dist, None, mtx)
            cv2.imwrite(os.path.join(out_folder, test_img), dst)
            
            # Save calibration matrix and distortion coefficients
            calibration = dict()
            calibration["mtx"] = mtx
            calibration["dist"] = dist
            pickle.dump(calibration, open(output_pickle, "wb"))
            
            print('--calibration.p-- saved! ')
            
            f, (ax1, ax2) = plt.subplots(1, 2, figsize = (9, 3))
            f.tight_layout()
            ax1.imshow(img)
            ax1.set_title('Original Image', fontsize = 15)
            ax2.imshow(dst)
            ax2.set_title('Undistorted Image', fontsize = 15)
            plt.subplots_adjust(left = 0., right = 1, top = 0.9, bottom = 0.)
            img_name = os.path.splitext(test_img)[0]
            plt.savefig(os.path.join(out_folder, str(img_name + '_compare')))
            plt.close()
        else:
            sys.exit('--corners_pickle.p-- does not exist! Call `get_points()` function first')
    else:
        print('--calibration.p-- file already exist! Use \'force=True\' to overwrite')


def abs_sobel_thresh(img, orient = 'x', sobel_kernel = 3, thresh = (0, 255)):
    """Function that applies Sobel in x or y with a given kernel size takes the absolute value of
    the gradient scales to 8bit and returns a mask after checking values with the threshold range"""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    if orient.upper() == 'X':
        sobel = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize = sobel_kernel)
    elif orient.upper() == 'Y':
        sobel = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize = sobel_kernel)
    else:
        sys.exit('Orientation should be a member of (x, y) in abs_sobel_thresh function')
    
    abs_sobel = np.absolute(sobel)
    scaled_sobel = np.uint8(255 * abs_sobel / np.max(abs_sobel))
    
    binary_output = np.zeros_like(scaled_sobel)
    binary_output[(scaled_sobel >= thresh[0]) & (scaled_sobel <= thresh[1])] = 1
    return binary_output


def mag_thresh(img, sobel_kernel = 3, thresh = (0, 255)):
    """Calculates the derivatives in x and y directions with a given kernel size
    Uses the resultant magnitude of the derivatives to mask the image"""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize = sobel_kernel)
    sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize = sobel_kernel)
    
    sobel = np.sqrt(np.square(sobel_x) + np.square(sobel_y))
    
    scale_factor = np.max(sobel) / 255
    gradmag = (sobel / scale_factor).astype(np.uint8)
    
    binary_output = np.zeros_like(gradmag)
    binary_output[(gradmag >= thresh[0]) & (gradmag <= thresh[1])] = 1
    
    return binary_output


def dir_threshold(img, sobel_kernel = 3, thresh = (0, np.pi / 2)):
    """Calculates the derivatives in x and y directions with a given kernel size
    Then calculates the direction of the resultant vector formed by the x and y gradients
    Uses this direction information and threshold values provided to mask the image"""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    abs_sobelx = np.absolute(cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize = sobel_kernel))
    abs_sobely = np.absolute(cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize = sobel_kernel))
    
    grad_dir = np.arctan2(abs_sobely, abs_sobelx)
    
    binary_output = np.zeros_like(grad_dir)
    binary_output[(grad_dir >= thresh[0]) & (grad_dir <= thresh[1])] = 1
    
    return binary_output


def hls_select(img, chn, thresh = (0, 255)):
    """Function that converts to HLS color space
    Applies a threshold to the desired channel and returns the mask"""
    hls = cv2.cvtColor(img, cv2.COLOR_BGR2HLS)
    
    if chn.upper() == 'H':
        chn_select = hls[:, :, 0]
    elif chn.upper() == "L":
        chn_select = hls[:, :, 1]
    elif chn.upper() == "S":
        chn_select = hls[:, :, 2]
    else:
        sys.exit('Select from (H, L, S) as the channel argument for hls_select() function')
    
    binary_output = np.zeros_like(chn_select)
    binary_output[(chn_select >= thresh[0]) & (chn_select <= thresh[1])] = 1
    return binary_output


def hsv_select(img, chn, thresh = (0, 255)):
    """Function that converts to HSV color space
    Applies a threshold to the desired channel and returns the mask"""
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    
    if chn.upper() == 'H':
        chn_select = hsv[:, :, 0]
    elif chn.upper() == "S":
        chn_select = hsv[:, :, 1]
    elif chn.upper() == "V":
        chn_select = hsv[:, :, 2]
    else:
        sys.exit('Select from (H, S, V) as the channel argument for hsv_select() function')
    
    binary_output = np.zeros_like(chn_select)
    binary_output[(chn_select >= thresh[0]) & (chn_select <= thresh[1])] = 1
    return binary_output


def window_mask(width, height, img_ref, center, level):
    """Draws boxes"""
    output = np.zeros_like(img_ref)
    output[int(img_ref.shape[0] - (level + 1) * height):int(img_ref.shape[0] - level * height), max(0, int(center - width / 2)):min(int(center + width / 2), img_ref.shape[1])] = 1
    return output


def pipeline(img):
    # Get the calibration parameters
    calibration_pickle = pickle.load(open('calibration.p', 'rb'))
    mtx = calibration_pickle['mtx']
    dist = calibration_pickle['dist']
    
    # Undistort the image based on calibration data
    img = cv2.undistort(img, mtx, dist, None, mtx)
    
    # Apply some thresholding methods
    gradx = abs_sobel_thresh(img, orient = 'x', sobel_kernel = 3, thresh = (15, 255))
    grady = abs_sobel_thresh(img, orient = 'y', sobel_kernel = 3, thresh = (45, 255))
     
    #mag_binary = mag_thresh(img, sobel_kernel = 3, thresh = (30, 100))
    #dir_binary = dir_threshold(img, sobel_kernel = 15, thresh = (0.7, 1.3))
    
    sthresh = hls_select(img, 's', thresh = (102, 255))
    vthresh = hsv_select(img, 'v', thresh = (51, 255))
    
    # Get the combined binary
    combined = np.zeros_like(gradx)
    combined[(gradx == 1) & (grady == 1) | ((sthresh == 1) & (vthresh == 1))] = 255
    #combined[((gradx == 1) & (grady == 1)) | ((mag_binary == 1) & (dir_binary == 1))] = 1
    
    # Apply perspective Transform
    
    # First video
    box_width = 0.645
    mid_width = 0.118
    height_pct = 0.645 #468px=,65; 450px=,625, 486px=,675
    bottom_trim = 0.934
    
    src = np.float32([[img.shape[1] * (.5 - mid_width / 2), img.shape[0] * height_pct],
                      [img.shape[1] * (.5 + mid_width / 2), img.shape[0] * height_pct],
                      [img.shape[1] * (.5 + box_width / 2), img.shape[0] * bottom_trim],
                      [img.shape[1] * (.5 - box_width / 2), img.shape[0] * bottom_trim]])
    
    # offset adjusts the shrinkage of the warped image - larger is shrunken more
    offset = img.shape[1] * 0.175
    
    dst = np.float32([[offset, 0], [img.shape[1] - offset, 0],
                      [img.shape[1] - offset, img.shape[0]],
                      [offset, img.shape[0]]])
    
    # Get perspective transformation matrix and its inverse for later use
    mat = cv2.getPerspectiveTransform(src, dst)
    mat_inv = cv2.getPerspectiveTransform(dst, src)
    
    # Use the matrix for perspective transform
    binary_warped = cv2.warpPerspective(combined, mat, (img.shape[1], img.shape[0]), flags = cv2.INTER_LINEAR)
    
    # window settings
    window_width = 30  # consider a window width
    window_height = 120  # Break image into 6 vertical layers since image height is 720
    margin = 30  # How much to slide left and right for searching
    smoothing_factor = 20  # Use last 20 results for averaging - smooth the data 20
    # The lane is about 30 meters long and 3.7 meters wide
    y_scale = 30.0 / 720.0  # 30 meter is around 720 pixels
    x_scale = 3.7 / 700.0  # 3.7 meters is around 700pixels
    
    # Setup the overall class to do all the tracking
    centroids = Line(window_width = window_width, window_height = window_height, margin = margin, ym = y_scale, xm = x_scale, smooth_factor = smoothing_factor)
    
    window_centroids = centroids.find_window_centroids(binary_warped)
    
    # If we found any window centers
    if len(window_centroids) > 0:
        # Points used to draw all the left and right windows
        l_points = np.zeros_like(binary_warped)
        r_points = np.zeros_like(binary_warped)
        
        # Points used to find the left and right lanes
        rightx = []
        leftx = []
        
        # Go through each level and draw the windows
        for level in range(0, len(window_centroids)):
            # add center value found in frame to the list of lane points per left, right
            leftx.append(window_centroids[level][0])
            rightx.append(window_centroids[level][1])
            
            # window_mask is a function to draw window areas
            l_mask = window_mask(window_width, window_height, binary_warped, window_centroids[level][0], level) #window_width - 35
            r_mask = window_mask(window_width, window_height, binary_warped, window_centroids[level][1], level) #window_width - 35
            
            # Add graphic points from window mask here to total pixels found
            l_points[(l_points == 255) | ((l_mask == 1))] = 255
            r_points[(r_points == 255) | ((r_mask == 1))] = 255
        
        # Draw the results
        template = np.array(r_points + l_points, np.uint8)  # add both left and right window pixels together
        zero_channel = np.zeros_like(template)  # create a zero color channel
        template = np.array(cv2.merge((zero_channel, template, zero_channel)), np.uint8)  # make window pixels green
        # making the original road pixels 3 color channel
        warpage = np.array(cv2.merge((binary_warped, binary_warped, binary_warped)), np.uint8)
        # overlay the original road image with window results
        output = cv2.addWeighted(warpage, 1.0, template, 0.5, 0.0)
        
        # fit the lane boundaries to the left, right and center positions found
        yvals = range(0, binary_warped.shape[0])
        
        # box centers - should be 9 components
        res_yvals = np.arange(binary_warped.shape[0] - (window_height / 2), 0, -window_height)
        
        # fit polynomial to left - 2nd order
        left_fit = np.polyfit(res_yvals, leftx, 2)
        # predict the x value for each y value - continuous with resolution of 1 pixel
        left_fitx = left_fit[0] * yvals * yvals + left_fit[1] * yvals + left_fit[2]
        left_fitx = np.array(left_fitx, np.int32)
        
        # fit polynomial to right - 2nd order
        right_fit = np.polyfit(res_yvals, rightx, 2)
        # predict the x value for each y value - continuous with resolution of 1 pixel
        right_fitx = right_fit[0] * yvals * yvals + right_fit[1] * yvals + right_fit[2]
        right_fitx = np.array(right_fitx, np.int32)
        
        # encapsulate lines to give depth
        # left lane
        left_lane = np.array(list(zip(np.concatenate((left_fitx - (window_width) / 2, left_fitx[::-1] + (window_width) / 2), axis = 0),
                                      np.concatenate((yvals, yvals[::-1]), axis = 0))), np.int32) #window_width - 35
        # right lane
        right_lane = np.array(list(zip(np.concatenate((right_fitx - (window_width) / 2, right_fitx[::-1] + (window_width) / 2), axis = 0),
                                       np.concatenate((yvals, yvals[::-1]), axis = 0))), np.int32) #window_width - 35
        
        # inner lane
        inner_lane = np.array(list(zip(np.concatenate((left_fitx + (window_width) / 2, right_fitx[::-1] - (window_width) / 2), axis = 0),
                                       np.concatenate((yvals, yvals[::-1]), axis = 0))), np.int32) #window_width - 35
        
        # lane lines themselves
        road = np.zeros_like(img)
        cv2.fillPoly(road, [left_lane], color = [255, 0, 0])
        cv2.fillPoly(road, [right_lane], color = [0, 0, 255])
        cv2.fillPoly(road, [inner_lane], color = [0, 255, 0])
        # inverse transform to get back to actual perspective
        road_warped = cv2.warpPerspective(road, mat_inv, (img.shape[1], img.shape[0]), flags = cv2.INTER_LINEAR)
        
        # to get nice outlines
        road_bkg = np.zeros_like(img)
        cv2.fillPoly(road_bkg, [left_lane], color = [255, 255, 255])
        cv2.fillPoly(road_bkg, [right_lane], color = [255, 255, 255])
        cv2.fillPoly(road_bkg, [inner_lane], color = [255, 255, 255])
        # inverse transform to get back to actual perspective
        road_warped_bkg = cv2.warpPerspective(road_bkg, mat_inv, (img.shape[1], img.shape[0]), flags = cv2.INTER_LINEAR)
        
        # return the transformed lane lines
        # first make background black
        base = cv2.addWeighted(img, 1.0, road_warped_bkg, -1.0, 0.0)
        # then add the lane lines
        result = cv2.addWeighted(base, 1.0, road_warped, 0.7, 0.0)
        output = result
        
        # meters per pixel in y direction
        xm_ppx, ym_ppx = centroids.get_ppx_values()
        
        # fit a 2nd order polynomial for the actual x and y coordinates of the left lane
        # left lane is more stable
        curve_fit_cr = np.polyfit(np.array(res_yvals, np.float32) * ym_ppx, np.array(leftx, np.float32) * xm_ppx, 2)
        # using the formula calculate the road curvature
        curverad = ((1 + (2 * curve_fit_cr[0] * yvals[-1] * ym_ppx + curve_fit_cr[1]) ** 2) ** 1.5) / np.absolute(2 * curve_fit_cr[0])
        
        # calculate the offset of the car on the road
        # average the x pixel values that are closest to the car to find the road center
        road_center = (left_fitx[-1] + right_fitx[-1]) / 2
        # find the difference between the road center and the warped image center - convert it to actual meters
        center_diff = (road_center - binary_warped.shape[1]/2) * xm_ppx
        side_pos = "left"
        if center_diff <= 0:
            # if difference is smaller than zero, warped image center (and hence the car) location
            # is to the right of the road
            side_pos = "right"
        
        # draw the text showing curvature, offset and speed
        # can check the values with
        cv2.putText(output, 'Radius of Curvature = ' + str(round(curverad, 3)) + '(m)', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        cv2.putText(output, 'Vehicle is = ' + str(abs(round(center_diff, 3))) + 'm ' + side_pos + ' of center', (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    else:
        output = img
    
    return output


def process_image(img):
    # Main Proces
    x_start_stop = [None, None]
    y_start_stop = [400, 672]  # Min and max in y to search in slide_window()
    scale = 1.5
    #scales = [1.0, 1.5]
    #threshold = 0.3
    
    # Advanced Line Detection
    out_img_pipe = pipeline(img = img)
    
    
    #heat_maps = []
    #for scale in scales:
    out_img, out_ = find_cars(img = img, scale = scale, x_start_stop = x_start_stop,
                              y_start_stop = y_start_stop, clf = svc, scaler = x_scaler,
                              orient = orient, pix_per_cell = pix_per_cell,
                              cell_per_block = cell_per_block, spatial_size = spatial_size,
                              hist_bins = hist_bins, color_space = color_space)
    
    #heat_maps.append(out_)
    
    labels = label(out_)
    draw_img = draw_labeled_bboxes(np.copy(out_img_pipe), labels)
    #avg_ = np.divide(np.sum(heat_maps, axis = 0), len(scales))
    #final_ = apply_threshold(avg_, threshold)
    #labels = label(final_)
    #draw_img = draw_labeled_bboxes(np.copy(out_img_pipe), labels)
    return draw_img


def process_video(input_video_path, out_path, process_func):
    # A function to process video images
    clip = VideoFileClip(input_video_path)
    video_clip = clip.fl_image(process_func)
    video_clip.write_videofile(out_path, audio = False)

## Load Images

In [3]:
# Since we will be identifying cars using a classification algorithm, 
# we need to train the classifier using car and non-car images. 
# These images are located in two different folders 'vehicles' and 'non-vehicles' with subfolders.
# These example images come from a combination of the GTI vehicle image database, 
# the KITTI vision benchmark suite, and examples extracted from the project video itself.
# We can use glob package to get various png images under different subfolders using a pattern 

car_image_paths = glob.glob('vehicles/*/*.png')
notcar_image_paths = glob.glob('non-vehicles/*/*.png')

# Now let's see the total number of images we have in each class 
print('Number of Vehicle Images found: ', len(car_image_paths))
print('Number of Non-Vehicle Images found: ', len(notcar_image_paths))

Number of Vehicle Images found:  8792
Number of Non-Vehicle Images found:  8968


## Train Classifier

In [4]:
# Define feature parameters
color_space = 'YCrCb' # Can be RGB, HSV, LUV, HLS, YUV, YCrCb
orient = 9
pix_per_cell = 8 # size of the features we are looking in the images 
cell_per_block = 2 # helps with the normalization - lighting, shadows
hog_channel = 'ALL'  # can be 0, 1, 2, or 'ALL' 
spatial_size = (32, 32)  # Spatial binning dimensions 
hist_bins = 32  # Number of histogram bins
spatial_feat = True  # Get spatial features on/off
hist_feat = True  # Get color histogram features on/off
hog_feat = True  # Get HOG features on/off

t = time.time()

n_samples = 2000
random_idxs = np.random.randint(0, len(car_image_paths), n_samples)

test_cars = car_image_paths
test_notcars = notcar_image_paths

car_features = extract_features(test_cars, color_space = color_space,
                                spatial_size = spatial_size, hist_bins = hist_bins,
                                orient = orient, pix_per_cell = pix_per_cell,
                                cell_per_block = cell_per_block, hog_channel = hog_channel,
                                spatial_feat = spatial_feat, hist_feat = hist_feat, hog_feat = hog_feat)

notcar_features = extract_features(test_notcars, color_space = color_space,
                                   spatial_size = spatial_size, hist_bins = hist_bins,
                                   orient = orient, pix_per_cell = pix_per_cell,
                                   cell_per_block = cell_per_block, hog_channel = hog_channel,
                                   spatial_feat = spatial_feat, hist_feat = hist_feat, hog_feat = hog_feat)

print(round(time.time() - t, 2), 'Seconds to compute the features\n')

x = np.vstack((car_features, notcar_features)).astype(np.float64) # Standard Scaler expects float 64
y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))

# Normalize Data
# We have spatial, color histogram and HOG features in the same feature set
# It is best to bring them to equal scale to avoid one feature to dominate due to scale differences
# Fit a per-column scaler
x_scaler = StandardScaler().fit(x)
# Use the scaler to transform X 
scaled_x = x_scaler.transform(x)

# Split the data into train and test sets
rand_state = np.random.randint(0, 100)
x_train, x_test, y_train, y_test = train_test_split(scaled_x, y, test_size = 0.2, random_state = rand_state)

print('Using:', orient, 'orientations', pix_per_cell, 'pixels per cell and', 
      cell_per_block, 'cells per block', hist_bins, 'histogram bins, and',
      spatial_size, 'spatial sampling\n')
print('Feature vector length:', len(x_train[0]), '\n')

# Use linear SVC
svc = LinearSVC()

# Check the training time for the SVC
t = time.time()

svc.fit(x_train, y_train)

print(round(time.time() - t, 2), 'Seconds to train SVC. \n')

# Check the score of the SVC
svc_score = svc.score(x_test, y_test)

print('Test accuracy of SVC = ', round(svc_score, 4), '\n')

94.48 Seconds to compute the features

Using: 9 orientations 8 pixels per cell and 2 cells per block 32 histogram bins, and (32, 32) spatial sampling

Feature vector length: 8460 

5.94 Seconds to train SVC. 

Test accuracy of SVC =  0.9916 



In [5]:
process_video('test_video.mp4', 'output_videos/challenge_test_video.mp4', process_image)

[MoviePy] >>>> Building video output_videos/challenge_test_video.mp4
[MoviePy] Writing video output_videos/challenge_test_video.mp4


 97%|███████████████████████████████████████████████████████████████████████████████▉  | 38/39 [00:22<00:00,  1.69it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: output_videos/challenge_test_video.mp4 



In [None]:
video = io.open('output_videos/challenge_test_video.mp4', 'r+b').read()
encoded = base64.b64encode(video)
HTML(data = '''<video width="960" height="540" alt="test_video_car_detection" controls>
               <source src="data:video/mp4;base64,{0}" type="video/mp4" />
               </video>'''.format(encoded.decode('ascii')))

In [6]:
process_video('project_video.mp4', 'output_videos/challenge_project_video.mp4', process_image)

[MoviePy] >>>> Building video output_videos/challenge_project_video.mp4
[MoviePy] Writing video output_videos/challenge_project_video.mp4


100%|█████████████████████████████████████████████████████████████████████████████▉| 1260/1261 [12:20<00:00,  1.69it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: output_videos/challenge_project_video.mp4 



In [None]:
video = io.open('output_videos/challenge_project_video.mp4', 'r+b').read()
encoded = base64.b64encode(video)
HTML(data = '''<video width="960" height="540" alt="project_video_car_detection" controls>
               <source src="data:video/mp4;base64,{0}" type="video/mp4" />
               </video>'''.format(encoded.decode('ascii')))