# Label Images from Videos

This project intended to detect cars from a dashcam video stream. In order to detect cars, labeled images of cars on road with bounding box information is needed to train the machine learning model.

This notebook tries to label the images from such videos instead of training the models on publically available data, which may have limitations.

The videos are captured from Car's dashcam and are placed in 'vids/to_label' directory. With computer vision library, the videos are streamed frame by frame manually. Interesting images of cars as well as non-cars may then be saved by the labeler.

This notebook allows user to drag a bounding box around a car, and then save the image in the directory 'data/c' (labelled as car), the bounding box information is stored in a csv representing the coordinates (pixel number) of the four corners of the bounding box.
Images with no car(background) are stroed in the directory 'data/n'.

In this notebook, the from videop stream frames are saved from the videos. the frames with Car in it is going to be labelled as 1, other frames with no car (background) will be labelled as 0.
The images with Car also have a bounding box, (inserted manually) which will be used for training and bounding box prediction

In [2]:
#importing necessary libraries, computer vision for image and video processing, video presenting, os for file management 
import cv2 as cv
import os
import numpy as np
import math

## Utility functions

In [3]:
#this function rescales the image to a given scale size. missing information is restored by cv library's interpolation argument
def rescaleFrame(frame, scale=0.5):
    width = int(frame.shape[1] * scale)
    height= int(frame.shape[0] * scale)
    dims  = (width,height)
    
    return cv.resize(frame, dims, interpolation=cv.INTER_AREA)

In [4]:
#this function rotates image to a given angle
def rotateImage(image, angle):
    
    #first two dimensions only
    h, w = image.shape[:2]
    
    #center point of the image
    img_c = (w / 2, h / 2)

    #returns rotation angle
    rot = cv.getRotationMatrix2D(img_c, angle, 1)

    rad = math.radians(angle)
    sin = math.sin(rad)
    cos = math.cos(rad)
    b_w = int((h * abs(sin)) + (w * abs(cos)))
    b_h = int((h * abs(cos)) + (w * abs(sin)))

    rot[0, 2] += ((b_w / 2) - img_c[0])
    rot[1, 2] += ((b_h / 2) - img_c[1])

    outImg = cv.warpAffine(image, rot, (b_w, b_h), flags=cv.INTER_LINEAR)
    return outImg

In [5]:
#this function crops the image, focused on center, shifts accordingly
def crop_img(img, scale=1.0, right=0, down=0):
    center_x, center_y = (img.shape[1] / 2) + right, (img.shape[0] / 2) + down
    width_scaled, height_scaled = img.shape[1] * scale, img.shape[0] * scale
    left_x, right_x = center_x - width_scaled / 2, center_x + width_scaled / 2
    top_y, bottom_y = center_y - height_scaled / 2, center_y + height_scaled / 2
    img_cropped = img[int(top_y):int(bottom_y)-200, int(left_x):int(right_x)]
    return img_cropped


def crop_sc(img, scale=1.0, right=0, down=0):
    center_x, center_y = (img.shape[1] / 2) + right, (img.shape[0] / 2) + down
    width_scaled, height_scaled = img.shape[1] * 0.75, img.shape[0] * scale
    left_x, right_x = center_x - width_scaled / 2, center_x + width_scaled / 2
    top_y, bottom_y = center_y - height_scaled / 2, center_y + height_scaled / 2
    img_cropped = img[int(top_y):int(bottom_y)-200, int(left_x):int(right_x)-200]
    return img_cropped

In [6]:
#defining global parameters
w2=240
h2=135
#reference point of bounding box, top left of the box
refPt = []
#ending point of the bounding box, bottom right of the box
endPt = []
#images are 240x130 pixels and scope defines x and y coordinates of the image after the bounding box selection, scope is the 
#outer part of the bounding box, bounding box always stays within the scope
scope = None
draw  = False
#clone has copy of original image
clone = None

#input_dir has raw and unlabelled input videos
input_dir='vids/to_label/'
#data_dir has labelled images
data_dir='data/'
#count_file has the count of car and no car images
count_file='count.txt'
#label_file has final csv with image path, and 4 coordinates of bounding box
label_file='label.csv'

In [7]:
#this is a function for the event handler of the mouse. The aim here is to draw a bounding box manually by first defining a 
#scope of the image and then drawing a yellow bounding box for the car in the image. User first do the right click that
#results in a blue box that would be the actual size of the image. this is a fixed dimension but user can choose it for anywhere
#in the image where there is an object of interest. then by left click and drag would result in a yellow bounding box surrounding
#the car in the image. similarly some of the video frames would be used for a no car image by just right cdlicking and defining 
#the scope of the image where there could be any background, road signs, buildings, trees etc.
#x and y are mouse pointer position coordinates
def click_and_crop(event, x, y, flags, param):    
    global refPt, draw, w2, h2, Origf, clone, scope, frame, endPt

    #defining the scope of the image
    if event == cv.EVENT_RBUTTONDOWN:
        frame = clone.copy()
        #if the input coordinates of image selection event is out of bounds of the actual image size, then it assigns the 
        # corner of the frame
        if y+h2*2> frame.shape[0]: y=frame.shape[0]-h2*2
        if x+w2*2> frame.shape[1]: x=frame.shape[1]-w2*2
        scope = (x,y)
        
        #draws a rectangle of dimension wxh starting from coordinates x,y and ending at x+w, y+w
        cv.rectangle(frame, (x,y), (x+w2*2,y+h2*2), (255, 0, 0), 2)        
        cv.imshow("image", frame)
        
    #drawing the bounding box
    if event == cv.EVENT_LBUTTONDOWN:
        if draw == False:
            refPt = [x, y]
            endPt = []
        draw  = True
            
    elif event == cv.EVENT_MOUSEMOVE and draw:
        frame = clone.copy()
        if scope != None:
            cv.rectangle(frame, scope, (scope[0]+w2*2,scope[1]+h2*2), (255, 0, 0), 2)        
        if abs(refPt[0]-x)>(w2*2)-40: refPt[0]=abs(x-((w2*2)-40)*int((x-refPt[0])/abs(refPt[0]-x)))
        if abs(refPt[1]-y)>(h2*2)-20: refPt[1]=abs(y-((h2*2)-20)*int((y-refPt[1])/abs(refPt[1]-y)))
            
        cv.rectangle(frame, (refPt[0],refPt[1]), (x,y), (0, 255, 0), 2)
        cv.imshow("image", frame)
        
        
    elif event == cv.EVENT_LBUTTONUP and draw:
        
        frame = clone.copy()  
        endPt = [x, y]
        
        #refPt is the starting x,y coordinates of the bounding box and endPt are the ending x,y coordinates
        x1,y1,x2,y2=refPt+endPt
        if x2<x1: 
            refPt[0]=x2
            endPt[0]=x1
        if y2<y1: 
            refPt[1]=y2
            endPt[1]=y1
        
        #when user forgot the right click, meaning there is no scope defined. it will set the scope according to the bounding
        #box, such that the bounding box comes in the middle of the scope
        if scope == None: 
            x,y = int((refPt[0]+x)/2)-w2-20, int((refPt[1]+y)/2)-h2-10
            if y+h2*2> frame.shape[0]: y=frame.shape[0]-h2*2
            if x+w2*2> frame.shape[1]: x=frame.shape[1]-w2*2
            if x<0: x = 0
            if y<0: y = 0
            scope = (x,y)
        #elif endPt[0] > scope[0]+(w2*2)-20 or endPt[1]>scope[1]+(h2*2)-10:
        #    scope = (endPt[0]-(w2*2)+20, endPt[1]-(h2*2)+10)
            
        if refPt[0]<scope[0]+20: refPt[0]=scope[0]+20
        if refPt[1]<scope[1]+10: refPt[1]=scope[1]+10 
        if endPt[0]>scope[0]+(w2*2)-20: endPt[0]=scope[0]+(w2*2)-20 
        if endPt[1]>scope[1]+(h2*2)-10: endPt[1]=scope[1]+(h2*2)-10     
        
        cv.rectangle(frame, scope, (scope[0]+w2*2,scope[1]+h2*2), (255, 0, 0), 2) 
            
        #if refPt[0]-20<scope[0]: refPt[0]=scope[0]+20
        #if endPt[0]+20>scope[0]+(w2*2): endPt[0]=scope[0]-20+(w2*2)
        #if refPt[1]-10<scope[1]: refPt[1]=scope[1]+10
        #if endPt[1]+10>scope[1]+(h2*2): endPt[1]=scope[1]-10+(h2*2)
        
        cv.rectangle(frame, (refPt[0],refPt[1]), (endPt[0],endPt[1]), (0, 255, 0), 2)
        cv.imshow("image", frame)
        
        draw = False
        

In [8]:
#each time the image is given a scope and/or a bounding box, it adds up to the counters text file
with open(data_dir+count_file,'r') as f:
    counters = f.read().split(',')
    cc = int(counters[0])
    nc = int(counters[1])
    
print(cc,nc)

2332 1648


In [9]:
#saving all images and selecting next image from the video stream. stops video after every 30 frames, meaning 1 second
#conditionally skip to two frames when n is pressed, skip to 5 frames when m is pressed, ',' for 10 frames 
#image saved when s is pressed, cleared when c is pressed, window quit when q or escape is pressed, also saving the image 
#count in text file
refPt = []
endPt = []
scope = None
draw  = False
clone = None

cont = True
cr=[]
pr=1

#with open('data/label.csv','w') as f:
#    pass

for file in os.listdir(input_dir):
    if file.endswith(".MP4"):
        capture = cv.VideoCapture(input_dir+str(file))
        print(file)
        fno = -1
        cont = capture.isOpened()

        while cont:
            fno+=1

            isTrue, frame = capture.read()

            if not isTrue:
                break

            #frame = rotateImage(frame,0)  
            #frame = rescaleFrame(crop_img(frame,1),1)
            clone = frame.copy()
            #frame = cv.resize(image, (int(frame.shape[0]*.5),int(frame.shape[1]*.5)), interpolation=cv.INTER_AREA)

            cv.namedWindow("image")
            cv.setMouseCallback("image", click_and_crop)
            cv.imshow('image', frame)
            
            if fno%pr==0 and fno !=0:
                pr=30
                refPt = []
                endPt = []

                while True:
                    cv.imshow('image', frame)

                    key = cv.waitKey(0) & 0xFF
                    
                    if key == ord("s"):
                        if scope != None:
                            roi = clone[scope[1]:scope[1]+h2*2, scope[0]:scope[0]+w2*2]
                            #cv.resize(roi, (w2*2,h2*2), interpolation=cv.INTER_AREA)
                            
                            with open(data_dir+label_file,'a+') as f:
                                if len(refPt)==2 and len(endPt)==2:
                                    refPt[0]=refPt[0]-scope[0]
                                    endPt[0]=endPt[0]-scope[0]
                                    refPt[1]=refPt[1]-scope[1]
                                    endPt[1]=endPt[1]-scope[1]
                                    f.write(data_dir+'c/'+str(cc)+'.jpg,'+','.join([str(elem) for elem in refPt+endPt])+'\n')
                                    cv.imwrite(data_dir+'c/'+str(cc)+'.jpg', roi)
                                    cc+=1
                                else:
                                    cv.imwrite(data_dir+'n/'+str(nc)+'.jpg', roi)
                                    f.write(data_dir+'n/'+str(nc)+'.jpg,'+'0,0,0,0\n')
                                    nc+=1
                            frame = clone.copy()
                            scope = None
                            refPt = []
                            endPt = []
                    
                    if key == ord("c"):
                        frame = clone.copy()
                        scope = None
                    
                    if key == ord("q") or key == 27:
                        capture.release
                        cv.destroyAllWindows()
                        cont = False
                        break
                    
                    if key == ord("n"):
                        pr=2
                        break
                    
                    if key == ord("m"):
                        pr=5
                        break
                    
                    if key == ord(","):
                        pr=10
                        break
                    
                    if key == ord("."):
                        pr=30
                        break
                        
                    if key == ord("x"):
                        break                    

            else:
                continue
    if not cont:
        break
        
    
with open(data_dir+count_file,'w') as f:
    f.write(str(cc)+","+str(nc))
    
capture.release
cv.destroyAllWindows()


GRMN0075.MP4


In [9]:
capture.release
cv.destroyAllWindows()

In [None]:
|