In [16]:
import os
import sys
import time
import cv2
from PIL import Image, ImageDraw
from utils import *
from darknet import Darknet

In [17]:
def detect(cfgfile, weightfile, videoFile):
    frame_stream = cv2.VideoCapture(videoFile)
    
    m = Darknet(cfgfile)

    m.print_network()
    m.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))

    num_classes = 80
    if num_classes == 20:
        namesfile = 'data/voc.names'
    elif num_classes == 80:
        namesfile = 'data/coco.names'
    else:
        namesfile = 'data/names'

    use_cuda = 0
    if use_cuda:
        m.cuda()
    
    
    is_valid, frame = frame_stream.read()
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = Image.fromarray(frame)
    sized = frame.resize((m.width, m.height))
    frame_count = 0
    total_time = 0
    output_dir = videoFile[:-4]+'_detections'
    os.mkdir(output_dir)
    while(1):
        if not is_valid:
            break
        else:
            for i in range(2):
                t_start = time.time()
                boxes = do_detect(m, sized, 0.5, 0.4, use_cuda)
                t_finish = time.time()

            
            total_time += t_finish-t_start
            class_names = load_class_names(namesfile)
            classified_frame = output_dir + "/output_" + str(frame_count) + ".jpg"
            plot_boxes(sized, boxes, classified_frame, class_names)
            is_valid, frame = frame_stream.read()
            if not is_valid:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = Image.fromarray(frame)
            sized = frame.resize((m.width, m.height))
        frame_count += 1
    
    avgtime_per_frame = total_time/frame_count
    time_elapsed_mins = (total_time/60) - ((total_time/60) % 1)
    time_elapsed_secs = ((total_time/60)%1)*60
    print("---------yolo3" + str(videoFile) + " frames classified-----------")
    print("Average computation time per frame: " + str(avgtime_per_frame))
    print("Completed. Total time for computation: " + str(time_elapsed_mins) + " minutes, " + str(time_elapsed_secs) + " seconds.")
    
def detect_one(cfgfile, weightfile, img):

    m = Darknet(cfgfile)

    m.print_network()
    m.load_weights(weightfile)
    print('Loading weights from %s... Done!' % (weightfile))

    num_classes = 80
    if num_classes == 20:
        namesfile = 'yolo3/data/voc.names'
    elif num_classes == 80:
        namesfile = 'yolo3/data/coco.names'
    else:
        namesfile = 'yolo3/data/names'

    use_cuda = 0
    if use_cuda:
        m.cuda()
       


    for i in range(2):
        t_start = time.time()
        boxes = do_detect(m, sized, 0.5, 0.4, use_cuda)
        t_finish = time.time()


    total_time += t_finish-t_start
    class_names = load_class_names(namesfile)
    print(t_finish-t_start)
    return plot_boxes(sized, boxes, None, class_names)
    
    
if __name__ == '__main__':
    for veh_cam in ['camera_1.mp4', 'camera_2.mp4', 'camera_5.mp4', 'camera_6.mp4']:
        detect('cfg/yolov3.cfg', 'yolov3.weights', veh_cam)

layer     filters    size              input                output
    0 conv     32  3 x 3 / 1   416 x 416 x   3   ->   416 x 416 x  32
    1 conv     64  3 x 3 / 2   416 x 416 x  32   ->   208 x 208 x  64
    2 conv     32  1 x 1 / 1   208 x 208 x  64   ->   208 x 208 x  32
    3 conv     64  3 x 3 / 1   208 x 208 x  32   ->   208 x 208 x  64
    4 shortcut 1
    5 conv    128  3 x 3 / 2   208 x 208 x  64   ->   104 x 104 x 128
    6 conv     64  1 x 1 / 1   104 x 104 x 128   ->   104 x 104 x  64
    7 conv    128  3 x 3 / 1   104 x 104 x  64   ->   104 x 104 x 128
    8 shortcut 5
    9 conv     64  1 x 1 / 1   104 x 104 x 128   ->   104 x 104 x  64
   10 conv    128  3 x 3 / 1   104 x 104 x  64   ->   104 x 104 x 128
   11 shortcut 8
   12 conv    256  3 x 3 / 2   104 x 104 x 128   ->    52 x  52 x 256
   13 conv    128  1 x 1 / 1    52 x  52 x 256   ->    52 x  52 x 128
   14 conv    256  3 x 3 / 1    52 x  52 x 128   ->    52 x  52 x 256
   15 shortcut 12
   16 conv    128  1 x 1

  cls_confs = torch.nn.Softmax()(Variable(output[5:5+num_classes].transpose(0,1))).data


car: 0.999967
car: 0.999994
car: 0.999464
car: 0.999936
car: 0.999997
car: 0.899496
car: 0.999890
save plot results to camera_1_detections/output_0.jpg
car: 0.999967
car: 0.999993
car: 0.999964
car: 0.999435
car: 0.999997
car: 0.915991
car: 0.999892
save plot results to camera_1_detections/output_1.jpg
car: 0.999992
car: 0.999883
car: 0.999942
car: 0.998814
car: 0.999995
car: 0.874761
person: 0.999998
car: 0.999561
save plot results to camera_1_detections/output_2.jpg
car: 0.999993
car: 0.999932
car: 0.999921
car: 0.998931
car: 0.999993
person: 1.000000
car: 0.834540
car: 0.997008
save plot results to camera_1_detections/output_3.jpg
car: 0.999994
car: 0.999991
car: 0.999937
person: 1.000000
car: 0.998622
car: 0.999993
car: 0.745533
car: 0.993696
save plot results to camera_1_detections/output_4.jpg
car: 0.999982
car: 0.999993
person: 1.000000
car: 0.999905
car: 0.998795
car: 0.999994
car: 0.809488
car: 0.992153
save plot results to camera_1_detections/output_5.jpg
car: 0.999950
car: 0

KeyboardInterrupt: 

In [None]:
# Extract Frames of choice

frame_stream = cv2.VideoCapture("camera_6.mp4")
imgs = [2381, 4581, 3748, 3767, 3857, 4208]
for img in imgs:
    frame_stream.set(1, img)
    is_valid, frame = frame_stream.read()
    cv2.imwrite("camera6_"+str(img)+".png", frame)