In [1]:
# Using the camera video data recorded in this lab, work with your team to create a
# shared Python code on GitHub that successfully identifies the following objects:
### At least one pedestrian
### At least one stop sign
### At least one vehicle
### Any other object

In [5]:
from IPython.display import Video
import numpy as np
import cv2
from matplotlib import pyplot as plt

In [29]:
# load the COCO class labels:
class_names = open("coco.names").read().strip().split("\n")

# Load the serialized caffe model from disk:
net = cv2.dnn.readNetFromDarknet("yolov3.cfg", "yolov3.weights")

def process(image):
    (H, W) = image.shape[:2]

    # Get the output layer names:
    layer_names = net.getLayerNames()
    layer_names = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    # Create the blob with a size of (416, 416), swap red and blue channels
    # and also a scale factor of 1/255 = 0,003921568627451:
    blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    print(blob.shape)

    # Feed the input blob to the network, perform inference and get the output:
    net.setInput(blob)
    layerOutputs = net.forward(layer_names)

    # Get inference time:
    t, _ = net.getPerfProfile()
    print('Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency()))

    # Initialization:
    boxes = []
    confidences = []
    class_ids = []

    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            # Get class ID and confidence of the current detection:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            # Filter out weak predictions:
            if confidence > 0.25:
                # Scale the bounding box coordinates (center, width, height) using the dimensions of the original image:
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                # Calculate the top-left corner of the bounding box:
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                # Update the information we have for each detection:
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # We can apply non-maxima suppression (eliminate weak and overlapping bounding boxes):
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3)

    # Show the results (if any object is detected after non-maxima suppression):
    if len(indices) > 0:
        for i in indices.flatten():
            # Extract the (previously recalculated) bounding box coordinates:
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            # Draw label and confidence:
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
            label = "{}: {:.4f}".format(class_names[class_ids[i]], confidences[i])
            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
            y = max(y, labelSize[1])
            cv2.rectangle(image, (x, y - labelSize[1]), (x + labelSize[0], y + 0), (0, 255, 0), cv2.FILLED)
            cv2.putText(image, label, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)

    return image

In [30]:
Video("Lab3Video1_Trim.mp4")

In [31]:
cap = cv2.VideoCapture('Lab3Video1_Trim.mp4')

writer = cv2.VideoWriter('StopSign.avi', cv2.VideoWriter_fourcc(*'MJPG'), 20, (int(width/2),height))

In [32]:
while(cap.isOpened()):
    ret, frame = cap.read()
    frame = process(frame)
    
    writer.write(frame)
    cv2.imshow('frame', frame)
    
    
    if cv2.waitKey(30) & 0xFF == ord('q'):
        break
        
cap.release()

cv2.destroyAllWindows()

(1, 3, 416, 416)
Inference time: 1745.08 ms
(1, 3, 416, 416)
Inference time: 1033.30 ms
(1, 3, 416, 416)
Inference time: 1089.16 ms


In [33]:
Video('Lab3Video2_Trim.mp4')

In [34]:
caps = cv2.VideoCapture('Lab3Video2_Trim.mp4')

writer = cv2.VideoWriter('BillyWalking.avi', cv2.VideoWriter_fourcc(*'MJPG'), 20, (int(width/2),height))

In [36]:
while(caps.isOpened()):
    ret, frame = caps.read()
    frame = process(frame)
    
    writer.write(frame)
    cv2.imshow('frame', frame)
    
    
    if cv2.waitKey(30) & 0xFF == ord('q'):
        break
        
cap.release()

cv2.destroyAllWindows()

(1, 3, 416, 416)
Inference time: 1852.94 ms
(1, 3, 416, 416)
Inference time: 850.99 ms
(1, 3, 416, 416)
Inference time: 777.11 ms
(1, 3, 416, 416)
Inference time: 811.80 ms
(1, 3, 416, 416)
Inference time: 784.52 ms
(1, 3, 416, 416)
Inference time: 788.59 ms
(1, 3, 416, 416)
Inference time: 788.93 ms
(1, 3, 416, 416)
Inference time: 805.10 ms
(1, 3, 416, 416)
Inference time: 781.24 ms
(1, 3, 416, 416)
Inference time: 965.90 ms
(1, 3, 416, 416)
Inference time: 912.65 ms
(1, 3, 416, 416)
Inference time: 806.70 ms
(1, 3, 416, 416)
Inference time: 777.51 ms
(1, 3, 416, 416)
Inference time: 773.47 ms
(1, 3, 416, 416)
Inference time: 790.47 ms
(1, 3, 416, 416)
Inference time: 783.93 ms
(1, 3, 416, 416)
Inference time: 784.44 ms
(1, 3, 416, 416)
Inference time: 829.46 ms
(1, 3, 416, 416)
Inference time: 797.84 ms
(1, 3, 416, 416)
Inference time: 838.46 ms
(1, 3, 416, 416)
Inference time: 816.78 ms
(1, 3, 416, 416)
Inference time: 825.75 ms
(1, 3, 416, 416)
Inference time: 777.20 ms
(1, 3, 416

In [37]:
Video('Lab3Video3_Trim.mp4')

In [38]:
capz = cv2.VideoCapture('Lab3Video3_Trim.mp4')

writer = cv2.VideoWriter('Cars_and_Parking_Garage.avi', cv2.VideoWriter_fourcc(*'MJPG'), 20, (int(width/2),height))

In [39]:
while(capz.isOpened()):
    ret, frame = capz.read()
    frame = process(frame)
    
    writer.write(frame)
    cv2.imshow('frame', frame)
    
    
    if cv2.waitKey(30) & 0xFF == ord('q'):
        break
        
cap.release()

cv2.destroyAllWindows()

(1, 3, 416, 416)
Inference time: 2147.77 ms
(1, 3, 416, 416)
Inference time: 903.80 ms
(1, 3, 416, 416)
Inference time: 791.51 ms
(1, 3, 416, 416)
Inference time: 782.96 ms
(1, 3, 416, 416)
Inference time: 866.72 ms
(1, 3, 416, 416)
Inference time: 971.57 ms
(1, 3, 416, 416)
Inference time: 848.05 ms
(1, 3, 416, 416)
Inference time: 823.53 ms
(1, 3, 416, 416)
Inference time: 778.49 ms
(1, 3, 416, 416)
Inference time: 776.68 ms
(1, 3, 416, 416)
Inference time: 799.49 ms
(1, 3, 416, 416)
Inference time: 769.69 ms
(1, 3, 416, 416)
Inference time: 787.88 ms
(1, 3, 416, 416)
Inference time: 820.05 ms
(1, 3, 416, 416)
Inference time: 871.36 ms
(1, 3, 416, 416)
Inference time: 813.84 ms
(1, 3, 416, 416)
Inference time: 794.36 ms
(1, 3, 416, 416)
Inference time: 776.82 ms
(1, 3, 416, 416)
Inference time: 801.97 ms
(1, 3, 416, 416)
Inference time: 792.61 ms
(1, 3, 416, 416)
Inference time: 788.39 ms
(1, 3, 416, 416)
Inference time: 793.30 ms
(1, 3, 416, 416)
Inference time: 987.73 ms
(1, 3, 416

In [13]:
Video('Lab3Video4_Trim.mp4')

In [44]:
capx = cv2.VideoCapture('Lab3Video4_Trim.mp4')

writer = cv2.VideoWriter('Top_Of_Ramp.avi', cv2.VideoWriter_fourcc(*'MJPG'), 20, (int(width/2),height))

In [45]:
while(capx.isOpened()):
    ret, frame = capx.read()
    frame = process(frame)
    
    writer.write(frame)
    cv2.imshow('frame', frame)
    
    
    if cv2.waitKey(30) & 0xFF == ord('q'):
        break
        
cap.release()

cv2.destroyAllWindows()

(1, 3, 416, 416)
Inference time: 1824.06 ms
(1, 3, 416, 416)
Inference time: 987.77 ms
(1, 3, 416, 416)
Inference time: 804.43 ms
(1, 3, 416, 416)
Inference time: 796.33 ms
(1, 3, 416, 416)
Inference time: 782.91 ms
