In [1]:
from __future__ import division
import time
import torch 
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2 
from util import *
from darknet import Darknet
from preprocess import prep_image, inp_to_image
import pandas as pd
import random 
import pickle as pkl
import argparse


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
def get_test_input(input_dim, CUDA):
    img = cv2.imread("dog-cycle-car.png")
    img = cv2.resize(img, (input_dim, input_dim)) 
    img_ =  img[:,:,::-1].transpose((2,0,1))
    img_ = img_[np.newaxis,:,:,:]/255.0
    img_ = torch.from_numpy(img_).float()
    img_ = Variable(img_)
    
    if CUDA:
        img_ = img_.cuda()
    
    return img_

In [3]:
def prep_image(img, inp_dim):
    """
    Prepare image for inputting to the neural network. 
    Returns a Variable 
    """
    orig_im = img
    dim = orig_im.shape[1], orig_im.shape[0]
    img = cv2.resize(orig_im, (inp_dim, inp_dim))
    img_ = img[:,:,::-1].transpose((2,0,1)).copy()
    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
    return img_, orig_im, dim

In [4]:
def write(x, img, classes, colors):
    c1 = (int(x[1]), int(x[2]))  # Convert to integers
    c2 = (int(x[3]), int(x[4]))  # Convert to integers
    cls = int(x[-1])
    label = "{0}".format(classes[cls])
    color = random.choice(colors)
    cv2.rectangle(img, c1, c2, color, 1)
    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
    c2 = (c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4)
    cv2.rectangle(img, c1, c2, color, -1)
    cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)
    return img

In [5]:
def arg_parse():
    """
    Parse arguements to the detect module
    
    """
    parser = argparse.ArgumentParser(description='YOLO v2 Video Detection Module')
   
    parser.add_argument("--video", dest = 'video', help = 
                        "Video to run detection upon",
                        default = "video.avi", type = str)
    parser.add_argument("--dataset", dest = "dataset", help = "Dataset on which the network has been trained", default = "pascal")
    parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
    parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)

    return parser.parse_args()

In [6]:
# Assuming CUDA is defined somewhere in your code
CUDA = torch.cuda.is_available()

nms_thresh = 0.4
confidence = 0.5

dataset_train = "pascal"

if dataset_train == "pascal":
    inp_dim = 416
    num_classes = 20
    classes = load_classes('data/voc.names')
    weightsfile = 'yolo-voc.weights'
    cfgfile = "cfg/yolo-voc.cfg"

elif dataset_train == "coco":
    inp_dim = 544
    num_classes = 80
    classes = load_classes('data/coco.names')
    weightsfile = 'yolo.weights'
    cfgfile = "cfg/yolo.cfg"

else:
    print("Invalid dataset")
    exit()

stride = 32
bbox_attrs = 5 + num_classes

print("Loading network.....")
model = Darknet(cfgfile)
model.load_weights(weightsfile)
print("Network successfully loaded")

if CUDA:
    model.cuda()

model(get_test_input(inp_dim, CUDA).to("cuda:0" if CUDA else "cpu"))

model.eval()

videofile = "../Videos/V1.avi"

cap = cv2.VideoCapture(videofile)

if not cap.isOpened():
    print("Error opening video stream or file")

frames = 0
start = time.time()
while cap.isOpened():
    ret, frame = cap.read()
    if ret:
        frame = cv2.resize(frame, (int(frame.shape[1] / 2), int(frame.shape[0] / 2)))
        img, orig_im, dim = prep_image(frame, inp_dim)
        im_dim = torch.FloatTensor(dim).repeat(1, 2)

        if CUDA:
            im_dim = im_dim.cuda()

        output = model(Variable(img.to("cuda:0" if CUDA else "cpu"), volatile=True)).data
        output = predict_transform(output, inp_dim, stride, model.anchors, num_classes, confidence, CUDA)

        if type(output) == int:
            frames += 1
            print("FPS of the video is {:5.2f}".format(frames / (time.time() - start)))
            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            # continue

        output = output.float()
        # output = write_results(output, num_classes, nms=True, nms_conf=nms_thresh)

# Check shape of output tensor
        print("Shape of output tensor:", output.shape)
        
        output = write_results(output, num_classes, nms=True, nms_conf=nms_thresh)
        print(output.shape)
        if len(output.shape) == 1:
            output = output.unsqueeze(0)

        output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim))
        # output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim))

        im_dim = im_dim.repeat(output.size(0), 1) / inp_dim
        output[:, 1:5] *= im_dim

        classes = load_classes('data/voc.names')
        colors = pkl.load(open("pallete", "rb"))

        list(map(lambda x: write(x, orig_im, classes, colors), output))

        cv2.imshow("frame", orig_im)
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            break
        frames += 1
        print("FPS of the video is {:5.2f}".format(frames / (time.time() - start)))

    else:
        print("End of video steam")


Loading network.....


Network successfully loaded


  output = model(Variable(img.to("cuda:0" if CUDA else "cpu"), volatile=True)).data


Shape of output tensor: torch.Size([1, 845, 25])
torch.Size([3, 8])
FPS of the video is  1.09
Shape of output tensor: torch.Size([1, 845, 25])
torch.Size([3, 8])
FPS of the video is  1.91
Shape of output tensor: torch.Size([1, 845, 25])
torch.Size([3, 8])
FPS of the video is  2.57
Shape of output tensor: torch.Size([1, 845, 25])
torch.Size([3, 8])
FPS of the video is  3.20
Shape of output tensor: torch.Size([1, 845, 25])
torch.Size([3, 8])
FPS of the video is  3.74
Shape of output tensor: torch.Size([1, 845, 25])
torch.Size([3, 8])
FPS of the video is  4.22
Shape of output tensor: torch.Size([1, 845, 25])
torch.Size([3, 8])
FPS of the video is  4.63
Shape of output tensor: torch.Size([1, 845, 25])
torch.Size([3, 8])
FPS of the video is  4.78
Shape of output tensor: torch.Size([1, 845, 25])
torch.Size([3, 8])
FPS of the video is  4.97
Shape of output tensor: torch.Size([1, 845, 25])
torch.Size([3, 8])
FPS of the video is  4.99
Shape of output tensor: torch.Size([1, 845, 25])
torch.Size(

IndexError: too many indices for tensor of dimension 1