In [1]:
## download the yolov4 cfg file
# !mkdir -p cfg
!wget https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov4.cfg
!mv yolov4.cfg cfg

--2025-01-29 17:31:07--  https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov4.cfg
Resolving github.com (github.com)... 20.205.243.166
Connecting to github.com (github.com)|20.205.243.166|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘yolov4.cfg’

yolov4.cfg              [ <=>                ] 622.78K  3.62MB/s    in 0.2s    

2025-01-29 17:31:08 (3.62 MB/s) - ‘yolov4.cfg’ saved [637724]



In [2]:
!wget https://raw.githubusercontent.com/ayooshkathuria/YOLO_v3_tutorial_from_scratch/master/darknet.py
!wget https://raw.githubusercontent.com/ayooshkathuria/YOLO_v3_tutorial_from_scratch/master/util.py

--2025-01-29 17:31:14--  https://raw.githubusercontent.com/ayooshkathuria/YOLO_v3_tutorial_from_scratch/master/darknet.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 11533 (11K) [text/plain]
Saving to: ‘darknet.py’


2025-01-29 17:31:15 (6.88 MB/s) - ‘darknet.py’ saved [11533/11533]

--2025-01-29 17:31:15--  https://raw.githubusercontent.com/ayooshkathuria/YOLO_v3_tutorial_from_scratch/master/util.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7432 (7.3K) [text/plain]
Saving to: ‘util.py’


2025-01-29 17:31:16 (45.6 MB/s) 

In [2]:
## parse the yolov4 module
#!pip install opencv-python
import darknet

blocks = darknet.parse_cfg("cfg/yolov4.cfg")
net_info, module_list = darknet.create_modules(blocks)

In [3]:
import torch
import numpy as np
from torch import nn
from torchsummary import summary

class Darknet(nn.Module):
    def __init__(self, blocks, module_list,net_info, CUDA):
        super(Darknet, self).__init__()
        self.blocks = blocks
        self.net_info = net_info
        self.module_list = module_list
        self.CUDA = CUDA
        
    def forward(self, x):
        modules = self.blocks[1:]
        outputs = {}   #We cache the outputs for the route layer
        
        write = 0
        for i, module in enumerate(modules):        
            module_type = (module["type"])
            
            if module_type == "convolutional" or module_type == "upsample":
                # print(i, module_type, self.module_list[i])
                x = self.module_list[i](x)
    
            elif module_type == "route":
                layers = module["layers"]
                layers = [int(a) for a in layers]
                if len(layers) == 1:
                    # Output feature maps from the layer indexed by the value
                    if layers[0] < 0:
                        connect_layer = i + layers[0]
                        x = outputs[connect_layer]
                    elif layers[0] > 0:
                        connect_layer = layers[0]
                        x = outputs[connect_layer]
                        
                elif len(layers) == 2:
                    # Concatenate feature maps from two layers
                    connect_layer1 = i + (layers[0])
                    connect_layer2 = i+ (layers[1])
                    x = torch.cat((outputs[connect_layer1], outputs[connect_layer2]), dim=1)
                elif len(layers) == 4:
                    connect_layer1 = i + (layers[0])
                    connect_layer2 = i + (layers[1])
                    connect_layer3 = i + (layers[2])
                    connect_layer4 = i + (layers[3])
                    x = torch.cat((outputs[connect_layer1], outputs[connect_layer2], outputs[connect_layer3], outputs[connect_layer3]), dim=1)

            elif  module_type == "shortcut":
                from_ = int(module["from"])
                x = outputs[i-1] + outputs[i+from_]
    
            elif module_type == 'yolo':    
                anchors = self.module_list[i][0].anchors
                #Get the input dimensions
                print(self.net_info["height"])
                inp_dim = int(self.net_info["height"])
        
                #Get the number of classes
                num_classes = int(module["classes"])
        
                #Transform 
                x = x.data
                x = predict_transform(x, inp_dim, anchors, num_classes, CUDA=False)
                if not write:              #if no collector has been intialised. 
                    detections = x
                    write = 1
        
                else:       
                    detections = torch.cat((detections, x), 1)
        
            outputs[i] = x
        
        return detections


    def load_weights(self, weightfile):
        #Open the weights file
        fp = open(weightfile, "rb")
    
        #The first 5 values are header information 
        # 1. Major version number
        # 2. Minor Version Number
        # 3. Subversion number 
        # 4,5. Images seen by the network (during training)
        header = np.fromfile(fp, dtype = np.int32, count = 5)
        self.header = torch.from_numpy(header)
        self.seen = self.header[3]   
        
        weights = np.fromfile(fp, dtype = np.float32)
        
        ptr = 0
        for i in range(len(self.module_list)):
            module_type = self.blocks[i + 1]["type"]
    
            #If module_type is convolutional load weights
            #Otherwise ignore.
            
            if module_type == "convolutional":
                model = self.module_list[i]
                try:
                    batch_normalize = int(self.blocks[i+1]["batch_normalize"])
                except:
                    batch_normalize = 0
            
                conv = model[0]
                
                
                if (batch_normalize):
                    bn = model[1]
        
                    #Get the number of weights of Batch Norm Layer
                    num_bn_biases = bn.bias.numel()
        
                    #Load the weights
                    bn_biases = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
                    ptr += num_bn_biases
        
                    bn_weights = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
                    ptr  += num_bn_biases
        
                    bn_running_mean = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
                    ptr  += num_bn_biases
        
                    bn_running_var = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
                    ptr  += num_bn_biases
        
                    #Cast the loaded weights into dims of model weights. 
                    bn_biases = bn_biases.view_as(bn.bias.data)
                    bn_weights = bn_weights.view_as(bn.weight.data)
                    bn_running_mean = bn_running_mean.view_as(bn.running_mean)
                    bn_running_var = bn_running_var.view_as(bn.running_var)
        
                    #Copy the data to model
                    bn.bias.data.copy_(bn_biases)
                    bn.weight.data.copy_(bn_weights)
                    bn.running_mean.copy_(bn_running_mean)
                    bn.running_var.copy_(bn_running_var)
                
                else:
                    #Number of biases
                    num_biases = conv.bias.numel()
                
                    #Load the weights
                    conv_biases = torch.from_numpy(weights[ptr: ptr + num_biases])
                    ptr = ptr + num_biases
                
                    #reshape the loaded weights according to the dims of the model weights
                    conv_biases = conv_biases.view_as(conv.bias.data)
                
                    #Finally copy the data
                    conv.bias.data.copy_(conv_biases)
                    
                #Let us load the weights for the Convolutional layers
                num_weights = conv.weight.numel()
                
                #Do the same as above for weights
                conv_weights = torch.from_numpy(weights[ptr:ptr+num_weights])
                ptr = ptr + num_weights
                
                conv_weights = conv_weights.view_as(conv.weight.data)
                conv.weight.data.copy_(conv_weights)

In [4]:
#loading weights fro darknet 
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
yolo_v4=Darknet(blocks, module_list, net_info, CUDA='cpu').to('cpu')
yolo_v4.load_weights('yolov4.weights')
# load_weights_into_model(yolo_v4,'yolov4.weights')

In [6]:
#summary of the yolov4 with dense connections
# summary(yolo_v4.to(device), (3,608,608)).to('cpu')

In [24]:
!wget https://github.com/ayooshkathuria/pytorch-yolo-v3/raw/master/dog-cycle-car.png

--2025-01-30 11:41:39--  https://github.com/ayooshkathuria/pytorch-yolo-v3/raw/master/dog-cycle-car.png
Resolving github.com (github.com)... 20.205.243.166
Connecting to github.com (github.com)|20.205.243.166|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/ayooshkathuria/pytorch-yolo-v3/master/dog-cycle-car.png [following]
--2025-01-30 11:41:39--  https://raw.githubusercontent.com/ayooshkathuria/pytorch-yolo-v3/master/dog-cycle-car.png
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ...
connected. to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... 
200 OKequest sent, awaiting response... 
Length: 347445 (339K) [image/png]
Saving to: ‘dog-cycle-car.png’


2025-01-30 11:41:41 (3.43 MB/s) - ‘dog-cycle-car.png’ saved [347445/347445]



In [5]:
import cv2
import torch

import torch
import torch.nn.functional as F

def normalize_tensor_image(tensor_image):
    """
    Normalize the input tensor image using ImageNet mean and std values.
    The image tensor is assumed to be in the format [C, H, W] (channels, height, width).
    """
    # Define mean and standard deviation values for ImageNet (you can adjust for other datasets)
    mean = torch.tensor([0.485, 0.456, 0.406], dtype=torch.float32).view(3, 1, 1)  # [C, 1, 1]
    std = torch.tensor([0.229, 0.224, 0.225], dtype=torch.float32).view(3, 1, 1)   # [C, 1, 1]

    # Normalize the image tensor
    normalized_tensor = (tensor_image / 255.0 - mean) / std  # First scale, then subtract mean, divide by std
    return normalized_tensor

# Example usage:
# Assuming `img_tensor` is the input tensor with shape [C, H, W]
img_tensor = torch.randn(3, 224, 224)  # Example tensor (replace with your actual image tensor)

normalized_img = normalize_tensor_image(img_tensor)
print(normalized_img.shape)  # It should be the same shape as input [C, H, W]


def get_test_input():
    img = cv2.imread("dog-cycle-car.png")
    img = cv2.resize(img, (608,608))          #Resize to the input dimension
    img_ =  img[:,:,::-1].transpose((2,0,1))  # BGR -> RGB | H X W C -> C X H X W 
    img_ = img_[np.newaxis,:,:,:]/255.0       #Add a channel at 0 (for batch) | Normalise
    img_ = torch.from_numpy(img_).float()     #Convert to float
    img_ = Variable(img_)                     # Convert to Variable
    return img_

torch.Size([3, 224, 224])


In [6]:
from util import *

#model = MyDarknet("cfg/yolov3.cfg")
inp = get_test_input()
inp=normalize_tensor_image(inp)
pred_ = yolo_v4(inp)
# print (pred_)

608
608
608


In [7]:
pred_

tensor([[[1.0937e+00, 1.8076e+00, 1.7396e+01,  ..., 5.0362e-01,
          8.4453e-01, 8.4273e-02],
         [1.2765e+00, 6.3729e-01, 5.4941e+01,  ..., 3.7434e-01,
          2.2933e-01, 3.7951e-01],
         [5.4570e-01, 1.0080e+00, 1.0801e+01,  ..., 2.2132e-01,
          7.0085e-01, 2.2979e-01],
         ...,
         [6.0001e+02, 5.9996e+02, 1.4375e+02,  ..., 4.9847e-01,
          4.9989e-01, 4.9411e-01],
         [5.9992e+02, 6.0000e+02, 1.8958e+02,  ..., 5.0153e-01,
          4.9924e-01, 4.9375e-01],
         [5.9995e+02, 5.9996e+02, 4.5438e+02,  ..., 4.9870e-01,
          4.9830e-01, 4.9466e-01]]])

In [33]:
inp=get_test_input()[0]

In [8]:
#using non_maximum_supression
out=write_results(pred_.detach(), 0.5, 80, nms_conf = 0.4)
print(out.shape)

torch.Size([16142, 8])


In [9]:
def load_classes(namesfile):
    fp = open(namesfile, "r")
    names = fp.read().split("\n")[:-1]
    return names

num_classes = 80
classes = load_classes("data/coco.names")
idx = []
for i in range(num_classes):
    # Filter predictions for the current class
    class_preds = out[out[:, -1] == i]  # Filter based on class ID (last column)
    
    if class_preds.size(0) > 0:  # Ensure there are predictions for the class
        max_prob = torch.max(class_preds[:, 5])
        print(f"Max probability for class {classes[i]}: {max_prob.item()}")
    else:
        print(f"No predictions found for class {classes[i]}")

No predictions found for class person
Max probability for class bicycle: 0.5413703918457031
Max probability for class car: 0.6058207750320435
Max probability for class motorbike: 0.5304465889930725
No predictions found for class aeroplane
Max probability for class bus: 0.517562210559845
No predictions found for class train
No predictions found for class truck
Max probability for class boat: 0.5085288286209106
Max probability for class traffic light: 0.5562366247177124
Max probability for class fire hydrant: 0.580032467842102
No predictions found for class stop sign
No predictions found for class parking meter
Max probability for class bench: 0.5349060297012329
No predictions found for class bird
Max probability for class cat: 0.5429936647415161
Max probability for class dog: 0.8789980411529541
Max probability for class horse: 0.5355207920074463
Max probability for class sheep: 0.5327343940734863
Max probability for class cow: 0.5053259134292603
Max probability for class elephant: 0.518

In [10]:
import cv2
import torch
import random

def load_classes(namesfile):
    with open(namesfile, "r") as fp:
        names = fp.read().split("\n")[:-1]
    return names

# Load COCO class names
classes = load_classes("data/coco.names")

# Assuming 'out' is the predictions tensor, shape = [num_predictions, 8]
# Columns: [image_index, x_min, y_min, x_max, y_max, confidence, class_probabilities, class_id]

# Set up the colors for drawing bounding boxes
colors = [[255, 0, 0], [255, 0, 0], [255, 255, 0], [0, 255, 0], [0, 255, 255], [0, 0, 255], [255, 0, 255]]

# Sample image for demonstration, replace with actual image loading
img = inp[0].permute(1, 2, 0).numpy()  # Convert from [C, H, W] to [H, W, C]
# Filter out predictions with low confidence
confidence_threshold = 0.9
filtered_preds = out[out[:, 5] > confidence_threshold]
# Iterate over each class
for i in range(len(classes)):
    # Filter predictions for the current class (assuming class_id is in the last column, index 7)
    # class_preds = out[out[:, -1] == i]  # Filter based on class ID (last column)
    class_preds = filtered_preds[filtered_preds[:, -1] == i]  # class_id is in column index -1
    if class_preds.size(0) > 0:  # Ensure there are predictions for the class
        # Find the index of the prediction with the maximum confidence
        max_idx = torch.argmax(class_preds[:, 5])  # Confidence is assumed to be in the 6th column (index 5)
        
        # Extract the bounding box coordinates and confidence for the max probability prediction
        bbox = class_preds[max_idx]  # Get the prediction with the max confidence
        x_min, y_min, x_max, y_max = bbox[1:5]  # Extract the bounding box coordinates
        
        # Convert the bounding box coordinates to integers (for drawing)
        c1 = (int(x_min), int(y_min))  # Top-left corner (x_min, y_min)
        c2 = (int(x_max), int(y_max))  # Bottom-right corner (x_max, y_max)
        
        # Pick a random color for the bounding box
        color = random.choice(colors)
        
        # Draw the rectangle on the image
        cv2.rectangle(img, c1, c2, color, 2)  # Draw rectangle with thickness 2
        
        # Get the class label and confidence score
        label = classes[i]  # Get class label from the classes list
        confidence = bbox[5].item()  # Confidence is in the 6th column (index 5)
        
        # Create label text
        label_text = f"{label}: {confidence:.2f}"
        
        # Draw the label background
        t_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        c2 = (c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4)
        cv2.rectangle(img, c1, c2, color, -1)  # Draw filled rectangle for label background
        
        # Put the label text on the image
        cv2.putText(img, label_text, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
    
    else:
        print(f"No predictions found for class {classes[i]}")

# Save the image with bounding boxes to a file
cv2.imwrite('output_image_with_bboxes.jpg', img)  # Save the result as an image

print("Image saved as 'output_image_with_bboxes.jpg'")




No predictions found for class person
No predictions found for class bicycle
No predictions found for class car
No predictions found for class motorbike
No predictions found for class aeroplane
No predictions found for class bus
No predictions found for class train
No predictions found for class truck
No predictions found for class boat
No predictions found for class traffic light
No predictions found for class fire hydrant
No predictions found for class stop sign
No predictions found for class parking meter
No predictions found for class bench
No predictions found for class bird
No predictions found for class cat
No predictions found for class dog
No predictions found for class horse
No predictions found for class sheep
No predictions found for class cow
No predictions found for class elephant
No predictions found for class bear
No predictions found for class zebra
No predictions found for class giraffe
No predictions found for class backpack
No predictions found for class umbrella
No

[ WARN:0@73.653] global loadsave.cpp:848 imwrite_ Unsupported depth image for selected encoder is fallbacked to CV_8U.


In [37]:
inp

tensor([[[[255.0000, 255.0000, 255.0000,  ...,  -2.1066,  -2.1131,  -2.1135],
          [255.0000, 255.0000, 255.0000,  ..., 255.0000, 255.0000, 255.0000],
          [255.0000, 255.0000, 255.0000,  ..., 255.0000, 255.0000, 255.0000],
          ...,
          [255.0000, 255.0000,  -2.1072,  ...,  -2.1109,  -2.1137,  -2.1143],
          [255.0000, 255.0000,  -2.1072,  ..., 255.0000, 255.0000, 255.0000],
          [255.0000, 255.0000,  -2.1072,  ..., 255.0000, 255.0000, 255.0000]],

         [[  0.0000,   0.0000,   0.0000,  ...,  -2.0236,  -2.0306,  -2.0312],
          [  0.0000,   0.0000,   0.0000,  ..., 255.0000, 255.0000, 255.0000],
          [  0.0000,   0.0000,   0.0000,  ..., 255.0000, 255.0000, 255.0000],
          ...,
          [  0.0000,   0.0000,  -2.0241,  ...,  -2.0288,  -2.0317,  -2.0324],
          [  0.0000,   0.0000,  -2.0242,  ..., 255.0000, 255.0000, 255.0000],
          [  0.0000,   0.0000,  -2.0242,  ..., 255.0000, 255.0000, 255.0000]],

         [[  0.0000,   0.0000,

In [9]:
out.shape

torch.Size([17949, 8])

In [9]:
!wget https://raw.githubusercontent.com/ayooshkathuria/YOLO_v3_tutorial_from_scratch/master/data/coco.names
!mkdir data
!mv coco.names data/coco.names

--2025-01-30 15:05:03--  https://raw.githubusercontent.com/ayooshkathuria/YOLO_v3_tutorial_from_scratch/master/data/coco.names
185.199.110.133, 185.199.111.133, 185.199.109.133, ...tent.com)... 
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 625 [text/plain]
Saving to: ‘coco.names’


2025-01-30 15:05:04 (26.0 MB/s) - ‘coco.names’ saved [625/625]

mkdir: cannot create directory ‘data’: File exists


In [12]:
!wget https://raw.githubusercontent.com/ayooshkathuria/YOLO_v3_tutorial_from_scratch/master/detect.py

--2025-01-30 15:05:39--  https://raw.githubusercontent.com/ayooshkathuria/YOLO_v3_tutorial_from_scratch/master/detect.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.109.133, ...
connected. to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... 
HTTP request sent, awaiting response... 200 OK
Length: 7273 (7.1K) [text/plain]
Saving to: ‘detect.py’


2025-01-30 15:05:40 (44.4 MB/s) - ‘detect.py’ saved [7273/7273]



In [13]:
!mkdir -p cocoimages
!cp dog-cycle-car.png cocoimages/

In [11]:
from __future__ import division
import time
import torch 
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2 
from util import *
import argparse
import os 
import os.path as osp
# from darknet import Darknet
import pickle as pkl
import pandas as pd
import random

images = "cocoimages"
batch_size = 4
confidence = 0.5
nms_thesh = 0.2
start = 0
CUDA = False

num_classes = 80
classes = load_classes("data/coco.names")

#Set up the neural network
# model=Darknet(blocks, module_list, net_info, CUDA='cpu').to('cpu')
# model.load_weights('yolov4.weights')

yolo_v4.net_info["height"] = 608
yolo_v4.net_info["width"] = 608
inp_dim = int(yolo_v4.net_info["height"])
yolo_v4.eval()
read_dir = time.time()
assert inp_dim % 32 == 0 
assert inp_dim > 32
try:
    imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images)]
except NotADirectoryError:
    imlist = []
    imlist.append(osp.join(osp.realpath('.'), images))
except FileNotFoundError:
    print ("No file or directory with the name {}".format(images))
    exit()
    
if not os.path.exists("des"):
    os.makedirs("des")

load_batch = time.time()
# loaded_ims = [letterbox_image(cv2.imread(x), (inp_dim, inp_dim)) for x in imlist]

# img = cv2.imread(imlist[0])

print(type(img), img.shape)
img = letterbox_image(img, (inp_dim, inp_dim))
cv2.imwrite('test.jpg', img)
img = cv2.imread('test.jpg')
print(img.shape)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
# print(img.shape)
# img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
print(img.shape)

loaded_ims = [img]


im_batches = list(map(prep_image, loaded_ims, [inp_dim for x in range(len(imlist))]))


im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_ims]
im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
print(im_dim_list)

leftover = 0
if (len(im_dim_list) % batch_size):
    leftover = 1

if batch_size != 1:
    num_batches = len(imlist) // batch_size + leftover            
    im_batches = [torch.cat((im_batches[i*batch_size : min((i +  1)*batch_size,
                        len(im_batches))]))  for i in range(num_batches)]  

write = 0

if CUDA:
    im_dim_list = im_dim_list.cuda()
    
start_det_loop = time.time()
for i, batch in enumerate(im_batches):
    # Load the image 
    start = time.time()
    if CUDA:
        batch = batch.cuda()
    with torch.no_grad():
        prediction = yolo_v4(Variable(batch))

    prediction = write_results(prediction, confidence, num_classes, nms_conf = nms_thesh)

    end = time.time()

    if type(prediction) == int:

        for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
            im_id = i*batch_size + im_num
            print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
            print("{0:20s} {1:s}".format("Objects Detected:", ""))
            print("----------------------------------------------------------")
        continue

    prediction[:,0] += i*batch_size    #transform the atribute from index in batch to index in imlist 

    if not write:                      #If we have't initialised output
        output = prediction  
        write = 1
    else:
        output = torch.cat((output,prediction))

    for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
        im_id = i*batch_size + im_num
        objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
        print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
        # print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
        print("----------------------------------------------------------")

    if CUDA:
        torch.cuda.synchronize()       
try:
    output
except NameError:
    print ("No detections were made")
    exit()

im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())

scaling_factor = torch.min(yolo_v4.net_info["height"]/im_dim_list,1)[0].view(-1,1)

output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2

output[:,1:5] /= scaling_factor

for i in range(output.shape[0]):
    output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
    output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
    
output_recast = time.time()
class_load = time.time()
colors = [[255, 0, 0], [255, 0, 0], [255, 255, 0], [0, 255, 0], [0, 255, 255], [0, 0, 255], [255, 0, 255]]

draw = time.time()

def write(x, results):
    # Extract coordinates and class label from YOLOv4 output
    c1 = tuple(map(int, x[1:3]))  # Top-left corner (x_min, y_min)
    c2 = tuple(map(int, x[3:5]))  # Bottom-right corner (x_max, y_max)
    
    img = results[int(x[0])]  # Get the corresponding image from results list
    
    # Ensure image is a NumPy array
    if img is None or len(img.shape) != 3:
        print("Invalid image!")
        return img
    
    # Get the predicted class and corresponding label
    cls = int(x[-1])
    label = classes[cls]  # Get class label from classes list
    
    # Pick a random color for the bounding box
    color = random.choice(colors)
    
    # Draw the rectangle around the detected object
    cv2.rectangle(img, c1, c2, color, 2)  # Draw rectangle with thickness 2
    
    # Get the size of the label text to fit the rectangle
    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
    
    # Adjust the bottom-right corner to fit the label text
    c2 = (c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4)
    
    # Draw a filled rectangle for the label background
    cv2.rectangle(img, c1, c2, color, -1)
    
    # Put the label text on top of the rectangle
    cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
    
    return img


list(map(lambda x: write(x, loaded_ims), output))

det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format("des",x.split("/")[-1]))

list(map(cv2.imwrite, det_names, [cv2.cvtColor(loaded_ims[0], cv2.COLOR_BGR2RGB)]))
end = time.time()

print("SUMMARY")
print("----------------------------------------------------------")
print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
print()
print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch))
print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) +  " images)", output_recast - start_det_loop))
print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast))
print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist)))
print("----------------------------------------------------------")


torch.cuda.empty_cache()

<class 'numpy.ndarray'> (608, 608, 3)
(608, 608, 3)
(608, 608, 3)
tensor([[608., 608., 608., 608.]])
608
608
608
dog-cycle-car.png    predicted in 70.530 seconds
----------------------------------------------------------


ValueError: cannot convert float NaN to integer

In [22]:
import random
import cv2

# Assuming 'colors' and 'classes' are already defined somewhere
# Example: colors = [(0, 255, 0), (255, 0, 0), ...]
# Example: classes = ["person", "car", "dog", ...]

def write(x, results):
    # Extract coordinates and class label from YOLOv4 output
    c1 = tuple(map(int, x[1:3]))  # Top-left corner (x_min, y_min)
    c2 = tuple(map(int, x[3:5]))  # Bottom-right corner (x_max, y_max)
    
    img = results[int(x[0])]  # Get the corresponding image from results list
    
    # Ensure image is a NumPy array
    if img is None or len(img.shape) != 3:
        print("Invalid image!")
        return img
    
    # Get the predicted class and corresponding label
    cls = int(x[-1])
    label = classes[cls]  # Get class label from classes list
    
    # Pick a random color for the bounding box
    color = random.choice(colors)
    
    # Draw the rectangle around the detected object
    cv2.rectangle(img, c1, c2, color, 2)  # Draw rectangle with thickness 2
    
    # Get the size of the label text to fit the rectangle
    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
    
    # Adjust the bottom-right corner to fit the label text
    c2 = (c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4)
    
    # Draw a filled rectangle for the label background
    cv2.rectangle(img, c1, c2, color, -1)
    
    # Put the label text on top of the rectangle
    cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
    
    return img

list(map(lambda x: write(x, loaded_ims), output))

det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format("des",x.split("/")[-1]))

list(map(cv2.imwrite, det_names, [cv2.cvtColor(loaded_ims[0], cv2.COLOR_BGR2RGB)]))

ValueError: cannot convert float NaN to integer

In [12]:
output

tensor([[0., nan, nan,  ..., nan, nan, 0.],
        [0., nan, nan,  ..., nan, nan, 0.],
        [0., nan, nan,  ..., nan, nan, 0.],
        ...,
        [0., nan, nan,  ..., nan, nan, 0.],
        [0., nan, nan,  ..., nan, nan, 0.],
        [0., nan, nan,  ..., nan, nan, 0.]])