In [5]:
from __future__ import division

import torch 
import random

import numpy as np
import cv2


In [6]:
def confidence_filter(result, confidence):
    conf_mask = (result[:,:,4] > confidence).float().unsqueeze(2)
    result = result*conf_mask    
    
    return result

In [7]:
def confidence_filter_cls(result, confidence):
    max_scores = torch.max(result[:,:,5:25], 2)[0]
    res = torch.cat((result, max_scores),2)
    print(res.shape)
    
    
    cond_1 = (res[:,:,4] > confidence).float()
    cond_2 = (res[:,:,25] > 0.995).float()
    
    conf = cond_1 + cond_2
    conf = torch.clamp(conf, 0.0, 1.0)
    conf = conf.unsqueeze(2)
    result = result*conf   
    return result

def get_abs_coord(box):
    box[2], box[3] = abs(box[2]), abs(box[3])
    x1 = (box[0] - box[2]/2) - 1 
    y1 = (box[1] - box[3]/2) - 1 
    x2 = (box[0] + box[2]/2) - 1 
    y2 = (box[1] + box[3]/2) - 1
    return x1, y1, x2, y2
    


def sanity_fix(box):
    if (box[0] > box[2]):
        box[0], box[2] = box[2], box[0]
    
    if (box[1] >  box[3]):
        box[1], box[3] = box[3], box[1]
        
    return box

In [8]:
def bbox_iou(box1, box2):
    """
    Returns the IoU of two bounding boxes 
    
    
    """
    #Get the coordinates of bounding boxes
    b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
    b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
    
    #get the corrdinates of the intersection rectangle
    inter_rect_x1 =  torch.max(b1_x1, b2_x1)
    inter_rect_y1 =  torch.max(b1_y1, b2_y1)
    inter_rect_x2 =  torch.min(b1_x2, b2_x2)
    inter_rect_y2 =  torch.min(b1_y2, b2_y2)
    
    #Intersection area
    if torch.cuda.is_available():
            inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape).cuda())*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape).cuda())
    else:
            inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape))*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape))
    
    #Union Area
    b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
    b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)
    
    iou = inter_area / (b1_area + b2_area - inter_area)
    
    return iou


def pred_corner_coord(prediction):
    #Get indices of non-zero confidence bboxes
    ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
    
    box = prediction[ind_nz[0], ind_nz[1]]
    
    
    box_a = box.new(box.shape)
    box_a[:,0] = (box[:,0] - box[:,2]/2)
    box_a[:,1] = (box[:,1] - box[:,3]/2)
    box_a[:,2] = (box[:,0] + box[:,2]/2) 
    box_a[:,3] = (box[:,1] + box[:,3]/2)
    box[:,:4] = box_a[:,:4]
    
    prediction[ind_nz[0], ind_nz[1]] = box
    
    return prediction




def write(x, batches, results, colors, classes):
    c1 = tuple(x[1:3].int())
    c2 = tuple(x[3:5].int())
    img = results[int(x[0])]
    cls = int(x[-1])
    label = "{0}".format(classes[cls])
    color = random.choice(colors)
    cv2.rectangle(img, c1, c2,color, 1)
    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
    c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
    cv2.rectangle(img, c1, c2,color, -1)
    cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
    return img

In [9]:
from __future__ import division

import torch 
import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable
import numpy as np
import cv2 
import matplotlib.pyplot as plt


def count_parameters(model):
    return sum(p.numel() for p in model.parameters())

def count_learnable_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def convert2cpu(matrix):
    if matrix.is_cuda:
        return torch.FloatTensor(matrix.size()).copy_(matrix)
    else:
        return matrix

def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA = True):
    batch_size = prediction.size(0)
    stride =  inp_dim // prediction.size(2)
    grid_size = inp_dim // stride
    bbox_attrs = 5 + num_classes
    num_anchors = len(anchors)
    
    anchors = [(a[0]/stride, a[1]/stride) for a in anchors]



    prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
    prediction = prediction.transpose(1,2).contiguous()
    prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)


    #Sigmoid the  centre_X, centre_Y. and object confidencce
    prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
    prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
    prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
    

    
    #Add the center offsets
    grid_len = np.arange(grid_size)
    a,b = np.meshgrid(grid_len, grid_len)
    
    x_offset = torch.FloatTensor(a).view(-1,1)
    y_offset = torch.FloatTensor(b).view(-1,1)
    
    if CUDA:
        x_offset = x_offset.cuda()
        y_offset = y_offset.cuda()
    
    x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
    
    prediction[:,:,:2] += x_y_offset
      
    #log space transform height and the width
    anchors = torch.FloatTensor(anchors)
    
    if CUDA:
        anchors = anchors.cuda()
    
    anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
    prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors

    #Softmax the class scores
    prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))

    prediction[:,:,:4] *= stride
   
    
    return prediction

def load_classes(namesfile):
    fp = open(namesfile, "r")
    names = fp.read().split("\n")[:-1]
    return names

def get_im_dim(im):
    im = cv2.imread(im)
    w,h = im.shape[1], im.shape[0]
    return w,h

def unique(tensor):
    tensor_np = tensor.cpu().numpy()
    unique_np = np.unique(tensor_np)
    unique_tensor = torch.from_numpy(unique_np)
    
    tensor_res = tensor.new(unique_tensor.shape)
    tensor_res.copy_(unique_tensor)
    return tensor_res

def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.4):
    conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
    prediction = prediction*conf_mask
    

    try:
        ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
    except:
        return 0
    
    
    box_a = prediction.new(prediction.shape)
    box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
    box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
    box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) 
    box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
    prediction[:,:,:4] = box_a[:,:,:4]
    

    
    batch_size = prediction.size(0)
    
    output = prediction.new(1, prediction.size(2) + 1)
    write = False


    for ind in range(batch_size):
        #select the image from the batch
        image_pred = prediction[ind]
        

        
        #Get the class having maximum score, and the index of that class
        #Get rid of num_classes softmax scores 
        #Add the class index and the class score of class having maximum score
        max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_score = max_conf_score.float().unsqueeze(1)
        seq = (image_pred[:,:5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)
        

        
        #Get rid of the zero entries
        non_zero_ind =  (torch.nonzero(image_pred[:,4]))

        
        image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
        
        #Get the various classes detected in the image
        try:
            img_classes = unique(image_pred_[:,-1])
        except:
             continue
        #WE will do NMS classwise
        for cls in img_classes:
            #get the detections with one particular class
            cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
            

            image_pred_class = image_pred_[class_mask_ind].view(-1,7)

		
        
             #sort the detections such that the entry with the maximum objectness
             #confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)
            
            #if nms has to be done
            if nms:
                #For each detection
                for i in range(idx):
                    #Get the IOUs of all boxes that come after the one we are looking at 
                    #in the loop
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
                    except ValueError:
                        break
        
                    except IndexError:
                        break
                    
                    #Zero out all the detections that have IoU > treshhold
                    iou_mask = (ious < nms_conf).float().unsqueeze(1)
                    image_pred_class[i+1:] *= iou_mask       
                    
                    #Remove the non-zero entries
                    non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind].view(-1,7)
                    
                    

            #Concatenate the batch_id of the image to the detection
            #this helps us identify which image does the detection correspond to 
            #We use a linear straucture to hold ALL the detections from the batch
            #the batch_dim is flattened
            #batch is identified by extra batch column
            
            
            batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
            seq = batch_ind, image_pred_class
            if not write:
                output = torch.cat(seq,1)
                write = True
            else:
                out = torch.cat(seq,1)
                output = torch.cat((output,out))
    
    return output

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 24 00:12:16 2018

@author: ayooshmac
"""

def predict_transform_half(prediction, inp_dim, anchors, num_classes, CUDA = True):
    batch_size = prediction.size(0)
    stride =  inp_dim // prediction.size(2)

    bbox_attrs = 5 + num_classes
    num_anchors = len(anchors)
    grid_size = inp_dim // stride

    
    prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
    prediction = prediction.transpose(1,2).contiguous()
    prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
    
    
    #Sigmoid the  centre_X, centre_Y. and object confidencce
    prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
    prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
    prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])

    
    #Add the center offsets
    grid_len = np.arange(grid_size)
    a,b = np.meshgrid(grid_len, grid_len)
    
    x_offset = torch.FloatTensor(a).view(-1,1)
    y_offset = torch.FloatTensor(b).view(-1,1)
    
    if CUDA:
        x_offset = x_offset.cuda().half()
        y_offset = y_offset.cuda().half()
    
    x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
    
    prediction[:,:,:2] += x_y_offset
      
    #log space transform height and the width
    anchors = torch.HalfTensor(anchors)
    
    if CUDA:
        anchors = anchors.cuda()
    
    anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
    prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors

    #Softmax the class scores
    prediction[:,:,5: 5 + num_classes] = nn.Softmax(-1)(Variable(prediction[:,:, 5 : 5 + num_classes])).data

    prediction[:,:,:4] *= stride
    
    
    return prediction


def write_results_half(prediction, confidence, num_classes, nms = True, nms_conf = 0.4):
    conf_mask = (prediction[:,:,4] > confidence).half().unsqueeze(2)
    prediction = prediction*conf_mask
    
    try:
        ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
    except:
        return 0
    
    
    
    box_a = prediction.new(prediction.shape)
    box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
    box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
    box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) 
    box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
    prediction[:,:,:4] = box_a[:,:,:4]
    
    
    
    batch_size = prediction.size(0)
    
    output = prediction.new(1, prediction.size(2) + 1)
    write = False
    
    for ind in range(batch_size):
        #select the image from the batch
        image_pred = prediction[ind]

        
        #Get the class having maximum score, and the index of that class
        #Get rid of num_classes softmax scores 
        #Add the class index and the class score of class having maximum score
        max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
        max_conf = max_conf.half().unsqueeze(1)
        max_conf_score = max_conf_score.half().unsqueeze(1)
        seq = (image_pred[:,:5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)
        
        
        #Get rid of the zero entries
        non_zero_ind =  (torch.nonzero(image_pred[:,4]))
        try:
            image_pred_ = image_pred[non_zero_ind.squeeze(),:]
        except:
            continue
        
        #Get the various classes detected in the image
        img_classes = unique(image_pred_[:,-1].long()).half()
        
        
        
                
        #WE will do NMS classwise
        for cls in img_classes:
            #get the detections with one particular class
            cls_mask = image_pred_*(image_pred_[:,-1] == cls).half().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
            

            image_pred_class = image_pred_[class_mask_ind]

        
             #sort the detections such that the entry with the maximum objectness
             #confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)
            
            #if nms has to be done
            if nms:
                #For each detection
                for i in range(idx):
                    #Get the IOUs of all boxes that come after the one we are looking at 
                    #in the loop
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
                    except ValueError:
                        break
        
                    except IndexError:
                        break
                    
                    #Zero out all the detections that have IoU > treshhold
                    iou_mask = (ious < nms_conf).half().unsqueeze(1)
                    image_pred_class[i+1:] *= iou_mask       
                    
                    #Remove the non-zero entries
                    non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind]
                    
                    
            
            #Concatenate the batch_id of the image to the detection
            #this helps us identify which image does the detection correspond to 
            #We use a linear straucture to hold ALL the detections from the batch
            #the batch_dim is flattened
            #batch is identified by extra batch column
            batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
            seq = batch_ind, image_pred_class
            
            if not write:
                output = torch.cat(seq,1)
                write = True
            else:
                out = torch.cat(seq,1)
                output = torch.cat((output,out))
    
    return output

In [10]:
from __future__ import division

import torch 
import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable
import numpy as np
import cv2 
import matplotlib.pyplot as plt

from PIL import Image, ImageDraw

In [11]:
def letterbox_image(img, inp_dim):
    '''resize image with unchanged aspect ratio using padding'''
    img_w, img_h = img.shape[1], img.shape[0]
    w, h = inp_dim
    new_w = int(img_w * min(w/img_w, h/img_h))
    new_h = int(img_h * min(w/img_w, h/img_h))
    resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
    
    canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)

    canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
    
    return canvas


        
def prep_image(img, inp_dim):
    """
    Prepare image for inputting to the neural network. 
    
    Returns a Variable 
    """

    orig_im = cv2.imread(img)
    dim = orig_im.shape[1], orig_im.shape[0]
    img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
    img_ = img[:,:,::-1].transpose((2,0,1)).copy()
    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
    return img_, orig_im, dim

def prep_image_pil(img, network_dim):
    orig_im = Image.open(img)
    img = orig_im.convert('RGB')
    dim = img.size
    img = img.resize(network_dim)
    img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
    img = img.view(*network_dim, 3).transpose(0,1).transpose(0,2).contiguous()
    img = img.view(1, 3,*network_dim)
    img = img.float().div(255.0)
    return (img, orig_im, dim)

def inp_to_image(inp):
    inp = inp.cpu().squeeze()
    inp = inp*255
    try:
        inp = inp.data.numpy()
    except RuntimeError:
        inp = inp.numpy()
    inp = inp.transpose(1,2,0)

    inp = inp[:,:,::-1]
    return inp

In [12]:
from __future__ import division

import torch 
import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable
import numpy as np
import cv2 
import matplotlib.pyplot as plt
#from util import count_parameters as count
#from util import convert2cpu as cpu
#from util import predict_transform

class test_net(nn.Module):
    def __init__(self, num_layers, input_size):
        super(test_net, self).__init__()
        self.num_layers= num_layers
        self.linear_1 = nn.Linear(input_size, 5)
        self.middle = nn.ModuleList([nn.Linear(5,5) for x in range(num_layers)])
        self.output = nn.Linear(5,2)
    
    def forward(self, x):
        x = x.view(-1)
        fwd = nn.Sequential(self.linear_1, *self.middle, self.output)
        return fwd(x)
        
def get_test_input():
    img = cv2.imread("dog-cycle-car.png")
    img = cv2.resize(img, (416,416)) 
    img_ =  img[:,:,::-1].transpose((2,0,1))
    img_ = img_[np.newaxis,:,:,:]/255.0
    img_ = torch.from_numpy(img_).float()
    img_ = Variable(img_)
    return img_


def parse_cfg(cfgfile):
    """
    Takes a configuration file
    
    Returns a list of blocks. Each blocks describes a block in the neural
    network to be built. Block is represented as a dictionary in the list
    
    """
    file = open(cfgfile, 'r')
    lines = file.read().split('\n')     #store the lines in a list
    lines = [x for x in lines if len(x) > 0] #get read of the empty lines 
    lines = [x for x in lines if x[0] != '#']  
    lines = [x.rstrip().lstrip() for x in lines]

    
    block = {}
    blocks = []
    
    for line in lines:
        if line[0] == "[":               #This marks the start of a new block
            if len(block) != 0:
                blocks.append(block)
                block = {}
            block["type"] = line[1:-1].rstrip()
        else:
            key,value = line.split("=")
            block[key.rstrip()] = value.lstrip()
    blocks.append(block)

    return blocks
#    print('\n\n'.join([repr(x) for x in blocks]))

import pickle as pkl

class MaxPoolStride1(nn.Module):
    def __init__(self, kernel_size):
        super(MaxPoolStride1, self).__init__()
        self.kernel_size = kernel_size
        self.pad = kernel_size - 1
    
    def forward(self, x):
        padded_x = F.pad(x, (0,self.pad,0,self.pad), mode="replicate")
        pooled_x = nn.MaxPool2d(self.kernel_size, self.pad)(padded_x)
        return pooled_x
    

class EmptyLayer(nn.Module):
    def __init__(self):
        super(EmptyLayer, self).__init__()
        

class DetectionLayer(nn.Module):
    def __init__(self, anchors):
        super(DetectionLayer, self).__init__()
        self.anchors = anchors
    
    def forward(self, x, inp_dim, num_classes, confidence):
        x = x.data
        global CUDA
        prediction = x
        prediction = predict_transform(prediction, inp_dim, self.anchors, num_classes, confidence, CUDA)
        return prediction
        

        


class Upsample(nn.Module):
    def __init__(self, stride=2):
        super(Upsample, self).__init__()
        self.stride = stride
        
    def forward(self, x):
        stride = self.stride
        assert(x.data.dim() == 4)
        B = x.data.size(0)
        C = x.data.size(1)
        H = x.data.size(2)
        W = x.data.size(3)
        ws = stride
        hs = stride
        x = x.view(B, C, H, 1, W, 1).expand(B, C, H, stride, W, stride).contiguous().view(B, C, H*stride, W*stride)
        return x
#       
        
class ReOrgLayer(nn.Module):
    def __init__(self, stride = 2):
        super(ReOrgLayer, self).__init__()
        self.stride= stride
        
    def forward(self,x):
        assert(x.data.dim() == 4)
        B,C,H,W = x.data.shape
        hs = self.stride
        ws = self.stride
        assert(H % hs == 0),  "The stride " + str(self.stride) + " is not a proper divisor of height " + str(H)
        assert(W % ws == 0),  "The stride " + str(self.stride) + " is not a proper divisor of height " + str(W)
        x = x.view(B,C, H // hs, hs, W // ws, ws).transpose(-2,-3).contiguous()
        x = x.view(B,C, H // hs * W // ws, hs, ws)
        x = x.view(B,C, H // hs * W // ws, hs*ws).transpose(-1,-2).contiguous()
        x = x.view(B, C, ws*hs, H // ws, W // ws).transpose(1,2).contiguous()
        x = x.view(B, C*ws*hs, H // ws, W // ws)
        return x


def create_modules(blocks):
    net_info = blocks[0]     #Captures the information about the input and pre-processing    
    
    module_list = nn.ModuleList()
    
    index = 0    #indexing blocks helps with implementing route  layers (skip connections)

    
    prev_filters = 3
    
    output_filters = []
    
    for x in blocks:
        module = nn.Sequential()
        
        if (x["type"] == "net"):
            continue
        
        #If it's a convolutional layer
        if (x["type"] == "convolutional"):
            #Get the info about the layer
            activation = x["activation"]
            try:
                batch_normalize = int(x["batch_normalize"])
                bias = False
            except:
                batch_normalize = 0
                bias = True
                
            filters= int(x["filters"])
            padding = int(x["pad"])
            kernel_size = int(x["size"])
            stride = int(x["stride"])
            
            if padding:
                pad = (kernel_size - 1) // 2
            else:
                pad = 0
                
            #Add the convolutional layer
            conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias = bias)
            module.add_module("conv_{0}".format(index), conv)
            
            #Add the Batch Norm Layer
            if batch_normalize:
                bn = nn.BatchNorm2d(filters)
                module.add_module("batch_norm_{0}".format(index), bn)
            
            #Check the activation. 
            #It is either Linear or a Leaky ReLU for YOLO
            if activation == "leaky":
                activn = nn.LeakyReLU(0.1, inplace = True)
                module.add_module("leaky_{0}".format(index), activn)
            
            
            
        #If it's an upsampling layer
        #We use Bilinear2dUpsampling
        
        elif (x["type"] == "upsample"):
            stride = int(x["stride"])
#            upsample = Upsample(stride)
            upsample = nn.Upsample(scale_factor = 2, mode = "nearest")
            module.add_module("upsample_{}".format(index), upsample)
        
        #If it is a route layer
        elif (x["type"] == "route"):
            x["layers"] = x["layers"].split(',')
            
            #Start  of a route
            start = int(x["layers"][0])
            
            #end, if there exists one.
            try:
                end = int(x["layers"][1])
            except:
                end = 0
                
            
            
            #Positive anotation
            if start > 0: 
                start = start - index
            
            if end > 0:
                end = end - index

            
            route = EmptyLayer()
            module.add_module("route_{0}".format(index), route)
            
            
            
            if end < 0:
                filters = output_filters[index + start] + output_filters[index + end]
            else:
                filters= output_filters[index + start]
                        
            
        
        #shortcut corresponds to skip connection
        elif x["type"] == "shortcut":
            from_ = int(x["from"])
            shortcut = EmptyLayer()
            module.add_module("shortcut_{}".format(index), shortcut)
            
            
        elif x["type"] == "maxpool":
            stride = int(x["stride"])
            size = int(x["size"])
            if stride != 1:
                maxpool = nn.MaxPool2d(size, stride)
            else:
                maxpool = MaxPoolStride1(size)
            
            module.add_module("maxpool_{}".format(index), maxpool)
        
        #Yolo is the detection layer
        elif x["type"] == "yolo":
            mask = x["mask"].split(",")
            mask = [int(x) for x in mask]
            
            
            anchors = x["anchors"].split(",")
            anchors = [int(a) for a in anchors]
            anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors),2)]
            anchors = [anchors[i] for i in mask]
            
            detection = DetectionLayer(anchors)
            module.add_module("Detection_{}".format(index), detection)
        
            
            
        else:
            print("Something I dunno")
            assert False


        module_list.append(module)
        prev_filters = filters
        output_filters.append(filters)
        index += 1
        
    
    return (net_info, module_list)



class Darknet(nn.Module):
    def __init__(self, cfgfile):
        super(Darknet, self).__init__()
        self.blocks = parse_cfg(cfgfile)
        self.net_info, self.module_list = create_modules(self.blocks)
        self.header = torch.IntTensor([0,0,0,0])
        self.seen = 0

        
        
    def get_blocks(self):
        return self.blocks
    
    def get_module_list(self):
        return self.module_list

                
    def forward(self, x, CUDA):
        detections = []
        modules = self.blocks[1:]
        outputs = {}   #We cache the outputs for the route layer
        
        
        write = 0
        for i in range(len(modules)):        
            
            module_type = (modules[i]["type"])
            if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool":
                
                x = self.module_list[i](x)
                outputs[i] = x

                
            elif module_type == "route":
                layers = modules[i]["layers"]
                layers = [int(a) for a in layers]
                
                if (layers[0]) > 0:
                    layers[0] = layers[0] - i

                if len(layers) == 1:
                    x = outputs[i + (layers[0])]

                else:
                    if (layers[1]) > 0:
                        layers[1] = layers[1] - i
                        
                    map1 = outputs[i + layers[0]]
                    map2 = outputs[i + layers[1]]
                    
                    if map1.size() != map2.size():
                        print(f"Resizing map2 from {map2.size()} to match map1 {map1.size()}")
                        map2 = F.interpolate(map2, size=map1.shape[2:])  # Resize map2 to match the spatial dimensions of map1

                    # Now you can safely concatenate
                    x = torch.cat((map1, map2), 1)
                    #x = torch.cat((map1, map2), 1)
                outputs[i] = x
            
            elif  module_type == "shortcut":
                from_ = int(modules[i]["from"])
                x = outputs[i-1] + outputs[i+from_]
                outputs[i] = x
            elif module_type == 'yolo':        
                
                anchors = self.module_list[i][0].anchors
                #Get the input dimensions
                inp_dim = int (self.net_info["height"])
                
                #Get the number of classes
                num_classes = int (modules[i]["classes"])
                
                #Output the result
                x = x.data
                x = predict_transform(x, inp_dim, anchors, num_classes, CUDA)
                
                if type(x) == int:
                    continue

                
                if not write:
                    detections = x
                    write = 1
                
                else:
                    detections = torch.cat((detections, x), 1)
                
                outputs[i] = outputs[i-1]
                
        
        
        try:
            return detections
        except:
            return 0

            
    def load_weights(self, weightfile):
        
        #Open the weights file
        fp = open(weightfile, "rb")

        #The first 4 values are header information 
        # 1. Major version number
        # 2. Minor Version Number
        # 3. Subversion number 
        # 4. IMages seen 
        header = np.fromfile(fp, dtype = np.int32, count = 5)
        self.header = torch.from_numpy(header)
        self.seen = self.header[3]
        
        #The rest of the values are the weights
        # Let's load them up
        weights = np.fromfile(fp, dtype = np.float32)
        
        ptr = 0
        for i in range(len(self.module_list)):
            module_type = self.blocks[i + 1]["type"]
            
            if module_type == "convolutional":
                model = self.module_list[i]
                try:
                    batch_normalize = int(self.blocks[i+1]["batch_normalize"])
                except:
                    batch_normalize = 0
                
                conv = model[0]
                
                if (batch_normalize):
                    bn = model[1]
                    
                    #Get the number of weights of Batch Norm Layer
                    num_bn_biases = bn.bias.numel()
                    
                    #Load the weights
                    bn_biases = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
                    ptr += num_bn_biases
                    
                    bn_weights = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
                    ptr  += num_bn_biases
                    
                    bn_running_mean = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
                    ptr  += num_bn_biases
                    
                    bn_running_var = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
                    ptr  += num_bn_biases
                    
                    #Cast the loaded weights into dims of model weights. 
                    bn_biases = bn_biases.view_as(bn.bias.data)
                    bn_weights = bn_weights.view_as(bn.weight.data)
                    bn_running_mean = bn_running_mean.view_as(bn.running_mean)
                    bn_running_var = bn_running_var.view_as(bn.running_var)

                    #Copy the data to model
                    bn.bias.data.copy_(bn_biases)
                    bn.weight.data.copy_(bn_weights)
                    bn.running_mean.copy_(bn_running_mean)
                    bn.running_var.copy_(bn_running_var)
                
                else:
                    #Number of biases
                    num_biases = conv.bias.numel()
                
                    #Load the weights
                    conv_biases = torch.from_numpy(weights[ptr: ptr + num_biases])
                    ptr = ptr + num_biases
                    
                    #reshape the loaded weights according to the dims of the model weights
                    conv_biases = conv_biases.view_as(conv.bias.data)
                    
                    #Finally copy the data
                    conv.bias.data.copy_(conv_biases)
                    
                    
                #Let us load the weights for the Convolutional layers
                num_weights = conv.weight.numel()
                
                #Do the same as above for weights
                conv_weights = torch.from_numpy(weights[ptr:ptr+num_weights])
                ptr = ptr + num_weights

                conv_weights = conv_weights.view_as(conv.weight.data)
                conv.weight.data.copy_(conv_weights)
                
    def save_weights(self, savedfile, cutoff = 0):
            
        if cutoff <= 0:
            cutoff = len(self.blocks) - 1
        
        fp = open(savedfile, 'wb')
        
        # Attach the header at the top of the file
        self.header[3] = self.seen
        header = self.header

        header = header.numpy()
        header.tofile(fp)
        
        # Now, let us save the weights 
        for i in range(len(self.module_list)):
            module_type = self.blocks[i+1]["type"]
            
            if (module_type) == "convolutional":
                model = self.module_list[i]
                try:
                    batch_normalize = int(self.blocks[i+1]["batch_normalize"])
                except:
                    batch_normalize = 0
                    
                conv = model[0]

                if (batch_normalize):
                    bn = model[1]
                
                    #If the parameters are on GPU, convert them back to CPU
                    #We don't convert the parameter to GPU
                    #Instead. we copy the parameter and then convert it to CPU
                    #This is done as weight are need to be saved during training
                    cpu(bn.bias.data).numpy().tofile(fp)
                    cpu(bn.weight.data).numpy().tofile(fp)
                    cpu(bn.running_mean).numpy().tofile(fp)
                    cpu(bn.running_var).numpy().tofile(fp)
                
            
                else:
                    cpu(conv.bias.data).numpy().tofile(fp)
                
                
                #Let us save the weights for the Convolutional layers
                cpu(conv.weight.data).numpy().tofile(fp)
               




#
#dn = Darknet('cfg/yolov3.cfg')
#dn.load_weights("yolov3.weights")
#inp = get_test_input()
#a, interms = dn(inp)
#dn.eval()
#a_i, interms_i = dn(inp)

In [21]:
import torch, cv2, random, os
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import pickle as pkl
import threading, queue
import glob
import pycocotools
import faster_coco_eval
from torchmetrics.detection.mean_ap import MeanAveragePrecision

torch.multiprocessing.set_start_method('spawn', force=True)

# Set up torch for GPU utilization
if torch.cuda.is_available():
    torch.backends.cudnn.enabled = True 
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

def prep_image(img, inp_dim):
    orig_im = img
    dim = orig_im.shape[1], orig_im.shape[0]
    img = letterbox_image(orig_im, (inp_dim, inp_dim))
    img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
    return img_, orig_im, dim

labels = {}
b_boxes = {}

def write(bboxes, img, classes, colors):
    class_idx = bboxes
    bboxes = bboxes[1:5]
    
    # Check for NaN or Inf values in bboxes
    if torch.isnan(bboxes).any() or torch.isinf(bboxes).any():
        print("Invalid values in bounding boxes!")
        bboxes = torch.zeros_like(bboxes)  # Reset to zeros or some valid default
    
    bboxes = bboxes.cpu().data.numpy()
    if np.isnan(bboxes).any() or np.isinf(bboxes).any():
        bboxes = np.zeros_like(bboxes)  # Replace invalid values
    
    bboxes = bboxes.astype(int)
    
    cls = int(class_idx[-1])
    label = "{0}".format(classes[cls])
    color = random.choice(colors)

    text_str = '%s' % (label)
    font_face = cv2.FONT_HERSHEY_DUPLEX
    font_scale = 0.6
    font_thickness = 1
    text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]
    text_pt = (bboxes[0], max(bboxes[1] - 3, 0))  # Prevent overflow in text position
    
    text_color = [255, 255, 255]
    x, y, w, h = bboxes[0], bboxes[1], bboxes[2], bboxes[3]
    
    # Ensure the bounding box size is reasonable
    if w + h > 0:
        distance = (2 * 3.14 * 180) / ((w + h) * 360) * 1000 + 3
    else:
        distance = float('inf')

    if not (distance == float('inf') or distance != distance):  # Check for invalid distance
        feedback = ("{}".format(classes[cls]) + " is at {} Inches".format(round(distance)))
    else:
        feedback = ("{}".format(classes[cls]) + " is at an unknown distance (invalid calculation)")

    print(feedback)

    cv2.putText(img, feedback, (int(x), int(y)), font_face, font_scale, (0, 255, 0), font_thickness, cv2.LINE_AA)
    cv2.rectangle(img, (bboxes[0], bboxes[1]), (bboxes[2], bboxes[3]), color, 2)
    cv2.putText(img, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)

    return img


class ObjectDetection:
    def __init__(self, dataset_path, is_training=True): 
        self.image_paths = glob.glob(dataset_path)
        self.is_training = is_training
        self.current_image_index = 0
        self.cfgfile = "C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\cfg\\yolov4.cfg" 
        self.weightsfile = "C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\yolov4.weights"
        self.confidence = float(0.6)
        self.nms_thesh = float(0.8)
        self.num_classes = 3
        self.classes = load_classes('C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\data\\coco.names')
        self.colors = pkl.load(open("C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\pallete", "rb"))
        self.model = Darknet(self.cfgfile)
        self.CUDA = torch.cuda.is_available()
        self.model.load_weights(self.weightsfile)
        self.model.net_info["height"] = 160
        self.inp_dim = int(self.model.net_info["height"])
        self.width = 1280
        self.height = 720
        self.batch_size = 4  
        self.max_images = 8
        self.metric = MeanAveragePrecision()  # Initialize mAP metric
        print(f"Loading network for {'training' if self.is_training else 'validation'}.....")
        if self.CUDA:
            self.model.cuda()
        print("Network successfully loaded")
        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32
        self.model.eval()

    def evaluate_map(self, pred_boxes, pred_labels, pred_scores, true_boxes, true_labels):
        """
        Update mAP metric for each batch
        """
        # Prepare predictions and targets in the expected format (list of dicts)
        preds = [
            dict(boxes=pred_boxes, scores=pred_scores, labels=pred_labels)
        ]
        targets = [
            dict(boxes=true_boxes, labels=true_labels)
        ]
        
        # Update the mAP metric
        self.metric.update(preds, targets)

    def compute_map(self):
        """
        Compute and print mAP after processing all batches
        """
        final_map = self.metric.compute()
        Print("computinggg")
        print(f"Mean Average Precision (mAP) for {'training' if self.is_training else 'validation'}: {final_map['map']}")

    def process_batch(self, frames, batch_paths):
        """
        Process the batch of images, make predictions, and update mAP.
        """
        for frame in frames:
            try:
                img, orig_im, dim = prep_image(frame, self.inp_dim)
                im_dim = torch.FloatTensor(dim).repeat(1, 2)
                if self.CUDA:
                    im_dim = im_dim.cuda()
                    img = img.cuda()

                output = self.model(Variable(img), self.CUDA)
                output = write_results(output, confidence=0.5, num_classes=self.num_classes, nms=True, nms_conf=0.4)

                if len(output) == 0:
                    print(f"No detections for batch starting with image {batch_paths[0]}")
                    continue

                output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(self.inp_dim)) / self.inp_dim
                output[:, [1, 3]] *= frame.shape[1]
                output[:, [2, 4]] *= frame.shape[0]

                pred_boxes = output[:, 1:5]
                pred_scores = output[:, 5]
                pred_labels = output[:, -1]

                # Dummy ground truth (replace with actual data)
                true_boxes = torch.Tensor([[0, 0, 0, 0]])  
                true_labels = torch.Tensor([0])  

                # Update mAP metric
                self.evaluate_map(pred_boxes, pred_labels, pred_scores, true_boxes, true_labels)

                list(map(lambda boxes: write(boxes, frame, self.classes, self.colors), output))

            except Exception as e:
                print(f"Error processing batch starting with image {batch_paths[0]}: {e}")
                import traceback
                traceback.print_exc()

            # Display the image with bounding boxes
            cv2.imshow(f"{'Training' if self.is_training else 'Validation'} Object Detection Window", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    def main(self):
        q = queue.Queue()

        for i in range(0, min(self.max_images, len(self.image_paths)), self.batch_size):
            batch_paths = self.image_paths[i:i + self.batch_size]
            frames = []
            for image_path in batch_paths:
                def frame_render(queue_from_cam, img_path):
                    frame = cv2.imread(img_path)
                    frame = cv2.resize(frame, (self.width, self.height))
                    queue_from_cam.put(frame)

                cam = threading.Thread(target=frame_render, args=(q, image_path))
                cam.start()
                cam.join()
                frame = q.get()
                frames.append(frame)
                q.task_done()

            # Process the batch of images
            self.process_batch(frames, batch_paths)

            # Clear CUDA cache to free memory
            torch.cuda.empty_cache()

        # Close OpenCV windows after processing
        cv2.destroyAllWindows()
        
        # Compute and display mAP after processing all images
        self.compute_map()

if __name__ == "__main__":
    train_dataset_path = "C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\data\\train\\*.jpg"
    valid_dataset_path = "C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\data\\valid\\*.jpg"

    print("Processing training dataset...")
    train_detection = ObjectDetection(train_dataset_path)
    train_detection.main()

    print("\nProcessing validation dataset...")
    valid_detection = ObjectDetection(valid_dataset_path)
    valid_detection.main()


Processing training dataset...
Loading network for training.....
Network successfully loaded
Resizing map2 from torch.Size([1, 24, 2, 2]) to match map1 torch.Size([1, 24, 10, 10])
Resizing map2 from torch.Size([1, 24, 1, 1]) to match map1 torch.Size([1, 24, 5, 5])
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxe

ValueError: Invalid input class of sample 0, element 0 (expected value of type integer, got type <class 'float'>)

In [18]:
import torch, cv2, random, os
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import pickle as pkl
import threading, queue
import glob
import pycocotools
import faster_coco_eval
from torchmetrics.detection.mean_ap import MeanAveragePrecision

torch.multiprocessing.set_start_method('spawn', force=True)

# Set up torch for GPU utilization
if torch.cuda.is_available():
    torch.backends.cudnn.enabled = True 
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

def prep_image(img, inp_dim):
    orig_im = img
    dim = orig_im.shape[1], orig_im.shape[0]
    img = letterbox_image(orig_im, (inp_dim, inp_dim))
    img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
    return img_, orig_im, dim

labels = {}
b_boxes = {}

def write(bboxes, img, classes, colors):
    class_idx = bboxes
    bboxes = bboxes[1:5]
    
    # Check for NaN or Inf values in bboxes
    if torch.isnan(bboxes).any() or torch.isinf(bboxes).any():
        print("Invalid values in bounding boxes!")
        bboxes = torch.zeros_like(bboxes)  # Reset to zeros or some valid default
    
    bboxes = bboxes.cpu().data.numpy()
    if np.isnan(bboxes).any() or np.isinf(bboxes).any():
        bboxes = np.zeros_like(bboxes)  # Replace invalid values
    
    bboxes = bboxes.astype(int)
    
    cls = int(class_idx[-1])
    label = "{0}".format(classes[cls])
    color = random.choice(colors)

    text_str = '%s' % (label)
    font_face = cv2.FONT_HERSHEY_DUPLEX
    font_scale = 0.6
    font_thickness = 1
    text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]
    text_pt = (bboxes[0], max(bboxes[1] - 3, 0))  # Prevent overflow in text position
    
    text_color = [255, 255, 255]
    x, y, w, h = bboxes[0], bboxes[1], bboxes[2], bboxes[3]
    # Ensure the bounding box size is reasonable
    if w + h > 0:
        distance = (2 * 3.14 * 180) / ((w + h) * 360) * 1000 + 3
    else:
        distance = float('inf')

    if not (distance == float('inf') or distance != distance):  # Check for invalid distance
        feedback = ("{}".format(classes[cls]) + " is at {} Inches".format(round(distance)))
    else:
        feedback = ("{}".format(classes[cls]) + " is at an unknown distance (invalid calculation)")

    print(feedback)

    cv2.putText(img, feedback, (int(x), int(y)), font_face, font_scale, (0, 255, 0), font_thickness, cv2.LINE_AA)
    cv2.rectangle(img, (bboxes[0], bboxes[1]), (bboxes[2], bboxes[3]), color, 2)
    cv2.putText(img, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)

    return img

class ObjectDetection:
    def __init__(self, dataset_path, annotation_file, is_training=True): 
        self.image_paths = glob.glob(dataset_path)
        self.annotation_file = annotation_file
        self.is_training = is_training
        self.annotations = self.load_annotations()
        self.current_image_index = 0
        self.cfgfile = "C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\cfg\\yolov4.cfg" 
        self.weightsfile = "C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\yolov4.weights"
        self.confidence = float(0.6)
        self.nms_thesh = float(0.8)
        self.num_classes = 3
        self.classes = load_classes('C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\data\\coco.names')
        self.colors = pkl.load(open("C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\pallete", "rb"))
        self.model = Darknet(self.cfgfile)
        self.CUDA = torch.cuda.is_available()
        self.model.load_weights(self.weightsfile)
        self.model.net_info["height"] = 160
        self.inp_dim = int(self.model.net_info["height"])
        self.width = 1280
        self.height = 720
        self.batch_size = 4  
        self.max_images = 8
        self.metric = MeanAveragePrecision()  # Initialize mAP metric
        print(f"Loading network for {'training' if self.is_training else 'validation'}.....")
        if self.CUDA:
            self.model.cuda()
        print("Network successfully loaded")
        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32
        self.model.eval()

    def load_annotations(self):
        """
        Load the annotations from the file and store them in a dictionary.
        The format of each line is:
        image_filename x_min, y_min, x_max, y_max, class_id
        """
        annotations = {}
        with open(self.annotation_file, 'r') as file:
            for line in file.readlines():
                data = line.strip().split()
                image_filename = data[0]
                boxes = []
                for box_data in data[1:]:
                    x_min, y_min, x_max, y_max, class_id = map(int, box_data.split(','))
                    boxes.append((x_min, y_min, x_max, y_max, class_id))
                annotations[image_filename] = boxes
        return annotations

    def get_ground_truth(self, image_filename):
        """
        Retrieve ground truth boxes and labels from the annotation file for a given image.
        """
        boxes = []
        labels = []
        if image_filename in self.annotations:
            for box in self.annotations[image_filename]:
                x_min, y_min, x_max, y_max, class_id = box
                boxes.append([x_min, y_min, x_max, y_max])
                labels.append(class_id)
        return torch.Tensor(boxes), torch.Tensor(labels)

    def evaluate_map(self, pred_boxes, pred_labels, pred_scores, true_boxes, true_labels):
        """
        Update mAP metric for each batch
        """
        # Prepare predictions and targets in the expected format (list of dicts)
        preds = [
            dict(boxes=pred_boxes, scores=pred_scores, labels=pred_labels)
        ]
        targets = [
            dict(boxes=true_boxes, labels=true_labels)
        ]
        
        # Update the mAP metric
        self.metric.update(preds, targets)

    def compute_map(self):
        """
        Compute and print mAP after processing all batches
        """
        final_map = self.metric.compute()
        print(f"Mean Average Precision (mAP) for {'training' if self.is_training else 'validation'}: {final_map['map']}")

    def process_batch(self, frames, batch_paths):
        """
        Process the batch of images, make predictions, and update mAP.
        """
        for i, frame in enumerate(frames):
            image_filename = os.path.basename(batch_paths[i])
            try:
                img, orig_im, dim = prep_image(frame, self.inp_dim)
                im_dim = torch.FloatTensor(dim).repeat(1, 2)
                if self.CUDA:
                    im_dim = im_dim.cuda()
                    img = img.cuda()

                output = self.model(Variable(img), self.CUDA)
                output = write_results(output, confidence=0.5, num_classes=self.num_classes, nms=True, nms_conf=0.4)

                if len(output) == 0:
                    print(f"No detections for batch starting with image {batch_paths[0]}")
                    continue

                output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(self.inp_dim)) / self.inp_dim
                output[:, [1, 3]] *= frame.shape[1]
                output[:, [2, 4]] *= frame.shape[0]

                pred_boxes = output[:, 1:5]
                pred_scores = output[:, 5]
                pred_labels = output[:, -1]

                # Get actual ground truth boxes and labels from the annotations
                true_boxes, true_labels = self.get_ground_truth(image_filename)

                # Update mAP metric
                self.evaluate_map(pred_boxes, pred_labels, pred_scores, true_boxes, true_labels)

                list(map(lambda boxes: write(boxes, frame, self.classes, self.colors), output))

            except Exception as e:
                print(f"Error processing batch starting with image {batch_paths[0]}: {e}")
                import traceback
                traceback.print_exc()

            # Display the image with bounding boxes
            cv2.imshow(f"{'Training' if self.is_training else 'Validation'} Object Detection Window", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    def main(self):
        q = queue.Queue()

        for i in range(0, min(self.max_images, len(self.image_paths)), self.batch_size):
            batch_paths = self.image_paths[i:i + self.batch_size]
            frames = []
            for image_path in batch_paths:
                def frame_render(queue_from_cam, img_path):
                    frame = cv2.imread(img_path)
                    frame = cv2.resize(frame, (self.width, self.height))
                    queue_from_cam.put(frame)

                cam = threading.Thread(target=frame_render, args=(q, image_path))
                cam.start()
                cam.join()
                frame = q.get()
                frames.append(frame)
                q.task_done()

            # Process the batch of images
            self.process_batch(frames, batch_paths)

            # Clear CUDA cache to free memory
            torch.cuda.empty_cache()

        # Close OpenCV windows after processing
        cv2.destroyAllWindows()
        
        # Compute and display mAP after processing all images
        self.compute_map()

if __name__ == "__main__":
    train_dataset_path = "C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\data\\train\\*.jpg"
    train_annotation_file = "C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\data\\train\\_annotations.txt"

    print("Processing training dataset...")
    train_detection = ObjectDetection(train_dataset_path, train_annotation_file)
    train_detection.main()

   


Processing training dataset...
Loading network for training.....
Network successfully loaded
Resizing map2 from torch.Size([1, 24, 2, 2]) to match map1 torch.Size([1, 24, 10, 10])
Resizing map2 from torch.Size([1, 24, 1, 1]) to match map1 torch.Size([1, 24, 5, 5])
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxe

ValueError: Invalid input class of sample 0, element 0 (expected value of type integer, got type <class 'float'>)

In [19]:
valid_dataset_path = "C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\data\\valid\\*.jpg"
valid_annotation_file = "C:\\Users\\mramy\\Downloads\\Object-Detection-and-Distance-Measurement-master\\data\\valid\\_annotations.txt"

print("\nProcessing validation dataset...")
valid_detection = ObjectDetection(valid_dataset_path, valid_annotation_file)
valid_detection.main()


Processing validation dataset...
Loading network for training.....
Network successfully loaded
Resizing map2 from torch.Size([1, 24, 2, 2]) to match map1 torch.Size([1, 24, 10, 10])
Resizing map2 from torch.Size([1, 24, 1, 1]) to match map1 torch.Size([1, 24, 5, 5])
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding boxes!
Table is at an unknown distance (invalid calculation)
Invalid values in bounding b

ValueError: Invalid input class of sample 0, element 0 (expected value of type integer, got type <class 'float'>)