# Inference Testing for Baseline Model + 3D Guesser



## Date: 27th August 2024
## Creator: Sean Lim

# Baseline

The following notebook displays the baseline model used for distance estimation

The code consists of the following pipeline:
1) Yolov10 model: Used for detecting objects and is the core of the model
2) Config files: Extracts important manually input information such as real dimsnesions and the intrinsic camera matrix
- Real dimensions: Estimates in meters (m) used to compare the bounding box vs the real length, height of the object
- Intrinsic camera matrix: setting "use_own" = True allows manually input variables, while False uses the image dimensions as the camera matrix variables (not recommended)
3) The closest distance between two bounding boxes is obtained in pixel length and transforms them to 3D coordinates using computer vision transformations, obtaining the distance between the object and the camera and used as the depth to then get the location of the object (X, Y, Z=depth)
4) Euclidean distance between the center points of the boxes are obtained and if it is less then the configurable [safe_distance], a warning will be sent out and lines will be drawn

### Install Pre-requisites

In [None]:
# !pip install -r requirements.txt

In [None]:
# Load a pre-trained YOLOv10n model
from ultralytics import YOLOv10
from lib.Dataset import ConstructionDataset
from lib.Camera import Camera
from lib.utils import parse_yaml, estimate_distance_2d, draw_lines, estimate_distance_centers_3d
from lu_vp_detect import VPDetection
import torch
import os
from torch.utils import data
import matplotlib.pyplot as plt
import numpy as np
import cv2
# !python --version
from lu_vp_detect import VPDetection

In [None]:
config=parse_yaml('config.yaml')

### Prepare dataset from original repo 

#### skips if config > YoloPrep > init = True

In [None]:
# !python YoloCreate.py

In [None]:

#Initialization
weightpath = os.path.join('runs/detect/train12/weights/best.pt')
model = YOLOv10(weightpath)
lookup = config['Classes']
config_cam = config['Camera']

batch_size = 1 # Only for testing purposes
safe_distancing = config['General']['safe_dist']
consDataset = ConstructionDataset(config, crops=False)


params = {'batch_size': batch_size,
            'shuffle':True,
            'num_workers': 6}
seed = config['General']['seed']
if seed is not None:
    torch.manual_seed(config['General']['seed'])
generator = data.DataLoader(consDataset, **params)

# Drawing parameters
Color_palette = 255 * np.eye(3)
config_cam = config['Camera']
config_vp = config['VP_Detector']
remove_fisheye=config_cam['remove_fisheye']
# BGR format
# First row is red, second green, third blue
Color_palette = Color_palette[:, ::-1].astype(int).tolist()

length_thresh = config_vp['length_thresh']
principal_point = config_vp['principal_point']
focal_length = config_vp['focal_length']
seed = config['General']['seed'] # Or specify whatever ID you want (integer)

vpd = VPDetection(length_thresh, seed=seed)


In [None]:
# Initialise generator
generator_iter = iter(generator)

In [None]:
debug=True

## Test Loop

In [None]:
(local_image_tensor, local_image, indexed_label, indexed_pair) = next(generator_iter)

temp_img = local_image_tensor
temp_img_ori = np.array(local_image[0])

print(temp_img_ori.shape)

cam = Camera(use_own = config_cam['use_own'], img=temp_img_ori, distortion_coef=config_cam['distortion_coef'], fx=config_cam['fx'], fy=config_cam['fy'], cx=config_cam['cx'], cy=config_cam['cy'])


with torch.no_grad():
    results = model(temp_img)

#Predicted coordinates of box (top left, bottom right)
list_boxes = results[0].boxes.xyxy

#List of detected classes
detected_classes = results[0].boxes.cls

# Plot original Image with Yolo Detection    
img = results[0].plot()

#Draw lines and send warning if a distance is lower than safe distance
if(len(list_boxes)>1 and 0 in detected_classes):
    list_conf = results[0].boxes.conf
    list_workers = [(i, detected_classes[i].item(), list_boxes[i], list_conf[i].item()) for i in range(len(detected_classes)) if int(detected_classes[i]) == 0]
    list_nonworkers = [(i, detected_classes[i].item(), list_boxes[i], list_conf[i].item()) for i in range(len(detected_classes)) if int(detected_classes[i]) != 0]

##################################################################################################################################################
# List workers and List Non Workers have the following:
# (0: index wrt detected objects in YOLO accordingly, 1: Class in int format, 2: xyxy, 3: Confidence Score for ease of labelling when plotting)
##################################################################################################################################################

    vps = vpd.find_vps(temp_img_ori)
    # print(vps)
    # print(vpd.vps_2D)

    vp1, vp2, vp3 = vpd.vps_2D[:3]

    # draw_grid(img, vp1, vp2, vp3, Color_palette)

    for worker in list_workers:
        for nonworker in list_nonworkers:
            length= estimate_distance_2d(worker[2], nonworker[2])
            # print(length)
            hazard = lookup[nonworker[1]]['name']
            worker_dim = lookup[worker[1]]['dimensions']
            hazard_dim = lookup[nonworker[1]]['dimensions']
            
            worker_3d_coords = cam.find_real_coords(worker[2], worker_dim)
            hazard_3d_coords = cam.find_real_coords(nonworker[2], hazard_dim)
            distance = estimate_distance_centers_3d(worker_3d_coords, hazard_3d_coords)
            
            
            # print(f'3D Coordinates of worker: ,{worker[3]:.2f},{worker_3d_coords}')
            # print(f'3D Coordinates of , {hazard}: , {nonworker[3]:.2f}, {hazard_3d_coords}')
            # print(f'Distance = {distance:.2f}m')
            
            if(distance < safe_distancing):
                print(f'Unsafe distancing between worker:{worker[3]:.2f} and {hazard}:{nonworker[3]:.2f}, Distance={distance:.2f}m')
                print('3D Coordinates of worker: ',worker[3],worker_3d_coords)
                print('3D Coordinates of ', hazard,': ', nonworker[3], hazard_3d_coords)
            
            
            draw_lines(worker[2], nonworker[2], img, distance, debug)
            
    print('__________________________________________________________________________')


plt.figure(figsize=(18, 16), dpi=80)
plt.imshow(img)


# 3D Guesser

In [1]:
from lib.Dataset import ConstructionDataset
# from lib.Camera import Camera
from lib.utils import parse_yaml, get_angle, save_checkpoint, load_checkpoint, draw_lines, estimate_distance_centers_3d
# from lib.loss import Custom_Loss
from lib.VP_Detector import VPD
from torchvision.models import vgg, efficientnet_b0
from my_model.Model import BB_Guesser_Model
from lib.loss import Custom_Loss_v2
# from lib.traintestfuncs import train
# from model.faster_rcnn.resnet import resnet
from ultralytics import YOLOv10
import torch
import os
import cv2
# import pandas as pd
import matplotlib.pyplot as plt
from torch.utils import data

import numpy as np
from lib_3D.Dataset import *
from lib_3D.Plotting import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cpu


In [3]:
config=parse_yaml('config.yaml')
lookup = config['Classes']
batch_size = 1 # Only for testing purposes
seed = config['General']['seed']
safe_distancing = config['General']['safe_dist']

In [4]:
consDataset = ConstructionDataset(config, crops=False)
    

params = {'batch_size': batch_size,
            'shuffle':True,
            'num_workers': 6}
if(seed is not None):
    torch.manual_seed(config['General']['seed'])
generator = data.DataLoader(consDataset, **params)

In [5]:
weightpath = os.path.join('runs/detect/train12/weights/best.pt')
yolo = YOLOv10(weightpath)

# backbone = efficientnet_b0(weights='IMAGENET1K_V1')
backbone = vgg.vgg19_bn(weights='IMAGENET1K_V1')
model = BB_Guesser_Model(backbone=backbone, proposals=1, angles=1)
model.eval()
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
# loss_fn = Custom_Loss_v2()
file_name = "3D_Guesser_Train_ckpt_6.pth"
# file_name = "3D_Guesser_Train_ckpt_4.pth"
# file_name = '3D_Guesser_Train_ckpt_24.pth'
_, _ = load_checkpoint(model, file_name, optimizer)
my_vpd = VPD(config)

  ckpt = torch.load(file, map_location="cpu")
  ckpt = torch.load(file_name, map_location=device)


Model's pretrained weights loaded!


In [6]:
debug=False

In [7]:
generator_iter = iter(generator)

In [None]:

(indexed_img_tensor, indexed_ori_img, indexed_label, indexed_pair) = next(generator_iter)
curr_image = indexed_img_tensor
plot_image = np.array(indexed_ori_img.squeeze())
# curr_image = indexed_img_tensor.to(device)
# indexed_orientation = indexed_orientation.to(device)
# curr_crop = indexed_crop_tensor.to(device)
# indexed_dims = indexed_dims.to(device)
list_workers2 = []
list_nonworkers2 = []

with torch.no_grad():
    detections = yolo(curr_image)
# img = np.copy(plot_image)
# print(detections)
calib_file = "calib_cam_to_cam.txt"

list_boxes_2 = detections[0].boxes.xyxy

#List of detected classes
detected_classes_2 = detections[0].boxes.cls

img = detections[0].plot()

for detection in detections[0]:
    # print(img, detection.boxes.cls, detection.boxes.xyxy, calib_file)
    # print(img.shape, detection.boxes.cls.shape, detection.boxes.xyxy.shape)
    box = detection.boxes.xyxy.squeeze().cpu().int().numpy()
    box_2d = ([(box[0], box[1]), (box[2], box[3])])
    try:
        detectedObject = DetectedObject(img, detection.boxes.cls.cpu(), box_2d, calib_file)
    except:
        continue
    
    theta_ray = detectedObject.theta_ray
    input_img = detectedObject.img
    proj_matrix = detectedObject.proj_matrix
    box_2d = box_2d
    
    
    # detected_class = detection.boxes.cls
    
    input_tensor = torch.zeros([1,3,224,224]).to(device)
    input_tensor[0,:,:,:] = input_img
    
    [orient, dim] = model(input_tensor)
    # print(orient, dim)
    orient = orient.cpu().data.numpy()[0, :, :]
    
    # conf = conf.cpu().data.numpy()[0, :]
    dim = dim.cpu().data.numpy()[0, :]
    
    orient = orient[0,:]
    # print(orient)
    # argmax = np.argmax(conf)
    # orient = orient[argmax, :]
    cos = np.cos(orient[0])
    sin = np.sin(orient[0])
    alpha = np.arctan2(sin, cos)
    # print(alpha)
    # alpha = orient[0]
    beta = 0
    # beta = orient[1]
    beta = 0
    # alpha += angle_bins[argmax]
    # alpha -= np.pi
    # print(alpha)
    
    # print(box_2d)
    detect_class = detection.boxes.cls.cpu().int().numpy()[0]
    dim = np.array(lookup[detect_class]['dimensions'])
    dim = np.flip(dim)
    # print(dim)
    location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray, beta=beta, clip=True )
    # print(location)
    
    detect_label = lookup[detect_class]['name']
    detect_conf = detection.boxes.conf.cpu().float().numpy()[0]
    # print(detect_class)
    if(detect_class == 0):
        list_workers2.append((detect_conf, location, detect_class, box, dim))
    else:
        list_nonworkers2.append((detect_conf, location, detect_class, box, dim))
    print(f'Estimated pose:{detect_label} {detect_conf:.2f} {location}')
# numpy_vertical = np.concatenate((plot_image, img), axis=0)


if(len(list_nonworkers2) > 0 and len(list_workers2) > 0):
    for worker in list_workers2:
        for nonworker in list_nonworkers2:
            conf_worker, coord_worker, class_worker, box_worker, dim = worker
            conf_non, coord_non, class_non, box_non, dim = nonworker
            # length= estimate_distance_2d(worker[2], nonworker[2])
            # print(length)
            hazard = lookup[class_non]['name']
            
            worker_3d_coords = coord_worker
            hazard_3d_coords = coord_non
            distance = estimate_distance_centers_3d(worker_3d_coords, hazard_3d_coords, dim)
            
            
            # print(f'3D Coordinates of worker: ,{worker[3]:.2f},{worker_3d_coords}')
            # print(f'3D Coordinates of , {hazard}: , {nonworker[3]:.2f}, {hazard_3d_coords}')
            # print(f'Distance = {distance:.2f}m')
            
            if(distance < safe_distancing):
                print(f'Unsafe distancing between worker:{conf_worker:.2f} and {hazard}:{conf_non:.2f}, Distance={distance:.2f}m')
                # print('3D Coordinates of worker: ',conf_worker,worker_3d_coords)
                # print('3D Coordinates of ', hazard,': ', conf_non, hazard_3d_coords)
                color = (255,0,0)
            else:
                color = (255,255,255)
            
            
            
            draw_lines(box_worker, box_non, img, distance, debug=debug, safe_distancing=safe_distancing)



plt.figure(figsize=(18, 16), dpi=80)
plt.imshow(img)
plt.show()

: 

In [17]:
conf_worker, coord_worker, class_worker, box_worker, dim_worker = list_workers2[2]
conf_non, coord_non, class_non, box_non, dim_non = list_nonworkers2[3]

print(lookup[class_non]['name'], conf_non, coord_non, box_non)
print('worker', conf_worker, coord_worker, box_worker)
# length= estimate_distance_2d(worker[2], nonworker[2])
# print(length)
hazard = lookup[class_non]['name']

worker_3d_coords = coord_worker
hazard_3d_coords = coord_non
distance = estimate_distance_centers_3d(worker_3d_coords, hazard_3d_coords, dim1=dim_worker, dim2=dim_non)

# print(lookup[class_non])
print(distance)

pump truck 0.8050343 [-1.4354035884831169, 4.276195028252108, 18.947894239312184] [490 294 621 407]
worker 0.50111413 [-0.2747990848972066, 7.720346271042832, 24.32680479259329] [589 393 613 433]
42.14185933715964
