In [1]:
from ultralytics import YOLO, YOLOv10
from matplotlib import pyplot as plt
from pathlib import Path
import cv2
from PIL import Image
import numpy as np
from boxmot import DeepOCSORT
from tqdm import tqdm
import torchvision.transforms as transforms
import torchvision.transforms.functional as con 

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
#Helper Functions
colors = [(0,170,170),(250,110,25),(40,40,40),(255,90,210),(65,150,60),(100,100,100),(110,60,60),(200,200,200),(110,110,170),(240,240,50)]
def sizefunction(conf:float):
    if(conf < 0.5):
        return 2
    elif(conf < 0.6):
        return 3
    elif(conf < 0.7):
        return 4
    else:
        return 5
def TrackerProcess(im, results, tracker,false_width,false_height):
    width, height = (0,0)
    if(type(im) == np.ndarray):
        height, width , _ = im.shape
    else:
        width, height = im.size
        
    objects = np.zeros((len(results[0].boxes.conf),6))
    j = 0
    for box, conf, cls in zip(results[0].boxes.xyxy,results[0].boxes.conf,results[0].boxes.cls):
        box = box.numpy().astype(int).tolist()
        for k in range(4):
            objects[j][k] = box[k]
        objects[j][5] = int(cls.item())
        objects[j][4] = conf.item()
        j += 1
    #print(objects)
    tracker.update(objects, im) # --> M X (x, y, x, y, id, conf, cls, ind)
    for a in tracker.active_tracks:
        if(a.history_observations and len(a.history_observations) > 2):
            x1, y1, x2, y2, conf = a.history_observations[-1]
            cls = a.cls
            r,g,b = colors[int(cls.item())]
            cv2.rectangle(im, (int(x1 * width/false_width), int(y1 * height/false_height)), (int(x2* width/false_width), int(y2 * height/false_height)), (b,g,r), sizefunction(conf))
    #print(tracker.active_tracks.history_observations[-1])
    # for x1,y1,x2,y2,conf,cls in tracker_results:
    #     conf = conf.item()
    #     cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), colors[int(cls.item())], sizefunction(conf))
    return im
def NoTrackerProcess(im,results):
    width, height = (0,0)
    if(type(im) == np.ndarray):
        height, width , _ = im.shape
    else:
        width, height = im.size

    for (x1,y1,x2,y2),conf,cls in zip(results[0].boxes.xyxyn,results[0].boxes.conf,results[0].boxes.cls):
        #print((x1,y1,x2,y2), width,height, (int(x1 * width), int(y1 * height)), (int(x2 * width), int(y2 * height)))
        conf = conf.item()
        r,g,b = colors[int(cls.item())]
        cv2.rectangle(im, (int(x1 * width), int(y1 * height)), (int(x2 * width), int(y2 * height)), (b,g,r), sizefunction(conf))
    return im
def PutText2(im, text:str):
    height, width, layers = im.shape
    org = (int(width/3), height-20)  # Bottom-left corner of the text string in the image
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 7
    color = (0, 0, 0)  # Blue color in BGR
    thickness = 4
    line_type = cv2.LINE_AA
    cv2.putText(im, text, org, font, font_scale, color, thickness, line_type)
    return im
transform = transforms.Compose([
    transforms.Resize((640, 640)),  # Resize to the model's input shape
    transforms.ToTensor()
])
def GaussianUnsharp2(image):
    gaussian_3 = cv2.GaussianBlur(image, (0, 0), 2.0)
    unsharp_image = cv2.addWeighted(image, 2.0, gaussian_3, -1.0, 0)
    return unsharp_image
def Preprocess(img):
    color_image = np.array(img)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    ycrcb_img = cv2.cvtColor(color_image, cv2.COLOR_RGB2YCrCb)
    ycrcb_img[:, :, 0] = clahe.apply(ycrcb_img[:, :, 0])#cv2.equalizeHist(ycrcb_img[:, :, 0])
    equalized_color_image = cv2.cvtColor(ycrcb_img, cv2.COLOR_YCrCb2RGB)
    return transform(Image.fromarray(GaussianUnsharp2(equalized_color_image)))

In [14]:
#Process each frame, label it using the pretrained YOLO network and output a video
tracker = DeepOCSORT(
    model_weights=Path("osnet_x0_25_dukemtmcreid.pt"),#Path('osnet_x0_25_msmt17.pt'), # which ReID model to use
    device='cuda:0',
    fp16=True,
    max_age=3
)
vid = cv2.VideoCapture(VideoPath)
model = YOLO(YOLOPath).cpu()
frames = range(0,3000)
#frames = range(0,1)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Use 'XVID' or 'MJPG' for .avi files
output_video = None
UsingTracker = True
Tandem = False
vid.set(cv2.CAP_PROP_POS_FRAMES, min(frames))

for i in tqdm(frames):
    ret, im = vid.read()
    
    if(ret):
        im_network = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        im_network = Preprocess(Image.fromarray(im_network)).unsqueeze(0)
        _,actual_width, actual_height, _ = im_network.shape
        height, width, layers = im.shape
        if(output_video == None):
            output_video = cv2.VideoWriter(OutputPath, fourcc, 10, (2 * width if Tandem else width, height),isColor=True)
        results = model(im_network,conf = 0.1, verbose=False)
        if(not Tandem):
            if(UsingTracker):
                TrackerProcess(im,results,tracker,actual_width, actual_height)
            else:
                NoTrackerProcess(im,results)
        else:
            im1 = im.copy()
            im2 = im.copy()
            
            YCrCb(im1,results,tracker, actual_width, actual_height)
            NoTrackerProcess(im2,results)
            im1 = PutText2(im1, "DeepOCSORT")
            im2 = PutText2(im2, "No Tracker")
            im = np.hstack((im1, im2))
            #print(im.shape, im1.shape)
            #plt.imshow(im)
        output_video.write(im)
    elif(not ret):
        break
vid.release()
output_video.release()
cv2.destroyAllWindows()       

[32m2024-07-04 10:20:10.148[0m | [1mINFO    [0m | [36mboxmot.utils.torch_utils[0m:[36mselect_device[0m:[36m52[0m - [1mYolo Tracking v10.0.71 🚀 Python-3.10.6 torch-2.3.0+cu121
CUDA:0 (NVIDIA GeForce RTX 4070 SUPER, 12282MiB)[0m
[32m2024-07-04 10:20:10.216[0m | [32m[1mSUCCESS [0m | [36mboxmot.appearance.reid_model_factory[0m:[36mload_pretrained_weights[0m:[36m207[0m - [32m[1mSuccessfully loaded pretrained weights from "osnet_x0_25_dukemtmcreid.pt"[0m
 25%|██▍       | 740/3000 [06:02<18:28,  2.04it/s]


In [9]:
#create labels file 


vid = cv2.VideoCapture(VideoPath)
model = YOLO(YOLOPath).cuda()
frames = range(0,10000)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Use 'XVID' or 'MJPG' for .avi files
with open(OutputPath,'w') as f:
    for i in tqdm(frames):
        ret, im = vid.read()
        
        if(ret):
            im_network = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            im_network = Preprocess(Image.fromarray(im_network))
            results = model(im_network.unsqueeze(0).cuda(),conf = 0.2, verbose=False)
            for (x,y,w,h),conf,cls in zip(results[0].boxes.xywhn,results[0].boxes.conf,results[0].boxes.cls):
                f.write(f"{int(cls)} {x:.4f} {y:.4f} {w:.4f} {h:.4f}\n")
            f.write('\n')
        elif(not ret):
            break
vid.release()
cv2.destroyAllWindows()       

  7%|▋         | 740/10000 [02:32<31:45,  4.86it/s]
