In [None]:
pip install yolox --no-deps  # onnxruntime==1.8.0 is putdated, hence --no-deps

In [16]:
import cv2
import gdown
import torch
from torchvision import transforms
import numpy as np
from pathlib import Path
from ultralytics.utils import ops


from yolox.exp import get_exp
from yolox.utils import postprocess
from yolox.utils.model_utils import fuse_model

from boxmot import BotSort
from argparse import Namespace


# Function for letterbox resizing (padding to maintain aspect ratio)
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
    shape = img.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val performance)
        r = min(r, 1.0)

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, 32), np.mod(dh, 32)  # wh padding
    elif scaleFill:  # stretch
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return img, ratio, (dw, dh)

# default model weigths for these model names
YOLOX_ZOO = {
    'yolox_n.pt': 'https://drive.google.com/uc?id=1AoN2AxzVwOLM0gJ15bcwqZUpFjlDV1dX',
    'yolox_s.pt': 'https://drive.google.com/uc?id=1uSmhXzyV1Zvb4TJJCzpsZOIcw7CCJLxj',
    'yolox_m.pt': 'https://drive.google.com/uc?id=11Zb0NN_Uu7JwUd9e6Nk8o2_EUfxWqsun',
    'yolox_l.pt': 'https://drive.google.com/uc?id=1XwfUuCBF4IgWBWK2H7oOhQgEj9Mrb3rz',
    'yolox_x.pt': 'https://drive.google.com/uc?id=1P4mY0Yyd3PPTybgZkjMYhFri88nTmJX5',
}

preprocess = transforms.Compose([
    transforms.ToTensor(),
])


device = torch.device('cpu')  # Use 'cuda' if available
yolox_model = 'yolox_s.pt'
yolox_model_path = Path('./examples') / yolox_model

if not Path(yolox_model).exists():
    gdown.download(
        url=YOLOX_ZOO[yolox_model],
        output=yolox_model,
        quiet=False
    )

exp = get_exp(None, 'yolox_s')
# needed for bytetrack yolox people models
# update with your custom model needs
exp.num_classes = 1

ckpt = torch.load(
    yolox_model,
    map_location=device
)

# Initialize device and arguments
args = Namespace(conf=0.5, iou=0.5, agnostic_nms=False, classes=None)

model = exp.get_model()
model.eval()
model.load_state_dict(ckpt["model"])
model = fuse_model(model)
model.to(device)
model.eval()

# Initialize the tracker
tracker = BotSort(
    reid_weights=Path('osnet_x0_25_msmt17.pt'),  # Path to ReID model
    device=device,  # Use CPU for inference
    half=False
)

# Open the video file or stream
vid = cv2.VideoCapture(0)  # Replace with 'path/to/your/video.avi' if needed

while True:
    # Capture frame-by-frame
    ret, frame = vid.read()

    # If ret is False, it means we have reached the end of the video
    if not ret:
        break

    # Convert frame to tensor and move to device
    # Define the transform pipeline
    # Apply letterbox resizing
    frame_letterbox, ratio, (dw, dh) = letterbox(frame, new_shape=[640, 640], auto=False, scaleFill=True)

    # Apply the transformations
    frame_tensor = preprocess(frame_letterbox).unsqueeze(0).to(device)
    
    print('frame_tensor.shape', frame_tensor.shape)

    # Perform detection with YOLOX
    with torch.no_grad():
        dets = model(frame_tensor)
        
    print('dets.shape', dets.shape)
        
    dets = postprocess(
        dets, 1, 0.5,
        0.2, class_agnostic=True
    )[0]
    
    
    if dets is not None:
        # Adjust boxes back to the original image size
        dets[:, [0, 2]] -= dw  # x1, x2 - dw
        dets[:, [1, 3]] -= dh  # y1, y2 - dh
        dets[:, :4] /= ratio[0]  # divide by ratio to scale to original

        # Filter and rearrange detections
        dets[:, 4] = dets[:, 4] * dets[:, 5]
        dets = dets[:, [0, 1, 2, 3, 4, 6]]
        dets = dets.cpu().numpy()
    else:
        dets = torch.empty((0, 6))

    print(dets.shape, frame.shape)

    # Update the tracker
    res = tracker.update(dets, frame)  # --> M X (x, y, x, y, id, conf, cls, ind)

    # Plot tracking results on the image
    tracker.plot_results(frame, show_trajectories=True)
    
    # Display the frame with tracked objects
    cv2.imshow('BoXMOT + YOLOX', frame)

    # Wait for key press, exit if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
vid.release()
cv2.destroyAllWindows()

[32m2024-10-25 19:25:50.129[0m | [1mINFO    [0m | [36mboxmot.utils.torch_utils[0m:[36mselect_device[0m:[36m52[0m - [1mYolo Tracking v11.0.4 🚀 Python-3.11.5 torch-2.2.2CPU[0m
[32m2024-10-25 19:25:50.245[0m | [32m[1mSUCCESS [0m | [36mboxmot.appearance.reid_model_factory[0m:[36mload_pretrained_weights[0m:[36m183[0m - [32m[1mLoaded pretrained weights from osnet_x0_25_msmt17.pt[0m


frame_tensor.shape torch.Size([1, 3, 1920, 1080])


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 136 but got size 135 for tensor number 1 in the list.