In [1]:
from tp2.utils import display_tracking_results, save_tracking_results, parse_seqinfo, parse_file_pd
from tp2.compute import ObjectInstance, iou_jaccard, compute_similarity_matrix, hungarian_algorithm

import cv2
import numpy as np
import pandas as pd
import os
import torch
from tqdm import tqdm

probleme person are confused with each other, i tried to change the normalization from constent to std and mean from the frame

In [2]:
def preprocess_patch(im_crops, width=64, height=128):
        # constantes

        roi_means = np.array([123.675, 116.28, 103.53], dtype=np.float32)
        roi_stds = np.array([58.395, 57.12, 57.375], dtype=np.float32)

        roi_input = cv2.resize(im_crops, (width, height)) 
        roi_input = cv2.cvtColor(roi_input, cv2.COLOR_BGR2RGB)
        std = np.std(roi_input)
        mean = np.mean(roi_input)
        roi_input = (np.asarray(roi_input).astype(np.float32)  -  mean) / std
        roi_input = np.moveaxis(roi_input, -1, 0) 
        object_patch = roi_input.astype('float32') 
        return object_patch

In [3]:
import onnx
import onnxruntime as rt
def get_model(model_path):
    onnx_model = onnx.load(model_path)

    # Check the model for consistency
    onnx.checker.check_model(onnx_model)
    return onnx_model

problem with model input, forgot to unsqueeze

In [4]:
class Tracker:
    def __init__(self, frame, obj_id, bb_left, bb_top, bb_width, bb_height,):
        self.frame = int(frame)
        self.obj_id = int(obj_id)
        self.bb_left = float(bb_left)
        self.bb_top = float(bb_top)
        self.bb_width = float(bb_width)
        self.bb_height = float(bb_height)
        # self.conf = float(conf)
        # self.coord = float(x), float(y), float(z)

        self.feature = []
    
    def generate_feature(self, session, frame):

        y_end = int(self.bb_top + self.bb_height)
        x_end = int(self.bb_left + self.bb_width)
        x_start = int(self.bb_left)
        y_start = int(self.bb_top)
        image = frame[y_start:y_end, x_start:x_end]

        # Preprocess the image
        image = preprocess_patch(image)

        with torch.no_grad():
            input_data = torch.from_numpy(image).unsqueeze(0)
            input_data = input_data.numpy()

            input_name = session.get_inputs()[0].name
            feature = session.run(None, {input_name: input_data})[0]
            self.feature = feature[0]

In [5]:
def compute_euclidean_distance_matrix(tracked : list[Tracker], detected : list[Tracker] ):
    distance_matrix = np.zeros((len(tracked), len(detected)))
    for i, track in enumerate(tracked):
        for j, detect in enumerate(detected):
            distance_matrix[i, j] = 1 / (1 + (track.feature - detect.feature))
    return distance_matrix


def cosine_similarity(tracked : list[Tracker], detected : list[Tracker] ):
    distance_matrix = np.zeros((len(tracked), len(detected)))
    for i, track in enumerate(tracked):
        for j, detect in enumerate(detected):
            distance_matrix[i, j] = np.dot(track.feature, detect.feature) / (np.linalg.norm(track.feature) * np.linalg.norm(detect.feature))
    return distance_matrix

probleme with echange of id when object goes in front of other (change alpha and beta, lost frame, threshold (hyperparameter))
probleme with object getting old id (same hyper parameter search)

In [18]:
THRESHOLD = 0.5
MAX_LOST_FRAMES = 50
ALPHA = 0.3
BETA = 0.7

# manage the tracking
def track_management(detected_objects : pd.DataFrame, data_path : str, model_path: str) -> pd.DataFrame:
    
    # video settings
    seq_info = parse_seqinfo(data_path)
    video_path = os.path.join(data_path, 'img1')
    frame_length = int(seq_info['seqLength'])
    # frame_length = 10 # for testing




    session = rt.InferenceSession(model_path,providers=rt.get_available_providers())

    # initialize tracked objects
    tracked_objects = []
    # initialize results with pandas frame
    results = pd.DataFrame(columns=['frame', 'id', 'bb_left', 'bb_top', 'bb_width', 'bb_height', 'conf', 'x', 'y', 'z'])

    idx = 1
    all_matrix = []
    # iter over frames
    for frame_idx in tqdm(range(1,frame_length + 1)):
        new_tracks = []

        # Get detected objects on frame
        detected_obj_on_frame = detected_objects[detected_objects['frame'] == frame_idx]
        # no object detected on frame
        if detected_obj_on_frame.empty:
            for track in tracked_objects:
                if track.frame > frame_idx - MAX_LOST_FRAMES:
                    new_tracks.append(track)

        # atleast one object detected on frame
        else:
            # get frame
            frame = cv2.imread(os.path.join(video_path, f'{frame_idx:06d}.jpg'))

            # Create ObjectInstance objects
            detected_obj_on_frame = [Tracker(*obj) for obj in detected_obj_on_frame.values]

            # generate features for each object
            for obj in detected_obj_on_frame:
                obj.generate_feature(session, frame)

            # compute similarity matrix and matches
            similarity_matrix = compute_similarity_matrix(tracked_objects, detected_obj_on_frame)
            cosine_similarity_matrix = cosine_similarity(tracked_objects, detected_obj_on_frame)

            cost_matrix = ALPHA * similarity_matrix + BETA * cosine_similarity_matrix
            
            matches = hungarian_algorithm(cost_matrix)

            # update tracks
            matched_detections = []
            matched_tracks = []
            matrix = []
            for i, j in matches:
                # matched track
                matrix.append(cost_matrix[i][j])
                if cost_matrix[i][j] >= THRESHOLD:
                    matched_detections.append(j)
                    matched_tracks.append(i)
                    # save track 
                    old_track = tracked_objects[i]
                    # update track information
                    detected_obj_on_frame[j].obj_id = old_track.obj_id
                    new_tracks.append(detected_obj_on_frame[j])

            # remove old tracks
            for i in range(len(tracked_objects)):
                if i not in matched_tracks:
                    if tracked_objects[i].frame > frame_idx - MAX_LOST_FRAMES:
                        new_tracks.append(tracked_objects[i])
            
            # add unmatched detections as new tracks
            for i in range(len(detected_obj_on_frame)):
                if i not in matched_detections:
                    new_tracks.append(detected_obj_on_frame[i])
                    new_tracks[-1].obj_id = idx
                    idx += 1
            all_matrix.append(matrix)

        # update results
        for track in new_tracks:
            track_df = pd.DataFrame([{'frame': track.frame, 'id': track.obj_id, 'bb_left': track.bb_left, 'bb_top': track.bb_top, 'bb_width': track.bb_width, 'bb_height': track.bb_height, 'conf': 1, 'x': -1, 'y': -1, 'z': -1}])
            results = pd.concat([results, track_df], ignore_index=True)

        # update old tracked objects with updated tracked objects
        tracked_objects = new_tracks

    # sort over frame and id
    results.sort_values(by=['frame', 'id'], inplace=True)
    return results, all_matrix

In [7]:
# path to the data
data_path = "/home/gautier/scia2/mlvot/ADL-Rundle-6"

# sequences names
sequence_public = "public-dataset"
sequence_yolo5l = "Yolov5l"
sequence_yolo5s = "Yolov5s"

# create the path to the file
file = f"det/{sequence_yolo5l}/det.txt"
file_path = os.path.join(data_path, file)

# model path
model_path = "/home/gautier/scia2/mlvot/tp4-5/Filemail.com - TP4 et TP5/reid_osnet_x025_market1501.onnx"

# parse the file
parsed_objects_pd = parse_file_pd(file_path, ' ')
parsed_objects_pd.drop(columns=['conf', 'x', 'y', 'z'], inplace=True)



In [19]:
results, all_matrix = track_management(parsed_objects_pd, data_path=data_path, model_path=model_path)

  results = pd.concat([results, track_df], ignore_index=True)
100%|██████████| 525/525 [00:43<00:00, 11.98it/s]


In [10]:
def display_tracking_results(results : pd.DataFrame, data_path : str, output_path : str):
    # parse seqinfo
    seq_info = parse_seqinfo(data_path)

    video_path = os.path.join(data_path, 'img1')

    # Configure video reader
    frame_width = int(seq_info['imWidth'])
    frame_height = int(seq_info['imHeight'])
    frame_length = int(seq_info['seqLength'])
    # frame_length = 10
    fps = int(seq_info['frameRate'])
    
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))    

    # get results
    # results = track_management(tracked_object, frame_length)

    # draw bounding boxes
    for frame_idx in range(1, frame_length + 1):
        frame = cv2.imread(os.path.join(video_path, f'{frame_idx:06d}.jpg'))
        tracks = results[results['frame'] == frame_idx]
        for track in tracks.values:
            x, y, w, h = track[2:6]
            cv2.rectangle(frame, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 0), 2)
            cv2.putText(frame, str(int(track[1])), (int(x), int(y) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        cv2.imshow('frame', frame)
        out.write(frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    out.release()
    cv2.destroyAllWindows()

In [20]:
output_path = 'output.mp4'
display_tracking_results(results, data_path, output_path)