In [None]:
!git clone https://github.com/KL0224/RetrievalPerson -b pipeline

In [None]:
!pip install open_clip_torch
!pip install torchreid
!pip install ultralytics

In [None]:
%cd /kaggle/working/RetrievalPerson

In [None]:
import os

VIDEO_FOLDER = 'videos_test' #'../../input/dataset-person/videos'
OUTPUT_FOLDER = 'outputs'
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
os.makedirs(os.path.join(OUTPUT_FOLDER, 'frames'), exist_ok=True)
# os.makedirs(os.path.join(OUTPUT_FOLDER, 'metadata'), exist_ok=True)
# os.makedirs(os.path.join(OUTPUT_FOLDER, 'features'), exist_ok=True)

In [None]:
import numpy as np
import glob
import os
import PIL.Image as Image
import cv2
import torch

from config import *
from tracking.tracklet import TrackletManager
from tracking.detector_tracker import run_tracking
from sampling.sampler import sample_best_per_window
from models.reid import ReIDModel
from models.clip_model import CLIPModel

In [None]:
def save_image_webp(img_bgr, path: str, quality: int = 80, resize_factor: float = 0.5):
    if resize_factor != 1.0:
        img_bgr = cv2.resize(img_bgr, (0, 0), fx=resize_factor, fy=resize_factor)
    img_pil = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
    os.makedirs(os.path.dirname(path), exist_ok=True)
    img_pil.save(path, format="WEBP", quality=quality)

def save_crop_webp(crop, path):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    cv2.imwrite(
        path,
        crop,
        [cv2.IMWRITE_WEBP_QUALITY, 100]  # 100 = lossless
    )

def safe_delete(path):
    try:
        if path and os.path.exists(path):
            os.remove(path)
    except Exception as e:
        print(f"[WARN] Failed to delete {path}: {e}")


In [None]:
manager = TrackletManager()
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# tracking
reid_model = ReIDModel(device=device, model_path='models/osnet_x1_0_market_256x128_amsgrad_ep150_stp60_lr0.0015_b64_fb10_softmax_labelsmooth_flip.pth')
clip_model = CLIPModel(device=device)

seqs = sorted(glob.glob(f'{VIDEO_FOLDER}/seq_*'))

for seq in seqs[:2]:
    seq_name = os.path.basename(seq)
    seq_id = int(seq_name.split('_')[-1])
    print(f'Processing sequence {seq_name}')
    
    os.makedirs(os.path.join(OUTPUT_FOLDER, 'frames', seq_name), exist_ok=True)
    cameras = sorted(glob.glob(f'{seq}/camera_*'))
    for cam_id, video_path in enumerate(cameras):
        manager = TrackletManager()
        cam_id += 1
        camera_name = "_".join(os.path.basename(video_path).split('_')[:2])
        camera_frame_folder = os.path.join(OUTPUT_FOLDER, 'frames', seq_name, camera_name)
        os.makedirs(camera_frame_folder, exist_ok=True)
        print(f'  Processing camera {cam_id}')
        for frame_id, frame, boxes, ids, confs in run_tracking(video_path, model_name='yolov8n.pt', 
                                                               vid_stride=1, 
                                                               confidence=CONFIDENCE_THRESHOLD,
                                                               device=device):
            print(f'    Processing frame {frame_id}, detected {len(boxes)} persons') 
            # detected boxes + (alive but not detected)
            print(boxes)
            frame_save_path = os.path.join(camera_frame_folder, f'frame_{frame_id:06d}.webp')
            save_image_webp(frame, frame_save_path)
            for box, tid, conf in zip(boxes, ids, confs):
                print(f'      Track ID: {tid}, BBox: {box}, Conf: {conf}')
                gid = seq_id*SEQ_ID_OFFSET + cam_id * CAMERA_ID_OFFSET + tid
                x1, y1, x2, y2 = map(int, box)

                # invalid box
                if x2<=x1 and y2<=y1:
                    continue
                
                crop = frame[y1:y2, x1:x2]

                crop_path = os.path.join(
                    OUTPUT_FOLDER,
                    "crops",
                    seq_name,
                    camera_name,
                    f"{gid}_{frame_id:06d}.webp"
                )
                
                save_crop_webp(crop, crop_path)
                t = manager.get(gid, seq_id, cam_id)
                t.add_frame(frame_id, box, conf, crop_path)
    
        tracklets = manager.all()

        for t in tracklets:
            candidates = sample_best_per_window(t.frames)
            # candidates = []
            # if len(sampled) <= 3:
            #     candidates = sampled
            # else:
            #     candidates = sampled[:1] + sampled[-1:] + sampled[len(sampled)//2:len(sampled)//2+1]
            
            candidate_paths = set(f.crop_path for f in candidates)
            for f in t.frames:
                if f.crop_path not in candidate_paths:
                    safe_delete(f.crop_path)
                    f.crop_path = None

            imgs = [cv2.imread(f.crop_path) for f in candidates]
        
            reid_feat = reid_model.extract(imgs).mean(axis=0)
            reid_feat = reid_feat / np.linalg.norm(reid_feat)

            t.reid_embeddings.append(reid_feat)

            
            clip_feats = clip_model.encode_batch(imgs).mean(axis=0)
            clip_feats = clip_feats / np.linalg.norm(clip_feats)
            
            t.clip_embeddings.append(clip_feats)           
        
        

        # save into metadata.txt
        with open(f"{OUTPUT_FOLDER}/metadata/{seq_name}_{camera_name}.txt", "w") as f:
            for t in tracklets:
                for frame in t.frames:
                    f.write(f"{t.sequence_id} {t.camera_id} {frame.frame_id} {t.global_id} {int(frame.bbox[0])} {int(frame.bbox[1])} {int(frame.bbox[2])} {int(frame.bbox[3])}\n")

        # save into features.txt
        features = {}
        output_path_pkl = f"{OUTPUT_FOLDER}/features/{seq_name}_{camera_name}.pkl"
        
        with open(f"{OUTPUT_FOLDER}/features/{seq_name}_{camera_name}.txt", "w") as f:
            for t in tracklets:
                f.write(f"{t.sequence_id} {t.camera_id} {t.global_id} {t.reid_embeddings[0].tolist()} {t.clip_embeddings[0].tolist()}\n")
                key = (t.sequence_id, t.camera_id, t.global_id)
        
                reid_emb = t.reid_embeddings[0]
                clip_emb = t.clip_embeddings[0]
            
                features[key] = [reid_emb, clip_emb]
                
        with open(output_path_pkl, "wb") as f:
            pickle.dump(features, f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
tracklets = manager.all()
from collections import defaultdict

tracks = defaultdict(list)
bbox_per_frame = {}
for t in tracklets:
    for f in t.frames:
        # print(t.global_id, f.frame_id, f.bbox)
        # bbox_per_frame.setdefault(f.frame_id, []).append(f.bbox)
        tracks[f.frame_id].append({
                "id": t.global_id,
                "bbox": f.bbox
            })

In [None]:
import cv2
import random

def visualize_video_with_ids(
    video_path,
    tracks_per_frame,
    output_path=None
):
    cap = cv2.VideoCapture(video_path)

    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps    = cap.get(cv2.CAP_PROP_FPS)

    writer = None
    if output_path:
        writer = cv2.VideoWriter(
            output_path,
            cv2.VideoWriter_fourcc(*"mp4v"),
            fps,
            (width, height)
        )

    id_colors = {}

    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        for item in tracks_per_frame.get(frame_idx, []):
            track_id = item["id"]
            x1, y1, x2, y2 = map(int, item["bbox"])

            if track_id not in id_colors:
                random.seed(int(track_id))
                id_colors[track_id] = (
                    random.randint(50, 255),
                    random.randint(50, 255),
                    random.randint(50, 255),
                )

            color = id_colors[track_id]

            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(
                frame,
                f"ID {track_id}",
                (x1, max(0, y1 - 7)),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.6,
                color,
                2
            )

        if writer:
            writer.write(frame)

        cv2.imshow("Tracking Visualization", frame)
        if cv2.waitKey(1) & 0xFF == 27:
            break

        frame_idx += 1

    cap.release()
    if writer:
        writer.release()
    cv2.destroyAllWindows()

In [None]:
visualize_video_with_ids('videos_test/seq_001/camera_3_cut.mp4', tracks)

In [None]:
# Sampling + Embeddings
reid_model = ReIDModel()

for t in manager.all():
    sampled = sample_best_per_window(t.frames)
    candidates = []
    if len(sampled) <= 3:
        candidates = sampled
    else:
        candidates = sampled[:1] + sampled[-1:] + sampled[len(sampled)//2:len(sampled)//2+1]

    imgs = [f.image for f in candidates]

    reid_feats = reid_model.extract(imgs).mean(axis=0)
    t.reid_embeddings.append(reid_feats)

In [None]:
del reid_model

In [None]:
# Sampling + Embeddings
clip_model = CLIPModel()

for t in manager.all():
    sampled = sample_best_per_window(t.frames)
    candidates = []
    if len(sampled) <= 3:
        candidates = sampled
    else:
        candidates = sampled[:1] + sampled[-1:] + sampled[len(sampled)//2:len(sampled)//2+1]

    imgs = [f.image for f in candidates]

    clip_feats = np.array([clip_model.encode_image(img) for img in imgs]).mean(axis=0)
    t.clip_embeddings.append(clip_feats)


In [None]:
del clip_model

In [None]:
tracklets = manager.all()

# save into metadata.txt
with open(f"{OUTPUT_FOLDER}/metadata.txt", "a") as f:
    for t in tracklets:
        for frame in t.frames:
            f.write(f"{t.sequence_id} {t.camera_id} {frame.frame_id} {t.global_id} {int(frame.bbox[0])} {int(frame.bbox[1])} {int(frame.bbox[2])} {int(frame.bbox[3])}\n")

# save into features.txt
with open(f"{OUTPUT_FOLDER}/features.txt", "w") as f:
    for t in tracklets:
        f.write(f"{t.sequence_id} {t.camera_id} {t.global_id} {t.reid_embeddings[0].tolist()} {t.clip_embeddings[0].tolist()}\n")

In [None]:
!zip -r results.zip /kaggle/working/outputs
from IPython.display import FileLink
FileLink(r'results.zip')