# YOLO frame inference
Notebook walkthrough for running `inference.py` on extracted frames and writing `submission.json` plus timing CSV.
Set the config cell, then run top-to-bottom.


In [None]:
import os
import json
import random
import logging
import time
from pathlib import Path
from types import SimpleNamespace

import cv2
import numpy as np
from tqdm import tqdm
from ultralytics import YOLO

from inference import (
    draw_bbox,
    list_video_dirs,
    parse_frame_idx,
    process_video,
    setup_logger,
)


## Extract data

In [None]:
!bash /code/extract_frame.sh

## Set seed

In [None]:
def seed_everything(seed: int = 42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything()


## Configure paths and thresholds

In [None]:
WEIGHTS = Path('/code/checkpoint/v8s_640_bs_16_30eps_v3/weights/best.pt')
FRAMES_ROOT = Path('/data/extracted_frames')
OUT_DIR = Path('/result/notebook_run')
IMG_SIZE = 640
CONF = 0.3
IOU = 0.45
DEVICE = '0'  # set to 'cpu' to disable GPU
SAVE_VIS = False  # set True to dump visualizations under OUT_DIR/visualize

OUT_DIR.mkdir(parents=True, exist_ok=True)
VIS_ROOT = OUT_DIR / 'visualize' if SAVE_VIS else None
if VIS_ROOT:
    VIS_ROOT.mkdir(parents=True, exist_ok=True)

logger = setup_logger()
logger.info('weights: %s', WEIGHTS)
logger.info('frames_root: %s', FRAMES_ROOT)

cfg_args = SimpleNamespace(
    imgsz=IMG_SIZE,
    conf=CONF,
    iou=IOU,
    device=DEVICE,
    save_vis=SAVE_VIS,
)


## Load model and inspect data

In [None]:
if not WEIGHTS.exists():
    raise FileNotFoundError(f'Weights not found: {WEIGHTS}')

logger.info('Loading model...')
model = YOLO(str(WEIGHTS))

videos = list_video_dirs(FRAMES_ROOT)
print(f'Found {len(videos)} video folders under {FRAMES_ROOT}')
if videos:
    print('Sample IDs:', [v.name for v in videos[:5]])


## Run inference with timing

In [None]:
submission = []
all_predicted_time = []  # list of tuples (id, time_ms)

for vdir in tqdm(videos, desc='Processing videos'):
    t1 = time.time()
    result = process_video(vdir, model, cfg_args, VIS_ROOT, logger)
    t2 = time.time()
    predicted_time = int(t2*1000 - t1*1000)
    all_predicted_time.append((vdir.name, predicted_time))
    submission.append(result)

print(f'Completed {len(submission)} items')
print('Timing sample:', all_predicted_time[:3])


## Save submission and timing CSV

In [None]:
out_json = OUT_DIR / 'jupyter_submission.json'
with out_json.open('w', encoding='utf-8') as f:
    json.dump(submission, f, ensure_ascii=False, indent=2)

time_csv = OUT_DIR / 'time_submission.csv'
with time_csv.open('w', encoding='utf-8') as f:
    f.write('id,answer,time')
    for vid, t_ms in all_predicted_time:
        answer = json.dumps(next((s['detections'] for s in submission if s['video_id']==vid), []))
        f.write(f"{vid},{answer},{t_ms}")
print(f'Wrote results → {out_json}')
print(f'Wrote timing → {time_csv}')
if submission:
    print('Preview:', json.dumps(submission[0], ensure_ascii=False, indent=2)[:500])


## (Optional) Inspect a visualization frame

In [None]:
if SAVE_VIS and VIS_ROOT:
    sample = next(VIS_ROOT.glob('*/*.jpg'), None)
    if sample:
        print(f'Sample visualization: {sample}')
    else:
        print('No visualization files written yet.')
else:
    print('Set SAVE_VIS=True in the config cell to export visualization frames.')
