This noteook includes two demo
1. [Download](#download) a track from the JSONL file
2. [Parse and extract](#pipeline) tracks in a video using the TrackVerse automated pipeline

In [1]:
import json, gzip
import tqdm

<a id='download'></a>
## Download tracks from the JSONL file
### (1) Read the JSONL file
For demo purpose, we only read one track from the 184K-CB300 subset to show how to read the jonsl file and download correspoonding tracks.

To download the all tracks, refer to the [download instructions](https://github.com/MMPLab/TrackVerse?tab=readme-ov-file#download-trackverse)

In [2]:
HOME_DIR = "/home/yibingwei/dataset"
BASE_DIR = f"{HOME_DIR}/object_tracks_db_fixed_detic"
DATASET_PATH = './trackverse'
subset_dir = 'tracks_subsets/hdvila_lvis/NoTestVids'
subset = 'LVIS-184K-CB300-T0.0-NoTestVids.jsonl.gzip'
    
subset_gzip = f'{BASE_DIR}/{subset_dir}/{subset}'
for line in tqdm.tqdm(gzip.open(subset_gzip, 'rt')):
    data = json.loads(line)
    break

0it [00:00, ?it/s]


The explanation of the keys

- `track_id` - unique track identifier.
- `video_size` - [height, width] of the video from which this track was extracted.
- `track_ts` - [start_time, end_time] timestamps (seconds) in the original video for the first and last frame in the track.
- `top10_lbl` - Class IDs of the top-10 predicted classes for the track, based on class logit score.
- `top10_desc` - Names of the top-10 predicted classes for the track, based on class logit score.
- `top10_cls` - [[top-10 logits mean], [top-10 logits std]] A list of the mean values of the classification logits for the top 10 classes, and a list of the standard deviations for these logits.
- `top10_wcls` - [[top-10 weighted logits mean], [top-10 weighted logits std]] A list of the mean scores for each of the top 10 weighted scores (class logits weighted by the objectness score), and a list of the standard deviations of these scores.
- `frame_ts` - timestamps (seconds) in the original video for each frame in the track
- `frame_bboxes` - list of bounding box coordinates [top_left_x, top_left_y, bottom_right_x, bottom_right_y] of the object for each frame in the track.
- `yid` - YouTube ID for the video from which this track was extracted
- `mp4_filename` - Filename of the track produced by running the track extraction pipeline.

In [3]:
data

{'track_id': 25,
 'video_size': [720, 1280],
 'track_ts': [157.99116666666666, 161.79496666666665],
 'top10_lbl': [276, 588, 965, 869, 764, 110, 1117, 1042, 619, 269],
 'top10_desc': ['coat',
  'jacket',
  'ski parka (also known as ski jacket)',
  'raincoat (also known as waterproof jacket)',
  'parka (also known as anorak)',
  'blazer (also known as sport jacket, sport coat, sports jacket, sports coat)',
  'trench coat',
  'sweatshirt',
  'lab coat (also known as laboratory coat)',
  'cloak'],
 'top10_cls': [[0.7643770575523376,
   0.7308394312858582,
   0.6392128467559814,
   0.6328601241111755,
   0.5113518834114075,
   0.3750734329223633,
   0.3555653393268585,
   0.15030327439308167,
   0.12453538924455643,
   0.06438256800174713],
  [0.06421122699975967,
   0.052835941314697266,
   0.19852320849895477,
   0.16911189258098602,
   0.19979597628116608,
   0.13355493545532227,
   0.12727364897727966,
   0.06547857075929642,
   0.057766273617744446,
   0.0484013631939888]],
 'top10_wc

### (2) Download the orignial video from Youtube 
to DATASET_PATH


In [4]:
# from utils import youtube as yt_utils

# downloader = yt_utils.YoutubeDL('{DATASET_PATH}/videos_mp4')

# dl_status, video_filepath = downloader.download_video(data['yid'])
# if dl_status == yt_utils.STATUS.FAIL:
#     print(f'[{data['yid']}] Download failed.', flush=True)
# if dl_status == yt_utils.STATUS.DONE:
#     print(f'[{data['yid']}] Already downloaded. Skipping', flush=True)
# else:
#     print(f'[{data['yid']}] Download successful.', flush=True)

from download_videos import TrackVerseDL
class TrackVerseDLARGS:
    base_dir = DATASET_PATH
    yid_index_fn = ''
    world_size = 1
    rank = 0
    skip_cartoon_filter = True
    skip_aesthetics_filter = True
    
    
downloader = TrackVerseDL(TrackVerseDLARGS())
downloader.process_video(youtube_id=data['yid'], job_id=0)

[0][1l4wfwq2TLo] Already downloaded.
[0][1l4wfwq2TLo] Already split into segments.


### (3) Extract the track

In [5]:
from extract_tracks import ObjectTrackExtractor, Track
import numpy as np
extractor = ObjectTrackExtractor(base_dir=DATASET_PATH, yid_index_fn='') # For this demo, we only extrack one given track so we don't need the yid_index_fn

tracks = [Track(
            data['yid'],
            ts=np.array(data['frame_ts']).astype(float),
            boxes=np.array(data['frame_bboxes']).astype(float),
            meta=data
            )]
extractor.extract_tracks_from_video(vid=data['yid'], tracks=tracks, job_id=0)

[0][1l4wfwq2TLo] Start track extraction
[0][1l4wfwq2TLo] Track extraction done.


In [6]:
# Display the extracted track 
from IPython.display import Video
Video(f"{DATASET_PATH}/tracks_mp4/{extractor.dataset_domain}/{data['mp4_filename']}")

<a id='pipeline'></a>
## Parse and extract tracks from a scene clip in a video using the TrackVerse automated pipeline
For demo purpose, we only use one scene from the downloaded video and extract the tracks from that scene.

To use the pipeline to create a full dataset, refer to the [pipeline instructions](https://github.com/MMPLab/TrackVerse/tree/main?tab=readme-ov-file#generate-customized-trackverse-dataset).

### (1) Scene Cut


In [7]:
from utils.segments import SegmentExtractor


In [8]:
from parse_tracks import ObjectTracksParser
class ParserArgs:
    base_dir = BASE_DIR
    index_fn = ''
    dataset_domain = 'LVIS'
    world_size = 1
    rank = 0
paser = ObjectTracksParser(ParserArgs())

ModuleNotFoundError: No module named 'centernet'