In [None]:
# fix json if it includes missing images
import json
import os
from tqdm.notebook import tqdm

val_file_path = ""
output_path = ""
orig_val_data = json.load(open(val_file_path))
img_name_to_id = {img['file_name']:img['id'] for img in orig_val_data['images']}

IMAGES_DIR = "/shared/vision/dataset/"
def get_missing_images(images):
    list_imgs_paths = [img['file_name'] for img in images]
    missing_image_ids = []
    for file_name in tqdm(list_imgs_paths):
        if not os.path.exists(os.path.join(IMAGES_DIR, file_name)):
            missing_image_ids.append(img_name_to_id[file_name])
    return missing_image_ids

missing_image_ids = get_missing_images(orig_val_data['images'])
print(len(missing_image_ids))

if len(missing_image_ids) > 0:
    dataset = {}
    dataset["categories"] = orig_val_data["categories"]
    dataset["images"] = [img for img in orig_val_data['images'] if img['id'] not in missing_image_ids]
    dataset["annotations"] = [annot for annot in orig_val_data['annotations'] if annot['image_id'] not in missing_image_ids]

    with open(output_path, "w") as fp:
        json.dump(dataset, fp, indent=4)
        

In [None]:
# visualize intermediate dataset
import json
from collections import defaultdict
import random
import cv2

train_file = ""
train_data = json.load(open(train_file, "r"))

vid_id_to_anns = defaultdict(list)
for ann in train_data["annotations"]:
    vid_id_to_anns[ann["video_id"]].append(ann)

viz_vids = random.sample(train_data['videos'], 10)

for vid in viz_vids:
    annotations = vid_id_to_anns[vid["id"]]
    file_path = vid['file_names'][0]
    img_cv = cv2.imread(f"{file_path}")
    # annotations = img_id_gts[img_id]
    vid = vid["id"]
    for ann in annotations:
        x1, y1, w, h = ann["bboxes"][0]
        xmin = int(x1)
        ymin = int(y1)
        xmax = int(x1+w)
        ymax = int(y1+h)
        img_cv = cv2.rectangle(img_cv, (xmin, ymin), (xmax, ymax), (0,255,0), 2)
    cv2.imwrite(f"/home/rgummadi/DetectAndAvoid/flywheel/scripts/OVIS_VIZ/{vid}.png", img_cv)

## visualize 10 images from coco dataset

In [None]:
# visualize input to yolov++ 

import json
from collections import defaultdict
data_dir = ""
train_file = ""
train_data = json.load(open(train_file, "r"))

gt_img_ids_to_img = {img["id"]:img["file_name"] for img in train_data["images"]}
img_id_gts = defaultdict(list)
for ann in train_data["annotations"]:
    img_id_gts[ann["image_id"]].append(ann)
    
import random
viz_img_ids = random.sample(list(gt_img_ids_to_img.keys()), 10)

import cv2
for img_id in viz_img_ids:
    file_path = gt_img_ids_to_img[img_id]
    img_cv = cv2.imread(f"{data_dir}/{file_path}")
    annotations = img_id_gts[img_id]
    for ann in annotations:
        x1, y1, w, h = ann["bbox"]
        xmin = int(x1)
        ymin = int(y1)
        xmax = int(x1+w)
        ymax = int(y1+h)
        img_cv = cv2.rectangle(img_cv, (xmin, ymin), (xmax, ymax), (0,255,0), 1)
    cv2.imwrite(f"/home/rgummadi/DetectAndAvoid/flywheel/scripts/OVIS_VIZ/{img_id}.png", img_cv)

In [None]:
# visualize inputs to yolov++ evaluator

import cv2
import json
from collections import defaultdict
import random
import numpy as np

def preproc(img, input_size, swap=(2, 0, 1)):
    padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114

    r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
    resized_img = cv2.resize(
        img,
        (int(img.shape[1] * r), int(img.shape[0] * r)),
        interpolation=cv2.INTER_LINEAR,
    ).astype(np.uint8)
    padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img

    padded_img = np.ascontiguousarray(padded_img, dtype=np.uint8)
    return padded_img, r

gt_file = ""
pred_file = ""

gt = json.load(open(gt_file, "r"))
dt = json.load(open(pred_file, "r"))

img_id_dets = defaultdict(list)
for det in dt:
    img_id_dets[det["image_id"]].append(det)
    
img_id_gts = defaultdict(list)
for ann in gt["annotations"]:
    img_id_gts[ann["image_id"]].append(ann)
    
gt_img_ids_to_img = {ann["image_id"]:ann["image_name"] for ann in gt["annotations"]}
viz_img_ids = random.sample(list(gt_img_ids_to_img.keys()), min(10, len(gt_img_ids_to_img.keys())))

input_size = (1920, 1920)
for img_id in viz_img_ids:
    file_path = gt_img_ids_to_img[img_id]
    img_cv = cv2.imread(file_path)
    # _, r = preproc(img_cv, input_size)
    detections = img_id_dets[img_id]
    annotations = img_id_gts[img_id]
    for det in detections:
        if det['score'] > 0.1:
            xmin, ymin, w, h = map(int, [val for val in det["bbox"]])
            xmax = xmin+w
            ymax = ymin+h
            img_cv = cv2.rectangle(img_cv, (xmin, ymin), (xmax, ymax), (0,0,255), 2)
            img_cv = cv2.putText(img_cv, str(det['category_id']),(xmin-5, ymin-5), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
    for ann in annotations:
        xmin, ymin, w, h = map(int, [val for val in ann["bbox"]])
        xmax = xmin+w
        ymax = ymin+h
        img_cv = cv2.rectangle(img_cv, (xmin, ymin), (xmax, ymax), (0,255,0), 2)
        img_cv = cv2.putText(img_cv, str(det['category_id']),(xmax+5, ymax+5), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
    cv2.imwrite(f"/home/rgummadi/DetectAndAvoid/flywheel/scripts/OVIS_VIZ/{img_id}.png", img_cv)

## Convert zipline coco dataset to coco-video dataset format

In [None]:
# Convert zipline dataset to coco-video dataset format

import json
import os
import cv2
from collections import defaultdict
from tqdm.notebook import tqdm

v7_file = "/shared/vision/dataset/metadata/v7_8_cls/test_annotations_coco_fmt.json"
Video_name_idx = 3 if "zeromatter" in v7_file else 1
data_dir = "/shared/vision/dataset/"
new_json_file = "/shared/vision/dataset/metadata/v7_8_cls/coco_vid/test_coco_vid_06_06.json"
v7_data = json.load(open(v7_file, "r"))
# single_category = [{'supercategory': 'none', 'id': 0, 'name': 'Airborne'}]
# categories = [{'supercategory': 'none', 'id': 0, 'name': 'Airborne'},
#  {'supercategory': 'none', 'id': 1, 'name': 'Zip'},
#  {'supercategory': 'none', 'id': 2, 'name': 'Glider'},
#  {'supercategory': 'none', 'id': 3, 'name': 'Balloon'},
#  {'supercategory': 'none', 'id': 4, 'name': 'Paraglider'},
#  {'supercategory': 'none', 'id': 5, 'name': 'Bird'},
#  {'supercategory': 'none', 'id': 6, 'name': 'Flock'},
#  {'supercategory': 'none', 'id': 7, 'name': 'Airplane'},
#  {'supercategory': 'none', 'id': 8, 'name': 'Ultralight'},
#  {'supercategory': 'none', 'id': 9, 'name': 'Helicopter'},
#  {'supercategory': 'none', 'id': 10, 'name': 'Unknown'},
#  {'supercategory': 'none', 'id': 11, 'name': 'HangGlider'},
#  {'supercategory': 'none', 'id': 12, 'name': 'CommercialAirliner'},
#  {'supercategory': 'none', 'id': 13, 'name': 'Drone'},
#  {'supercategory': 'none', 'id': 14, 'name': 'Artificial'},
#  {'supercategory': 'none', 'id': 15, 'name': 'Natural'}]

categories = [{'supercategory': 'none', 'id': 0, 'name': 'Airplane'},
 {'supercategory': 'none', 'id': 1, 'name': 'Paraglider'},
 {'supercategory': 'none', 'id': 2, 'name': 'Helicopter'},
 {'supercategory': 'none', 'id': 3, 'name': 'Zip'},
 {'supercategory': 'none', 'id': 4, 'name': 'Ultralight'},
 {'supercategory': 'none', 'id': 5, 'name': 'Glider'},
 {'supercategory': 'none', 'id': 6, 'name': 'Bird'},
 {'supercategory': 'none', 'id': 7, 'name': 'Balloon'}]

# categories = [{'supercategory': 'none', 'id': 0, 'name': 'aircraft'},
#               {'supercategory': 'none', 'id': 1, 'name': 'aircraft-nav-light'},
#               {'supercategory': 'none', 'id': 2, 'name': 'strobe'}]

vid_to_images = defaultdict(list)
img_id_to_vid = {}
unique_videos = set()

videos = []
# categories = []
images = []

vid = 0
img_id = 0
old_img_id_to_new_img_id = {}
# subsampled_img_ids = [img["id"] for img in v7_data["images"]]
for image in tqdm(v7_data["images"]):
    vid_num = image["file_name"].split("/")[Video_name_idx]
    image_file_name = os.path.join(data_dir, image["file_name"])
    vid_to_images[vid_num].append(image)
    old_img_id_to_new_img_id[image["id"]] = img_id
    new_image = {"id": img_id,
                 "width": image["width"], 
                 "height": image["height"],
                 "name": image_file_name}
    if vid_num in unique_videos:
        videos[vid-1]["length"] += 1
        videos[vid-1]["file_names"].append(image_file_name)
        img_id_to_vid[img_id]=vid-1
        new_image["sid"] = vid-1
        new_image["fid"] = video["length"]-1
    else:
        video = {"id": vid, "width": image["width"], "height": image["height"], "length": 1, "file_names": [image_file_name]}
        videos.append(video)
        img_id_to_vid[img_id]=vid
        new_image["sid"] = vid
        new_image["fid"] = video["length"]-1
        vid+=1
        unique_videos.add(vid_num)
    images.append(new_image)
    img_id += 1
    
annotations = []
ann_id = 0
for ann in tqdm(v7_data["annotations"]):
    # if ann["image_id"] not in subsampled_img_ids:
    #     continue
    annotation = {"id": ann_id,
                 "category_id": ann["category_id"],
                 "sid": img_id_to_vid[ old_img_id_to_new_img_id[ann["image_id"]]],
                 "iscrowd": False, 
                 "ignore": False,
                 "bbox": ann["bbox"],
                 "area": ann["area"],
                 "image_id": old_img_id_to_new_img_id[ann["image_id"]],
                 'attributes': ann["attributes"]}
    annotations.append(annotation)
    ann_id+=1

# categories = sorted(v7_data["categories"], key=lambda x: x["id"])
new_json_data = {"info": {}, "categories": categories, "videos": videos, "images": images, "annotations": annotations}
# new_json_data = {"info": {}, "categories": single_category, "videos": videos, "images": images, "annotations": annotations}
with open(new_json_file, "w") as f:
    json.dump(new_json_data, f, indent=4)

In [None]:
data = json.load(open("/shared/vision/dataset/metadata/v7_8_cls/coco_vid/test_coco_vid_06_06.json", "r"))


## Split videos if filenames skip by more than 10 frames

In [None]:
import copy
import json

data = json.load(open("/shared/vision/dataset/metadata/v7_8_cls/coco_vid/val_coco_vid_06_06.json", "r"))
new_json_outfile = "/shared/vision/dataset/metadata/v7_8_cls/coco_vid/val_coco_vid_06_06_split_vid.json"

stride = 10 # when there's >=10 frames gap in between a single video, split to new video

from collections import defaultdict

video_to_image = defaultdict(list) # old_video_id: [list of images that belong to it?]
for image in data["images"]:
    old_video_id = image["sid"]
    video_to_image[old_video_id].append(image)
    
video_to_annot = defaultdict(list) # old_video_id: [list of images that belong to it?]
for annot in data["annotations"]:
    old_video_id = annot["sid"]
    video_to_annot[old_video_id].append(annot)
    
image_filename_to_image_entries = defaultdict(dict) # old_video_id: [list of images that belong to it?]
for image in data["images"]:    
    image_filename_to_image_entries[image["name"]] = image
    
image_id_to_annot_entry = defaultdict(list) # old_video_id: [list of images that belong to it?]
for annot in data["annotations"]:    
    image_id_to_annot_entry[annot["image_id"]].append(annot)

########################################
# Initialize new json fields
new_info = data['info']
new_categories = data['categories']
new_videos = []
new_images = []
new_annotations = []
########################################

running_video_id_counter = 0
for big_video_entry in data["videos"]:
    current_chunked_video_entry = copy.deepcopy(big_video_entry)
    current_chunked_video_entry['id'] = running_video_id_counter
    current_chunked_video_entry['length'] = 0          # TODO this must be overwritten by the end
    current_chunked_video_entry['file_names'] = []


    last_image_id = -1
    for filename in big_video_entry["file_names"]:
        image_id = int(filename.split('/')[-1].split('.')[0])
        
        # Use this filename to create a new video entry
        if (last_image_id != -1) and (image_id - last_image_id >= stride):
            
            images_for_old_video_id = video_to_image[big_video_entry['id']]
            
            # go thru the filenames we have in this video's segment, and pluck them from the
            
            image_fid_counter = 0
            for image_filename in current_chunked_video_entry['file_names']:
                image_entry = copy.deepcopy(image_filename_to_image_entries[image_filename])
                
                ## updating sid
                image_entry["sid"] = running_video_id_counter
                ## updating fid
                image_entry['fid'] = image_fid_counter
                image_fid_counter+=1   
                new_images.append(image_entry)

                #### DO ANNOTS HERE ####
                # for annots, only sid id has to be updated to match new video id
                # for image id, should pull all annots for that image id...
                # and put those into list of annots for the video

                annots_for_this_image = image_id_to_annot_entry[image_entry["id"]]                
                for annot in annots_for_this_image:
                    new_annot = copy.deepcopy(annot)
                    new_annot["sid"] = running_video_id_counter
                    new_annotations.append(new_annot)
                
            # prepare to add the completed video dict to the list of video dicts, and reset all fields to begin on
            # this next one... do all increments, and continue
            video_frame_length = len(current_chunked_video_entry['file_names'])
            current_chunked_video_entry['length'] = video_frame_length
            
            # Increment, to prepare id for next video sequence
            running_video_id_counter+=1
            new_videos.append(copy.deepcopy(current_chunked_video_entry))
            
            # setting up new video dict
            current_chunked_video_entry['id'] = running_video_id_counter
            current_chunked_video_entry['length'] = 0
            current_chunked_video_entry['file_names'] = [filename] ## add current filename, to kick it off
            
            ## the current video id is running_video_id_counter...
            ## print(f"running_video_id_counter: {running_video_id_counter}")
        
        # Add this filename to current video entry + continue
        else:
            # print("no need to create new")
            current_chunked_video_entry['file_names'].append(filename)
            ## print(current_chunked_video_entry)
            
        last_image_id = image_id
        
        
#     print("_______CREATING_NEW_VIDEO______")
    # Now we finished processing that video... pack up and add to new_videos, this one, before moving to next entry
    # prepare to add the completed video dict to the list of video dicts, and reset all fields to begin on
    # this next one... do all increments, and continue
    video_frame_length = len(current_chunked_video_entry['file_names'])
    current_chunked_video_entry['length'] = video_frame_length
    
    
    # lets process all images and annotations for this video id, and update the "sid"s for them, as well
    # as the "fid" and place in the final dicts
    images_for_old_video_id = video_to_image[old_video_id]


    # go thru the filenames we have in this video's segment, and pluck them from the 
    image_fid_counter = 0
    for image_filename in current_chunked_video_entry['file_names']:
        image_entry = copy.deepcopy(image_filename_to_image_entries[image_filename])

        ## updating sid
        image_entry["sid"] = running_video_id_counter
        ## updating fid
        image_entry['fid'] = image_fid_counter
        image_fid_counter+=1

        new_images.append(image_entry)            
        #### DO ANNOTS HERE ####
        # for annots, only sid id has to be updated to match new video id
        # for image id, should pull all annots for that image id...
        # and put those into list of annots for the video

        annots_for_this_image = image_id_to_annot_entry[image_entry["id"]]
        for annot in annots_for_this_image:
            new_annot = copy.deepcopy(annot)
            new_annot["sid"] = running_video_id_counter
            new_annotations.append(new_annot)

    # Increment, to prepare id for next video sequence
    running_video_id_counter+=1
    new_videos.append(copy.deepcopy(current_chunked_video_entry))
    
print(new_info)
print(new_categories)
print(len(new_videos))
print(len(new_images))
print(len(new_annotations))

new_json_data = {"info": new_info, "categories": new_categories, "videos": new_videos, "images": new_images, "annotations": new_annotations}

with open(new_json_outfile, "w") as f:
    json.dump(new_json_data, f, indent=4)

In [None]:
import json
data = json.load(open("", "r"))
print("no of videos", len(data["videos"]))
print("no of images", len(data["images"]))
print("no of annotations", len(data["annotations"]))
for ann in data["annotations"]:
    if type(ann["category_id"]) == list:
        ann["category_id"] = ann["category_id"][0]
    
with open("", "w") as f:
    json.dump(data, f, indent=4)

## Trim coco ovis data format to fixed len number of images per video

In [None]:
# trim coco ovis data format to fixed len number of images per video

import json
from tqdm.notebook import tqdm

data = json.load(open("/shared/vision/dataset/metadata/v7_8_cls/coco_vid/val_coco_vid_06_06.json", "r"))
print("videos: ", len(data["videos"]), " images: ", len(data["images"]), " annoatations: ", len(data["annotations"]))

videos = []
old_vid_id_to_new_vid_id = {}
new_v_id = 0
skipped_videos = []
min_seq_len = 64
seq_len = 500
trimmed_file = f"/shared/vision/dataset/metadata/v7_8_cls/coco_vid/trimmed1000_64-500seq_val_coco_vid_06_06.json"
max_videos = 100000

for vid in tqdm(data["videos"]):
    if new_v_id >= max_videos:
        skipped_videos.append(vid["id"])
        continue
    if vid["length"] < min_seq_len:
        skipped_videos.append(vid["id"])
        continue
    else:
        old_vid_id_to_new_vid_id[vid["id"]] = new_v_id
        vid["file_names"] = vid["file_names"][:seq_len]
        vid["length"] = seq_len
        vid["id"] = new_v_id
        videos.append(vid)
        new_v_id += 1

images = [] 
new_img_id = 0
old_img_id_to_new_img_id = {}
skipped_images = []
for img in tqdm(data["images"]):
    if img["sid"] in skipped_videos:
        skipped_images.append(img["id"])
        continue
        
    if img["fid"] >= seq_len:
        skipped_images.append(img["id"])
        continue
    old_img_id_to_new_img_id[img["id"]]=new_img_id
    img["id"] = new_img_id
    img["sid"] = old_vid_id_to_new_vid_id[img["sid"]]
    images.append(img)
    new_img_id += 1

new_ann_id = 0
annotations = []
for ann in tqdm(data["annotations"]):
    if ann["sid"] in skipped_videos:
        continue
    if ann["image_id"] in skipped_images:
        continue
    ann["id"] = new_ann_id
    ann["sid"] = old_vid_id_to_new_vid_id[ann["sid"]]
    ann["image_id"] = old_img_id_to_new_img_id[ann["image_id"]]
    ann["category_id"] = ann["category_id"][0] if type(ann["category_id"])==list else ann["category_id"]
    annotations.append(ann)
    new_ann_id += 1

categories = [{'supercategory': 'none', 'id': 0, 'name': 'Airplane'},
 {'supercategory': 'none', 'id': 1, 'name': 'Paraglider'},
 {'supercategory': 'none', 'id': 2, 'name': 'Helicopter'},
 {'supercategory': 'none', 'id': 3, 'name': 'Zip'},
 {'supercategory': 'none', 'id': 4, 'name': 'Ultralight'},
 {'supercategory': 'none', 'id': 5, 'name': 'Glider'},
 {'supercategory': 'none', 'id': 6, 'name': 'Bird'},
 {'supercategory': 'none', 'id': 7, 'name': 'Balloon'}]
trimmed_data = {"info": {}, "categories": categories, "videos": videos, "images": images, "annotations": annotations}
with open(trimmed_file, "w") as f:
    json.dump(trimmed_data, f, indent=4)

print("no of videos", len(trimmed_data["videos"]))
print("no of images", len(trimmed_data["images"]))
print("no of annotations", len(trimmed_data["annotations"]))

## Run sanity checks

In [None]:
## Run sanity checks

In [None]:
import json
data = json.load(open("", "r"))
ann_ids = [ann["id"] for ann in data["annotations"]]
assert len(set(ann_ids))==len(ann_ids)

img_ids = [img["id"] for img in data["images"]]
ann_img_ids_not_in_imgs = [ann["image_id"] for ann in data["annotations"] if ann["image_id"] not in img_ids]
print(len(ann_img_ids_not_in_imgs))

## visualize 10 images from coco vid dataset

In [None]:
# visualize input to yolov++ 

import json
from collections import defaultdict
data_dir = ""
train_file = ""
train_data = json.load(open(train_file, "r"))

gt_img_ids_to_img = {img["id"]:img["name"] for img in train_data["images"]}
img_id_gts = defaultdict(list)
for ann in train_data["annotations"]:
    img_id_gts[ann["image_id"]].append(ann)
    
import random
viz_img_ids = random.sample(list(gt_img_ids_to_img.keys()), 10)

import cv2
for img_id in viz_img_ids:
    file_path = gt_img_ids_to_img[img_id]
    img_cv = cv2.imread(file_path)
    annotations = img_id_gts[img_id]
    for ann in annotations:
        x1, y1, w, h = ann["bbox"]
        xmin = int(x1)
        ymin = int(y1)
        xmax = int(x1+w)
        ymax = int(y1+h)
        img_cv = cv2.rectangle(img_cv, (xmin, ymin), (xmax, ymax), (0,255,0), 1)
    cv2.imwrite(f"/home/rgummadi/DetectAndAvoid/flywheel/scripts/OVIS_VIZ/{img_id}.png", img_cv)

## Split coco vid dataset into train and val

In [None]:
# visualize input to yolov++ 

import json
from collections import defaultdict
data_dir = ""
train_file = ""
train_data = json.load(open(train_file, "r"))

gt_img_ids_to_img = {img["id"]:img["name"] for img in train_data["images"]}
img_id_gts = defaultdict(list)
for ann in train_data["annotations"]:
    img_id_gts[ann["image_id"]].append(ann)
    
import random
viz_img_ids = random.sample(list(gt_img_ids_to_img.keys()), 10)

import cv2
for img_id in viz_img_ids:
    file_path = gt_img_ids_to_img[img_id]
    img_cv = cv2.imread(file_path)
    annotations = img_id_gts[img_id]
    for ann in annotations:
        x1, y1, w, h = ann["bbox"]
        xmin = int(x1)
        ymin = int(y1)
        xmax = int(x1+w)
        ymax = int(y1+h)
        img_cv = cv2.rectangle(img_cv, (xmin, ymin), (xmax, ymax), (0,255,0), 1)
    cv2.imwrite(f"/home/rgummadi/DetectAndAvoid/flywheel/scripts/OVIS_VIZ/{img_id}.png", img_cv)

## Split coco vid dataset into train and val

In [None]:
import json
from tqdm.notebook import tqdm
import random

data = json.load(open("", "r"))
print("videos: ", len(data["videos"]), " images: ", len(data["images"]), " annoatations: ", len(data["annotations"]))

train_file = ""
val_file = ""

vids = [vid["id"] for vid in data["videos"]]
train_vids = random.sample(vids, int(0.8*len(vids)))
val_vids = [vid for vid in vids if vid not in train_vids]
print(train_vids, val_vids)

In [None]:
import json
from tqdm.notebook import tqdm
import random
import copy

random.seed(32)

data = json.load(open("", "r"))
print("videos: ", len(data["videos"]), " images: ", len(data["images"]), " annoatations: ", len(data["annotations"]))

train_file = ""
val_file = ""

vids = [vid["id"] for vid in data["videos"]]
train_vids = random.sample(vids, int(0.8*len(vids)))
val_vids = [v_id for v_id in vids if v_id not in train_vids]
print(train_vids, val_vids)

def subsample_dataset(data, required_vids):
    new_v_id = 0
    videos = []
    old_vid_id_to_new_vid_id = {}
    skipped_videos = []
    for vid in tqdm(data["videos"]):
        if vid["id"] not in required_vids:
            skipped_videos.append(vid["id"])
            continue
        else:
            old_vid_id_to_new_vid_id[vid["id"]] = new_v_id
            vid["id"] = new_v_id
            videos.append(vid)
            new_v_id += 1

    images = [] 
    new_img_id = 0
    old_img_id_to_new_img_id = {}
    skipped_images = []
    for img in tqdm(data["images"]):
        if img["sid"] in skipped_videos:
            skipped_images.append(img["id"])
            continue

        old_img_id_to_new_img_id[img["id"]]=new_img_id
        img["id"] = new_img_id
        img["sid"] = old_vid_id_to_new_vid_id[img["sid"]]
        images.append(img)
        new_img_id += 1

    new_ann_id = 0
    annotations = []
    for ann in tqdm(data["annotations"]):
        if ann["sid"] in skipped_videos:
            continue
        if ann["image_id"] in skipped_images:
            continue
        ann["id"] = new_ann_id
        ann["sid"] = old_vid_id_to_new_vid_id[ann["sid"]]
        ann["image_id"] = old_img_id_to_new_img_id[ann["image_id"]]
        annotations.append(ann)
        new_ann_id += 1

    new_data = {"info": {}, "categories": data["categories"], "videos": videos, "images": images, "annotations": annotations}

    return new_data

train_data = subsample_dataset(copy.deepcopy(data), train_vids)
val_data = subsample_dataset(copy.deepcopy(data), val_vids)
with open(train_file, "w") as f:
    json.dump(train_data, f, indent=4)

print("=== Training dataset ===")
print("no of videos", len(train_data["videos"]))
print("no of images", len(train_data["images"]))
print("no of annotations", len(train_data["annotations"]))
with open(val_file, "w") as f:
    json.dump(val_data, f, indent=4)

print("=== Validation dataset ===")
print("no of videos", len(val_data["videos"]))
print("no of images", len(val_data["images"]))
print("no of annotations", len(val_data["annotations"]))

In [None]:
#create a tiny coco json
import json
import random
from tqdm.notebook import tqdm
from copy import deepcopy
import os

data  = json.load(open("/shared/vision/dataset/metadata/v7_8_cls/test_annotations_coco_fmt.json", "r"))
tiny_dir = "/shared/vision/dataset/metadata/v7_8_cls/tiny/"
tiny_file = os.path.join(tiny_dir, "test_annotations_coco_fmt.json")
os.makedirs(tiny_dir, exist_ok=True)

random.seed(42)
subsampled_imgs = random.sample(data["images"], 2)
subsampled_img_ids = [img["id"] for img in subsampled_imgs]
old_ids_to_new_ids = {}
new_img_id = 0
images = []
for img in (subsampled_imgs):
    img_copy = deepcopy(img)
    old_id = img_copy["id"]
    img_copy["id"] = new_img_id
    old_ids_to_new_ids[old_id] = new_img_id
    images.append(img_copy)
    new_img_id += 1

new_ann_id = 0
annotations = []
for ann in tqdm(data["annotations"]):
    if ann["image_id"] in subsampled_img_ids:
        ann_copy = deepcopy(ann)
        ann_copy["image_id"] = old_ids_to_new_ids[ann["image_id"]]
        ann_copy["id"] = new_ann_id
        new_ann_id += 1
        annotations.append(ann_copy)
        
tiny_data = {"info": {}, "categories": data["categories"], "images": images, "annotations": annotations}

with open(tiny_file, "w") as f:
    json.dump(tiny_data, f, indent=4)

# ZM-Synthetic logs processing

In [None]:
import json
import os
import random
from pathlib import Path
from collections import defaultdict

# Config
zm_folder = "/shared/vision/dataset/zeromatter_synthetic"
data_dir = "/shared/vision/dataset/"
renders = [os.path.join(zm_folder, folder) for folder in os.listdir(zm_folder) if folder != "depricated"]
train_ratio = 0.8
output_train_path = os.path.join(zm_folder, "coco_train_06_04_v2.json")
output_val_path = os.path.join(zm_folder, "coco_val_06_04_v2.json")

def find_all_coco_annotations(base_dirs):
    coco_files = []
    for base_dir in base_dirs:
        base_path = Path(base_dir)
        coco_files.extend(base_path.rglob('coco_annotations.json'))
    return [str(path.resolve()) for path in coco_files]

all_coco_paths = find_all_coco_annotations(renders)

# Initialize counters and data structures
new_img_id = 0
new_ann_id = 0
video_to_images = defaultdict(list)
image_id_to_annotations = defaultdict(list)

# Load and remap annotations
for coco_json in all_coco_paths:
    with open(coco_json, "r") as f:
        data = json.load(f)

    img_id_to_ann = defaultdict(list)
    for ann in data["annotations"]:
        img_id_to_ann[ann["image_id"]].append(ann)

    img_id_to_images = {img["id"]: img for img in data["images"]}

    for old_img_id, annotations in img_id_to_ann.items():
        if not annotations:
            continue

        img = img_id_to_images[old_img_id]
        img["id"] = new_img_id
        img["file_name"] = img["file_name"].replace("/systems/systems/", "/systems/", 1)

        # Derive a "video name" from the path
        # This assumes the path includes the video at a fixed depth. Adjust if needed.
        path_parts = Path(img["file_name"]).parts
        if "systems" in path_parts:
            systems_idx = path_parts.index("systems")
            video_name = "/".join(path_parts[:systems_idx + 2])  # up to systems/[video]
        else:
            video_name = "unknown"

        video_to_images[video_name].append(img)
        for ann in annotations:
            ann["image_id"] = new_img_id
            ann["id"] = new_ann_id
            image_id_to_annotations[new_img_id].append(ann)
            new_ann_id += 1

        new_img_id += 1

# Split videos into train/val
all_video_names = list(video_to_images.keys())
random.shuffle(all_video_names)

split_index = int(len(all_video_names) * train_ratio)
train_video_names = set(all_video_names[:split_index])
val_video_names = set(all_video_names[split_index:])

# Collect images and annotations for train/val
train_images, val_images = [], []
train_annotations, val_annotations = [], []

for video_name in train_video_names:
    for img in video_to_images[video_name]:
        train_images.append(img)
        train_annotations.extend(image_id_to_annotations[img["id"]])

for video_name in val_video_names:
    for img in video_to_images[video_name]:
        val_images.append(img)
        val_annotations.extend(image_id_to_annotations[img["id"]])

# Get categories from one file
with open(all_coco_paths[0], "r") as f:
    categories = json.load(f)["categories"]

# Save results
with open(output_train_path, "w") as f:
    json.dump({"images": train_images, "annotations": train_annotations, "categories": categories}, f)

with open(output_val_path, "w") as f:
    json.dump({"images": val_images, "annotations": val_annotations, "categories": categories}, f)

print(f"Train annotations saved to: {output_train_path}")
print(f"Val annotations saved to: {output_val_path}")

In [None]:
import json
v1 = "/shared/vision/dataset/metadata/v7_8_cls/coco_vid/trimmed1000_64-500seq_test_coco_vid_06_06.json"
v2 = "/shared/vision/dataset/metadata/ovis_v7/trimmed1000_fixedlen_02_26_test_split_video_sequences.json"
v1_data = json.load(open(v1, "r"))
v2_data = json.load(open(v2, "r"))

v1_files = []
for vid in v1_data["videos"]:
    v1_files.extend(vid["file_names"])
    
v2_files = []
for vid in v2_data["videos"]:
    v2_files.extend(vid["file_names"])
    
v1_not_in_v2 = [file for file in v1_files if file not in v2]
v2_not_in_v1 = [file for file in v2_files if file not in v1]

print(len(v1_not_in_v2))
print(len(v2_not_in_v1))