In [3]:
import os
import argparse
import json
import logging
import random
import re
from ast import literal_eval
from collections import defaultdict
from typing import Dict, List
from air_daa_offline.database.database import Database, DatabaseName
from air_daa_offline.logs_id import LogsID
from air_daa_offline.database.flight_artifacts import FlightArtifact, ArtifactNotFound
from autonomy.vision_daa.artifacts.flight_frames import FetchFrameFormat, FlightCameraFrames
from autonomy.vision_daa.artifacts.annotated_frames import AnnotatedFrames
from autonomy.vision_daa.artifacts.legacy_scripts.utils import get_legacy_to_fcf_frame_id

import pandas as pd
from tqdm.notebook import tqdm

from flywheel.utils.defaults import AirborneObjectDetectorOntology
classes = [i.name for i in AirborneObjectDetectorOntology]
name_to_id = {i.name: i.value for i in AirborneObjectDetectorOntology}

In [4]:
# load v7 json and grab all unique videos from it
data_dir = "/shared/vision/dataset/"
v7_json = f"{data_dir}/metadata/v7/subsample_10_percent/train_annotations_coco_fmt.json"
# output_vid_json = "/shared/users/raajitha/YOLOVexperiments/data/vid_annotations_train.json"
with open(v7_json, 'r') as f:
    data = json.load(f)

unique_videos = set()
for image in data["images"]:
    unique_videos.add(image["file_name"].split("/")[1])

In [5]:
# get all log_ids (nest_id_flight_id) for v7 videos

import sqlalchemy as sa

# Constants
DAA_DB_NAME = "daa_perception"
DAA_DB_USERNAME = "zipline"

# Connection to DAA database
daa_db_pswd = os.getenv("DAA_PERCEPTION_DB_PASSWORD")
daa_db_url = f'postgresql://{DAA_DB_USERNAME}:{daa_db_pswd}@daa-perception.ckpty26inwq5.us-west-2.rds.amazonaws.com:5432/{DAA_DB_NAME}'
daa_engine = sa.create_engine(daa_db_url)
daa_db_cursor = daa_engine.raw_connection().cursor()

query= f"""
SELECT DISTINCT(flight_key)
FROM (
    SELECT 
        CONCAT('nest_', nest_id, '_flight_', flight_id) as flight_key
    FROM
        video
    WHERE
        video.id in {tuple(unique_videos)}
    ) a
"""
daa_db_cursor.execute(query)
res = daa_db_cursor.fetchall()
v7_log_ids = [LogsID.from_str(x[0]) for x in res]

In [7]:
from autonomy.vision_daa.artifacts.annotated_frames import AnnotatedFrames
from autonomy.artifacts.annotation_task_result import AnnotationTaskResult
from autonomy.vision_daa.artifacts.legacy_scripts.utils import get_legacy_to_fcf_frame_id
from tqdm.notebook import tqdm

db_name = DatabaseName["DAA_PERCEPTION_PROD"]
db = Database(db_name=db_name)
annotated_frames = pd.DataFrame()

af_arts = FlightArtifact.get_all_artifacts(
    db,
    AnnotatedFrames,
    namespace_override="production",
    raw=True,
    get_current_only=True,
)

log_ids = [af_art.logs_id for af_art in af_arts if af_art.logs_id in v7_log_ids]

vid_frame_id_to_fcf_frame_id = {}
for log_id in tqdm(log_ids[:1]):
    ann_art = FlightArtifact.get_current(
        db=db, artifact_type=AnnotatedFrames, logs_id=log_id, namespace_override='production',
    )
    annotated_frames = annotated_frames.append(ann_art.frames)
    vid_frame_id_to_fcf_frame_id.update(get_legacy_to_fcf_frame_id(db, log_id))
fcf_frame_id_to_vid_frame_id = {v: k for k, v in vid_frame_id_to_fcf_frame_id.items()}
videos_with_annotated_frames = set([key[0] for key in vid_frame_id_to_fcf_frame_id.keys() if str(key[0]) in unique_videos])

  0%|          | 0/1 [00:00<?, ?it/s]

  annotated_frames = annotated_frames.append(ann_art.frames)


In [8]:
import cv2
from collections import defaultdict
vid_counter = 0
annotation_counter = 0
coco_vids = []
coco_annots = []

copy_annot_frames = annotated_frames.copy()
copy_annot_frames = copy_annot_frames.reset_index().set_index(["frame_id"])
for vid in tqdm(videos_with_annotated_frames):
    vid_counter+=1
    video_dir = os.path.join(data_dir, "images", str(vid))
    files = os.listdir(video_dir)
    file_nums = []
    for file in files:
        file_nums.append(int(file.split('.')[0].split('_')[1]))
    sorted_files, _ = zip(*sorted(zip(files, file_nums), key=lambda x: x[1]))
    frame_ids = []
    sample_image = cv2.imread(f"{video_dir}/{sorted_files[0]}")
    frame_height, frame_width, _ = sample_image.shape
    
    annot_id_to_boxes = defaultdict(list)
    annot_id_to_areas = defaultdict(list)
    annot_id_to_category = defaultdict(list)
    
    sorted_files_w_annots = []
    for frame in tqdm(sorted_files):
        key = tuple([int(x) for x in frame[:-4].split("_")])
        if key not in vid_frame_id_to_fcf_frame_id:
            continue
        frame_id = vid_frame_id_to_fcf_frame_id[key]
        frame_annots = copy_annot_frames.loc[frame_id]
        
        if type(frame_annots) == pd.core.series.Series and frame_annots["object_id"]=='-1':
            continue

        annot_found = False
        if type(frame_annots) == pd.core.series.Series: #== pd.core.frame.DataFrame
            if frame_annots["class_name"] not in name_to_id:
                continue
            ann_id = annotation_counter + int(frame_annots["object_id"])
            x = float(frame_annots["bbox_x"])
            y = float(frame_annots["bbox_y"])
            w = float(frame_annots["bbox_w"])
            h = float(frame_annots["bbox_h"])
            
            annot_id_to_boxes[ann_id].append([x, y, w, h])
            annot_id_to_category[ann_id].append(name_to_id[frame_annots["class_name"]])
            annot_id_to_areas[ann_id].append(w*h)
            annot_found = True
        else:
            for ann_id in frame_annots["object_id"]:
                df = frame_annots[frame_annots["object_id"]==ann_id]
                if df["class_name"][0] not in name_to_id:
                    continue
                
                x = float(df["bbox_x"][0])
                y = float(df["bbox_y"][0])
                w = float(df["bbox_w"][0])
                h = float(df["bbox_h"][0])

                ann_id = annotation_counter + int(ann_id)
                annot_id_to_boxes[ann_id].append([x, y, w, h])
                annot_id_to_category[ann_id].append(name_to_id[df["class_name"][0]])
                annot_id_to_areas[ann_id].append(w*h)
                annot_found = True
        if annot_found:
            frame_ids.append(frame_id)
            sorted_files_w_annots.append(os.path.join(video_dir, frame))
            break
            
#     for ann_id in annot_id_to_boxes.keys():
#         annotation = {
#             "id" : ann_id, 
#             "video_id" : vid_counter, 
#             "category_id" : annot_id_to_category[ann_id][0], 
#             "areas" : annot_id_to_areas[ann_id], 
#             "bboxes" : annot_id_to_boxes[ann_id], 
#             "iscrowd" : 0,
#         }
#         coco_annots.append(annotation)
#     annotation_counter += len(annot_id_to_boxes.keys())
#     video = {
#             "id" : vid_counter,
#             "width" : frame_width,
#             "height" : frame_height,
#             "length" : len(sorted_files_w_annots),
#             "file_names" : sorted_files_w_annots,
#         }
#     coco_vids.append(video)
    
# coco_vid_format = {
#     "info" : {},
#     "videos" : coco_vids,
#     "annotations" : coco_annots,
#     "categories" : data["categories"],
# }

# with open(output_vid_json, "w") as f:
#     json.dump(coco_vid_format, f)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/131 [00:00<?, ?it/s]

In [9]:
frame_annots

logs_id                                  nest_104_flight_48057
created_at                                   1681419539.352361
object_id                                                    1
task_id                   67b59cc0-004e-56e7-8e36-b09673fe3366
external_id                           633695bfea8ef12dbe863191
ontology_version                                           2.0
annotation_version                                         1.0
height                                                    2180
width                                                     3864
bbox_x                                                     721
bbox_y                                                     476
bbox_w                                                       7
bbox_h                                                       5
width_bbox_x                                                -1
width_bbox_y                                                -1
width_bbox_w                                           

In [None]:
import numpy as np
import json

def make_npy(file_name, outfile):
    with open(file_name, 'r') as f:
        data = json.load(f)
        
    sequences = []
    for vid in data["videos"]:
        sequences.append(np.array(vid["file_names"], dtype=object))
        
    sequences = np.array(sequences, dtype=object)
    np.save(outfile, sequences, allow_pickle=True)
    

make_npy("/shared/users/raajitha/YOLOVexperiments/data/coco_vid_test.json", "test_seq.npy")

In [None]:
import json
import os

data_dir = "/shared/vision/dataset/"
json_path = "/shared/users/raajitha/YOLOVexperiments/data/coco_vid_val.json"
with open(json_path, 'r') as f:
    data = json.load(f)

for video in data["videos"]:
    new_files = []
    for file in video["file_names"]:
        vid = file.split("_")[0]
        new_file = os.path.join(data_dir, "images", vid, file)
        # assert(os.path.exists(new_file))
        new_files.append(new_file)
    video["file_names"] = new_files

with open(json_path, "w") as f:
    json.dump(data, f)

In [22]:
# load v7 json and grab all unique videos from it
import json
import random
data_dir = "/shared/vision/dataset/"
v7_json = f"{data_dir}/metadata/v7/subsample_10_percent/test_annotations_coco_fmt.json"
small_v7_json = f"{data_dir}/metadata/v7/tiny/test_annotations_coco_fmt.json"
with open(v7_json, 'r') as f:
    data = json.load(f)
    
small_data = {}
small_data["categories"] = data["categories"]
small_data["images"] = random.sample(data["images"], 100)
small_img_ids = [img["id"] for img in small_data["images"]]
small_data["annotations"] = [annot for annot in data["annotations"] if annot["image_id"] in small_img_ids]
with open(small_v7_json, "w") as f:
    json.dump(small_data, f)