# Log Paper Results

In [1]:
import csv, os, sys, re, string, json, glob, shutil, random, datetime, math

import cv2
import pandas as pd
import numpy as np
import pprint

from IPython.display import HTML as html_print
from IPython.display import Markdown
from IPython.display import clear_output

# from IPython.display import Image
from PIL import Image, ImageDraw, ImageFont
from ipywidgets import Video

import torch
from transformers import AutoTokenizer
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize

import warnings
warnings.filterwarnings('ignore')

import seaborn as sns
sns.set()
from matplotlib import pyplot as plt
import matplotlib.patches as patches
%pylab inline

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


In [2]:
# Use our own built-in customized COCO API.
sys.path.insert(0, "../cocoapi")

In [26]:
# from pycocotools.coco import COCO
# from pycocotools.cocoeval import COCOeval
from PythonAPI.pycocotools.coco import COCO
from PythonAPI.pycocotools.cocoeval import COCOeval

import inspect
print("COCO API at:     {}".format(inspect.getfile(COCO)))
print("COCOeval API at: {}".format(inspect.getfile(COCOeval)))

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%reload_ext autoreload
%autoreload 2

COCO API at:     /home/telinwu/research/project_jarvis/modeling/../cocoapi/PythonAPI/pycocotools/coco.py
COCOeval API at: /home/telinwu/research/project_jarvis/modeling/../cocoapi/PythonAPI/pycocotools/cocoeval.py


In [4]:
from coco_eval import calc_iou_individual

# Functions

In [5]:
def stats_on_prediction(
    coco_pred_file,
):
    coco_preds = json.load(open(coco_pred_file))
    d = {}
    for pred in coco_preds:
        image_id = pred["image_id"]
        if image_id not in d:
            d[image_id] = 0
        d[image_id] += 1
    arr = []
    for image_id in d:
        arr.append(d[image_id])
    arr = np.asarray(arr)
    avg_preds = np.mean(arr)
    std_preds = np.std(arr)
    print("Mean Pred BBoxes: {:.3f}".format(avg_preds))
    print("Std  Pred BBoxes: {:.3f}".format(std_preds))

In [6]:
def create_coco_gt_pred_dict(
    coco_gt_file,
    coco_pred_file,
    compensate_missing_pred=False,
):
    preds = json.load(open(coco_pred_file))
    gts = json.load(open(coco_gt_file))["annotations"]

    gt_dict = {}
    for gt in gts:
        image_id = gt["image_id"]
        if image_id not in gt_dict:
            gt_dict[image_id] = []
        gt_dict[image_id].append(gt["bbox"])
    print("Num GT Instances: {}".format(len(gt_dict)))

    preds_dict = {}
    for pred in preds:
        image_id = pred["image_id"]
        if image_id not in preds_dict:
            preds_dict[image_id] = []
        preds_dict[image_id].append({
            "bbox": pred["bbox"],
            "score": pred["score"],
            "category_id": pred["category_id"],
            "image_id": pred["image_id"],
        })
    print("Num Pred Instances: {}".format(len(preds_dict)))
    
    assert len(gt_dict) >= len(preds_dict), (
        "More prediction ids than gt ids, sure the correct file(s)?"
    )

    joint_dict = {}
    for image_id in sorted(gt_dict):
        if compensate_missing_pred and image_id not in preds_dict:
            preds_dict[image_id] = [{
                "bbox": [0, 0, 1, 1],
                "score": 1.0,
                "category_id": 1,
                "image_id": image_id,
            }]
        preds_dict[image_id] = sorted(
            preds_dict[image_id],
            key=lambda x: x["score"],
            reverse=True,
        )
        joint_dict[image_id] = {
            "gt_bboxes": gt_dict[image_id],
            "pred_bboxes": preds_dict[image_id],
        }
    
    print("Num Joint Instances: {}".format(len(joint_dict)))
    return joint_dict


def get_ndarray_coco_results(coco_pred_file):
    # [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
    if type(coco_pred_file) == str:
        preds = json.load(open(coco_pred_file))
    else:
        preds = coco_pred_file
    arrs = []
    for pred in preds:
        image_id = pred["image_id"]
        x1 = pred["bbox"][0]
        y1 = pred["bbox"][1]
        w  = pred["bbox"][2]
        h  = pred["bbox"][3]
        score = pred["score"]
        label = pred["category_id"]
        arr = [image_id, x1, y1, w, h, score, label]
        arrs.append(arr)
    return np.asarray(arrs)

In [180]:
def get_per_frame_type_ndarray_coco_results(
    coco_gt_file,
    coco_pred_file,
    scod_clips,
    forced_frame_types=None,
    fully_narrated_image_ids=None,
    verbose=False
):
    if type(coco_pred_file) == str:
        preds = json.load(open(coco_pred_file))
    else:
        preds = coco_pred_file
    if fully_narrated_image_ids is not None:
        preds = [x for x in preds if x["image_id"] in fully_narrated_image_ids]
        
    if type(coco_gt_file) == str:
        gt_data = json.load(open(coco_gt_file))
        gts, anns = gt_data["images"], gt_data["annotations"]
    else:
        gts, anns = coco_gt_file
    if fully_narrated_image_ids is not None:
        gts = [x for x in gts if x["id"] in fully_narrated_image_ids]
        anns = [x for x in anns if x["image_id"] in fully_narrated_image_ids]
        
    frame_type_dict = {}
    for scod in scod_clips:
        video_uid = scod["video_uid"]
        for fr in ["pre", "pnr", "post"]:
            frame_type = "{}_frame".format(fr)
            if frame_type not in scod:
                continue
            frame_num = scod[frame_type]["frame_number"]
            key = "{}_{}".format(video_uid, frame_num)
            if key not in frame_type_dict:
                frame_type_dict[key] = []
            if frame_type not in frame_type_dict[key]:
                frame_type_dict[key].append(frame_type)
            pass
        pass

    id_to_frame_mappings = {}
    for gt in gts:
        file_name = gt["file_name"].split(".")[0]
        video_uid, frame_num = file_name.split("/")
        key = "{}_{}".format(video_uid, frame_num)
        if key not in frame_type_dict:
            raise ValueError(
                "Key {} not in scod clips, sure the right file(s)?".format(key)
            )
        image_id = gt["id"]
        if image_id not in id_to_frame_mappings:
            id_to_frame_mappings[image_id] = []
        id_to_frame_mappings[image_id] += frame_type_dict[key]
        
    per_frame_type_anns = {}
    for ann in anns:
        image_id = ann["image_id"]
        frame_types = id_to_frame_mappings[image_id]
        for frame_type in frame_types:
            if frame_type not in per_frame_type_anns:
                per_frame_type_anns[frame_type] = []
            per_frame_type_anns[frame_type].append(ann)
        pass
    
    per_frame_type_preds = {}
    for pred in preds:
        image_id = pred["image_id"]
        frame_types = id_to_frame_mappings[image_id]
        for frame_type in frame_types:
            if frame_type not in per_frame_type_preds:
                per_frame_type_preds[frame_type] = []
            per_frame_type_preds[frame_type].append(pred)
        pass
    
    per_frame_type_pred_ndarrays = {}
    for fr in ["pre", "pnr", "post"]:
        frame_type = "{}_frame".format(fr)
        if forced_frame_types is not None and fr not in forced_frame_types:
            continue
        per_frame_type_pred_ndarrays[frame_type] = get_ndarray_coco_results(
            per_frame_type_preds[frame_type]
        )
        if verbose:
            print("{}'s cnt = {}".format(frame_type, len(per_frame_type_preds[frame_type])))
    
    return per_frame_type_pred_ndarrays, per_frame_type_anns

In [179]:
def check_gt_pred_match_on_file(
    coco_gt_file,
    coco_pred_file,
):
    # print("Checking GT and Pred files matching...")
    if type(coco_gt_file) is str:
        coco_gt_data = json.load(open(coco_gt_file))
    else:
        coco_gt_data = coco_gt_file
    if type(coco_pred_file) is str:
        coco_pred_data = json.load(open(coco_pred_file))
    elif type(coco_pred_file) == np.ndarray:
        _coco = COCO(verbose=False)
        coco_pred_data = _coco.loadNumpyAnnotations(coco_pred_file)
    else:
        coco_pred_data = coco_pred_file
    gt_image_ids = {x["id"]: True for x in coco_gt_data["images"]}
    pred_image_ids = [x["image_id"] for x in coco_pred_data]
    pred_image_ids = list(set(pred_image_ids))
    for _id in pred_image_ids:
        if _id not in gt_image_ids:
            raise ValueError(
                "\nGT file: {}\nPred file: {}\nThey do not match!?".format(coco_gt_file, coco_pred_file))
        pass
    # print("Checking complete!")
    pass  # Passed.


def naive_ego4d_scod_coco_results(
    coco_gt_file,
    coco_pred_file,
    original_coco_gt_file=None,
    top_k=None,
    ignore_summaries=None,
    do_not_summarize=False,
    verbose=False,
):  
    if original_coco_gt_file is not None:
        coco_gt = COCO(annotation_file=original_coco_gt_file, verbose=verbose)
        coco_gt = coco_gt.loadRes(coco_gt_file)
        check_gt_pred_match_on_file(original_coco_gt_file, coco_pred_file)
    else:
        coco_gt = COCO(annotation_file=coco_gt_file, verbose=verbose)
        check_gt_pred_match_on_file(coco_gt_file, coco_pred_file)
    coco_dt = coco_gt.loadRes(coco_pred_file)
    coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
    coco_eval.params.verbose = verbose
    if top_k is not None:
        coco_eval.params.maxDets = [top_k, top_k, top_k]
    coco_eval.evaluate()
    coco_eval.accumulate()
    if not do_not_summarize:
        coco_eval.summarize(ignore_summaries=ignore_summaries)
        perfs = coco_eval.stats
    return coco_eval, perfs


def comprehensive_ego4d_scod_coco_results(
    coco_gt_file,
    coco_pred_file,
    scod_clips,
    top_k=None,
    forced_frame_types=None,
    ignore_summaries=None,
    return_eval_dicts=False,
    separate_classes=False,
    fully_narrated=False,
    verbose=False,
):
    object_type_dict = {}
    coco_gt_data = json.load(open(coco_gt_file))
    object_classes = coco_gt_data["categories"]
    fully_narrated_image_ids = None
    if fully_narrated:
        fully_narrated_image_ids = [x["id"] for x in coco_gt_data["images"]
                                    if "object_of_change" not in x["caption"]]
        print("Fully Narrated Image Counts: {} / {}".format(len(fully_narrated_image_ids),
                                                            len(coco_gt_data["images"])))
    for object_class in object_classes:
        object_id = int(object_class["id"])
        object_name = object_class["name"]
        object_type_dict[object_id] = object_name
    
    per_frame_type_pred_ndarrays, per_frame_type_anns = get_per_frame_type_ndarray_coco_results(
        coco_gt_file=coco_gt_file,
        coco_pred_file=coco_pred_file,
        scod_clips=scod_clips,
        forced_frame_types=forced_frame_types,
        fully_narrated_image_ids=fully_narrated_image_ids,
        verbose=verbose,
    )
    
    all_coco_evals = {}
    all_coco_gts = {}
    
    if return_eval_dicts:
        original_coco_gt = json.load(open(coco_gt_file))
        
    if not separate_classes:
        all_perfs = {"OOC and/or Tool": {}}
    else:
        all_perfs = {}
        for object_class in object_classes:
            object_name = object_class["name"]
            all_perfs[object_name] = {}
    
    for fr in ["pre", "pnr", "post"]:
        frame_type = "{}_frame".format(fr)
        if frame_type not in per_frame_type_pred_ndarrays:
            continue
        if fr != "pre":
            print()
        print("---------- {} ----------".format(frame_type))
        curr_frame_type_anns = per_frame_type_anns[frame_type]
        curr_frame_type_pred_ndarrays = per_frame_type_pred_ndarrays[frame_type]

        if separate_classes:
            for object_id in sorted(object_type_dict):
                object_id_indices = np.where(curr_frame_type_pred_ndarrays[:, -1]==object_id)
                object_id_frame_type_pred_ndarrays = curr_frame_type_pred_ndarrays[object_id_indices]
                object_id_frame_type_anns = [ann for ann in curr_frame_type_anns if ann["category_id"] == object_id]
                print("----- {} -----".format(object_type_dict[object_id]))
                _, perfs = naive_ego4d_scod_coco_results(
                    coco_gt_file=object_id_frame_type_anns,
                    coco_pred_file=object_id_frame_type_pred_ndarrays,
                    original_coco_gt_file=coco_gt_file,
                    top_k=top_k,
                    ignore_summaries=ignore_summaries,
                    verbose=verbose,
                )
                ap, ap50, ap75 = perfs[0], perfs[1], perfs[2]
                all_perfs[object_type_dict[object_id]][frame_type] = {
                    "AP": ap, "AP50": ap50, "AP75": ap75,
                }
            # Simply for the eval dict.
            if return_eval_dicts:
                curr_coco_eval, _ = naive_ego4d_scod_coco_results(
                    coco_gt_file=curr_frame_type_anns,
                    coco_pred_file=curr_frame_type_pred_ndarrays,
                    original_coco_gt_file=coco_gt_file,
                    top_k=top_k,
                    ignore_summaries=ignore_summaries,
                    do_not_summarize=True,
                    verbose=verbose,
                )
        else:
            curr_coco_eval, perfs = naive_ego4d_scod_coco_results(
                coco_gt_file=curr_frame_type_anns,
                coco_pred_file=curr_frame_type_pred_ndarrays,
                original_coco_gt_file=coco_gt_file,
                top_k=top_k,
                ignore_summaries=ignore_summaries,
                verbose=verbose,
            )
            ap, ap50, ap75 = perfs[0], perfs[1], perfs[2]
            all_perfs["OOC and/or Tool"][frame_type] = {
                "AP": ap, "AP50": ap50, "AP75": ap75,
            }

        if return_eval_dicts:
            fr_image_ids = [x["image_id"] for x in curr_frame_type_anns]
            fr_anns = curr_frame_type_anns
            fr_images = [x for x in original_coco_gt["images"] if x["id"] in fr_image_ids]
            fr_coco_gt = {
                "info": original_coco_gt["info"],
                "licenses": original_coco_gt["licenses"],
                "categories": original_coco_gt["categories"],
                "images": fr_images,
                "annotations": fr_anns,
            }
            all_coco_gts[frame_type] = fr_coco_gt
            curr_coco_eval.params.imgIds = fr_image_ids
            curr_coco_eval._paramsEval.imgIds = fr_image_ids
            all_coco_evals[frame_type] = curr_coco_eval

    header_str = "Pre                   & PNR                   & Post                 "
    ap_str     = "AP    & AP50  & AP75  & AP    & AP50  & AP75  & AP    & AP50  & AP75 "
    perf_str   = "{:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f}"
    
    if True:
        for obj_type in sorted(all_perfs):
            print()
            print("---------- For {} ----------".format(obj_type))
            perf_nums = []
            for fr in ["pre", "pnr", "post"]:
                frame_type = "{}_frame".format(fr)
                curr_perfs = all_perfs[obj_type][frame_type]
                perf_nums.append(curr_perfs["AP"]*100)
                perf_nums.append(curr_perfs["AP50"]*100)
                perf_nums.append(curr_perfs["AP75"]*100)
            print(header_str)
            print(ap_str)
            print(perf_str.format(*perf_nums))
        pass

    if return_eval_dicts:
        return all_coco_evals, all_coco_gts
    pass

In [9]:
def get_best_k_scod_preds(
    coco_gt_pred_dict,
    best_k=3,
):
    best_k_data = []
    for image_id in coco_gt_pred_dict:
        coco_gt_pred = coco_gt_pred_dict[image_id]
        gt_bboxes = coco_gt_pred["gt_bboxes"]
        pred_bboxes_scores = coco_gt_pred["pred_bboxes"]
        pred_bboxes_scores = pred_bboxes_scores[:best_k]
        pred_bboxes = [p["bbox"] for p in pred_bboxes_scores]
        best_k_bboxes_to_retain = []
        for gt_bbox in gt_bboxes:
            # xywh -> xyxy.
            _gt_bbox = [
                gt_bbox[0],            gt_bbox[1],
                gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3],
            ]
            curr_ious = []
            for pred_bbox in pred_bboxes:
                # xywh -> xyxy.
                _pred_bbox = [
                    pred_bbox[0],              pred_bbox[1],
                    pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3],
                ]
                _iou = calc_iou_individual(_pred_bbox, _gt_bbox)
                curr_ious.append(_iou)
            if len(pred_bboxes) <= 0:
                continue
            curr_ious = np.asarray(curr_ious)
            iou_arg_sort = np.argsort(-curr_ious)
            # Best one out of k.
            best_idx = iou_arg_sort[0]
            best_k_bboxes_to_retain.append(pred_bboxes_scores[best_idx])
            # If used, skip.
            pred_bboxes.pop(best_idx)
        best_k_data += best_k_bboxes_to_retain
    print("Num Best-{} instances: {}".format(best_k, len(best_k_data)))
    return best_k_data

## DATA

In [11]:
annots_root = "/local1/hu528/ego4d_data/v1/annotations/"

scod_train_file = "fho_scod_train.json"
scod_train_file = os.path.join(annots_root, scod_train_file)

scod_val_file = "fho_scod_val.json"
scod_val_file = os.path.join(annots_root, scod_val_file)

scod_train_clips = json.load(open(scod_train_file))["clips"]
scod_val_clips = json.load(open(scod_val_file))["clips"]

scod_clips = scod_train_clips + scod_val_clips

print("Train SCOD clips: {}".format(len(scod_train_clips)))
print("Val   SCOD clips: {}".format(len(scod_val_clips)))
print("Total SCOD clips: {}".format(len(scod_clips)))

Train SCOD clips: 19071
Val   SCOD clips: 12801
Total SCOD clips: 31872


# Results

## SOTA OD Baselines

#### SCOD Results (`Paper Chosen`)
* Model: VideoIntern (Swin-L)
* Training frames = `all frames`;  Eval frames = `pnr frames`

In [221]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/coco_annotations_all_frames/val.json"
)
coco_pred_file = (
    "/local1/bryanzhou008/jarvis/project_jarvis/baselines/ego4d-eccv2022-solutions"
    "/scod/OUTPUT_RESULTS/Swin-L-IN-22K+O365/pre_post_pnr_results.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    top_k=1,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.254
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=  1 ] = 0.381
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=  1 ] = 0.263
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.391
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.391
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.391

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.301
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=  1 ] = 0.456
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=  1 ] = 0.310
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.435
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.435
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all

In [259]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge"
    "/paper_data/all_frames/od/val_scod_all_frames_od_with_tool.json"
)
coco_pred_file = (
    "/local1/bryanzhou008/jarvis/project_jarvis/baselines/ego4d-eccv2022-solutions"
    "/scod/623_val_scod_all_frames_od_with_tool_epoch_6.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3386
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5010
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3554
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4028
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6587
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.7381
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.1508
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.2237
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.1579
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.5348
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6776
 Average Recall     (AR) @[ IoU

## **NO** `Tools`

### Without `GPT`

#### SCOD Results (`Paper Chosen`)
* Model: GLIP-**L**
* Training frames = `zero-shot`;  Eval frames = `all frames`
* Train: `N/A`
* Eval:  Grounding with `GT-SRL-ARG1`

In [228]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge"
    "/coco_annotations_all_frames/val_narrated_gt_srl_arg1.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/baselines"
    "/GLIP/narrated_scod_finetuning_large_v2/eval_ego4d_narrated_scod_val_zero_shot"
    "/eval/glip_large_model/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.202
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.330
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.206
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.292
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.466
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.499

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.195
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.323
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.194
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.285
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.456
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all

#### SCOD Results (`Paper Chosen`)
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: (Naive) detection with `object_of_change` as prompts
* Eval:  (Naive) detection with `object_of_change` as prompts

In [227]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/coco_annotations_all_frames/val.json"
)
coco_pred_file = (    
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/baselines"
    "/GLIP/naive_scod_finetuning_large_v2/eval_ego4d_scod_val_finetuned_45K"
    "/eval/model_0045000/inference/test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.269
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.428
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.279
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.337
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.586
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.632

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.297
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.477
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.305
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.362
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.589
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all

#### SCOD Results (`Paper Chosen`)
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `Random-Word` with `tool`s **NOT** it
* Eval:  Grounding with `Random-Word` with `tool`s **NOT** it

In [226]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge"
    "/coco_annotations_all_frames/val_narrated_random_word.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/baselines"
    "/GLIP/narrated_scod_finetuning_v2_rand_word/eval_ego4d_narrated_scod_val_finetuned_45K_rand_word"
    "/eval/model_0040000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.259
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.422
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.262
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.401
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.596
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.614

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.268
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.442
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.268
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.411
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.597
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all

#### SCOD Results (`Paper Chosen`)
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `Full-Sentence` with `tool`s **NOT** it
* Eval:  Grounding with `Full-Sentence` with `tool`s **NOT** it

In [136]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/full_sentence/val_scod_all_frames_narrated_full_sentence.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/full_sentence/ego4d_scod_all_frames_narrated_full_sentence"
    "/eval_at_35K/eval/model_0035000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.324
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.516
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.333
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.385
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.594
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.641

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.338
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.544
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.345
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.396
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.597
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all

#### SCOD Results (`Paper Chosen`)
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `GT-SRL-ARG1`
* Eval:  Grounding with `GT-SRL-ARG1`

In [241]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gt_srl_arg1/val_scod_all_frames_narrated_gt_srl_arg1.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gt_srl_arg1/ego4d_scod_all_frames_narrated_gt_srl_arg1"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    top_k=None,
    fully_narrated=False,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3787
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5635
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3955
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4406
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6332
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6656

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3964
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5941
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4073
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4471
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6338
 Average Recall     (AR) @[ IoU=0.50:0.95 | 

In [217]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gt_srl_arg1/val_scod_all_frames_narrated_gt_srl_arg1.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gt_srl_arg1/ego4d_scod_all_frames_narrated_gt_srl_arg1"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    fully_narrated=True,
    verbose=False,
)

Fully Narrated Image Counts: 34329 / 38403
---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.389
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.577
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.407
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.450
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.636
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.668

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.407
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.607
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.417
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.455
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.638
 Average Rec

In [251]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gt_srl_arg1/val_scod_all_frames_narrated_gt_srl_arg1.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gt_srl_arg1/ego4d_scod_all_frames_narrated_gt_srl_arg1"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    fully_narrated=False,
    top_k=1,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.3238
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=  1 ] = 0.4720
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=  1 ] = 0.3415
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4406
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4406
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4406

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.3422
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=  1 ] = 0.5044
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=  1 ] = 0.3522
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4471
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4471
 Average Recall     (AR) @[ IoU=0.50:0.95 | 

In [246]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gt_srl_arg1/val_scod_all_frames_narrated_gt_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gt_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gt_srl_arg1_with_tool"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    fully_narrated=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3859
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5738
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4024
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4482
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6434
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6731
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.4553
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.7122
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4627
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.5290
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6426
 Average Recall     (AR) @[ IoU

#### SCOD Results (`Paper Chosen`)
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `SRL-ARG1-Only`
* Eval:  Grounding with `SRL-ARG1-Only`

In [224]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gt_srl_arg1/val_scod_all_frames_narrated_gt_srl_arg1.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/srl_arg1_only/ego4d_scod_all_frames_narrated_srl_arg1_only"
    "/eval_at_55K/eval/model_0055000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    fully_narrated=False,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.364
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.549
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.376
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.432
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.632
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.667

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.383
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.581
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.394
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.440
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.630
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all

### With `GPT`

#### SCOD Results
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `GPT-v1-W/o-Tool` with `tool`s **NOT** it
* Eval:  Grounding with `GPT-v1-W/o-Tool` with `tool`s **NOT** it

In [12]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_wo_tool/val_scod_all_frames_narrated_gpt_v1_wo_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_wo_tool/ego4d_scod_all_frames_narrated_gpt_v1_wo_tool_a6000"
    "/eval_at_55K/eval/model_0055000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
Checking GT and Pred files matching...
Checking complete!
Evaluate annotation type *bbox*
DONE (t=12.14s).
Accumulating evaluation results...
DONE (t=2.84s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.368
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.549
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.386
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.439
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.640
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.670

---------- pnr_frame ----------
Checking GT and Pred files matching...
Checking complete!
Evaluate annotation type *bbox*
DONE (t=12.07s).
Accumulating evaluation results...
DONE (t=2.81s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.385
 Average Precision  (AP) @[ IoU=0.50      | area=   all | m

#### SCOD Results
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `GPT-v1-SRL-ARG1-W/o-Tool` with `tool`s **NOT** it
* Eval:  Grounding with `GPT-v1-SRL-ARG1-W/o-Tool` with `tool`s **NOT** it
  * Basically, use `SRL-ARG1` to supplement when `ooc` is **NULL**

In [15]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
Checking GT and Pred files matching...
Checking complete!
Evaluate annotation type *bbox*
DONE (t=14.60s).
Accumulating evaluation results...
DONE (t=2.94s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.370
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.555
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.384
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.434
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.635
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.668

---------- pnr_frame ----------
Checking GT and Pred files matching...
Checking complete!
Evaluate annotation type *bbox*
DONE (t=13.03s).
Accumulating evaluation results...
DONE (t=5.28s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.388
 Average Precision  (AP) @[ IoU=0.50      | area=   all | m

#### SCOD Results
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `GPT-v1-SRL-ARG1-W/o-Tool` with `tool`s **NOT** it, and *exclude* stop words
* Eval:  Grounding with `GPT-v1-SRL-ARG1-W/o-Tool` with `tool`s **NOT** it, and *exclude* stop words
  * Basically, use `SRL-ARG1` to supplement when `ooc` is **NULL**
  * Stop words are like [`a`, `the`]

In [17]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool_no_stopwords.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool_no_stopwords/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool_no_stopwords"
    "/eval_at_50K/eval/model_0050000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
Checking GT and Pred files matching...
Checking complete!
Evaluate annotation type *bbox*
DONE (t=19.34s).
Accumulating evaluation results...
DONE (t=4.14s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.362
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.549
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.375
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.428
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.628
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.662

---------- pnr_frame ----------
Checking GT and Pred files matching...
Checking complete!
Evaluate annotation type *bbox*
DONE (t=16.98s).
Accumulating evaluation results...
DONE (t=5.64s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.382
 Average Precision  (AP) @[ IoU=0.50      | area=   all | m

#### SCOD Results
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `GPT-v1-SRL-ARG1-W/o-Tool` with `tool`s **NOT** it, and **drop** *NULL OOC*
* Eval:  Grounding with `GPT-v1-SRL-ARG1-W/o-Tool` with `tool`s **NOT** it, and **drop** *NULL OOC*
  * Basically, just drop the `ooc` = **NULL** data points from training
  
**NOTE: We did not drop `NULL OOC` during inference!**

In [19]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool_drop_null_ooc"
    "/eval_at_40K/eval/model_0040000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
Checking GT and Pred files matching...
Checking complete!
Evaluate annotation type *bbox*
DONE (t=14.65s).
Accumulating evaluation results...
DONE (t=3.09s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.372
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.556
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.388
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.433
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.639
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.675

---------- pnr_frame ----------
Checking GT and Pred files matching...
Checking complete!
Evaluate annotation type *bbox*
DONE (t=18.89s).
Accumulating evaluation results...
DONE (t=3.79s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.393
 Average Precision  (AP) @[ IoU=0.50      | area=   all | m

#### SCOD Results (`Paper Chosen`)
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `GPT-v1-SRL-ARG1-W/o-Tool` with `tool`s **NOT** it, and add **OOC Conditions**
* Eval:  Grounding with `GPT-v1-SRL-ARG1-W/o-Tool` with `tool`s **NOT** it, and add **OOC Conditions**
  * Basically, use `SRL-ARG1` to supplement when `ooc` is **NULL**
  * Stop words are like [`a`, `the`]
  * **Add condition sentences** at the end of captions

In [31]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool_no_stopwords.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool_no_stopwords/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool_no_stopwords_symb_conds"
    "/eval_at_40K/eval/model_0040000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.375
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.560
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.390
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.434
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.641
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.676

---------- pnr_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.391
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.592
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.401
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.443
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6

#### SCOD Results
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `GPT-v1-SRL-ARG1-W/o-Tool` with `tool`s **NOT** it, and add **OOC Conditions**
* Eval:  Grounding with `GPT-v1-SRL-ARG1-W/o-Tool` with `tool`s **NOT** it, and add **OOC Conditions**
  * Basically, use `SRL-ARG1` to supplement when `ooc` is **NULL**
  * Stop words are like [`a`, `the`]
  * **Add condition sentences** at the end of captions
  * Exclude `commas`

In [34]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool_no_stopwords.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool_no_stopwords/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool_no_stopwords_symb_conds_mask_no_comma"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.384
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.575
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.400
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.442
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.642
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.676

---------- pnr_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.399
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.605
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.409
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.446
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6

## **With** `Tools`

### With `GPT`

#### SCOD Results
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `GPT-v1-SRL-ARG1-With-Tool` with `tool`s **IN** it, and **drop** *NULL OOC/Tool*
* Eval:  Grounding with `GPT-v1-SRL-ARG1-With-Tool` with `tool`s **IN** it, and **drop** *NULL Tool*
  * Basically, just drop the `ooc` = **NULL** data points from training
  
**NOTE: We did NOT drop `NULL OOC` during inference!**  
**NOTE: But we DID drop `NULL Tool` during inference!**

In [33]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.372
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.558
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.388
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.440
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.640
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.670
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.459
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.698
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.491
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.528
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.638
 Average Recall     (AR) @[ IoU=0.50:0.95 

#### SCOD Results
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `GPT-v1-SRL-ARG1-With-Tool` with `tool`s **IN** it, and **drop** *NULL OOC/Tool*
* Eval:  Grounding with `GPT-v1-SRL-ARG1-With-Tool` with `tool`s **IN** it, and **drop** *NULL Tool*
  * Basically, just drop the `ooc` = **NULL** data points from training
  * **Add condition sentences** at the end of captions
  * Exclude `commas`

**NOTE: We did NOT drop `NULL OOC` during inference!**  
**NOTE: But we DID drop `NULL Tool` during inference!**

In [105]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_no_comma"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.384
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.574
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.401
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.444
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.642
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.670
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.478
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.734
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.503
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.537
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.642
 Average Recall     (AR) @[ IoU=0.50:0.95 

#### SCOD Results
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `GPT-v1-SRL-ARG1-With-Tool` with `tool`s **IN** it, and **drop** *NULL OOC/Tool*
* Eval:  Grounding with `GPT-v1-SRL-ARG1-With-Tool` with `tool`s **IN** it
  * Basically, just drop the `ooc` = **NULL** data points from training

**NOTE: We did NOT drop `NULL OOC/TOOL` during inference!**  

In [112]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool"
    "/eval_at_45K_no_drop_nulls/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.369
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.554
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.385
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.436
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.638
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.669
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.381
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.578
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.407
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.436
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.528
 Average Recall     (AR) @[ IoU=0.50:0.95 

In [119]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_contd_pre_post_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool"
    "/eval_at_3K/eval/model_0003000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}
raise

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

RuntimeError: No active exception to reraise

 (`Paper Chosen`)

In [247]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool_drop_null_ooc_tool"
    "/eval_at_45K_no_drop_nulls/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3276
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5096
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3398
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.3940
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6046
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6510
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3841
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.6066
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3933
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4293
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5468
 Average Recall     (AR) @[ IoU

#### SCOD Results
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `GPT-v1-SRL-ARG1-With-Tool` with `tool`s **IN** it, and **drop** *NULL OOC/Tool*
* Eval:  Grounding with `GPT-v1-SRL-ARG1-With-Tool` with `tool`s **IN** it
  * Basically, just drop the `ooc` = **NULL** data points from training
  * **Add condition sentences** at the end of captions
  * Exclude `commas`

**NOTE: We did NOT drop `NULL OOC/TOOL` during inference!**  

In [134]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_no_comma"
    "/eval_at_45K_no_drop_nulls/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.381
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.570
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.397
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.439
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.638
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.667
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.396
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.608
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.417
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.444
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.531
 Average Recall     (AR) @[ IoU=0.50:0.95 

In [185]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_no_comma"
    "/eval_at_45K_no_drop_nulls/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    fully_narrated=True,
    verbose=False,
)

Fully Narrated Image Counts: 34329 / 38403
---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.391
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.583
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.409
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.449
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.642
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.669
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.396
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.608
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.417
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.444
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.531

In [244]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_no_comma"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    fully_narrated=False,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3834
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5705
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3986
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4430
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6400
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6774
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.4400
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.6649
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4612
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4920
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5981
 Average Recall     (AR) @[ IoU

In [129]:
...

Ellipsis

In [130]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_contd_pre_post_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_no_comma"
    "/eval_at_2K/eval/model_0002000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.383
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.570
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.398
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.441
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.643
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.678
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.404
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.616
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.427
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.445
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.537
 Average Recall     (AR) @[ IoU=0.50:0.95 

In [127]:
...

Ellipsis

In [133]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_defs"
    "/eval_at_45K_no_drop_nulls/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.380
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.571
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.394
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.437
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.638
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.668
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.382
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.587
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.391
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.439
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.528
 Average Recall     (AR) @[ IoU=0.50:0.95 

In [184]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_defs"
    "/eval_at_45K_no_drop_nulls/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    fully_narrated=True,
    verbose=False,
)

Fully Narrated Image Counts: 34329 / 38403
---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.391
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.585
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.407
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.448
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.641
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.670
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.382
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.587
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.391
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.439
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.528

(`Paper Chosen`)

In [243]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool_drop_null_ooc_tool_symb_defs"
    "/eval_at_45K_no_drop_nulls/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    fully_narrated=False,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3697
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5616
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3835
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4289
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6256
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6619
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.4226
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.6437
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4459
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4780
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5730
 Average Recall     (AR) @[ IoU

(`Paper Chosen`)

In [253]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool"
    "/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_no_comma"
    "/eval_at_45K_no_mask_no_drop_nulls/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    fully_narrated=False,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3759
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5628
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3919
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4373
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6356
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6630
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3840
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5905
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4040
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4361
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5205
 Average Recall     (AR) @[ IoU

#### SCOD Results (`Paper Chosen`)
* Model: GLIP-**L**
* Training frames = `all frames`;  Eval frames = `all frames`
* Train: Grounding with `GPT-v1-SRL-ARG1-With-Tool` with `tool`s **IN** it, and **drop** *NULL OOC/Tool*
* Eval:  Grounding with `GPT-v1-SRL-ARG1-With-Tool` with `tool`s **IN** it
  * Basically, just drop the `ooc` = **NULL** data points from training
  * **Add `condition` sentences** at the end of captions
  * **Add `definition` sentences** at the end of captions
  * Exclude `commas`

**NOTE: We did NOT drop `NULL OOC/TOOL` during inference!**  

In [240]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_no_comma_defs_new_caps"
    "/eval_at_35K_no_drop_nulls/eval/model_0035000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3827
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5779
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3965
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4357
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6367
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6714
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3869
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.6015
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4026
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4331
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5253
 Average Recall     (AR) @[ IoU

In [186]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_no_comma_defs_new_caps"
    "/eval_at_35K_no_drop_nulls/eval/model_0035000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    fully_narrated=True,
    verbose=False,
)
# GT.
# 38.90 & 57.71 & 40.73 & 40.66 & 60.74 & 41.71 & 36.41 & 53.09 & 38.32

Fully Narrated Image Counts: 34329 / 38403
---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.394
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.592
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.409
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.447
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.640
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.673
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.387
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.602
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.403
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.433
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.525

In [249]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_no_comma_defs_new_caps"
    "/eval_at_35K_no_drop_nulls/eval/model_0035000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=1,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.3226
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=  1 ] = 0.4757
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=  1 ] = 0.3353
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4357
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4357
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4357
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.3546
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=  1 ] = 0.5544
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=  1 ] = 0.3700
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4331
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4331
 Average Recall     (AR) @[ IoU

... (`Paper Chosen`)

In [198]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_new_condstr"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    fully_narrated=False,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.386
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.576
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.402
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.443
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.645
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.680
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.435
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.658
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.456
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.488
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.589
 Average Recall     (AR) @[ IoU=0.50:0.95 

...

...

In [236]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v3_v_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v3_v_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v3_v_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v3_v_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_new_condstr_defs"
    "/eval_at_45K_no_drop_nulls/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=1,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    fully_narrated=False,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.3195
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=  1 ] = 0.4758
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=  1 ] = 0.3320
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4372
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4372
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4372
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4028
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=  1 ] = 0.6129
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=  1 ] = 0.4162
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4843
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4843
 Average Recall     (AR) @[ IoU

### Spatial Try-Outs

In [215]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    # "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_strictly_with_tool_drop_null_ooc_tool.json"
    "/gpt_v3_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v3_srl_arg1_strictly_with_tool_drop_null_ooc_tool.json"
)
coco_pred_file = (
    # "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/baselines"
    # "/GLIP/narrated_scod_finetuning_large_v2_gt_srl_arg1_with_tool_new0527"
    # "/testing/eval/model_0001200/inference/narrated_ego4d_test/bbox.json"
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_new_condstr"
    "/spatials_contd_0617/testing/eval/model_0000800/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)
# 12.72 & 17.67 & 13.52 & 14.07 & 19.32 & 14.68 & 14.67 & 19.81 & 15.03

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.479
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.651
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.502
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.509
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.669
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.698
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.467
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.741
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.482
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.518
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.637
 Average Recall     (AR) @[ IoU=0.50:0.95 

In [201]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    # "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_strictly_with_tool_drop_null_ooc_tool.json"
    "/gpt_v3_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v3_srl_arg1_strictly_with_tool_drop_null_ooc_tool.json"
)
coco_pred_file = (
    # "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/baselines"
    # "/GLIP/narrated_scod_finetuning_large_v2_gt_srl_arg1_with_tool_new0527"
    # "/testing/eval/model_0001200/inference/narrated_ego4d_test/bbox.json"
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_new_condstr"
    "/spatials_contd_0617/org/eval/model_0000800/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.485
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.653
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.507
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.515
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.670
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.699
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.467
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.741
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.482
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.518
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.637
 Average Recall     (AR) @[ IoU=0.50:0.95 

# END