In [None]:
import ast
import json
import numpy as np
import os
import pandas as pd
import seaborn as sns
import sys
sns.set_style("white")

sys.path.append("../")
import vq2d.stats as vq_stats

import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
params = {
    "legend.fontsize": "xx-large",
    "axes.labelsize": "xx-large",
    "axes.titlesize":"xx-large",
    "xtick.labelsize": "xx-large",
    "ytick.labelsize": "xx-large",
    "text.color": "black",
    "axes.labelcolor": "black",
    "xtick.color": "black",
    "ytick.color": "black"
}
pylab.rcParams.update(params)

In [None]:
# Define paths to videos and VQ annotations
ANNOT_PATHS = [
    "../data/vq_train.json",
    "../data/vq_val.json",
    "../data/vq_test.json",
]
EGO4D_VIDEOS_ROOT = "<TODO: UPDATE EGO4D VIDEOS PATH>"
STATS_DIR = "./stats"
os.makedirs(STATS_DIR, exist_ok=True)

Visualize the statistics of the VQ2D annotations.

In [None]:
# Load annotations for each video
VIDEO_ANNOTATIONS = []
for annot_path in ANNOT_PATHS:
    with open(annot_path, "r") as fp:
        data = json.load(fp)
    for v in data["videos"]:
        VIDEO_ANNOTATIONS.append(v)

# Load metadata for each video
manifest_path = os.path.join(EGO4D_VIDEOS_ROOT, "manifest.csv")
manifest = pd.read_csv(manifest_path)
VIDEO_UIDS_TO_METADATA = {v['video_uid']: None for v in VIDEO_ANNOTATIONS}
for row in manifest.iterrows():
    row = dict(row[1])
    if row['video_uid'] not in VIDEO_UIDS_TO_METADATA:
        continue
    md = {
        'video_duration_sec': row['canonical_video_duration_sec'],
        'scenarios': ast.literal_eval(row['scenarios']),
        'source': row['video_source'],
    }
    VIDEO_UIDS_TO_METADATA[row['video_uid']] = md

In [None]:
# Compute hours of video annotated
coverage_stats = vq_stats.compute_coverage_statistics(
    VIDEO_ANNOTATIONS, VIDEO_UIDS_TO_METADATA
)
print('-' * 50)
print("Video coverage statistics")
print('-' * 50)
for k, v in coverage_stats.items():
    print(f"{k:<15s}: {v:.3f} hours")
# Compute number of clips annotated
clip_stats = vq_stats.compute_clip_statistics(VIDEO_ANNOTATIONS)
print('-' * 50)
print("Clip statistics")
print('-' * 50)
for k, v in clip_stats.items():
    print(f"{k:<15s}: {v:>5d}")
# Compute number of visual queries annotated
query_stats = vq_stats.compute_query_statistics(VIDEO_ANNOTATIONS)
print('-' * 50)
print("Query statistics")
print('-' * 50)
for k, v in query_stats.items():
    print(f"{k:<15s}: {v:>5d}")

In [None]:
# Print histogram over clip lengths in minutes
clip_lengths = []
for v in VIDEO_ANNOTATIONS:
    for c in v['clips']:
        if c['annotation_complete']:
            cl = (c['video_end_sec'] - c['video_start_sec']) / 60.0
            clip_lengths.append(cl)

vq_stats.hist(
    clip_lengths,
    rwidth=0.8,
    title=f'Histogram over clip lengths',
    xlabel='Clip length (minutes)', ylabel='# clips annotated',
    color="dodgerblue",
    edgecolor=".8",
    figsize=(6, 6),
    add_grid=True,
)

In [None]:
# Plot scenario coverage statistics
scenarios_coverage = vq_stats.compute_scenario_coverage_statistics(
    VIDEO_ANNOTATIONS, VIDEO_UIDS_TO_METADATA
)
sorted_scenarios_coverage = sorted(
    scenarios_coverage.items(), key=lambda x: x[1], reverse=True
)
vq_stats.barplot(
    x=[
        s[:23] if len(s) <= 23 else s[:20] + "..."
        for s, c in sorted_scenarios_coverage
    ],
    y=[c for s, c in sorted_scenarios_coverage],
    rotation=90,
    xlabel="Scenarios",
    ylabel="# video hours",
    color="dodgerblue",
    edgecolor=".8",
    figsize=(22, 7),
    title=f"Total: scenarios={len(sorted_scenarios_coverage)}, hours={coverage_stats['total_coverage']:.2f}",
    add_grid=True,
    save_path=os.path.join(STATS_DIR, "ego4d-vq-scenarios.png"),
)

In [None]:
# Plot university coverage statistics
universities_coverage = vq_stats.compute_university_coverage_statistics(
    VIDEO_ANNOTATIONS, VIDEO_UIDS_TO_METADATA
)
sorted_universities_coverage = sorted(
    universities_coverage.items(), key=lambda x: x[1], reverse=True
)
vq_stats.barplot(
    x=[
        s[:20] if len(s) <= 20 else s[:20] + "..."
        for s, c in sorted_universities_coverage
    ],
    y=[c for s, c in sorted_universities_coverage],
    rotation=90,
    xlabel="Universities",
    ylabel="# video hours",
    color="dodgerblue",
    edgecolor=".8",
    figsize=(8, 6),
    title=f"Total: universities={len(sorted_universities_coverage)}, hours={coverage_stats['total_coverage']:.2f}",
    add_grid=True,
    save_path=os.path.join(STATS_DIR, "ego4d-vq-universities.png"),
)

# Analyze biases in the data

* Bias 1: Separation b/w the query frame and response track
* Bias 2: Size of the response track
* Bias 3: Location of bboxes in the RT images

In [None]:
##################################################################################
# Bias 1: Separation b/w the query frame and response track
##################################################################################

q2r_separation = vq_stats.compute_query_to_response_separation_statistics(
    VIDEO_ANNOTATIONS
)

vq_stats.hist(
    x=q2r_separation,
    xlabel="Separation b/w query and response track (# frames)", 
    ylabel="# annotations",
    color="dodgerblue",
    edgecolor=".8",
    bins=500,
    xlim=[0, 600],
    ylim=[0, 5000],
    figsize=(7, 6),
    add_grid=True,
    save_path=os.path.join(STATS_DIR, "ego4d-biases-q2r.png"),
)

In [None]:
##################################################################################
# Bias 2: Size of the response track
##################################################################################

rt_sizes = vq_stats.compute_response_track_length_statistics(
    VIDEO_ANNOTATIONS
)

vq_stats.hist(
    x=rt_sizes,
    xlabel="Response track sizes (# frames)", 
    ylabel="# annotations",
    color="dodgerblue",
    edgecolor=".8",
    bins=300,
    ylim=[0, 5000],
    xlim=[0, 100],
    figsize=(7, 6),
    add_grid=True,
    save_path=os.path.join(STATS_DIR, "ego4d-biases-rtsize.png"),
)

In [None]:
##################################################################################
# Bias 3: Location of bboxes in the RT images
##################################################################################

rt_bbox_locations = vq_stats.compute_response_track_location_statistics(
    VIDEO_ANNOTATIONS,
)
# Create density image
H, W = 1080, 1920
density_image = np.zeros((H, W))
for xs, ys, xe, ye in rt_bbox_locations:
    xs, ys, xe, ye = int(xs * W), int(ys * H), int(xe * W), int(ye * H)
    density_image[ys : ye + 1, xs : xe + 1] += 1

plt.figure(figsize=(11, 5.5))
plt.imshow(density_image / density_image.max())
plt.colorbar()
plt.savefig(os.path.join(STATS_DIR, "ego4d-biases-rtlocs.png"))