In [None]:
%autosave 60
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import json
import os
import pickle
from copy import deepcopy
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast

import cv2
import matplotlib as plt
import numpy as np
import pandas as pd
import PIL.Image as pil_img
from IPython.display import display
from matplotlib.patches import Rectangle
from matplotlib_inline.backend_inline import set_matplotlib_formats
from tqdm.contrib import tenumerate
from tqdm.contrib.bells import tqdm

from geoscreens.consts import (
    EXTRACTED_FRAMES_PATH,
    FRAMES_METADATA_PATH,
    LATEST_DETECTION_MODEL_NAME,
    VIDEO_PATH,
)
from geoscreens.data import get_all_geoguessr_split_metadata
from geoscreens.data.metadata import GOOGLE_SHEET_IDS, FramesList
from geoscreens.utils import load_json, save_json, timeit_context

In [None]:
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_columns", 15)
pd.set_option("display.max_rows", 50)
# Suitable default display for floats
pd.options.display.float_format = "{:,.6f}".format
plt.rcParams["figure.figsize"] = (12, 10)

# This one is optional -- change graphs to SVG only use if you don't have a
# lot of points/lines in your graphs. Can also just use ['retina'] if you
# don't want SVG.
%config InlineBackend.figure_formats = ["retina"]
set_matplotlib_formats("pdf", "png")

In [None]:
from IPython.display import set_matplotlib_formats

set_matplotlib_formats("pdf", "png")
plt.rcParams["savefig.dpi"] = 75

plt.rcParams["figure.autolayout"] = False
plt.rcParams["figure.figsize"] = 10, 6
plt.rcParams["axes.labelsize"] = 18
plt.rcParams["axes.titlesize"] = 20
plt.rcParams["font.size"] = 16
plt.rcParams["lines.linewidth"] = 2.0
plt.rcParams["lines.markersize"] = 8
plt.rcParams["legend.fontsize"] = 14
plt.rcParams["text.usetex"] = True

plt.rcParams["font.family"] = "serif"
plt.rcParams["font.serif"] = "cm"
plt.rcParams["text.latex.preamble"] = "\\usepackage{subdepth}, \\usepackage{type1cm}"

In [None]:
# df_frames_meta = pd.read_json(
#     FRAMES_METADATA_PATH,
#     orient="index"
# )
# # df_frames_meta.describe()
# display(df_frames_meta)

---

## Functions

In [None]:
def transform_box(x1, y1, x2, y2, target_width, target_height, curr_dim=640):
    """
    Transform bbox coordinates from (curr_dim, curr_dim) pixel space to size=(width, height) pixel
    space. assumes width is greater than height. This is used because the detector bbox coordinates
    are in a square pixel space (config.dataset_config.img_size)**2, and we need to convert the bbox
    coordinates back to the original image pixel space (e.g., 1280*720).

    Args:
        xmin, ymin, xmax, ymax

    Returns:
        Tuple[[xmin, ymin, xmax, ymax], area]
    """
    # Back to width*width:
    new_x1 = x1 * (target_width / curr_dim)
    new_y1 = y1 * (target_width / curr_dim)
    new_x2 = x2 * (target_width / curr_dim)
    new_y2 = y2 * (target_width / curr_dim)
    # Remove vertical padding
    y_pad = (target_width - target_height) / 2
    new_y1 -= y_pad
    new_y2 -= y_pad
    new_area = (new_x2 - new_x1 + 1) * (new_y2 - new_y1 + 1)
    return (new_x1, new_y1, new_x2, new_y2), new_area


def subsample_frames(
    video_id: str,
    df_frames_meta: pd.DataFrame,
    frame_paths: dict[str, list[dict[str, str]]],
    target_fps: int = 1,
):
    """
    Subsample to 1 fps From existing frames on disk, which are extracted from
    original videos at 4.0fps and used to detect UI elements and compute the
    in/out-of-game segments. So this sub-samples at something higher since we
    don't need that high of a temporal resolution for im2clue training.

    Args:
        target_fps: This is how many  fps you want to sample from the existing
            jpg's. It should be a subset of `frame_sample_rate_fps`, which is the
            rate that the frames on disk were sampled at.
    """
    # This is how many fps the jpg's were sampled at:
    frames_fps = df_frames_meta.loc[video_id].frame_sample_rate_fps
    assert (
        frames_fps % target_fps == 0.0
    ), f"frames_fps {frames_fps} should be divisible by target_fps {target_fps}"
    video_frames = [Path(f["file_path"]) for f in frame_paths[video_id]]
    return [
        {
            "video_id": video_id,
            "frame_idx": int(f.stem.replace("frame_", "").replace("s", "").split("-")[0]),
            "sec": float(f.stem.replace("frame_", "").replace("s", "").split("-")[1]),
            "file_path": EXTRACTED_FRAMES_PATH / f,
        }
        for i, f in enumerate(video_frames)
        if i % int(frames_fps / target_fps) == 0
    ]


def filter_to_in_game(
    video_id: str, frames: List[Dict[str, Any]], df_meta: pd.DataFrame
) -> List[Dict[str, Any]]:
    """
    Limit frames to "in_game" segments
    """
    split = df_meta.loc[video_id].split
    seg_file = Path(f"/shared/gbiamby/geo/segment/seg/{split}/df_seg-video_id_{video_id}.pkl")
    df_seg = pickle.load(open(seg_file, "rb"))
    df_seg = df_seg[df_seg.state == "in_game"].reset_index(drop=True)
    in_games = [(idx, r["start_frame_idx"], r["end_frame_idx"]) for idx, r in df_seg.iterrows()]

    def is_in_game(frame):
        for seg in in_games:
            if seg[1] <= frame["frame_idx"] <= seg[2]:
                frame["round_num"] = seg[0]
                return True
        return False

    frames = [frame for frame in frames if is_in_game(frame)]
    if frames:
        img = pil_img.open(frames[0]["file_path"])
        img_width, img_height = img.size
        for f in frames:
            f["img_width"] = img_width
            f["img_height"] = img_height

    return frames


def get_dets(video_id: str, model: str, df_meta: pd.DataFrame):
    split = df_meta.loc[video_id].split
    dets_path = Path(
        f"/shared/gbiamby/geo/segment/detections/{model}/{split}/df_frame_dets-video_id_{video_id}.pkl"
    )
    df_dets = pickle.load(open(dets_path, "rb"))
    if "frame_id" in df_dets.columns:
        df_dets.drop(columns=["frame_id"], inplace=True)
    df_dets.set_index("frame_idx", inplace=True)

    # df_dets.bbox.apply(lambda x: transform_box(*x.values(),
    return df_dets

---

In [None]:
# in_game_frames = filter_to_in_game(video_id, frames)
# print(len(frames), len(in_game_frames))

---

## Test a Single Video

In [None]:
# video_id = "--0Kbpo9DtE"
video_id = "zOoUR17xnL0"

if "df_meta" not in locals():
    df_meta = pd.DataFrame(get_all_geoguessr_split_metadata().values()).set_index("id")
if "frame_paths" not in locals():
    frame_paths = pickle.load(open(EXTRACTED_FRAMES_PATH / "frames_list.pkl", "rb"))

frames = subsample_frames(video_id, df_frames_meta, frames_list)
print(len(frames))
df_all_frames = pd.DataFrame(frames)
in_game_frames = filter_to_in_game(video_id, frames, df_meta)
print(f"num frames: {len(frames)}, num in_game frames: {len(in_game_frames)}")

In [None]:
df_ingame = pd.DataFrame(in_game_frames).sort_values(["round_num", "frame_idx"])
# display(df_ingame)
display(
    pd.DataFrame(
        df_ingame.groupby(["round_num"]).agg(
            total_frames=("frame_idx", "count"),
            start_sec=("sec", "min"),
            end_sec=("sec", "max"),
            start_frame=("frame_idx", "min"),
            end_frame=("frame_idx", "max"),
        )
    )
)

---

## Show Some in_game Frames

In [None]:
df_ingame

In [None]:
from IPython.core.display import HTML, Markdown


def show_random_frames(df: pd.DataFrame, n_samples: int = 5):
    df_random = df.sample(n=n_samples)

    for idx, img_row in df_random.iterrows():
        print("-" * 180)
        img = pil_img.open(img_row["file_path"])
        img.thumbnail((1080, 640), pil_img.NEAREST)
        display(img)
        print(
            f"video_id: {img_row.video_id}, frame_idx: {img_row.frame_idx}, seconds: {img_row.sec}",
        )
        print("")


show_random_frames(df_ingame, 5)

## Show Some Random Masked Frames

In [None]:
from PIL import ImageDraw


def show_random_frames_masked(
    video_id: str, model: str, df: pd.DataFrame, df_meta: pd.DataFrame, n_samples: int = 5
):
    df_random = df.sample(n=n_samples)
    df_dets = get_dets(video_id, model, df_meta)
    for idx, img_row in df_random.iterrows():
        print("-" * 180)
        print(
            f"video_id: {img_row.video_id}, frame_idx: {img_row.frame_idx}, seconds: {img_row.sec}",
        )
        img = pil_img.open(img_row["file_path"])
        # img.thumbnail((1080, 640), pil_img.NEAREST)
        img_width, img_height = img.size
        display(img)
        dets = df_dets.loc[img_row.frame_idx]
        # display(dets)
        dets_lookup = {
            l: (l, transform_box(*bb.values(), img_width, img_height), s)
            for l, bb, s in zip(dets.labels, dets.bboxes, dets.scores)
        }
        # print(dets_lookup)
        masked_area = sum([d[1][1] for d in dets_lookup.values()])
        print(
            f"masked_area: {masked_area:,}",
            f"img_area: {float(img_width*img_height):,}",
            f"pct_masked: {100.0 * masked_area / (img_width*img_height):.2f}%",
        )

        img_masked = img
        draw = ImageDraw.Draw(img_masked)
        for label, bbox, score in dets_lookup.values():
            draw.rectangle(bbox[0], fill=0)

        # Mask out minimum rectangular region that encloses the geoguessr logo and/or the status bar:
        top_ui = [dets_lookup[l] for l in ["game_title", "status_bar"] if l in dets_lookup]
        if top_ui:
            y_max = max(d[1][0][3] for d in top_ui)
            # xmin, ymax = reverse_point(640, y_max, img_width, img_height, 640)
            draw.rectangle((0, 0, img_width, y_max), fill=0)
        display(img_masked)
        print("")

In [None]:
if "df_meta" not in locals():
    df_meta = pd.DataFrame(get_all_geoguessr_split_metadata().values()).set_index("id")
if "frame_paths" not in locals():
    frame_paths = pickle.load(open(EXTRACTED_FRAMES_PATH / "frames_list.pkl", "rb"))

video_id = "--0Kbpo9DtE"
# video_id = "zOoUR17xnL0"
model = LATEST_DETECTION_MODEL_NAME
frames = subsample_frames(video_id, df_frames_meta, frame_paths)
df_all_frames = pd.DataFrame(frames)
in_game_frames = filter_to_in_game(video_id, frames, df_meta)
df_ingame = pd.DataFrame(in_game_frames).sort_values(["round_num", "frame_idx"])

show_random_frames_masked(video_id, model, df_ingame, df_meta, 10)

---

---


## Some Stats about Video -> Frames, CLIP Samples Pipeline

In [None]:
id_list = deepcopy(GOOGLE_SHEET_IDS)
print("ids_list.len: ", len(id_list))

if "df_meta" not in locals():
    df_meta = pd.DataFrame(get_all_geoguessr_split_metadata().values()).set_index("id")
# df_meta.head(2)

In [None]:
video_files = sorted(p for p in os.listdir("/shared/g-luo/geoguessr/videos") if p.endswith(".mp4"))
ids_with_meta = [i for i in id_list if i in df_meta.index]
model = LATEST_DETECTION_MODEL_NAME
# frames_extracted = [
#     str(p.stem.replace("df_frame_dets-video_id_", ""))
#     for p in sorted(Path(f"/shared/gbiamby/geo/video_frames").glob("*/"))
# ]
frames_extracted = [p for p in FramesList().get().keys()]
dets = [
    str(p.stem.replace("df_frame_dets-video_id_", ""))
    for p in sorted(Path(f"/shared/gbiamby/geo/segment/detections/{model}").glob("**/*.pkl"))
]
segs = [
    str(p.stem.replace("df_seg-video_id_", ""))
    for p in sorted(Path("/shared/gbiamby/geo/segment/seg").glob("**/*.pkl"))
]
ids_with_dets = [i for i in ids_with_meta if i in dets]
ids_with_segs = [i for i in ids_with_dets if i in segs]
ids_with_frames = []
print("")
print(f"Total video files: {len(video_files):,}")
print(f"Videos w/ metadata: {len(df_meta):,}")
print("Videos w/ frames extracted: ", len(frames_extracted))
print("Videos w/ UI detection outputs: ", len(dets))
print("Videos w/ segmentation outputs: ", len(segs))

print("")
print("videos in google sheet: ", len(id_list))
print("videos w/ metadata: ", len(ids_with_meta))
print("videos in google sheet + w/ meta +  detections: ", len(ids_with_dets))
print("videos in google sheet + w/ meta + detections + segmentation: ", len(ids_with_segs))

In [None]:
fm = load_json(FRAMES_METADATA_PATH)
print(len(fm))

fl = pickle.load(open(FRAMES_METADATA_PATH.with_name("frames_list.pkl"), "rb"))
print(len(fl))

In [None]:
# df_meta[df_meta.video_id.isin(ids_list)].split.value_counts()
# len(set(df_meta.index.values).intersection(set(id_list)))
# df_meta.loc[list(id_list),:]
df_meta.index
df_meta.loc[["K4GXuDACK40", "8ytmWvud6-4"]]
df_meta.loc[list(set(df_meta.index.values).intersection(id_list)), :].split.value_counts()

In [None]:
videos_with_frames = sorted(Path("/shared/gbiamby/geo/video_frames").glob("*/"))
videos_with_frames[:5], len(videos_with_frames)

In [None]:
df_meta.head(5)
df_frames_meta.shape
len(ids_with_segs)

---

## Process All Data

In [None]:
model = LATEST_DETECTION_MODEL_NAME
if "df_frames_meta" not in locals():
    # One row per video_id, tells you sample rate, num frames sampled, and video fps:
    df_frames_meta = pd.read_json(FRAMES_METADATA_PATH, orient="index")

if "df_meta" not in locals():
    # One row per video_id, has the general metadata about the youtube video
    df_meta = pd.DataFrame(get_all_geoguessr_split_metadata().values()).set_index("id")

if "frame_paths" not in locals():
    # Dictionary of every single video. key=video_id, val=list of every sampled frame (@4fps)
    frame_paths = pickle.load(open(EXTRACTED_FRAMES_PATH / "frames_list.pkl", "rb"))

if "segs" not in locals():
    # List of video_id's that have segmentations computed
    segs = [
        str(p.stem.replace("df_seg-video_id_", ""))
        for p in sorted(Path("/shared/gbiamby/geo/segment/seg").glob("**/*.pkl"))
    ]

if "dets" not in locals():
    # List of video_id's that have UI detections computed
    dets = [
        str(p.stem.replace("df_frame_dets-video_id_", ""))
        for p in sorted(Path(f"/shared/gbiamby/geo/segment/detections/{model}").glob("**/*.pkl"))
    ]

in_game_frames_all = []
video_ids = (
    set(df_frames_meta.video_id.values.tolist()).intersection(set(segs)).intersection(set(dets))
)
print(f"Total video_ids with segmentations: {len(video_ids):,}")

for i, video_id in tenumerate(video_ids):
    # if i > 10:
    #     break
    frames = subsample_frames(video_id, df_frames_meta, frame_paths)
    df_all_frames = pd.DataFrame(frames)
    in_game_frames = filter_to_in_game(video_id, frames, df_meta)
    in_game_frames_all.extend(in_game_frames)

In [None]:
print(in_game_frames_all[0], f"{len(in_game_frames_all):,}")

### Append UI element detections to the frames data

In [None]:
df_dets_all = {}
for i, f in tenumerate(in_game_frames_all):
    if f["video_id"] not in df_dets_all:
        df_dets_all[f["video_id"]] = get_dets(f["video_id"], model, df_meta)
    df_dets = df_dets_all[f["video_id"]]
    frame_dets = df_dets.loc[f["frame_idx"]]
    f["time"] = frame_dets.time
    f["labels"] = frame_dets.labels
    f["scores"] = frame_dets.scores
    f["bboxes_640"] = frame_dets.bboxes
    f["bboxes"] = [
        transform_box(*b.values(), f["img_width"], f["img_height"]) for b in frame_dets.bboxes
    ]
    f["bboxes"] = [
        {
            "xmin": b[0][0],
            "ymin": b[0][1],
            "xmax": b[0][2],
            "ymax": b[0][3],
            "area": b[1],
        }
        for b in f["bboxes"]
    ]
    f["split"] = df_meta.loc[f["video_id"]].split
    f["file_path"] = str(f["file_path"])

### Save as both Raw JSON and DataFrame

In [None]:
df_ingame = (pd.DataFrame(in_game_frames_all).set_index(["video_id", "frame_idx"], drop=False))[
    [
        "video_id",
        "round_num",
        "frame_idx",
        "img_width",
        "img_height",
        "sec",
        "time",
        "labels",
        "scores",
        # "bboxes_640",
        "bboxes",
        "split",
        "file_path",
    ]
]
df_ingame.index.rename(["_video_id", "_frame_id"], inplace=True)
df_ingame.sort_values(["video_id", "round_num", "frame_idx"], inplace=True)

In [None]:
# Change to True to save (ovewrite) the files:
if True:
    dest_dir = Path("/shared/gbiamby/geo/segment")
    assert dest_dir.exists()
    assert dest_dir.is_dir()
    save_json(dest_dir / "in_game_frames_000.json", in_game_frames_all)
    pickle.dump(df_ingame, open(dest_dir / "in_game_frames_000.pkl", "wb"))
    pickle.dump(df_ingame, open(dest_dir / "in_game_frames_000-protocol_3.pkl", "wb"), protocol=3)
    pickle.dump(df_ingame, open(dest_dir / "in_game_frames_000-protocol_4.pkl", "wb"), protocol=4)
    pickle.dump(df_ingame, open(dest_dir / "in_game_frames_000-protocol_5.pkl", "wb"), protocol=5)

In [None]:
# display(df_ingame)
# df_in_game_summary = pd.DataFrame(
#     df_ingame.groupby(["round_num"]).agg(
#         total_frames=("frame_idx", "count"),
#         start_sec=("sec", "min"),
#         end_sec=("sec", "max"),
#         start_frame=("frame_idx", "min"),
#         end_frame=("frame_idx", "max"),
#     )
# )
# display(df_in_game_summary)

In [None]:
df_ingame

In [None]:
frame_info_base = load_json(FRAMES_METADATA_PATH)
frame_info_new_1 = load_json(FRAMES_METADATA_PATH.with_name("frame_meta_003_new_bak.json"))
frame_info_new_2 = load_json(FRAMES_METADATA_PATH.with_name("frame_meta_003_new.json"))

In [None]:
f0 = set(frame_info_base.keys())
f1 = set(frame_info_new_1.keys())
f2 = set(frame_info_new_2.keys())

In [None]:
print(len(f0), len(f1), len(f2))
print(len(f0.intersection(f1)))
print(len(f0.intersection(f2)))
print(len(f1.intersection(f2)))