In [None]:
%autosave 60
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import json
import os
import pickle
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast

import cv2
import matplotlib as plt
import numpy as np
import pandas as pd
import PIL.Image as pil_img
from IPython.display import display
from matplotlib.patches import Rectangle
from matplotlib_inline.backend_inline import set_matplotlib_formats
from tqdm.contrib import tenumerate
from tqdm.contrib.bells import tqdm

from geoscreens.data import get_all_geoguessr_split_metadata
from geoscreens.utils import load_json, save_json

In [None]:
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_columns", 15)
pd.set_option("display.max_rows", 50)
# Suitable default display for floats
pd.options.display.float_format = "{:,.6f}".format
plt.rcParams["figure.figsize"] = (12, 10)

# This one is optional -- change graphs to SVG only use if you don't have a
# lot of points/lines in your graphs. Can also just use ['retina'] if you
# don't want SVG.
%config InlineBackend.figure_formats = ["retina"]
set_matplotlib_formats("pdf", "png")

In [None]:
from IPython.display import set_matplotlib_formats

set_matplotlib_formats("pdf", "png")
plt.rcParams["savefig.dpi"] = 75

plt.rcParams["figure.autolayout"] = False
plt.rcParams["figure.figsize"] = 10, 6
plt.rcParams["axes.labelsize"] = 18
plt.rcParams["axes.titlesize"] = 20
plt.rcParams["font.size"] = 16
plt.rcParams["lines.linewidth"] = 2.0
plt.rcParams["lines.markersize"] = 8
plt.rcParams["legend.fontsize"] = 14
plt.rcParams["text.usetex"] = True

plt.rcParams["font.family"] = "serif"
plt.rcParams["font.serif"] = "cm"
plt.rcParams["text.latex.preamble"] = "\\usepackage{subdepth}, \\usepackage{type1cm}"

In [None]:
df_frames_meta = pd.read_json(
    "/shared/gbiamby/geo/video_frames/frame_meta_001.json",
    orient="index",
)
# df_frames_meta.describe()
display(df_frames_meta)

---

### Functions

In [None]:
def subsample_frames(video_id: str, df_frames_meta: pd.DataFrame, target_fps: int = 1):
    """
    Subsample to 1 fps From existing frames on disk, which are extracted from
    original videos at 4.0fps and used to detect UI elements and compute the
    in/out-of-game segments. So this sub-samples at something higher since we
    don't need that high of a temporal resolution for im2clue training.

    Args:
        target_fps: This is how many  fps you want to sample from the existing
            jpg's. It should be a subset of `frame_sample_rate_fps`, which is the
            rate that the frames on disk were sampled at.
    """
    # This is how many fps the jpg's were sampled at:
    frames_fps = df_frames_meta.loc[video_id].frame_sample_rate_fps
    assert (
        frames_fps % target_fps == 0.0
    ), f"frames_fps {frames_fps} should be divisible by target_fps {target_fps}"
    frame_paths = sorted((Path("/shared/gbiamby/geo/video_frames") / video_id).glob("*.jpg"))

    return [
        {
            "video_id": f.parent.name,
            "frame_idx": int(f.stem.replace("frame_", "").replace("s", "").split("-")[0]),
            "sec": float(f.stem.replace("frame_", "").replace("s", "").split("-")[1]),
            "file_path": f,
        }
        for i, f in enumerate(frame_paths)
        if i % int(frames_fps / target_fps) == 0
    ]


def filter_to_in_game(
    video_id: str, frames: List[Dict[str, Any]], df_meta: pd.DataFrame
) -> List[Dict[str, Any]]:
    """
    Limit frames to "in_game" segments
    """
    split = df_meta.loc[video_id].split
    seg_file = Path(f"/shared/gbiamby/geo/segment/seg/{split}/df_seg-video_id_{video_id}.pkl")
    df_seg = pickle.load(open(seg_file, "rb"))
    df_seg = df_seg[df_seg.state == "in_game"].reset_index(drop=True)
    in_games = [(idx, r["start_frame_idx"], r["end_frame_idx"]) for idx, r in df_seg.iterrows()]

    def is_in_game(frame):
        for seg in in_games:
            if seg[1] <= frame["frame_idx"] <= seg[2]:
                frame["round_num"] = seg[0]
                return True
        return False

    frames = [frame for frame in frames if is_in_game(frame)]
    if frames:
        img = pil_img.open(frames[0]["file_path"])
        img_width, img_height = img.size
        for f in frames:
            f["img_width"] = img_width
            f["img_height"] = img_height

    return frames

---

In [None]:
# in_game_frames = filter_to_in_game(video_id, frames)
# print(len(frames), len(in_game_frames))

---

### Test a Single Video

In [None]:
# video_id = "--0Kbpo9DtE"
video_id = "zOoUR17xnL0"

if "df_meta" not in locals():
    df_meta = pd.DataFrame(get_all_geoguessr_split_metadata().values()).set_index("id")

frames = subsample_frames(video_id, df_frames_meta)
print(len(frames))
df_all_frames = pd.DataFrame(frames)
in_game_frames = filter_to_in_game(video_id, frames, df_meta)
print(f"num frames: {len(frames)}, num in_game frames: {len(in_game_frames)}")

In [None]:
df_ingame = pd.DataFrame(in_game_frames).sort_values(["round_num", "frame_idx"])
# display(df_ingame)
display(
    pd.DataFrame(
        df_ingame.groupby(["round_num"]).agg(
            total_frames=("frame_idx", "count"),
            start_sec=("sec", "min"),
            end_sec=("sec", "max"),
            start_frame=("frame_idx", "min"),
            end_frame=("frame_idx", "max"),
        )
    )
)

---

## Show Some in_game Frames

In [None]:
df_ingame

In [None]:
from IPython.core.display import HTML, Markdown


def show_random_frames(df: pd.DataFrame, n_samples: int = 5):
    df_random = df.sample(n=n_samples)

    for idx, img_row in df_random.iterrows():
        print("-" * 180)
        img = pil_img.open(img_row["file_path"])
        img.thumbnail((1080, 640), pil_img.NEAREST)
        display(img)
        print(
            f"video_id: {img_row.video_id}, frame_idx: {img_row.frame_idx}, seconds: {img_row.sec}",
        )
        print("")


show_random_frames(df_ingame, 5)

## Show Some Random Masked Frames

In [None]:
from PIL import ImageDraw


def get_dets(video_id: str, model: str, df_meta: pd.DataFrame):
    split = df_meta.loc[video_id].split
    dets_path = Path(
        f"/shared/gbiamby/geo/segment/detections/{model}/{split}/df_frame_dets-video_id_{video_id}.pkl"
    )
    df_dets = pickle.load(open(dets_path, "rb"))
    if "frame_id" in df_dets.columns:
        df_dets.drop(columns=["frame_id"], inplace=True)
    df_dets.set_index("frame_idx", inplace=True)

    # df_dets.bbox.apply(lambda x: transform_box(*x.values(),
    return df_dets


def transform_box(x1, y1, x2, y2, target_width, target_height, curr_dim=640):
    """
    Transform bbox coordinates from (curr_dim, curr_dim) pixel space to size=(width, height) pixel
    space. assumes width is greater than height. This is used because the detector bbox coordinates
    are in a square pixel space (config.dataset_config.img_size)**2, and we need to convert the bbox
    coordinates back to the original image pixel space (e.g., 1280*720).

    Args:
        xmin, ymin, xmax, ymax

    Returns:
        Tuple[[xmin, ymin, xmax, ymax], area]
    """
    # Back to width*width:
    new_x1 = x1 * (target_width / curr_dim)
    new_y1 = y1 * (target_width / curr_dim)
    new_x2 = x2 * (target_width / curr_dim)
    new_y2 = y2 * (target_width / curr_dim)
    # Remove vertical padding
    y_pad = (target_width - target_height) / 2
    new_y1 -= y_pad
    new_y2 -= y_pad
    new_area = (new_x2 - new_x1 + 1) * (new_y2 - new_y1 + 1)
    return (new_x1, new_y1, new_x2, new_y2), new_area


def show_random_frames_masked(
    video_id: str, model: str, df: pd.DataFrame, df_meta: pd.DataFrame, n_samples: int = 5
):
    df_random = df.sample(n=n_samples)
    df_dets = get_dets(video_id, model, df_meta)
    for idx, img_row in df_random.iterrows():
        print("-" * 180)
        print(
            f"video_id: {img_row.video_id}, frame_idx: {img_row.frame_idx}, seconds: {img_row.sec}",
        )
        img = pil_img.open(img_row["file_path"])
        # img.thumbnail((1080, 640), pil_img.NEAREST)
        img_width, img_height = img.size
        display(img)
        dets = df_dets.loc[img_row.frame_idx]
        # display(dets)
        dets_lookup = {
            l: (l, transform_box(*bb.values(), img_width, img_height), s)
            for l, bb, s in zip(dets.labels, dets.bboxes, dets.scores)
        }
        # print(dets_lookup)
        masked_area = sum([d[1][1] for d in dets_lookup.values()])
        print(
            f"masked_area: {masked_area:,}",
            f"img_area: {float(img_width*img_height):,}",
            f"pct_masked: {100.0 * masked_area / (img_width*img_height):.2f}%",
        )

        img_masked = img
        draw = ImageDraw.Draw(img_masked)
        for label, bbox, score in dets_lookup.values():
            draw.rectangle(bbox[0], fill=0)

        # Mask out minimum rectangular region that encloses the geoguessr logo and/or the status bar:
        top_ui = [dets_lookup[l] for l in ["game_title", "status_bar"] if l in dets_lookup]
        if top_ui:
            y_max = max(d[1][0][3] for d in top_ui)
            # xmin, ymax = reverse_point(640, y_max, img_width, img_height, 640)
            draw.rectangle((0, 0, img_width, y_max), fill=0)
        display(img_masked)
        print("")


if "df_meta" not in locals():
    df_meta = pd.DataFrame(get_all_geoguessr_split_metadata().values()).set_index("id")
video_id = "--0Kbpo9DtE"
# video_id = "zOoUR17xnL0"
model = "gsmoreanch02_012--geoscreens_012-model_faster_rcnn-bb_resnest50_fpn-2b72cbf305"
frames = subsample_frames(video_id, df_frames_meta)
df_all_frames = pd.DataFrame(frames)
in_game_frames = filter_to_in_game(video_id, frames, df_meta)
df_ingame = pd.DataFrame(in_game_frames).sort_values(["round_num", "frame_idx"])

show_random_frames_masked(video_id, model, df_ingame, df_meta, 10)

---

---


## Some Stats about Video -> Frames, CLIP Samples Pipeline

In [None]:
# fmt: off
id_list = set([
    "8Bw7td5T49U", "tjTu5QhQgtg", "78xNkZqhB14", "5HueHB6D85g", "TZUU67D8eD4", "5vM8Vn8dzRc", "3e4p2WueJnk", "RoAIPgG1H-I", "D4POQX3geEs", "Fljin_26vug", "8YsakLYpA6I", "qHdMPFeIWf8", "X68B0Gpbbzs", "5DhfpT_BK14", "Xvxoq8uR3Zs", "uOpwZPtkEsk", "vtEQf5SeT8A", "tSwfT6dSwMg", "dO7TdYgtAWg", "RLjrxQJOubY", "mg_sgJJPNqM", "t6K0TXA4FT8", "78lriIFZvVw", "uN4hVUzQC5c", "x9mNJalP73w", "yDXcweSID4c", "5bQFKz_hAD8", "7l1elyK6smk", "8JvPC6mPjCE", "8EB6rbK6KIk", "hYzp8aT7Sqs", "AFTNudiAtWs", "yfNYtA__bJc", "XKIHkUfrTv0", "8TIlYg31Ys8", "7A6OIVDvNCM", "7Cb79FsTSbQ", "H2h8V2zWbNo", "G8muDcrX2vQ", "nr5cEg5dpN0", "k_59zOaQMYI", "ajWkg4k03UY", "XaR8S25aK-0", "EXwUtNeLUio", "FJjsv3hwGx8", "RtDIzWgaHsA", "zO8FOrHRCaE", "7yyMBkkFnl4", "_jRSHbYfCiI", "iS4dwvTr6Aw", "cgXjROAaR14", "Gx1eggPvvz8", "nWNVMm5Cjjc", "0YVorrXo3z0", "fyVctCdaycI", "V8RN33XDge0", "eF443S8Svso", "_w3ymH9Z4lg", "fALNGSKcgaY", "8INPrVUdwbY", "OIbJpKWlTv8", "7sbKs551sSw", "ygJYQcrPwWA", "iREpxVxkVy8", "7ZmbVYKuhJQ", "b56-pFJsYpo", "UNFGeLmSC8c", "yBITEq7yi7w", "z3MEEDh_VJ8", "C920wI0_lTU", "2HW4sUPH-SM", "9H44X63GrUY", "a-jofvbOEog", "ooDLsWuAPRI", "y93i-jEMTyE", "9hdrEQ2M2JQ", "-YqLmQhi2Mw", "5Tnf_wT3oTs", "pwohUNpbLgc", "SynBqjTP1WA", "S0x1s4d5VKY", "RmtlSq159xY", "bvaP9AsDyhM", "lIOFDYsjjO8", "SGdJ1-m0Il8", "Hsd15piApiM", "Hy9DB7WKGhA", "SlJnwQRThiM", "f9REixx1Cjw", "MCu_KujLtZc", "Y-UOwHtvBeo", "ID79S40DMGw", "V1I6qZ-1mo8", "97L6Imlixi8", "Ja5eSUlfrkY", "jbl8tS3LMdM", "TYR_miC66Kk", "hE_kQAVPe9Q", "Df1_jpBKCOI", "1LoeXjd0JwE", "7UXkRbjQPWU", "yYIpaVMG5uY", "T1SHlddMb1w", "TP_5VNhANkE", "n58XMSpPSmQ", "nKztCxH2QoQ", "sHCJccXO-XM", "s2j0LJMz2c8", "jZiZGE5Yw7M", "2RLc782IbG4", "y7o54MXyAmo", "6mfZIQJhzzw", "f4HYBPVfE5U", "hql2k-qRGQA", "XPN6ta4QP1s", "SF56rvgw9oU", "18uwUloxF70", "rBFbCUgtuTE", "XQghH7D9ivI", "YYpk9QuEb4Q", "uAAxSVBG3Ao", "SOQn-tAEqoA", "LYQqQR1-O8k", "9JhJLztPrcY", "-cpjdkdjSe4", "Dtwik5ME6k0", "op4ZXU65IEg", "rv0ExMrsoCQ", "ou6AXRCpTP0", "67h7PRS4DZQ", "8jQ7knB1dDw", "NB5CLD4_m6Y", "ttkayJHBKvM", "0Mh-3dij_yY", "EAcgPhHIz9k", "drAmJ8r8_UI", "NIr1XF0doag", "C085J8_mOFo", "SSiM_QcHUrw", "iG39LbO36OU", "4fpfMCbBZrY", "oPwC9_RkA6U", "xmzgSW9_qQQ", "J5zYNTUdJ7E", "f8Jhio30FcE", "lNAZnqZIe7Q", "Gj6Uf71DPjg", "Uq8-Mb6NuwA", "YMddopmzhaw", "y5o_LqLGTFk", "5PLdPga2l7A", "VFIvAelnjJk", "yZ8ZFqGGHEY", "9Y0iWInV50c", "sKaXS4L_LBE", "J4KwYAUjvps", "jTmdNqzyqoA", "qkgnZOiPhko", "6x7Vjf0eDZU", "dGv87y7orMk", "p8fWooyDeeg", "fGn-Yq7Pf0I", "V-8TDXRUL98", "k8VVUzrKsao", "jsiFQW7YhyU", "YpqCkIfj1kQ", "dY1RXh-43q4", "5ng7eZcmf08", "5Nyey2vqZjc", "3FOClyWnFLo", "_x5vekFIPwM", "bcUskY_-eaI", "frABgE4nvZE", "1gdystGUcWk", "N5PHW5fID5M", "cf1kiPYSQX0", "vK6YBEhT_a0", "j8YGygpejmM", "XzLmdfuT-aw", "Y09e38mJwxg", "BAHGhuTg0uc", "i7RbqNUpkzM", "bseiyQBfDFc", "qrppRbnm0Hs", "jSBbDebSEH8", "aPtUdHUZ3U4", "D-JVd1wiGmM", "FobVijiwcxo", "qQMeHkwP8hg", "tkrwURdzQD4", "erJJ6_dvqXQ", "S8Drmz7kb2Q", "nComDx3Hksk", "iSgTY8VsTf4", "M1ZCdTUpOMk", "KvP2xzlLuVQ", "_Yxhg-Ng6W4", "QAXV5-eUHVI", "UNJO7v2JqrQ", "oxGTI4ifaUI", "fvTmJro2lJs", "ucfvRONvrv4", "7Kov_ocesos", "KA3r-gF1ub8", "WYqh9IOP1ac", "RfGf4Sfi5eo", "dRG76uV8Gh8", "v-GbQnjx9qo", "I8aSkup6v5Y", "zg2Fsd_AQBY", "ByyXpvS5SsM", "L2x3gfC8JKk", "VeV2_pBKwyw", "fitpQnwDSF8", "MVA_CfddM7Q", "0GUAEM7yZfs", "NY3YDQvI1Ic", "ogJnHIuT8Yc", "LR2NyzmxUhU", "VNfQu_7ewMc", "71j61uq6dT0", "--0Kbpo9DtE", "N57v3XC_KgU", "7AAjhFb6vW4", "QeG3JIDj9X0", "eFlRk5-wfqE", "PhwZ9hgWbws", "BktA9fzbBFA", "8gW85SbDGms", "905EaxHE8uM", "2ZjvH8UJmMI", "Pz0sOLzEEc8", "fsHd9Dxb6dg", "jfvF7yBlUzw", "4h-GEDFgbyk", "9N5ehFTyiEA", "o0-B7oMrfYg", "dnw5qOqcUuc", "e7RSZvTrfxM", "S_CkGJ_2NRc", "auNws2QRR-I", "POJmNWuuWoU", "IcZkMPfJt9g", "9jU8kH2W4fw", "5mBTu5fSKrY", "3G7zcxVInzk", "G9iraVNE2YY", "PZNVXTPFpDg", "kheQo4Tpndc", "NhmN-Y4Ogt4", "EnLJRP3URAY", "AF9uezxZDeE", "ARWG_rDyo0w", "svHi_vMMz30", "RUAq8ypbS-M", "fgzsmVt8O9Q", "d9UM7dxWmSo", "HXsnKhMSXnA", "PzAXjKD4ZRg", "nurNqjV2BIE", "Vrc9iyOdffs", "HY_cFHStUdE", "7NpvRgadP6I", "1AAV73eCDng", "SB4UMgTRBe4", "IeN5MHlIFYs", "tqny4LpSUiE", "M1YxSawhLRM", "6dMleMbH1p4", "ycCwEbgAsBA", "28ST7y3V0Ts", "-IEDd8F93a4", "h_v9LNa-CJM", "KjCbBXm68t4", "M9EY3IUfc5k", "l9cwjJ8Fq18", "JPTuaNLK8Wo", "b8uR_dfzk1k", "U9B1tkrtRlE", "gkUCMaoMutQ", "15V7WLGkk_k", "k4IrMBjw4Wg", "XD03_-kYYIU", "qtnVQtoVVVc", "Fr7k4Of9MHc", "T2-lmTdq1xI", "5Zxg-TXyOI8", "FwvekZZF5Uc", "WBnhnil7BpE", "NRfetLNMgko", "khG49U9uA0w", "oipCIsg365Q", "jMRAXSUZfFM", "kAaInBtlT20", "1bYnXDko1Y0", "8F_j53zDM7Y", "ZKSH2u8LA3U", "yFUbV6e_gJU", "ri_34j2BrjQ", "vQ_K9yf88FY", "xEU6jN3g9-M", "mJf_LmgFK5Y", "4Q_7tdz1RLk", "NhWMpSodfiM", "wEhDoTH3z1Q", "Il_uQts188g", "6nqaSvpqq_4", "lUf2aUbevKI", "fNCt6HzDr5s", "uUThkhfRgcI", "NBdNAZ4xMHo", "Eww4ke7BRc8", "P-dCi2PGAbk", "kMxyUZjFRRk", "ZCHRMqF8WYo", "WdmLE97ZN3Y", "YWQsrpXutYI", "daYiBw4mumU", "XzBgSMyXKgc", "64Ig7a6W6Ew", "mkx8bU_di1k", "9FVnGjIxaMY", "CdIz5bo7Djk", "SpevCJaBI3c", "im7nCclNTkQ", "KqBtWllS4gI", "0takfE6ONYc", "8jWG2tLeVMw", "fH17ddueJWU", "x4-N7jQxQiw", "2mifm1pLKAA", "1OXB0WADaXg", "d7Traj8zMS8", "hkSU3XytNkA", "gNevBaS-uGA", "IQRT4fr8emQ", "Y9RLRlKbYdE", "6hFg9JyN2xs", "kx_IvEI7ank", "qB2CxfyYgaM", "bJ7gKGvwpY4", "XikyJPOxAz8", "Qzsf3DWibJk", "CWG3sDtKQDA", "QuiSL6Nkamg", "vtV5A5PcyVs", "ZBGy6D9KzpA", "9dw5HDcPPZc", "rWblyP6DWTI", "Zon59ELN9gY", "4PT2cRmp638", "0NZj6fl8oy8", "ca6GfGUTIBc", "5W-cXIqy7Qk", "1NUz6bWIxAk", "huqYMEJ0XAE", "OfLGN8vG8RI", "9S_aexwPTGY", "ZMNakOAS2PQ", "xFWjYDyiVgA", "oxQaoCK5-gw", "wchMLhhTThU", "DSQ0c3e1JIY", "EgiQcOFcKjI", "4gKwjSDe0pU", "rp0XRhlDaxs", "pEww8bI63pc", "ecTrx01X7Ww", "v3_XOVuqs7Q", "S9isSATsFrM", "Lo0Ss_RBBnE", "d7n1MfAE3Q8", "MZZi2H8arKw", "HTf1OrPoVeI", "3-12cTsYLBI", "26fy28wEjUg", "8QW8dm28hKI", "Jwcovn8B8pQ", "_0-N01oCEgM", "WGn2YyLrV74", "NSYJ8htKe3o", "H-gdaQB8YX8", "om0rl5inBb0", "K29Wr5eheZ0", "J__1wxI_PlI", "oN6DOjX2jwo", "jIao4NbL6RA", "Yv4A_RMBTUo", "7KHfnuxMvmM", "R6nxePpOwj4", "87PTLTwFIlM", "7s7h20eWyNQ", "KZr7oiGwvGo", "RWbEIssom0M", "Y-was9RqOPU", "-PVyrGvE_Ug", "q2pxWYFalxY", "GLbz7V42NTs", "t5BP1LMw7aE", "8qoNZIjLsNQ", "VgXRA5Rs2wY", "2BpNX3tDIp8", "RuPkSF1APFc", "itq0JKF_71k", "dDlabjh0d5M", "EVDZvdWO0Jk", "TWJwleci420", "DXmMV9u9Zu0", "XERAdiWQOjE", "vu_tBi3QEXU", "nBTTbo5aARY", "IIE40ZX84uM", "An7mR7syKAs", "RVntDFkxS7g", "PjBgUuSsA_4", "j7kDcRWH8x0", "LQM9dAUmXbc", "qtsbny47mdw", "3vLgiRXB5XA", "ISRYAscisFs", "yBvuC-gJLRk", "sIck3-vlSlA", "d24qTy_a2y8", "YuDMD_sxMFA", "4QQoRFg_afs", "FR7oa6EyJXw", "ZIJSGDK7JZE", "jQGV_kqnur0", "hZWt1PYH3hI", "5P_6PJMGCcM", "VR8fTKKaxkw", "yAy034yMsKM", "s-ujRzUvgv4", "HLDJp3hetEM", "OJXyK9Grzwg", "xdA779d7rf8", "nAGTYX024vs", "I7MYI_EAW18", "0rxcET_eukc", "tpI_5-rp7B4", "RZKXgfP57ao", "1R0jn0mXAhU", "3b4tPRdX2nk", "BGrHmcYSMqY", "_Wcx5qaUc3E", "nOcf_JCjA6E", "3a3oCg4gDlw", "b_-2uGDp8rU", "iA3wkEutnc8", "IYrmjKKnWsM", "8p8uAN8GMPg", "i1-b_LjegYI", "Uu17FqqvBPc", "0i22gBZe0Vs", "7PV1B8NglOw", "Qj4waaw1OzA", "5mMXaSzIH6M", "M_-STREt-B8", "t2cnHOQr0vQ", "5WPyovVixkY", "4Wloj3k9Gwc", "fsbZAcJXN5s", "az27quZLK8Q", "1963PvpQHlU", "xfIGSpWis6w", "xnvIL-m1Fs4", "U6PpJ8gh8g0", "7y2K0aGKzrI", "1COlCH966vk", "bG0JP1lpvc8", "RPELccF-qHs", "VE7JElrhOZk", "Lqce0YTaqvA", "apViju1pJSQ", "HuMA26sgEW8", "GZmvqMK704U", "kpSA5-dCUco", "-RdNktX6pe0", "4woMwBa9SsI", "TIujifYddi0", "URUobiG8DwM", "WhfHiLV10Dw", "NdX-qjViTnw", "EKU8wGUTXsE", "0LQeo_Ith1M", "dS2CRIjAkSc", "gMaG5H1711A", "EB29A-P4y_I", "uKAHruKnWcs", "3zv372sf060", "j51FXUb-x1Q", "Xg_u0FCyZ74", "Hi2USufrGtI", "JF1uIZRMUf4", "JvL8HHi_cbE", "pnr8PcRoDhU", "FZgw9gz3C1I", "0fbtLGFxJEo", "B6QqER1cyZQ", "1Fho_5uqk98", "KdbXMsZCW_s", "LK1kDgONDyA", "sDYDY5KOA0U", "YwYgHxtSUN8", "MUkhskdIzSw", "BZ9W12amjHo", "U2V25cli1rM", "i_LrB8xGX2Q", "bJmP1QtZRdc", "LXT9U8lhwSk", "yjog5dRToKM", "LlUKiEgP-fg", "7zM6-TZBjfY", "5KvWKvuM0Lw", "XWRcLFQ6SLc", "yQB9yV1aqkM", "4XvGmDO9MH4", "9RQUIk1OwAY", "7DEmYlGENng", "9bHgUYMyuus", "xhJhWN07d28", "QpkW-udMIV0", "Ti-CYdMC41I", "3Hff3DyKB5Y", "i2EytxoXbVw", "XP8VPh-Jck4", "6RtuulgTQEg", "S5gpj3dH04U", "zTnOM3pgsmI", "hPeUmmospU4", "P7oI4IXaQvQ", "42IPZRBbeZU", "R221KrCxkFI", "xiX18l1TJz0", "_pbDtZeN3Os", "eZZW9pr_5yA", "6Uin2eIjfAY", "Tr4OGLLjYnk", "ElcEPfop_CA", "1hu9EumykCw", "HQX9yeK4SuE", "RjzCc0kDGMA", "wPtPW4R3Bz8", "x9w0uk7G-8c", "T6G4wmkcZGg", "zOoUR17xnL0", "KQ7WRBhdKDo", "bIvCkvmf3Xs", "S5Ne5eoHxsY", "S6D18JLde4c", "rs2L2j6SE2Q", "L_PfdixvB1c", "G3FKN03gXIQ", "1NZsTHM_yvA", "PNpUEPXZO6c", "A3PGvLiUQeE", "klYiRchHrh0", "IVzbPudw9cw", "4x7zIn-ypMs", "8IuV-rUEJZQ", "1pvl8Xus5Ek", "9ffhZ_LWL40", "7h-TEPgvKas", "Walstb9S_U4", "5LBrotUmVKg", "U1sSt_i48f8", "6_iwi1av_Tg", "T5UQ0Fab3Ak", "JKwK2CdBka8", "81iad4yS8XA", "2ZU44Af4rGA", "XYVIHqjm7hk", "8I4DWSyhTaQ", "zIph871efJ4", "s3MM2Hn0578", "UO2IzVqYoT0", "JW_cY121vMU", "twuXShfe8ZY", "kXQSKQ-Iglo", "JnnvAlCn4-8", "04Rjc8cHKVA", "kPh7dYErCc8", "ylJlT37wcaE", "o5qTx0bKjrs", "V62niQ9D6wQ", "kko4Sug8_Us", "7KL_zitxz0w", "8HCqhfO4ukc", "5J5OiGLKqQM", "k-dT_m1bSuE", "8ytmWvud6-4", "S3MGYVhx2gM", "1s3Ax_KrLs0", "F6quWYzK7TU", "Z3V98L7YL00", "RH9p7jUHTBw", "-C7NJpYc3gw", "hpsYcBqu5gc", "wriKnOjJy6E", "c78FrBan-JQ", "MocC6pni5P8", "DuL3xoy_BAY", "DHhTo7sZrDg", "td1eP9FSL3Q", "1V35b_G7wok", "ThHk_Q1uovY", "VGNEtq4Bw3U", "K4GXuDACK40", "P8o_guQi41s", "YImUOaH7Uwg", "NC07yL_yN20", "hcdEoiWKPns", "T2r-_WM0enQ", "NrqC1YuBCuQ", "mBHXqKnDo6s", "1AmqyFQYSCU", "2he1eFOivTM", "Rnpm3wKt0Ls", "cpeIVVLO5QE", "1apsDpo_cGI", "Sn0XIA2aux4", "9Xr6LqfOIg0", "XvpAfKb67YE", "DsMej1SWfzU", "D7d-Wilesgw", "1hbFSXAZlro", "GtSNM5lM6Ww", "qRW9erqzcUA", "GungykjFga8", "QBJdM-PmiBY", "1AibHUd6TO4", "DJV5YcQ-lnU", "aJc9vJNv4G4", "2kcCNL3lxDw", "2cqIk-0WSdA", "M3BFifwYolI", "rTIYFLh2Y2Y", "mr08FdZmLa0", "DaX4ZuINsgg", "OLkXxaP5RRs", "pvjUGDcegGo", "mCDGajhSzn8", "U73B2rWWz4U", "ut6DwUYicv8", "OKOP1dnLRJE", "9fRE0IuFcZ8", "sA19OnfmFd4", "vnlSCnx5UUg", "2HGOjVDc1mY", "elgUkTn8snE", "OFuGaf8od_Q", "PzGs-GY1DaA", "SsQ4uMnSJRo", "SKxA0OEnCnk", "B511HI1RHuI", "MWR-h25GwmA", "89QEfKzSkzY", "0KJ0qXEVACs", "clB89mi-ZlY", "W82qLlUxuy0", "cKlp2iLl8Ps", "e-E1-hpK4w0", "khkp8TrFytQ", "a-8DyDJvglU", "KPVPU3vl_yI", "83m9ys4kxro", 
    "jsqkwpdurgw", "OD2_TpEG-C0", "YHHAQkgs32M", "XM87FwIenH4", "C1RHyCSkkKM", "ppYi2iGMht4", "TKTZoyKhHco", "19c03AjUyI8", "74jleDoImYc", "k6Cw-X3zDuo", "uCbXqNRiHRY", "HRsqlJmj-hE", "P9SWbIShXc8", "IV4Pb__t6ys", "_EJjfqyaebo", "5mTvphLejLU", "K2XcndcvzSc", "Sgp-xuDo1gs", "PF0_X0ilLNQ", "j-FROoAWAdw", "i7K9f9l05qA", "7UJ6f-iP6Bc", "7nb9W76qdEc", "1bKzHmmoofg", "osTwgzWluVs", "4mUPPVoQ-kI", "XTKWfDyejck", "3nPxLoDdlxY", "nqlxwD5RY94", "XoHvL7T10pM", "1g4aAtsCUCI", "87qBSgbwLq8", "C8l-D_z2p6k", "bbBeyBFvAyQ", "dVFTw5pacsY", "ZDAZvXbmOSs", "bz04LZDf0Oo", "vAtsI9ZA11U", "LtaV054F5Rs", "ldyQwxNCzlo", "AIHJQEtcPqY", "NjriHMSM26k", "pcxL0W0RuTk", "XjzDqqvMEcU", "V4D-N71EvvM", "hVBhp7U0xQQ", "8__DkawMrPo", "alKpLiE-KY4", "4ftLC-J963E", "HlAZm63ooQY", "OQFa34FoVY8", "KSlCyDyda1M", "SAmkg58TybQ", "IyfiCVAeyhg", "__qlp6JpcPc", "Tjxb3UzaduA", "-C2718vV9v4", "hYMPlDM5j80", "hzm0mRZAOrY", "X-3h3RTPoRg", "yDO_FdDeHfs", "IpWokx3CtDg", "1rpQNdmTtvk", "76F1NoCzNLY", "V2H1po51T-A", "A960qcFjXK8", "MaVbMOJg4Mg", "fBfmMCCMXHk", "CYe9uv5G1hw", "Gk-1toFmMZo", "jCJoc-6iqng", "Y8yW_BsZ018", "6FKmF1LDPrw", "1ShNH_1lVsw", "iT1c92P40z8", "DE-6w16AhcQ", "jNKj2MXeah4", "w-aTpX4_vcg", "CC8CCvle0Vc", "bErSxaBHc1U", "BslAPtFdAnE", "Mv8DWoW5Ojs", "DU7SmB9yqpQ", "ZKCPfy5gcKs", "pFZoRi-T9c0", "q5FEts6gKII", "hzjA9gfxMeQ", "TDL3Jk3SaK4", "FGGfQH0XxL0", "zFuNkVjC6hQ", "tOXrATngKOY", "18C7YgxCkIw", "GvTVU0Qslc0", "HKPpZx0E8FY", "no5rJt2Pj-0", "2SzL5VBF_BI", "8LXi_tpkpSg", "DJp6sBH46KI", "y2eFiEO3BdQ", "pP4VQ9JClM4", "Y0BjWF5gWmk", "wvl9ecRASFM", "l6TtoNu1_Jg", "88jRjbWTesc", "GbgHjGPVzao", "5LXv8t_-6_c", "LcIkJQgAOyA", "5PGCL_L81UA", "y2SDYOw0SDU", "o1GEEkLVFsk", "54c2PpV65hU", "ti2co-2tymo", "hdUTzzDEV8g", "RBPYrNfjnHY", "5IyXNi8TbzM", "t9YiBXcb49g", "qi3_M5Udics", "2ZioLZEZTdI", "dEi4H8QFxIg", "WtzVRdkaY7Y", "26VxX9_chSw", "6wrhV6SnfnE", "3xm58Ccps1c", "MKSRbvmbTVo", "NsUjPBgFJGw", "hXYKSf6QO04", "B7_WcWmSZ_Y", "vKWbEE-ttYA", "0J7cQ4FiDCc", "N_OLaiTShLA", "BuZ-ld13p1I", "izjFsRDbt5k", "TVt1GKBZMzc", "1kwoT-9ClV4", "BJ-bEId-7yQ", "JXUWefGu67A", "WD9ARP45p_M", "V6rWoMiKsAw", "WqKqqN54fHo", "WEqf7L2KL4g", "itGph3begOY", "ep8WGVBNq7M", "67SQuXM5J0o", "tuGOZme6Xjg", "fs98qK3Lnpk", "88FFBre9GFA", "D4fiQwrF5vw", "FTcYSsoAw28", "EAoVoLtzovk", "edPiYaSZvx0", "6h0faOkFVpI", "euFnkGaA5IY", "AX5D2Bt__gE", "IwX1wEQsngE", "DkLLhsfa-PU", "R1toABTIp9s", "AXxUyPWLoJA", "sf0ToMiRIvM", "zlsM4XM9_Rk", "4k57dSg8Cws", "iy8jWAIaG0w", "YZxqCnxHPtE", "TJbSoBC1qpc", "FGidXjJl1vA", "JyfpZuVSisU", "01zaYTxpmxk", "-L9-kfYb4_A", "ta4rw6YR8LQ", "7G_CTikQVEM", "PoNt0SfS5jk", "kdmw3fmfg88", "rb4dy7HLH8M", "HKTCO8mh5d8", "zfZ6BxPne4E", "nJPm9WqyTTU", "22v6KBUXEeQ", "rbfjUayf-gk", "wNr9eLPIits", "VQI85nD7h4U", "96Th1-UjSOU", "OSYcvcQn2w8", "hG6rJf0RBnk", "yYEk84kRqPo", "nuqUwvdZ30M", "hEZVNDqid2I", "pMgqa0mOExo", "E-rvEqFNBcs", "4aUHm5A_66w", "obF2AR2e5P8", "uhbki9G10zU", "GjuSYrjqaMs", "_osOExSsNyk", "UPWSMhAF3ME", "I_5u_woAjm4", "qQyXQtLoGyE", "o8qQAjkaXMM", "tCqhl589AdM", "m0e92O4DJKE", "_MBsJDwzn1M", "nyHeQWnm8YA", "kCSNzVDJ_W4", "7vz-osmi6tw", "Mk9x9VZpIi4", "De1Xc6EXj9Q", "UCQg1LJOywc", "HdQjTia26y4", "WMvw_CCYeG0", "utv4vBdgSG0", "liNzvqszWPc", "j4SXWDgDSSE", "ZcFbyUJbP94", "1GehhMoUwnc", "j_UyjWUW-cU", "jjTvJdgmsmc", "OwLe8JNyynw", "el4lgYNq6mY", "9w_En85TFqc", "E0DHszfXnsc", "7-uBrcBKCpE", "iRTAdF_o1-4", "nXx6gUklog8", "wnH6quY_MUE",
])
# fmt: on
print("ids_list.len: ", len(id_list))

if "df_meta" not in locals():
    df_meta = pd.DataFrame(get_all_geoguessr_split_metadata().values()).set_index("id")
# df_meta.head(2)

In [None]:
video_files = sorted(Path("/shared/g-luo/geoguessr/videos").glob("*.mp4"))
ids_with_meta = [i for i in id_list if i in df_meta.index]
model = "gsmoreanch02_012--geoscreens_012-model_faster_rcnn-bb_resnest50_fpn-2b72cbf305"
dets = [
    str(p.stem.replace("df_frame_dets-video_id_", ""))
    for p in sorted(Path(f"/shared/gbiamby/geo/segment/detections/{model}").glob("**/*.pkl"))
]
segs = [
    str(p.stem.replace("df_seg-video_id_", ""))
    for p in sorted(Path("/shared/gbiamby/geo/segment/seg").glob("**/*.pkl"))
]
ids_with_dets = [i for i in ids_with_meta if i in dets]
ids_with_segs = [i for i in ids_with_dets if i in segs]
ids_with_frames = []
print("")
print(f"Total video files: {len(video_files):,}")
print(f"Total metadata: {len(df_meta):,}")
print("UI detection outputs: ", len(dets))

print("")
print("videos in google sheet: ", len(id_list))
print("videos w/ metadata: ", len(ids_with_meta))
print("videos in google sheet + w/ meta + with detections: ", len(ids_with_dets))
print(
    "videos in google sheet + w/ meta + with detections + with segmentation: ", len(ids_with_segs)
)

In [None]:
# df_meta[df_meta.video_id.isin(ids_list)].split.value_counts()
# len(set(df_meta.index.values).intersection(set(id_list)))
# df_meta.loc[list(id_list),:]
df_meta.index
df_meta.loc[["K4GXuDACK40", "8ytmWvud6-4"]]
df_meta.loc[list(set(df_meta.index.values).intersection(id_list)), :].split.value_counts()

In [None]:
videos_with_frames = sorted(Path("/shared/gbiamby/geo/video_frames").glob("*/"))
videos_with_frames[:5], len(videos_with_frames)

In [None]:
df_meta.head(5)

---

## Process All Data

In [None]:
model = "gsmoreanch02_012--geoscreens_012-model_faster_rcnn-bb_resnest50_fpn-2b72cbf305"
if "df_frames_meta" not in locals():
    df_frames_meta = pd.read_json(
        "/shared/gbiamby/geo/video_frames/frame_meta_001.json",
        orient="index",
    )

if "df_meta" not in locals():
    df_meta = pd.DataFrame(get_all_geoguessr_split_metadata().values()).set_index("id")

in_game_frames_all = []
for i, video_id in tenumerate(ids_with_segs):
    # if i > 10:
    #     break
    frames = subsample_frames(video_id, df_frames_meta)
    df_all_frames = pd.DataFrame(frames)
    in_game_frames = filter_to_in_game(video_id, frames, df_meta)
    in_game_frames_all.extend(in_game_frames)

In [None]:
print(in_game_frames_all[0], f"{len(in_game_frames_all):,}")

### Append UI element detections to the frames data

In [None]:
df_dets_all = {}
for i, f in tenumerate(in_game_frames_all):
    if f["video_id"] not in df_dets_all:
        df_dets_all[f["video_id"]] = get_dets(f["video_id"], model, df_meta)
    df_dets = df_dets_all[f["video_id"]]
    frame_dets = df_dets.loc[f["frame_idx"]]
    f["time"] = frame_dets.time
    f["labels"] = frame_dets.labels
    f["scores"] = frame_dets.scores
    f["bboxes_640"] = frame_dets.bboxes
    f["bboxes"] = [
        transform_box(*b.values(), f["img_width"], f["img_height"]) for b in frame_dets.bboxes
    ]
    f["bboxes"] = [
        {
            "xmin": b[0][0],
            "ymin": b[0][1],
            "xmax": b[0][2],
            "ymax": b[0][3],
            "area": b[1],
        }
        for b in f["bboxes"]
    ]
    f["split"] = df_meta.loc[f["video_id"]].split
    f["file_path"] = str(f["file_path"])

### Save as both Raw JSON and DataFrame

In [None]:
df_ingame = (pd.DataFrame(in_game_frames_all).set_index(["video_id", "frame_idx"], drop=False))[
    [
        "video_id",
        "round_num",
        "frame_idx",
        "img_width",
        "img_height",
        "sec",
        "time",
        "labels",
        "scores",
        "bboxes_640",
        "bboxes",
        "split",
        "file_path",
    ]
]
df_ingame.index.rename(["_video_id", "_frame_id"], inplace=True)
df_ingame.sort_values(["video_id", "round_num", "frame_idx"], inplace=True)

In [None]:
# CHange to True to save (ovewrite) the file on /share/:
if True:
    dest_dir = Path("/shared/gbiamby/geo/segment")
    assert dest_dir.exists()
    assert dest_dir.is_dir()
    save_json(dest_dir / "in_game_frames_000.json", in_game_frames_all)
    pickle.dump(df_ingame, open(dest_dir / "in_game_frames_000.pkl", "wb"))
    pickle.dump(df_ingame, open(dest_dir / "in_game_frames_000-protocol_3.pkl", "wb"), protocol=3)
    pickle.dump(df_ingame, open(dest_dir / "in_game_frames_000-protocol_4.pkl", "wb"), protocol=4)
    pickle.dump(df_ingame, open(dest_dir / "in_game_frames_000-protocol_5.pkl", "wb"), protocol=5)

In [None]:
# display(df_ingame)
# df_in_game_summary = pd.DataFrame(
#     df_ingame.groupby(["round_num"]).agg(
#         total_frames=("frame_idx", "count"),
#         start_sec=("sec", "min"),
#         end_sec=("sec", "max"),
#         start_frame=("frame_idx", "min"),
#         end_frame=("frame_idx", "max"),
#     )
# )
# display(df_in_game_summary)

In [None]:
df_ingame