In [None]:
%autosave 60
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import json
import os
import pickle
from io import BytesIO
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast

import cv2
import matplotlib as plt
import numpy as np
import pandas as pd
import PIL.Image as pil_img
import seaborn as sns
import sklearn as skl
from IPython.display import display
from matplotlib.patches import Rectangle
from matplotlib_inline.backend_inline import set_matplotlib_formats
from tqdm.contrib import tenumerate, tmap, tzip
from tqdm.contrib.bells import tqdm, trange

In [None]:
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_columns", 15)
pd.set_option("display.max_rows", 50)
# Suitable default display for floats
pd.options.display.float_format = "{:,.6f}".format
plt.rcParams["figure.figsize"] = (12, 10)

# This one is optional -- change graphs to SVG only use if you don't have a
# lot of points/lines in your graphs. Can also just use ['retina'] if you
# don't want SVG.
%config InlineBackend.figure_formats = ["retina"]
set_matplotlib_formats("pdf", "png")

In [None]:
from IPython.display import set_matplotlib_formats

set_matplotlib_formats("pdf", "png")
plt.rcParams["savefig.dpi"] = 75

plt.rcParams["figure.autolayout"] = False
plt.rcParams["figure.figsize"] = 10, 6
plt.rcParams["axes.labelsize"] = 18
plt.rcParams["axes.titlesize"] = 20
plt.rcParams["font.size"] = 16
plt.rcParams["lines.linewidth"] = 2.0
plt.rcParams["lines.markersize"] = 8
plt.rcParams["legend.fontsize"] = 14
plt.rcParams["text.usetex"] = True

plt.rcParams["font.family"] = "serif"
plt.rcParams["font.serif"] = "cm"
plt.rcParams["text.latex.preamble"] = "\\usepackage{subdepth}, \\usepackage{type1cm}"

In [None]:
df_frames_meta = pd.read_json(
    "/shared/gbiamby/geo/video_frames/frame_meta_001.json",
    orient="index",
)
df_frames_meta.describe()

In [None]:
df_frames_meta.num_frames_sampled.sum()

In [None]:
df_frames_meta

In [None]:
frame_paths = sorted(Path("/shared/gbiamby/geo/video_frames/zOoUR17xnL0").glob("*.jpg"))
frame_paths[:8], frame_paths[-8:]

### Subsample to 1 fps

From existing frames on disk which are extracted from original videos at 4.0fps and used to detect UI elements and compute the in/out-of-game segments, sub-sample at something higher since we don't need that high of a temporal resolution for the im2clue lookup.

In [None]:
def subsample_frames(video_id: str, df_frames_meta: pd.DataFrame, target_fps: int = 1):
    """
    Args:
        target_fps: This is how many  fps you want to sample from the existing
            jpg's. It should be a subset of `frame_sample_rate_fps`, which is the
            rate that the frames on disk were sampled at.
    """
    # This is how many fps the jpg's were sampled at:
    frames_fps = df_frames_meta.loc[video_id].frame_sample_rate_fps
    assert (
        frames_fps % target_fps == 0.0
    ), f"frames_fps {frames_fps} should be divisible by target_fps {target_fps}"
    frame_paths = sorted((Path("/shared/gbiamby/geo/video_frames") / video_id).glob("*.jpg"))

    return [
        {
            "video_id": f.parent.name,
            "frame_idx": int(f.stem.replace("frame_", "").replace("s", "").split("-")[0]),
            "sec": float(f.stem.replace("frame_", "").replace("s", "").split("-")[1]),
            "file_path": f,
        }
        for i, f in enumerate(frame_paths)
        if i % int(frames_fps / target_fps) == 0
    ]


# video_id = "zOoUR17xnL0"
# frames = subsample_frames(video_id, df_frames_meta)
# print(len(frames))

---

### Limit frames to "in_game" segments

In [None]:
def filter_to_in_game(video_id: str, frames: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    seg_file = list(
        Path("/shared/gbiamby/geo/segment/seg").glob(f"*/df_seg-video_id_{video_id}.pkl")
    )[0]
    df_seg = pickle.load(open(seg_file, "rb"))
    df_seg = df_seg[df_seg.state == "in_game"].reset_index(drop=True)
    in_games = [(idx, r["start_frame_idx"], r["end_frame_idx"]) for idx, r in df_seg.iterrows()]

    def is_in_game(frame):
        for seg in in_games:
            if seg[1] <= frame["frame_idx"] <= seg[2]:
                frame["round_num"] = seg[0]
                return True
        return False

    frames = [frame for frame in frames if is_in_game(frame)]
    return frames


# in_game_frames = filter_to_in_game(video_id, frames)
# print(len(frames), len(in_game_frames))

In [None]:
video_id = "--0Kbpo9DtE"
# video_id = "zOoUR17xnL0"
frames = subsample_frames(video_id, df_frames_meta)
print(len(frames))

df_all_frames = pd.DataFrame(frames)
# display(df_all_frames)

in_game_frames = filter_to_in_game(video_id, frames)
print(f"num frames: {len(frames)}, num in_game frames: {len(in_game_frames)}")

In [None]:
df_ingame = pd.DataFrame(in_game_frames).sort_values(["round_num", "frame_idx"])
display(df_ingame)
display(
    df_ingame.groupby(["round_num"]).agg(
        total_frames=("frame_idx", "count"),
        start_sec=("sec", "min"),
        end_sec=("sec", "max"),
        start_frame=("frame_idx", "min"),
        end_frame=("frame_idx", "max"),
    )
)

In [None]:
frames[:10], frames[-10:], len(frames)