In [1]:
import PIL

%autosave 60
%load_ext autoreload
%autoreload 2
%matplotlib inline

Autosaving every 60 seconds


In [2]:
import json
import os
import pickle
import platform
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast

import matplotlib as plt
import numpy as np
import pandas as pd
import PIL.Image as pil_img
from IPython.core.display import HTML, Markdown
from IPython.display import Image, display
from matplotlib_inline.backend_inline import set_matplotlib_formats
from PIL import Image as pil_img
from tqdm.contrib.bells import tqdm

In [3]:
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_columns", 15)
pd.set_option("display.max_rows", 50)
# Suitable default display for floats
pd.options.display.float_format = "{:,.2f}".format
plt.rcParams["figure.figsize"] = (12, 10)

# This one is optional -- change graphs to SVG only use if you don't have a
# lot of points/lines in your graphs. Can also just use ['retina'] if you
# don't want SVG.
%config InlineBackend.figure_formats = ["retina"]
set_matplotlib_formats("pdf", "png")

---

## Load JSON and Convert to DataFrame

In [None]:
import json
from pathlib import Path
from typing import Any, Dict, List, Union

import pandas as pd


def load_json(json_path: Union[str, Path]) -> Dict[str, Any]:
    with open(json_path, encoding="utf-8") as f:
        data = json.load(f)
    return data


def get_ingames_df(in_game_frames: List[Dict[str, Any]]):
    df = pd.DataFrame(in_game_frames).set_index(["video_id", "frame_idx"], drop=False)[
        [
            "video_id",
            "round_num",
            "frame_idx",
            "img_width",
            "img_height",
            "sec",
            "time",
            "labels",
            "scores",
            "bboxes_640",
            "bboxes",
            "split",
            "file_path",
        ]
    ]
    df.index.rename(["_video_id", "_frame_id"], inplace=True)
    df.sort_values(["video_id", "round_num", "frame_idx"], inplace=True)

    return df


in_game_frames = load_json("/shared/gbiamby/geo/segment/in_game_frames_000.json")
df = get_ingames_df(in_game_frames)
print(df.shape)

---

In [None]:
# print(df.index)
# print(
#     type(df.loc["ZKSH2u8LA3U", 1492].labels[0]),
#     type(df.loc["ZKSH2u8LA3U", 1492].scores[0]),
#     type(df.loc["ZKSH2u8LA3U", 1492].bboxes_640[0]),
#     type(df.loc["ZKSH2u8LA3U", 1492].bboxes[0]),
# )
# print("")
# df.info()

---
## Load DataFrame from pickle

Try loading pickle files saved using different protocol versions. If you have pandas 1.3.x at least one of these should work.

In [None]:
import pickle
import platform

import pandas as pd

print("python version: ", platform.python_version())
print("pandas version: ", pd.__version__)

# pandas 1.3.x (protocol=5, python 3.8+)
try:
    df_ingame = pickle.load(open("/shared/gbiamby/geo/segment/in_game_frames_000.pkl", "rb"))
    display(df_ingame.head(1))
except Exception as ex:
    print("FAIL: ", str(ex))

# pandas 1.3.x (protocol=4, python 3.4+)
try:
    df_ingame = pickle.load(
        open("/shared/gbiamby/geo/segment/in_game_frames_000-protocol_4.pkl", "rb")
    )
    display(df_ingame.head(1))
except Exception as ex:
    print("FAIL: ", str(ex))

# pandas 1.3.x (protocol=3, python 3.0+)
try:
    df_ingame = pickle.load(
        open("/shared/gbiamby/geo/segment/in_game_frames_000-protocol_3.pkl", "rb")
    )
    display(df_ingame.head(1))
except Exception as ex:
    print("FAIL: ", str(ex))

In [None]:
# print(df_ingame.index)
# # print(df_ingame.columns)
# print(
#     type(df_ingame.loc["ZKSH2u8LA3U", 1492].labels[0]),
#     type(df_ingame.loc["ZKSH2u8LA3U", 1492].scores[0]),
#     type(df_ingame.loc["ZKSH2u8LA3U", 1492].bboxes_640[0]),
#     type(df_ingame.loc["ZKSH2u8LA3U", 1492].bboxes[0]),
# )
# print("")
# df_ingame.info()

---

## Load Dataset

Sample rate is 1 fps, so the `total_frames` count is same as total number of seconds of in_game footage.


In [None]:
import pickle
import platform

import pandas as pd

df_ingame = pickle.load(open("/shared/gbiamby/geo/segment/in_game_frames_000.pkl", "rb"))

In [None]:
df_video_stat = pd.DataFrame(
    df_ingame.groupby(["video_id", "split", "img_width", "img_height"]).agg(
        total_frames=("sec", "count"),
        total_rounds=("round_num", "nunique"),
    )
).reset_index()
display(df_video_stat)

In [None]:
pd.DataFrame(
    df_video_stat.groupby(["split"]).agg(
        total_videos=("video_id", "nunique"),
        total_rounds=("total_rounds", "sum"),
        total_frames=("total_frames", "sum"),
    )
)

---

## Show Some Frames

In [None]:
from IPython.core.display import HTML, Markdown
from PIL import Image as pil_img


def show_random_frames(df: pd.DataFrame, n_samples: int = 5):
    df_random = df.sample(n=n_samples)

    for idx, img_row in df_random.iterrows():
        print("-" * 180)
        print("")
        print(
            f"video_id: {img_row.video_id}, round:{img_row.round_num}, "
            f"frame_idx: {img_row.frame_idx}, seconds: {img_row.sec}",
        )
        img = pil_img.open(img_row["file_path"])
        img.thumbnail((1080, 640), pil_img.NEAREST)
        display(img)


show_random_frames(df_ingame, 5)

---

# Show Some Masked Frames


In [None]:
import PIL.Image as pil_img
from IPython.core.display import HTML, Markdown
from PIL import ImageDraw


def show_random_frames_masked(df: pd.DataFrame, n_samples: int = 5):
    df_random = df.sample(n=n_samples)
    for idx, img_row in df_random.iterrows():
        print("-" * 180)
        print(
            f"video_id: {img_row.video_id}, frame_idx: {img_row.frame_idx}, seconds: {img_row.sec}",
        )
        img = pil_img.open(Path(img_row["file_path"]))
        # img.thumbnail((1080, 640), pil_img.NEAREST)
        img_width, img_height = img.size
        display(img)
        dets_lookup = {
            l: (l, (*list(bb.values())[:4],), list(bb.values())[4], s)
            for l, bb, s in zip(img_row.labels, img_row.bboxes, img_row.scores)
        }
        masked_area = sum([d[2] for d in dets_lookup.values()])
        print(
            f"masked_area: {masked_area:,}",
            f"img_area: {float(img_width*img_height):,}",
            f"pct_masked: {100.0 * masked_area / (img_width*img_height):.2f}%",
        )

        img_masked = img
        draw = ImageDraw.Draw(img_masked)
        for label, bbox, area, score in dets_lookup.values():
            draw.rectangle(bbox, fill=0)

        # Mask out minimum rectangular region that encloses the geoguessr logo, status bar, url:
        top_ui = [dets_lookup[l] for l in ["game_title", "status_bar", "url"] if l in dets_lookup]
        if top_ui:
            y_max = max(d[1][3] for d in top_ui)
            draw.rectangle((0, 0, img_width, y_max), fill=0)
        display(img_masked)
        print("")


show_random_frames_masked(df_ingame, 5)