In [1]:
%autosave 60
%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
from pathlib import Path

Autosaving every 60 seconds


In [2]:
import json
import logging
import os
import pickle
import sys
from collections import Counter
from copy import deepcopy
from io import BytesIO
from pathlib import Path
from types import ModuleType
from typing import Dict, List, Optional, Tuple, Union, cast

import cv2
import matplotlib as plt
import numpy as np
import pandas as pd
import PIL
import PIL.Image as pil_img
import seaborn as sns
import sklearn as skl
from IPython.display import Image, display
from matplotlib.patches import Rectangle
from matplotlib_inline.backend_inline import set_matplotlib_formats
from tqdm.contrib import tenumerate, tmap, tzip
from tqdm.contrib.bells import tqdm, trange
from tqdm.notebook import tqdm

In [3]:
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_columns", 15)
pd.set_option("display.max_rows", 50)
# Suitable default display for floats
pd.options.display.float_format = "{:,.2f}".format
plt.rcParams["figure.figsize"] = (12, 10)

# This one is optional -- change graphs to SVG only use if you don't have a
# lot of points/lines in your graphs. Can also just use ['retina'] if you
# don't want SVG.
%config InlineBackend.figure_formats = ["retina"]
set_matplotlib_formats("pdf", "png")

In [4]:
from IPython.display import set_matplotlib_formats

set_matplotlib_formats("pdf", "png")
plt.rcParams["savefig.dpi"] = 75

plt.rcParams["figure.autolayout"] = False
plt.rcParams["figure.figsize"] = 10, 6
plt.rcParams["axes.labelsize"] = 18
plt.rcParams["axes.titlesize"] = 20
plt.rcParams["font.size"] = 16
plt.rcParams["lines.linewidth"] = 2.0
plt.rcParams["lines.markersize"] = 8
plt.rcParams["legend.fontsize"] = 14
plt.rcParams["text.usetex"] = True

plt.rcParams["font.family"] = "serif"
plt.rcParams["font.serif"] = "cm"
plt.rcParams["text.latex.preamble"] = "\\usepackage{subdepth}, \\usepackage{type1cm}"

  set_matplotlib_formats("pdf", "png")


In [9]:
VIDEO_PATH = Path("/shared/g-luo/geoguessr/videos").resolve()
DETS_PATH = Path("/shared/gbiamby/geo/segment/detections/train").resolve()
assert VIDEO_PATH.exists()
assert DETS_PATH.exists()

# files = sorted(VIDEO_PATH.glob("**/*.mp4"))
# print("total video files found: ", len(files))

In [12]:
detection_csvs = sorted(DETS_PATH.glob("*.csv"))
print(f"Found {len(detection_csvs)} csv files")

Found 48 csv files


In [21]:
cats = [
    {"id": 0, "name": "background"},
    {"id": 1, "name": "between_rounds_box_white"},
    {"id": 2, "name": "between_rounds_box_with_orange_next"},
    {"id": 3, "name": "big_green_btw_rounds_box"},
    {"id": 4, "name": "challenge_btn_orange"},
    {"id": 5, "name": "challenge_high_score_board"},
    {"id": 6, "name": "curr_state"},
    {"id": 7, "name": "did_you_enjoy_this_location"},
    {"id": 8, "name": "final_scores_box_beige"},
    {"id": 9, "name": "finished_legs_box"},
    {"id": 10, "name": "game_about_to_start_box_white"},
    {"id": 11, "name": "game_finished_well_done_big_box"},
    {"id": 12, "name": "game_finished_white_box"},
    {"id": 13, "name": "game_title"},
    {"id": 14, "name": "guess"},
    {"id": 15, "name": "guess_grey"},
    {"id": 16, "name": "guess_w_icon_only"},
    {"id": 17, "name": "high_score_box"},
    {"id": 18, "name": "in_game_mini_map"},
    {"id": 19, "name": "invite_friends"},
    {"id": 20, "name": "leader_board"},
    {"id": 21, "name": "left_menu_dark"},
    {"id": 22, "name": "loading_loc_white"},
    {"id": 23, "name": "make_a_guess"},
    {"id": 24, "name": "next_orange_btn"},
    {"id": 25, "name": "next_round"},
    {"id": 26, "name": "other"},
    {"id": 27, "name": "participants_box"},
    {"id": 28, "name": "play"},
    {"id": 29, "name": "play_again"},
    {"id": 30, "name": "play_current_leg"},
    {"id": 31, "name": "play_next_round"},
    {"id": 32, "name": "points_bar"},
    {"id": 33, "name": "points_bar_two_bars"},
    {"id": 34, "name": "refresh_btn"},
    {"id": 35, "name": "setup_round_time_limit_box"},
    {"id": 36, "name": "share_challenge_box_white"},
    {"id": 37, "name": "show_full_results"},
    {"id": 38, "name": "show_high_score"},
    {"id": 39, "name": "start_challenge_orange"},
    {"id": 40, "name": "start_game"},
    {"id": 41, "name": "status_bar"},
    {"id": 42, "name": "status_bar_white"},
    {"id": 43, "name": "try_another_map"},
    {"id": 44, "name": "try_pro_for_free"},
    {"id": 45, "name": "users_bar_white"},
    {"id": 46, "name": "view_summary"},
]

## Load detections for a Video

In [173]:
# Choose a random file:
csv_file = detection_csvs[32]
print("Loading csv: ", csv_file)

Loading csv:  /shared/gbiamby/geo/segment/detections/train/df_frame_dets-video_id_ZIJSGDK7JZE.csv


In [174]:
def str_to_list(s: str, _type, collection_type):
    if isinstance(s, str):
        result = s.replace("[", "").replace("]", "").replace("(", "").replace(")", "")
        result = result.split(",")
        result = [_type(r) for r in result if r]
        return collection_type(result)
    return s


def parse_tuple(s: str):
    if isinstance(s, str):
        result = s.replace("(", "[").replace(")", "]")
        result = result.replace("'", '"').strip()
        result = result.replace(",]", "]")
        if result:
            # print(result)
            return tuple(sorted((json.loads(result))))
        else:
            return set()
    return s


def parse_dict(s: str):
    if isinstance(s, str):
        return json.loads(s.replace("'", '"'))
    return s


def load_detections(csv_path: Path) -> pd.DataFrame:
    df = pd.read_csv(csv_file)
    df.frame_id = df.frame_id.astype(int)
    df.label_ids = df.label_ids.apply(lambda x: parse_dict(x))
    df.labels = df.labels.apply(lambda x: parse_dict(x))
    df.labels_set = df.labels_set.apply(lambda x: parse_tuple(x))
    df.scores = df.scores.apply(lambda x: parse_dict(x))
    df.bboxes = df.bboxes.apply(lambda x: parse_dict(x))
    # display(df.info())
    return df


csv_file = detection_csvs[13]
print("Loading csv: ", csv_file)
df_framedets = load_detections(csv_file)
# df_framedets.head(100)
# parse_tuple("('guess_grey', 'status_bar_white', 'game_title', 'in_game_mini_map', 'status_bar')")

Loading csv:  /shared/gbiamby/geo/segment/detections/train/df_frame_dets-video_id_8ytmWvud6-4.csv


## Show Game State

In [175]:
def classify_frame(dets: dict):
    label_set = set(dets["labels_set"])
    # print(label_set, type(label_set))
    if label_set in [
        set(["points_bar_two_bars", "did_you_enjoy_this_location", "status_bar"]),
        set(["between_rounds_box_white", "did_you_enjoy_this_location", "status_bar"]),
        set(["points_bar_two_bars", "status_bar"]),
        set(["points_bar_two_bars", "status_bar", "game_title"]),
        set(["points_bar_two_bars", "did_you_enjoy_this_location", "status_bar", "game_title"]),
        set(["points_bar_two_bars", "status_bar", "share_challenge_box_white"]),
        set(["points_bar_two_bars", "challenge_high_score_board", "status_bar"]),
        set(["points_bar_two_bars", "status_bar", "game_title", "share_challenge_box_white"]),
        set(
            ["between_rounds_box_white", "did_you_enjoy_this_location", "status_bar", "game_title"]
        ),
        set(["game_title", "points_bar", "points_bar_two_bars", "status_bar"]),
        set(["game_title", "points_bar", "status_bar"]),
        set(["game_title", "other", "points_bar", "status_bar"]),
        set(["game_title", "other", "points_bar", "points_bar_two_bars", "status_bar"]),
        # set([]),
        # set([]),
    ]:
        return "between_round_or_game_ambiguous"
    elif "try_another_map" in label_set and (
        ("points_bar" in label_set) or ("points_bar_two_bars" in label_set)
    ):
        return "between_round_or_game_ambiguous"
    elif (
        "in_game_mini_map" in label_set and "status_bar" in label_set and "guess_grey" in label_set
    ) or (set(["game_title", "guess_grey", "status_bar", "status_bar_white"]) == label_set):
        return "in_game"
    elif "in_game_mini_map" in label_set and "status_bar" in label_set:
        return "in_game"
    elif "left_menu_dark" in label_set and "challenge_high_score_board" in label_set:
        return "between_games"
    elif (
        ("points_bar_two_bars" in label_set and "between_rounds_box_white" in label_set)
        or ("play_next_round" in label_set and "points_bar_two_bars" in label_set)
        or ("play_next_round" in label_set and "points_bar" in label_set)
    ):
        return "between_rounds"
    elif "left_menu_dark" in label_set:
        return "between_games"
    elif "points_bar_two_bars" in label_set:
        return "unknown"
    elif len(label_set) == 0:
        return "out_of_game"
    elif ("final_scores_box_beige" in label_set and "points_bar" in label_set):
        return "between_games"
    else:
        return "unknown"


df_framedets["game_state"] = df_framedets.apply(classify_frame, axis=1)

### Game States Summary

In [176]:
pd.DataFrame(df_framedets.game_state.value_counts())

Unnamed: 0,game_state
in_game,3384
between_games,429
between_rounds,268
between_round_or_game_ambiguous,229
out_of_game,91
unknown,5


## Show UI elements for game_state == "unknown"

In [177]:
pd.DataFrame(
    df_framedets[df_framedets.game_state == "unknown"]
    .groupby(["labels_set"])
    .agg(cnt=("frame_id", "count"))
).sort_values("cnt", ascending=False)

Unnamed: 0_level_0,cnt
labels_set,Unnamed: 1_level_1
"(game_title, status_bar)",3
"(final_scores_box_beige, game_title, status_bar)",1
"(other,)",1


## Show Sequence of Game States

In [178]:
with pd.option_context("display.max_rows", None, "display.max_columns", None):
    display(df_framedets.game_state)

0                           out_of_game
1                           out_of_game
2                           out_of_game
3                           out_of_game
4                           out_of_game
5                           out_of_game
6                           out_of_game
7                           out_of_game
8                           out_of_game
9                           out_of_game
10                          out_of_game
11                          out_of_game
12                          out_of_game
13                          out_of_game
14                          out_of_game
15                          out_of_game
16                          out_of_game
17                          out_of_game
18                          out_of_game
19                          out_of_game
20                          out_of_game
21                          out_of_game
22                          out_of_game
23                          out_of_game
24                          out_of_game
