In [None]:
%autosave 60
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import json
import os
from io import BytesIO
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union, cast
import cv2
import matplotlib as plt
import numpy as np
import pandas as pd
import PIL.Image as pil_img
import seaborn as sns
import sklearn as skl
from IPython.display import Image, display
from matplotlib_inline.backend_inline import set_matplotlib_formats
from matplotlib.patches import Rectangle
from tqdm.contrib.bells import tqdm, trange
from tqdm.contrib import tmap, tzip, tenumerate

In [None]:
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_columns", 15)
pd.set_option("display.max_rows", 50)
# Suitable default display for floats
pd.options.display.float_format = "{:,.3f}".format
plt.rcParams["figure.figsize"] = (12, 10)

# This one is optional -- change graphs to SVG only use if you don't have a
# lot of points/lines in your graphs. Can also just use ['retina'] if you
# don't want SVG.
%config InlineBackend.figure_formats = ["retina"]
set_matplotlib_formats("pdf", "png")

In [None]:
import pickle


def check_seg(video_path: Path):
    df_seg = pickle.load(open(video_path, "rb"))
    if df_seg is None or df_seg.shape[0] == 0:
        return {"result": False, "msg": "FAIL, no segmentations", "video_path": video_path}
    df_seg = df_seg[df_seg.state == "in_game"]
    if len(df_seg) % 5 != 0:
        return {
            "result": False,
            "msg": f"FAIL. Expected MOD 5 but length was: {len(df_seg)}",
            "video_path": video_path,
        }
    return {"result": True, "msg": "", "video_path": video_path}


files = sorted(Path("/shared/gbiamby/geo/segment/seg").glob("**/*.pkl"))
results = []
for f in files:
    results.append(check_seg(f))

In [None]:
df_results = pd.DataFrame(results)
display(df_results)
print(df_results.shape)
df_bad = df_results[~df_results.result]
print(f"{100.00 * len(df_bad) / len(df_results):02.2f}%", "total: ", len(df_results))
display(df_bad)

In [None]:
print(df_bad.shape)
df_bad.video_path.apply(lambda x: str(x.stem).replace("df_seg-video_id_", "")).values

In [None]:
df_bad.iloc[28]