In [1]:
%autosave 60
%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
from pathlib import Path

Autosaving every 60 seconds


In [2]:
p = Path("../").resolve()

In [14]:
import json
import os
from collections import Counter
from io import BytesIO
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union, cast
import cv2
import matplotlib as plt
import numpy as np
import pandas as pd
import PIL
import PIL.Image as pil_img
import seaborn as sns
import sklearn as skl
from IPython.display import Image, display
from matplotlib.patches import Rectangle
from matplotlib_inline.backend_inline import set_matplotlib_formats
from tqdm import tqdm

from geoscreens.consts import GEO_SCREENS, IMG_SIZE
from geoscreens.geo_data import GeoScreensDataModule
from geoscreens.modules import LightModelTorch, build_module
from scripts.train_geo import get_model

USE_RAY = False
if USE_RAY:
    import ray

    ray.init(ignore_reinit_error=True)

In [4]:
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_columns", 15)
pd.set_option("display.max_rows", 50)
# Suitable default display for floats
pd.options.display.float_format = "{:,.2f}".format
plt.rcParams["figure.figsize"] = (12, 10)

# This one is optional -- change graphs to SVG only use if you don't have a
# lot of points/lines in your graphs. Can also just use ['retina'] if you
# don't want SVG.
%config InlineBackend.figure_formats = ["retina"]
set_matplotlib_formats("pdf", "png")

In [5]:
from IPython.display import set_matplotlib_formats

set_matplotlib_formats("pdf", "png")
plt.rcParams["savefig.dpi"] = 75

plt.rcParams["figure.autolayout"] = False
plt.rcParams["figure.figsize"] = 10, 6
plt.rcParams["axes.labelsize"] = 18
plt.rcParams["axes.titlesize"] = 20
plt.rcParams["font.size"] = 16
plt.rcParams["lines.linewidth"] = 2.0
plt.rcParams["lines.markersize"] = 8
plt.rcParams["legend.fontsize"] = 14
plt.rcParams["text.usetex"] = True

plt.rcParams["font.family"] = "serif"
plt.rcParams["font.serif"] = "cm"
# plt.rcParams["text.latex.preamble"] = "\\usepackage{subdepth}, \\usepackage{type1cm}"

  set_matplotlib_formats("pdf", "png")


In [7]:
tasks = json.load(open(Path("/shared/gbiamby/geo/geoscreens_003_tasks_export.json")))
print(len(tasks), " total tasks")
tasks[:2]

26084  total tasks


[{'id': 130304,
  'annotations': [{'id': 667,
    'completed_by': 1,
    'result': [{'original_width': 1280,
      'original_height': 720,
      'image_rotation': 0,
      'value': {'x': 28.046874999999993,
       'y': 57.916666666666636,
       'width': 42.8125,
       'height': 12.777777777777805,
       'rotation': 0,
       'rectanglelabels': ['points_bar_two_bars']},
      'id': '3kwwws2EiJ',
      'from_name': 'label',
      'to_name': 'image',
      'type': 'rectanglelabels',
      'origin': 'manual'},
     {'original_width': 1280,
      'original_height': 720,
      'image_rotation': 0,
      'value': {'x': 45.30000000000001,
       'y': 72.08333333333334,
       'width': 9.43437499999999,
       'height': 6.499999999999969,
       'rotation': 0,
       'rectanglelabels': ['did_you_enjoy_this_location']},
      'id': 'm1gU5p4IY4',
      'from_name': 'label',
      'to_name': 'image',
      'type': 'rectanglelabels',
      'origin': 'manual'},
     {'original_width': 1280,
     

In [11]:
for t in tqdm(tasks):
    t["data"]["full_path"] = t["data"]["image"].replace(
        "/data/local-files/?d=", "/shared/gbiamby/geo/screenshots/"
    )
    width, height = PIL.Image.open(t["data"]["full_path"]).size
    t["data"]["width"] = width
    t["data"]["height"] = height

100%|██████████| 26084/26084 [00:19<00:00, 1314.43it/s]


In [18]:
Counter([(img["data"]["width"], img["data"]["height"] ) for img in tasks])

Counter({(1280, 720): 24273, (1152, 720): 1504, (1280, 678): 307})

In [31]:
img_sizes = {t["data"]["full_path"]: t["data"] for t in tasks}
list(img_sizes.items())[:10]

[('/shared/gbiamby/geo/screenshots/screen_samples_auto/aob8sh6l-6M/frame_00000083.jpg',
  {'image': '/data/local-files/?d=screen_samples_auto/aob8sh6l-6M/frame_00000083.jpg',
   'video_id': 'aob8sh6l-6M',
   'full_path': '/shared/gbiamby/geo/screenshots/screen_samples_auto/aob8sh6l-6M/frame_00000083.jpg',
   'width': 1280,
   'height': 720}),
 ('/shared/gbiamby/geo/screenshots/screen_samples_auto/aob8sh6l-6M/frame_00000052.jpg',
  {'image': '/data/local-files/?d=screen_samples_auto/aob8sh6l-6M/frame_00000052.jpg',
   'video_id': 'aob8sh6l-6M',
   'full_path': '/shared/gbiamby/geo/screenshots/screen_samples_auto/aob8sh6l-6M/frame_00000052.jpg',
   'width': 1280,
   'height': 720}),
 ('/shared/gbiamby/geo/screenshots/screen_samples_auto/aob8sh6l-6M/frame_00000104.jpg',
  {'image': '/data/local-files/?d=screen_samples_auto/aob8sh6l-6M/frame_00000104.jpg',
   'video_id': 'aob8sh6l-6M',
   'full_path': '/shared/gbiamby/geo/screenshots/screen_samples_auto/aob8sh6l-6M/frame_00000104.jpg',
   

In [30]:
import pickle
# list(img_sizes.items())[:10]
pickle.dump(img_sizes, open("./img_sizes.pkl", "wb"))

In [12]:
tasks[0]

{'id': 130304,
 'annotations': [{'id': 667,
   'completed_by': 1,
   'result': [{'original_width': 1280,
     'original_height': 720,
     'image_rotation': 0,
     'value': {'x': 28.046874999999993,
      'y': 57.916666666666636,
      'width': 42.8125,
      'height': 12.777777777777805,
      'rotation': 0,
      'rectanglelabels': ['points_bar_two_bars']},
     'id': '3kwwws2EiJ',
     'from_name': 'label',
     'to_name': 'image',
     'type': 'rectanglelabels',
     'origin': 'manual'},
    {'original_width': 1280,
     'original_height': 720,
     'image_rotation': 0,
     'value': {'x': 45.30000000000001,
      'y': 72.08333333333334,
      'width': 9.43437499999999,
      'height': 6.499999999999969,
      'rotation': 0,
      'rectanglelabels': ['did_you_enjoy_this_location']},
     'id': 'm1gU5p4IY4',
     'from_name': 'label',
     'to_name': 'image',
     'type': 'rectanglelabels',
     'origin': 'manual'},
    {'original_width': 1280,
     'original_height': 720,
     'im

In [22]:
mistakes = []
for i, t in enumerate(tqdm(tasks, total=len(tasks))):
    # if i >= 10:
    #     break
    # print("")
    anns_results = [ann["result"] for ann in t["annotations"]]
    # print(anns_results)
    # print([ann for ann in anns_results])
    # if ann["value"] and ann["value"]["rectanglelables"]:
    if anns_results:
        labels = [ann["value"]["rectanglelabels"][0] for ann in anns_results[0]]
        if len(labels) != len(set(labels)):
            mistakes.append(t)

100%|██████████| 26084/26084 [00:00<00:00, 722927.45it/s]


In [24]:
len(mistakes)

3

In [25]:
[m["data"] for m in mistakes]

[{'image': '/data/local-files/?d=screen_samples_auto/aob8sh6l-6M/frame_00000079.jpg',
  'video_id': 'aob8sh6l-6M',
  'full_path': '/shared/gbiamby/geo/screenshots/screen_samples_auto/aob8sh6l-6M/frame_00000079.jpg',
  'width': 1280,
  'height': 720},
 {'image': '/data/local-files/?d=screen_samples_auto/aob8sh6l-6M/frame_00000067.jpg',
  'video_id': 'aob8sh6l-6M',
  'full_path': '/shared/gbiamby/geo/screenshots/screen_samples_auto/aob8sh6l-6M/frame_00000067.jpg',
  'width': 1280,
  'height': 720},
 {'image': '/data/local-files/?d=screen_samples_auto/aob8sh6l-6M/frame_00000227.jpg',
  'video_id': 'aob8sh6l-6M',
  'full_path': '/shared/gbiamby/geo/screenshots/screen_samples_auto/aob8sh6l-6M/frame_00000227.jpg',
  'width': 1280,
  'height': 720}]