In [None]:
from pathlib import Path
import json
import random
import shutil

from tqdm import tqdm
import cv2
import numpy as np
from PIL import Image

from matplotlib import pyplot as plt

In [None]:
frames_folder = Path("/workspace/frames")
ann_folder = Path("/workspace/Annotations")

In [None]:
for folder in sorted(frames_folder.iterdir()):
    num_files = len(list(folder.glob('*.jpg')))
    print(f"Folder {folder.name} contains {num_files} jpg files", end= " ")

    # Load the JSON data
    with open(ann_folder/f"{folder.name}.json", 'r') as f:
        data = json.load(f)

    # Get the total number of frames
    total_frames = max(ann['startFrame']+ann['numberFrames'] for ann in data)

    print(f"and {total_frames} annotations")

## Number of Examples for each Class

In [None]:
classes_count = {}
for file in sorted(ann_folder.iterdir()):
    if file.suffix == ".json":
        # Load the JSON data
        with open(file, 'r') as f:
            data = json.load(f)
        
        for ann in data:
            c = ann["label"]["name"]
            if c.startswith("P"):
                classes_count[c] = classes_count.get(c, 0) + ann["numberFrames"]
classes_count = {k: classes_count[k] for k in sorted(classes_count, key=lambda x: int(x.split()[0][1:]))}
classes_count

In [None]:
with open('/workspace/class_count.json', 'w') as file:
    json.dump(classes_count, file, indent=4)

## Sampling at 30FPS

In [None]:
cls_count = {}
mapping = {}
label_f = open("/workspace/1fps_labels.txt", "w")
for file in sorted(ann_folder.iterdir()):
    if file.suffix != ".json":
        continue
    # Load the JSON data
    with open(file, 'r') as f:
        data = json.load(f)
    
    for ann in data:
        c = ann["label"]["name"]
        if c.startswith("P"):
            s = ann["startFrame"] + 15 # tolerance of 0.5 seconds
            e = ann["startFrame"] + ann["numberFrames"] - 15 # tolerance of 0.5 seconds
            skip = 30
            for i in range(s,e,skip):
                new_c = c.split(' ')[0].lower()
                mapping[new_c] = c
                label_f.write(f"{file.stem}/frame{i:06d}.jpg,{new_c}\n")
                cls_count[new_c] = cls_count.get(new_c, 0) + 1
cls_count = {k: cls_count[k] for k in sorted(cls_count, key=lambda x: int(x.split()[0][1:]))}
mapping = {k: mapping[k] for k in sorted(mapping, key=lambda x: int(x.split()[0][1:]))}
label_f.close()


In [None]:
cls_count, mapping

In [None]:
with open('/workspace/mapping.json', 'w') as file:
    json.dump(mapping, file, indent=4)

## Uniform Sampling Examples Collection

In [None]:
cls2frames = {}
x = {}
for file in sorted(ann_folder.iterdir()):
    if file.suffix != ".json":
        continue

    # Load the JSON data
    with open(file, 'r') as f:
        data = json.load(f)
    
    for ann in data:
        c = ann["label"]["name"]
        if c.startswith("P"):
            s = ann["startFrame"] #+ 15 # tolerance of 0.5 seconds
            e = ann["startFrame"] + ann["numberFrames"] #- 15 # tolerance of 0.5 seconds
            for i in range(s,e+1):
                new_c = c.split(' ')[0].lower()
                if cls2frames.get(new_c, False):
                    cls2frames[new_c].append(f"{file.stem}/frame{i:06d}.jpg")
                else:
                    cls2frames[new_c] = [f"{file.stem}/frame{i:06d}.jpg"]

cls2frames = {k: cls2frames[k] for k in sorted(cls2frames, key=lambda x: int(x.split()[0][1:]))}

for k,v in cls2frames.items():
    print(k, len(v))

In [None]:
label_f = open("/workspace/uniform_labels.txt", "w")
final_list = []
for k,v in cls2frames.items():
    sampled = random.sample(v, k = 10_000)
    final_list.extend(sampled)
    for s in sampled:
        label_f.write(f"{s},{k}\n")
label_f.close()


## Crop and Copy

In [None]:
def get_cropping_co_ord(image):
    """
    Crop the Image
    Loop through each direction, left, right, top and bottom

    When ever the pixel value is greater than [5,5,5], use that pixel as starting
    point for the crop in that direction.
    """
    TOPL_X, TOPL_Y, BOTTOMR_X, BOTTOMR_Y = (None,)*4
    h, w, _ = image.shape
    # Getting TOPL_X
    for i in range(w):
        if not np.any(image[h//2,i,:] < (5,5,5)):
            TOPL_X = i
            break
        
    # Getting TOPL_Y
    for i in range(h):
        if not np.any(image[i,w//2,:] < (5,5,5)):
            TOPL_Y = i
            break
    
    # Getting BOTTOMR_X
    for i in range(w-1,0,-1):
        if not np.any(image[h//2,i,:] < (5,5,5)):
            BOTTOMR_X = i
            break
    
    # Getting BOTTOMR_Y
    for i in range(h-1,0,-1):
        if not np.any(image[i,w//2,:] < (5,5,5)):
            BOTTOMR_Y = i
            break

    return TOPL_X, TOPL_Y, BOTTOMR_X, BOTTOMR_Y

In [None]:
frames_shape = {}
for folder in frames_folder.iterdir():
    img = np.array(Image.open(folder/"frame001234.jpg"))
    TOPL_X, TOPL_Y, BOTTOMR_X, BOTTOMR_Y = get_cropping_co_ord(img)
    frames_shape[folder.name] = TOPL_X, TOPL_Y, BOTTOMR_X, BOTTOMR_Y

In [None]:
for image_path in tqdm(final_list):
    folder_name = image_path.split('/')[0]
    Path(f"/workspace/data/frames/{folder_name}").mkdir(parents=True, exist_ok=True)
    img = np.array(Image.open(frames_folder/image_path))
    TOPL_X, TOPL_Y, BOTTOMR_X, BOTTOMR_Y = frames_shape[folder_name]
    img = Image.fromarray(img[TOPL_Y:BOTTOMR_Y, TOPL_X:BOTTOMR_X])
    img.save(f"/workspace/data/frames/{image_path}")