# Explore Data
This notebook explores the dataset. For each type of annotation it computes the percentage of pixels belonging to the class in the dataset. It also computes the average width and height of the segmentation masks.

In [None]:
import os
import torch

import torchvision.transforms as transforms
import numpy as np
import constants as cst

from torchvision.ops import masks_to_boxes
from PIL import Image

In [None]:
transform = transforms.ToTensor()
total_pixels = 1932 * 2576

TERMS = cst.TERM_NAMES

for term in TERMS:
    mask_folder = "/notebooks/" + term
    
    percent_pixels = []
    horizontal_lengths = []
    vertical_lengths = []
    for file in os.listdir(mask_folder):
        mask = transform(Image.open(os.path.join(mask_folder, file)))
        mask = mask.type(torch.LongTensor)
        
        pixels = int(mask.sum())
        
        percent_pixels.append(pixels/total_pixels)
        
        obj_ids = torch.unique(mask)
        obj_ids = obj_ids[1:]
        
        masks = mask == obj_ids[:, None, None]
        
        box = masks_to_boxes(masks)
        
        h = box[0, 2].item() - box[0, 0].item()
        v = box[0, 3].item() - box[0, 1].item()
        horizontal_lengths.append(h)
        vertical_lengths.append(v)
        
    print("Term: " + term)
    print("Mean percentage of pixels: " + str(np.mean(percent_pixels) * 100))
    print("Mean horizontal: " + str(np.mean(horizontal_lengths)))
    print("Mean vertical: " + str(np.mean(vertical_lengths)))
    print()