In [1]:
import os
import json
import random 

import pandas as pd 
import numpy as np

import cv2
import torch
import torch.nn.utils
import torch.nn.functional as F

from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor

import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.patheffects as PathEffects
import matplotlib.image as mpimg
import matplotlib.colors as mcolors

from tqdm.notebook import tqdm

In [2]:
def read_jsons(input_dir):
    json_files = [
        pos_json for pos_json in os.listdir(input_dir) if pos_json.endswith(".json")
    ]
    output = [{}] * len(json_files)

    # get the image and bounding box
    for index, file in enumerate(json_files):  # [0:1]:
        with open(os.path.join(input_dir, file), "r") as input_file:
            data = json.load(input_file)
            output[index] = {
                "document": data["shapes"][0]["image_name"],
                "component_name": data["name"],
                "image": data["origin_image"],
                "bounding_box": data["shapes"][0]["points"],
            }

            # DEBUGGING
            # if data["origin_image"] == "./dataset/image/W19-6501-Figure4-1.png":
            #     print(index)
            #     print(output[index])
            #     print(file)

    return output

In [3]:
data_dir = "./dataset/"
train_metadata = os.path.join(data_dir, "train")
val_metadata = os.path.join(data_dir, "val")
images_dir = os.path.join(data_dir, "image")

In [4]:
train_data = read_jsons(train_metadata)
val_data = read_jsons(val_metadata)

print(f"Number training datapoints: {len(train_data)}")
print(f"Number validation datapoints: {len(val_data)}")

Number training datapoints: 13761
Number validation datapoints: 1000


In [20]:
resolution = []
xaxis = []
yaxis = []
disk_size = []

for file in tqdm(val_data):
    img = cv2.imread(file["image"])[...,::-1]
    xaxis.append(img.shape[0])
    yaxis.append(img.shape[1])
    resolution.append(img.shape[0] * img.shape[1])
    disk_size.append(os.path.getsize(file["image"]))

  0%|          | 0/1000 [00:00<?, ?it/s]

In [15]:
np.average(resolution)

np.float64(245570.512)

In [16]:
np.average(xaxis)

np.float64(379.477)

In [17]:
np.average(yaxis)

np.float64(629.82)

In [24]:
np.average(disk_size) / (1024)

np.float64(54.923392578125)