In [2]:
import os
import ast
import shutil as sh
from pathlib import Path
import random

import numpy as np
import pandas as pd

import PIL

import torch

from tqdm.auto import tqdm

from IPython.display import Image, clear_output

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
!git clone https://github.com/ultralytics/yolov5

Cloning into 'yolov5'...
remote: Enumerating objects: 17360, done.[K
remote: Counting objects: 100% (49/49), done.[K
remote: Compressing objects: 100% (40/40), done.[K
remote: Total 17360 (delta 31), reused 12 (delta 9), pack-reused 17311 (from 4)[K
Receiving objects: 100% (17360/17360), 16.25 MiB | 28.16 MiB/s, done.
Resolving deltas: 100% (11899/11899), done.


In [4]:
!pip install -qr /kaggle/working/yolov5/requirements.txt

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m950.0/950.0 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h

# Convert annotations to dataframe

In [5]:
import json

In [6]:
ANNOTATION_DIR = "/kaggle/input/ann-images/ann_images/ann"

In [7]:
def create_dataframe(img_name, data, df):

  for obj in data['objects']:
    label = obj['classTitle']
    bounding_box = obj['points']['exterior']
    width = bounding_box[1][0] - bounding_box[0][0]
    height = bounding_box[1][1] - bounding_box[0][1]
    final_list = []
    all_bounds = []
    for bounds in bounding_box:
      final_list.append(bounds[0])
      final_list.append(bounds[1])
    final_list = tuple(final_list)
    all_bounds.append(final_list)
    input_data = {
        "img_name":img_name,
        "class":label,
        "bounds":all_bounds,
        "width":width,
        "height":height,
    }

    tmp_df = pd.DataFrame(input_data)
    if(df.shape[0] == 0):
      df = tmp_df.copy()
    else:
      df = pd.concat([df, tmp_df], axis=0, ignore_index=True)
  return df


files_list = os.listdir(ANNOTATION_DIR)
df = pd.DataFrame()
for files in files_list:
  img_name = files.split(".json")[0]
  with open(f"{ANNOTATION_DIR}/{files}") as f:
    data = json.load(f)
  df = create_dataframe(img_name, data, df)

In [12]:
df['class'].value_counts()

class
Signature    140
Logo          40
Name: count, dtype: int64

# Tile Images

In [15]:
val_df = df.iloc[163:]
val_index = val_df['img_name'].unique()
val_index

array(['Detailed_Divorce_Agreement_9_pg2.jpg',
       'Detailed_Divorce_Agreement_1_pg2.jpg',
       'Residential_Lease_Agreement_Ashley_Bender_Lawrence_Williams_pg0.jpg',
       'generated_medical_bill_1_pg1.jpg',
       'generated_medical_bill_13_pg0.jpg',
       'Partnership_Agreement_Jason_Sanford_Heather_Pacheco_pg1.jpg',
       'document_1_pg0.jpg', 'generated_medical_bill_15_pg1.jpg',
       'Partnership_Agreement_Dominic_Bush_Ryan_Mcintosh_pg1.jpg',
       'Detailed_Divorce_Agreement_1_pg3.jpg',
       'generated_medical_bill_11_pg1.jpg'], dtype=object)

In [41]:
import os
import tqdm.notebook
TILE_WIDTH = 1200
TILE_HEIGHT = 1200
TILE_OVERLAP = 64
TRUNCATED_PERCENT = 0.3
_overwriteFiles = True

TILES_DIR = {'train': Path('train6/images'),
             'val': Path('val6/images/')}
for _, folder in TILES_DIR.items():
    if not os.path.isdir(folder):
        os.makedirs(folder)

In [42]:
LABELS_DIR = {'train': Path('train6/labels/'),
              'val': Path('val6/labels/')}
for _, folder in LABELS_DIR.items():
    if not os.path.isdir(folder):
        os.makedirs(folder)

In [43]:
IMG_DIR = "/kaggle/input/ann-images/ann_images/img"
img_list = os.listdir(IMG_DIR)

In [44]:
class_mapping = {"Signature": 0, "Logo": 1}

In [45]:
len(img_list)

108

In [46]:
def tag_is_inside_tile(bounds, class_index, x_start, y_start, width, height, truncated_percent):
    x_min, y_min, x_max, y_max = bounds
    x_min, y_min, x_max, y_max = x_min - x_start, y_min - y_start, x_max - x_start, y_max - y_start

    if (x_min > width) or (x_max < 0.0) or (y_min > height) or (y_max < 0.0):
        return None

    x_max_trunc = min(x_max, width)
    x_min_trunc = max(x_min, 0)
    if (x_max_trunc - x_min_trunc) / (x_max - x_min) < truncated_percent:
        return None

    y_max_trunc = min(y_max, width)
    y_min_trunc = max(y_min, 0)
    if (y_max_trunc - y_min_trunc) / (y_max - y_min) < truncated_percent:
        return None

    x_center = (x_min_trunc + x_max_trunc) / 2.0 / width
    y_center = (y_min_trunc + y_max_trunc) / 2.0 / height
    x_extend = (x_max_trunc - x_min_trunc) / width
    y_extend = (y_max_trunc - y_min_trunc) / height

    return (class_index, x_center, y_center, x_extend, y_extend)

for img_path in tqdm.notebook.tqdm(img_list):
    pil_img = PIL.Image.open(f"{IMG_DIR}/{img_path}", mode='r')
    np_img = np.array(pil_img, dtype=np.uint8)
    IMAGE_WIDTH = np_img.shape[0]
    IMAGE_HEIGHT = np_img.shape[1]
    img_labels = df[df["img_name"] == img_path]
    X_TILES = (IMAGE_WIDTH + TILE_WIDTH - TILE_OVERLAP - 1) // (TILE_WIDTH - TILE_OVERLAP)
    Y_TILES = (IMAGE_HEIGHT + TILE_HEIGHT - TILE_OVERLAP - 1) // (TILE_HEIGHT - TILE_OVERLAP)
    for x in range(X_TILES):
        for y in range(Y_TILES):

            x_end = min((x + 1) * TILE_WIDTH - TILE_OVERLAP * (x != 0), IMAGE_WIDTH)
            x_start = x_end - TILE_WIDTH
            y_end = min((y + 1) * TILE_HEIGHT - TILE_OVERLAP * (y != 0), IMAGE_HEIGHT)
            y_start = y_end - TILE_HEIGHT

            folder = 'val' if img_path in val_index else 'train'
            save_tile_path = TILES_DIR[folder].joinpath(img_path + "_" + str(x_start) + "_" + str(y_start) + ".jpg")
            save_label_path = LABELS_DIR[folder].joinpath(img_path + "_" + str(x_start) + "_" + str(y_start) + ".txt")

            cut_tile = np.zeros(shape=(TILE_WIDTH, TILE_HEIGHT, 3), dtype=np.uint8)
            cropped_img = np_img[y_start:y_end, x_start:x_end, :]
            h, w, c = cropped_img.shape
            cut_tile[:h, :w, :] = cropped_img
            #cut_tile[0:TILE_HEIGHT, 0:TILE_WIDTH, :] = np_img[y_start:y_end, x_start:x_end, :]


            found_tags = [
                tag_is_inside_tile(
                    bounds=bounds,
                    class_index=class_mapping[cls],  # Map class name to index
                    x_start=x_start,
                    y_start=y_start,
                    width=TILE_WIDTH,
                    height=TILE_HEIGHT,
                    truncated_percent=TRUNCATED_PERCENT
                )
                for cls, bounds in zip(img_labels['class'], img_labels['bounds'])
            ]
            # found_tags = [
            #     tag_is_inside_tile(bounds, x_start, y_start, TILE_WIDTH, TILE_HEIGHT, TRUNCATED_PERCENT)
            #     for i, bounds in enumerate(img_labels['bounds'])]
            found_tags = [el for el in found_tags if el is not None]

            if len(found_tags) > 0:
                for dup_index in range(10):  # Duplicate 10 times
                    duplicated_tile_path = TILES_DIR[folder].joinpath(
                        img_path + "_" + str(x_start) + "_" + str(y_start) + f"_dup{dup_index}.jpg"
                    )
                    duplicated_label_path = LABELS_DIR[folder].joinpath(
                        img_path + "_" + str(x_start) + "_" + str(y_start) + f"_dup{dup_index}.txt"
                    )

                    # Save duplicated image
                    duplicated_tile_img = PIL.Image.fromarray(cut_tile)
                    duplicated_tile_img.save(duplicated_tile_path)

                    # Save duplicated labels
                    with open(duplicated_label_path, 'w+') as f:
                        for tags in found_tags:
                            f.write(' '.join(str(x) for x in tags) + '\n')

            else:
              if _overwriteFiles or not os.path.isfile(save_tile_path):
                    cut_tile_img = PIL.Image.fromarray(cut_tile)
                    cut_tile_img.save(save_tile_path)
            with open(save_label_path, 'w+') as f:
                for tags in found_tags:
                    f.write(' '.join(str(x) for x in tags) + '\n')

  0%|          | 0/108 [00:00<?, ?it/s]

In [47]:
# Yaml file for YOLO

CONFIG = """
# train and val datasets (image directory or *.txt file with image paths)
train: /kaggle/working/train6/
val: /kaggle/working/val6/

# number of classes
nc: 2

# class names
names: ['Signature','Logo']
"""

with open("dataset.yaml", "w") as f:
    f.write(CONFIG)

In [48]:
!python /kaggle/working/yolov5/train.py --cfg yolov5s.yaml --imgsz 1200 --batch-size 32 --epochs 100 --data dataset.yaml --weights yolov5s.pt

2025-03-29 03:48:19.264321: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-29 03:48:19.286403: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-29 03:48:19.292893: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice: (30 second timeout) 
[34m[1mwandb[0m: W&B disabled 

In [None]:
%cd /kaggle/working

In [None]:
# import glob
# from IPython.display import Image, display

# for image_path in glob.glob('yolov5/runs/detect/exp/*.jpg'):
#       display(Image(filename=image_path, width=1024))
#       print("\n")