## Verifying Dataset


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip /content/project-3-at-2025-05-20-14-35-1549ca81.zip -d /content

In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv

In [7]:
img = cv.imread("/content/images/049a7787-image-4.jpg")

img.shape

(1792, 1792, 3)

In [3]:
img_root_dir = "/content/images"
lbl_root_dir = "/content/labels"
output_dir = "/content/output/"
os.makedirs(output_dir, exist_ok=True)

path_list = os.listdir(img_root_dir)
print(f"Dataset Size : {len(path_list)}")

Dataset Size : 60


In [8]:
def xywm_to_xyxy(yolo_bbox, img_width=1792, img_height=1792):
    x_center, y_center, w, h = map(float, yolo_bbox)

    x1 = (x_center - w / 2) * img_width
    y1 = (y_center - h / 2) * img_height
    x2 = (x_center + w / 2) * img_width
    y2 = (y_center + h / 2) * img_height

    return int(x1), int(y1), int(x2), int(y2)

In [None]:
# subset = np.random.choice(path_list, 10)

for path in path_list:
  ## images and label paths
  img_path = os.path.join(img_root_dir, path)
  lbl_path = os.path.join(lbl_root_dir, path[:-4] + ".txt")


  ## read image and label files
  image = cv.imread(img_path)
  with open(lbl_path, 'r') as f:
    lines = f.readlines()


  ## parse labels
  lines = [line.strip().split(" ") for line in lines]
  lines = [xywm_to_xyxy(line[1:]) for line in lines]
  for line in lines:
    cv.rectangle(image, (line[0], line[1]), (line[2], line[3]), (0, 255, 0), 2)

  ## save the image
  output_img_path = os.path.join(output_dir, img_path.split("/")[-1])
  cv.imwrite(output_img_path, image)

  print(f"image save at {output_img_path}")

In [None]:
!zip /content/output_3.zip -r /content/output/

In [None]:
!rm -rf /content/output/

## Preperaing The Detection Dataset

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [10]:
!pip install -q albumentations tqdm

In [11]:
!mkdir /content/dataset_v2/

In [None]:
## Unzip the yolo format images and labels
## Structure: -dataset
#                - images
#                - labels


!unzip /content/project-3-at-2025-05-17-19-58-99bd2d3f.zip -d /content/

In [12]:
import os
import cv2
import numpy as np
import albumentations as A

# Input & Output Directories
image_dir = "/content/images/"  # Folder with original images
label_dir = "/content/labels/"  # Folder with YOLO labels
output_image_dir = "rotated_images/"  # Save rotated images
output_label_dir = "rotated_labels/"  # Save updated labels

# Ensure output directories exist
os.makedirs(output_image_dir, exist_ok=True)
os.makedirs(output_label_dir, exist_ok=True)

# Define augmentation (90-degree rotation)
transform = A.Compose([
    A.Rotate(limit=[90, 90], p=1)  # Rotate exactly 90 degrees
])

def correct_yolo_labels_after_rotation(label_path, img_width, img_height):
    """ Adjust YOLO bounding boxes after a 90-degree clockwise rotation. """
    new_labels = []

    with open(label_path, "r") as file:
        lines = file.readlines()

    for line in lines:
        data = line.strip().split()
        class_id, x_center, y_center, bbox_w, bbox_h = map(float, data)

        # Convert to absolute values
        x_center_abs = x_center * img_width
        y_center_abs = y_center * img_height
        bbox_w_abs = bbox_w * img_width
        bbox_h_abs = bbox_h * img_height

        # Transform coordinates for 90-degree rotation
        new_x_center_abs = y_center_abs
        new_y_center_abs = img_width - x_center_abs
        new_bbox_w_abs, new_bbox_h_abs = bbox_h_abs, bbox_w_abs  # Swap width & height

        # Normalize back to YOLO format
        new_x_center = new_x_center_abs / img_height
        new_y_center = new_y_center_abs / img_width
        new_bbox_w = new_bbox_w_abs / img_height
        new_bbox_h = new_bbox_h_abs / img_width

        new_labels.append(f"{int(class_id)} {new_x_center} {new_y_center} {new_bbox_w} {new_bbox_h}\n")

    return new_labels



# Process all images
for img_file in os.listdir(image_dir):
    if img_file.endswith(".jpg") or img_file.endswith(".png"):
        img_path = os.path.join(image_dir, img_file)
        label_path = os.path.join(label_dir, img_file.replace(".jpg", ".txt").replace(".png", ".txt"))

        # Load Image
        image = cv2.imread(img_path)
        img_height, img_width = image.shape[:2]

        # Rotate Image
        augmented = transform(image=image)
        rotated_image = augmented["image"]

        # Save rotated image
        rotated_img_path = os.path.join(output_image_dir, f"aug_0_{img_file}")
        cv2.imwrite(rotated_img_path, rotated_image)

        # Adjust & Save YOLO labels
        if os.path.exists(label_path):
            new_labels = correct_yolo_labels_after_rotation(label_path, img_width, img_height)
            rotated_label_path = os.path.join(output_label_dir, f"aug_0_{img_file.replace('.jpg', '.txt').replace('.png', '.txt')}")

            with open(rotated_label_path, "w") as file:
                file.writelines(new_labels)

        print(f"Processed: {img_file}")

print("Augmentation completed for all images.")

  check_for_updates()


Processed: e17ce386-image-7.jpg
Processed: 6c656325-image-8.jpg
Processed: 7354a685-image-2.jpg
Processed: 9ebe8e73-image-13.jpg
Processed: 4c0438f6-image-5.jpg
Processed: db4e9248-image-13.jpg
Processed: 2588bcad-image-12.jpg
Processed: c6b3eda5-image-0.jpg
Processed: 949ee203-image-5.jpg
Processed: 36fab787-image-2.jpg
Processed: 24a7144f-image-0.jpg
Processed: b3d4b7e2-image-2.jpg
Processed: 1fe06538-image-3.jpg
Processed: 5f11dd54-image-6.jpg
Processed: a94f9b41-image-9.jpg
Processed: d8e116fe-image-10.jpg
Processed: b013a0cc-image-14.jpg
Processed: 0ddc6c33-image-6.jpg
Processed: 96c11d3f-image-1.jpg
Processed: 94c21519-image-0.jpg
Processed: d615e566-image-6.jpg
Processed: d4a77215-image-10.jpg
Processed: bafccc6a-image-4.jpg
Processed: 1b22a18b-image-4.jpg
Processed: 8abf181c-image-9.jpg
Processed: 57e061ef-image-10.jpg
Processed: 5a6f57f2-image-14.jpg
Processed: 559dd430-image-7.jpg
Processed: 934f75b7-image-10.jpg
Processed: a5b54ee1-image-5.jpg
Processed: 0ad6ea7e-image-11.jp

In [None]:
import cv2
import os


def draw_yolo_bboxes(image_path, label_path):
    """
    Reads an image and corresponding YOLO labels, then draws bounding boxes on the image.

    Parameters:
        image_path (str): Path to the rotated image.
        label_path (str): Path to the rotated YOLO label file.

    Saves:
        Image with drawn bounding boxes as 'img_1_rotated_bbox.jpg'
    """
    image = cv2.imread(image_path)
    h, w = image.shape[:2]

    with open(label_path, "r") as file:
        lines = file.readlines()

    for line in lines:
        data = line.strip().split()
        class_id, x_center, y_center, bbox_w, bbox_h = map(float, data)

        # Convert YOLO format (normalized) to pixel values
        x_center, y_center = int(x_center * w), int(y_center * h)
        bbox_w, bbox_h = int(bbox_w * w), int(bbox_h * h)

        # Get bounding box coordinates
        x1 = int(x_center - bbox_w / 2)
        y1 = int(y_center - bbox_h / 2)
        x2 = int(x_center + bbox_w / 2)
        y2 = int(y_center + bbox_h / 2)

        # Draw rectangle and class label
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(image, f"Class {int(class_id)}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # Save and display the image

    save_img_path = os.path.join("/content/output/",image_path.split("/")[-1])
    cv2.imwrite(save_img_path, image)
    print("img saved at ", save_img_path)

    # cv2.imshow("YOLO Bounding Boxes", image)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

# Run the function
os.mkdir("/content/output/")
for img_file in os.listdir("/content/rotated_images/"):
  img_path = "/content/rotated_images/" + img_file
  label_path = "/content/rotated_labels/" + img_file[:-3] + "txt"
  draw_yolo_bboxes(img_path, label_path)

In [16]:
os.makedirs("/content/dataset_v2/images/", exist_ok=True)
os.makedirs("/content/dataset_v2/labels/", exist_ok=True)

In [17]:
!cp -r /content/images/* /content/dataset_v2/images/
!cp -r /content/rotated_images/* /content/dataset_v2/images/
!cp -r /content/labels/* /content/dataset_v2/labels/
!cp -r /content/rotated_labels/* /content/dataset_v2/labels/

In [None]:
!rm -rf /content/images
!rm -rf /content/labels
!rm -rf /content/rotated_images/
!rm -rf /content/rotated_labels/
!rm -rf /content/output/

In [None]:
import cv2
import albumentations as A
import os
from tqdm import tqdm

# Define augmentation pipeline (no geometric changes)
transform = A.Compose([
    A.RandomBrightnessContrast(
     brightness_limit=0.2,
     contrast_limit=0.2,
     brightness_by_max=False,
     p=0.3
    ),

    A.GaussianBlur(blur_limit=3, p=0.3),  # Light Gaussian blur
    A.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1, p=0.4),


    A.RandomGamma(p=1),               # Adjust gamma
    A.RandomFog(fog_coef_lower=1, fog_coef_upper=1, p=0.3),
    A.RandomGamma(gamma_limit=(80, 120), p=0.5),

    # A.Blur(blur_limit=(5, 7), p=0.2),
])
# bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']),



# Paths
input_images = "/content/dataset_v2/images/"     # Original images folder
input_labels = "/content/dataset_v2/labels/"     # Original Labels folder
output_images = "/content/dataset_v2/images_1/"   # Augmented images folder
output_labels = "/content/dataset_v2/labels_1/"


os.makedirs(output_images, exist_ok=True)
os.makedirs(output_labels, exist_ok=True)


# Process images
augment = 0
for image_file in tqdm(os.listdir(input_images)):
    if image_file.endswith(('.png', '.jpg', '.jpeg')):
        img_path = os.path.join(input_images, image_file)
        img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)

        if img is not None:
            # Apply augmentation
            augmented = transform(image=img)["image"]

            # Save augmented image with a new name
            output_img_file_path = f"aug_{augment}_{image_file}"
            output_lbl_file_path = f"aug_{augment}_{image_file[:-3]}"+"txt"
            output_img_path = os.path.join(output_images, output_img_file_path)
            output_lbl_path = os.path.join(output_labels, output_lbl_file_path)

            ## read the label.txt file
            with open(os.path.join(input_labels, image_file[:-3]+"txt"), 'r') as infile:
              content = infile.read()


            ## save augmented images and label.txt
            cv2.imwrite(output_img_path, augmented)
            with open(output_lbl_path, "w") as outfile:
                outfile.write(content)




print(f"original examples ---> {len(os.listdir(input_images))}")
print(f"augmented examples ---> {len(os.listdir(output_images))}")
print(f"total examples ---> {len(os.listdir(input_images)) + len(os.listdir(output_labels))}")

In [None]:
!mv  /content/dataset_v2/images_1/* /content/dataset_v2/images/
!mv /content/dataset_v2/labels_1/* /content/dataset_v2/labels/

In [None]:
!rm -rf /content/dataset_v2/images_1
!rm -rf /content/dataset_v2/labels_1

In [None]:
import zipfile
import os

# Source folder containing files
source_folder = "/content/dataset_v2/"  # Replace with your folder path
zip_filename = "/content/custom-dataset-fractiion-imporovement.zip"  # Destination zip file path

# Create a new zip file
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    # Walk through all directories and files
    for root, _, files in os.walk(source_folder):
        for file in files:
            file_path = os.path.join(root, file)
            # Preserve directory structure inside the zip
            arcname = os.path.relpath(file_path, source_folder)
            zipf.write(file_path, arcname=arcname)



print(f"All files in '{source_folder}' have been zipped into '{zip_filename}'.")
print(f"Size of zip file is {os.path.getsize(zip_filename)} bytes")

In [None]:
# copy augmented zip file into drive
!cp /content/custom-dataset-fractiion-imporovement.zip /content/drive/MyDrive/OCR-Custom-Dataset/

## Preperaing Dataset For Recognition Model

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [19]:
import cv2 as cv
import cv2
import json
import os
import matplotlib.pyplot as plt

In [None]:
# unzip the yolo images into directory

!unzip /content/project-3-at-2025-05-17-19-58-99bd2d3f.zip -d /content/

In [20]:
## Json data file

with open("/content/project-3-at-2025-05-20-14-35-1549ca81 (1).json", 'r') as file:

  labels = json.load(file)

In [21]:
img_root_dir = "/content/images/"
lbl_root_dir = "/content/labels/"
output_dir_train = "./dataset/train/"
output_dir_test = "./dataset/val/"

os.makedirs(output_dir_train, exist_ok=True)
os.makedirs(output_dir_test, exist_ok=True)
data_1 = []  # hold the examples (img_path, img)
img_index = 0
path_list = os.listdir(img_root_dir)
print(f"Dataset Size : {len(path_list)}")

Dataset Size : 60


In [None]:
## it can be used for both train and test directory and lables generation

# Input:
  # - images
  # - labels.json

# output:
  # dataset
  #   - train
  #   - label.json  [img_path, transcription]



for idx, label in enumerate(labels):
  ## manuplate image path and read image
  img_path = os.path.join(img_root_dir, label['ocr'].split('/')[-1])
  image = cv.imread(img_path)


  try:
    ## iterate the bboxes and transcriptions
    bbox = label['bbox']
    transcription = label['transcription']

  except KeyError:
    continue

  for i in range(len(bbox)):
    # print(bbox[i], transcription[i])


    ## extract xywh points
    x_min = int(bbox[i]['x'] / 100 * bbox[i]['original_width'])
    y_min = int(bbox[i]['y'] / 100 * bbox[i]['original_height'])
    x_max = int(x_min + (bbox[i]['width'] / 100 * bbox[i]['original_width']))
    y_max = int(y_min + (bbox[i]['height'] / 100 * bbox[i]['original_height']))

    if type(transcription[i]) != str:
      continue


    ## cut the patches(words) from image
    ## set directory structure of dataset (paddleOCR)
    cropped_img = image[y_min:y_max, x_min:x_max]
    cropped_img_transcription = transcription[i]


    ## save image
    output_img_path = os.path.join(output_dir_train, f"crop_{i}_{label['ocr'].split('/')[-1]}")
    cv2.imwrite(output_img_path, cropped_img)



    ## update text file
    text_line = {"image_path" : f"{output_img_path}", "text" : cropped_img_transcription}
    data_1.append(text_line)



  print(f"image saved at {output_img_path}")



with open("test_01.json", "w") as file:
  file.write(json.dumps(data_1))

In [None]:
## Optional; only to be used whenever need test images from
# the same labeled images
## it can be used for test directory and lables generation

# Input:
  # - images
  # - labels.json

# output:
  # dataset
  #   - train
  #   - label.json  [img_path, transcription]



for idx, label in enumerate(labels):
  ## manuplate image path and read image
  img_path = os.path.join(img_root_dir, label['ocr'].split('/')[-1])
  image = cv.imread(img_path)


  try:
    ## iterate the bboxes and transcriptions
    bbox = label['bbox']
    transcription = label['transcription']

  except KeyError:
    continue

  for i in range(len(bbox)):
    # print(bbox[i], transcription[i])


    ## extract xywh points
    x_min = int(bbox[i]['x'] / 100 * bbox[i]['original_width'])
    y_min = int(bbox[i]['y'] / 100 * bbox[i]['original_height'])
    x_max = int(x_min + (bbox[i]['width'] / 100 * bbox[i]['original_width']))
    y_max = int(y_min + (bbox[i]['height'] / 100 * bbox[i]['original_height']))

    if type(transcription[i]) != str:
      continue


    ## cut the patches(words) from image
    ## set directory structure of dataset (paddleOCR)
    cropped_img = image[y_min:y_max, x_min:x_max]
    cropped_img_transcription = transcription[i]


    ## save image
    output_img_path = os.path.join(output_dir_test, f"crop_{i}_{label['ocr'].split('/')[-1]}")
    cv2.imwrite(output_img_path, cropped_img)



    ## update text file
    text_line = {"image_path" : f"{output_img_path}", "text" : cropped_img_transcription}
    data_1.append(text_line)



  print(f"image saved at {output_img_path}")



with open("02_custom-dataset-fractiion-imporovement_test.json", "w") as file:
  file.write(json.dumps(data_1))

In [None]:
import zipfile
import os

# Source folder containing files
source_folder = "/content/dataset"  # Replace with your folder path
zip_filename = "/content/02_custom-dataset-fractiion-imporovement.zip"  # Destination zip file path

# Create a new zip file
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    # Walk through all directories and files
    for root, _, files in os.walk(source_folder):
        for file in files:
            file_path = os.path.join(root, file)
            # Preserve directory structure inside the zip
            arcname = os.path.relpath(file_path, source_folder)
            zipf.write(file_path, arcname=arcname)



print(f"All files in '{source_folder}' have been zipped into '{zip_filename}'.")
print(f"Size of zip file is {os.path.getsize(zip_filename)} bytes")

In [None]:
!cp /content/02_custom-dataset-fractiion-imporovement.zip /content/drive/MyDrive/OCR-Custom-Dataset/