In [1]:
import os
import cv2
import argparse
import numpy as np
from skimage import io
from google.colab.patches import cv2_imshow
from PIL import Image


In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

## Criando os Bboxes

In [None]:
list_ignored_labels = [
    'NOME', 'CPF', 'PERMISSÃO', 'ACC', 'CAT. HAB.',
    '1ª HABILITAÇÃO', 'VALIDADE', 'Nº REGISTRO', 'VÁLIDA EM TODO', 'DATA NASCIMENTO',
    'DOC. IDENTIDADE/ÓRG EMISSOR/UF', 'DOC. IDENTIDADE/ÓRG EMISSOR UF', 'DOC IDENTIDADE/ÓRG EMISSOR/UF', 'FILIAÇÃO'
]

def checkClass(name):
    if name == '1ª HABILITAÇÃO':
        return 0
    elif name == 'CAT. HAB.':
        return 1
    elif name == 'CPF':
        return 2
    elif name == 'DATA NASCIMENTO':
        return 3
    elif name == 'DOC. IDENTIDADE/ÓRG EMISSOR/UF' or name == 'DOC. IDENTIDADE/ÓRG EMISSOR UF' or name == 'DOC IDENTIDADE/ÓRG EMISSOR/UF':
        return 4
    elif name == 'FILIAÇÃO':
        return 5
    elif name == 'Nº REGISTRO':
        return 6
    elif name == 'NOME':
        return 7
    elif name == 'VALIDADE':
        return 8
    elif name == 'VÁLIDA EM TODO':
        return 9
    elif name == 'PERMISSÃO':
        return 10
    elif name == 'ACC':
        return 11
    else:
        return -1

def read_coordinates_gt(filename):
    coordinates = []

    with open(filename, 'r', encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split(',', 4)
            if len(parts) == 5:
                try:
                    x, y, width, height = map(int, parts[:4])
                    label = parts[4].strip()
                    coordinates.append((x, y, width, height, label))
                except ValueError:
                    print(f"Error processing line: {line.strip()}")

    return coordinates

def crop_images(image_path, coordinates, code, output_dir):
    print(code)
    image = Image.open(image_path)
    code_output_dir = os.path.join(output_dir, code)
    os.makedirs(code_output_dir, exist_ok=True)

    last_ignored_label = None
    label_counters = {}
    permission_quantity = 0

    for coordinate in coordinates:
        x, y, w, h, label = coordinate

        if label in list_ignored_labels:
            if label != 'PERMISSÃO':
              last_ignored_label = label
              continue
            else:
              permission_quantity = permission_quantity + 1
              if permission_quantity == 1:
                last_ignored_label = label
                continue


        label_id = checkClass(last_ignored_label)
        if label_id == -1:
            continue

        if label_id not in label_counters:
            label_counters[label_id] = 1
        else:
            label_counters[label_id] += 1

        current_counter = label_counters[label_id]
        file_name = f'{label_id}.{current_counter}.jpg'

        left = max(x - 5, 0)
        top = max(y - 2, 0)
        right = x + w + 2
        bottom = y + h + 2

        cropped_image = image.crop((left, top, right, bottom))
        if label_id == 9:
            cropped_image = cropped_image.rotate(270, expand=True)
        print(f"Save image: {file_name}")
        cropped_image.save(os.path.join(code_output_dir, f'{file_name}'))

folder_path = '/content/gdrive/MyDrive/BD/BID Dataset/CNH_Frente'

new_gt = '/content/gdrive/MyDrive/news_gt'

output_dir = '/content/gdrive/MyDrive/cropped_images'

image_files = [f for f in os.listdir(folder_path) if f.endswith('.jpg') and '_gt_segmentation' not in f]

for image_file in image_files:
    image_path = os.path.join(folder_path, image_file)
    image_code = os.path.splitext(image_file)[0].replace('_in', '')
    gt_file = os.path.join(new_gt, f"{image_code}_gt_ocr.txt")
    coordinates = read_coordinates_gt(gt_file)
    crop_images(image_path, coordinates, image_code, output_dir)