In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Create hypothesis

In [None]:
import os
import json
from PIL import Image
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

# Base folder where the subfolders with images are located
base_folder = '/content/drive/MyDrive/cropped_images'

# File path with the list of folders to be processed
image_list_file = '/content/drive/MyDrive/dataset/test_files.txt'

# Function to perform text recognition in an image
def perform_ocr(image_path):
    image = Image.open(image_path).convert("RGB")
    pixel_values = processor(images=image, return_tensors="pt").pixel_values
    generated_ids = model.generate(pixel_values)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return generated_text

# Initializing the model and the OCR process
# Change the model between small, base and large
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-large-printed')
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-large-printed')

with open(image_list_file, 'r') as f:
     folders_to_process = f.read().strip().split(',')
     folders_to_process = [arquivo.strip() for arquivo in folders_to_process]


for folder_name in os.listdir(base_folder):
    if folder_name in folders_to_process:
        folder_path = os.path.join(base_folder, folder_name)
        if os.path.isdir(folder_path):
            output_file = f'/content/drive/MyDrive/TR OCR Large/resultados/{folder_name}/hipotese.txt'
            output_folder = os.path.dirname(output_file)

            if not os.path.exists(output_folder):
                os.makedirs(output_folder)

            ocr_results = []

            for filename in os.listdir(folder_path):
                if filename.endswith(('.png', '.jpg', '.jpeg')):
                    image_path = os.path.join(folder_path, filename)
                    ocr_text = perform_ocr(image_path)
                    ocr_results.append((filename.split('.')[0], ocr_text))
                    print(f'OCR for {filename}: {ocr_text}')

            with open(output_file, 'w') as txt_file:
                for image_id, ocr_text in ocr_results:
                    txt_file.write(f'{image_id} "{ocr_text}"\n')

            print(f'All results were saved in {output_file}')
