In [1]:
pip install azure-ai-vision-imageanalysis

Collecting azure-ai-vision-imageanalysis
  Downloading azure_ai_vision_imageanalysis-1.0.0-py3-none-any.whl.metadata (22 kB)
Collecting isodate>=0.6.1 (from azure-ai-vision-imageanalysis)
  Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)
Collecting azure-core>=1.30.0 (from azure-ai-vision-imageanalysis)
  Downloading azure_core-1.32.0-py3-none-any.whl.metadata (39 kB)
Downloading azure_ai_vision_imageanalysis-1.0.0-py3-none-any.whl (64 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading azure_core-1.32.0-py3-none-any.whl (198 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m198.9/198.9 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading isodate-0.7.2-py3-none-any.whl (22 kB)
Installing collected packages: isodate, azure-core, azure-ai-vision-imageanalysis
Successfully installed azure-ai-vision-imageanalysis-1.0.0 azure-core-1.32.0 isodate-0.7.2
N

In [None]:
import os
import json
import re
import time
from random import randint
from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.ai.vision.imageanalysis.models import VisualFeatures
from azure.core.credentials import AzureKeyCredential
import io

In [None]:
endpoint = ""
key = "NAPCARD20K"

In [None]:
# Create an Image Analysis client
client = ImageAnalysisClient(
    endpoint=endpoint,
    credential=AzureKeyCredential(key)
)

visual_features =[VisualFeatures.READ]

def ocr_result(file_path):
    with open(file_path, "rb") as f:
        image_data = f.read()

    result = client.analyze(
        image_data=image_data,
        visual_features=visual_features,
        smart_crops_aspect_ratios=[0.9, 1.33],
        gender_neutral_caption=True,
        language="en"
    )

    return result.read

In [5]:
# Chuyển đổi boundingPolygon thành list[list]
def convert_bounding_polygon(polygon):
    return [[point['x'], point['y']] for point in polygon]

# Hàm xử lý kết quả trả về của Azure Vision
def process_azure_vision_result(result):
    for block in result.blocks:
        for line in block.lines:
            # Chuyển đổi boundingPolygon của dòng
            line.bounding_polygon = convert_bounding_polygon(line.bounding_polygon)

            # Chuyển đổi boundingPolygon của từng từ trong dòng
            for word in line.words:
                word.bounding_polygon = convert_bounding_polygon(word.bounding_polygon)
    return result

In [6]:
def save_json(results, output_json):
    # Đọc dữ liệu cũ nếu file JSON đã tồn tại
    if os.path.exists(output_json):
        try:
            with open(output_json, "r", encoding="utf-8") as f:
                existing_data = json.load(f)  # Nạp dữ liệu cũ từ file JSON
                if not isinstance(existing_data, list):  # Kiểm tra định dạng
                    print(f"Invalid JSON format in {output_json}")
                    existing_data = []
        except json.JSONDecodeError:
            print(f"Error decoding JSON from {output_json}. Starting with an empty list.")
            existing_data = []
    else:
        existing_data = []

    # Thêm kết quả mới vào dữ liệu cũ
    existing_data.extend(results)

    # Ghi toàn bộ dữ liệu (cũ + mới) vào file JSON
    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(existing_data, f, ensure_ascii=False, indent=2)

In [None]:
def process_images(list_path, output_json):
    results = []  # Danh sách lưu kết quả
    MAX_RETRIES = 5  # Số lần retry tối đa

    # Duyệt qua từng ảnh
    for image_path in list_path:
        retries = 0

        while retries < MAX_RETRIES:
            try:
                print(f"Processing: {image_path}")

                ocr = ocr_result(image_path)
                process_ocr = process_azure_vision_result(ocr)

                # Lấy thông tin từ tên file
                base_name = os.path.basename(image_path)
                file_name, ext = os.path.splitext(base_name)
                splitter = file_name.split('_')

                # Kiểm tra định dạng tên file
                if len(splitter) < 3:
                    print(f"Invalid file name format: {file_name}")
                    break
                
                label = splitter[0].strip()
                label_index = splitter[1].strip()
                page_index = splitter[2].strip()

                # Tạo dictionary kết quả
                result = {
                    "image_name": file_name,
                    "label_name": label,
                    "page_index": page_index,
                    "label_index": label_index,
                    "result": process_ocr.blocks[0].as_dict()
                }

                # Thêm kết quả vào danh sách
                results.append(result)
                with open("/kaggle/working/tmp.json", "a", encoding="utf-8") as f:
                    json.dump(result, f, ensure_ascii=False, indent=2)

                break

            except Exception as e:
                retries += 1
                print(f"Error processing {image_path} (attempt {retries}/{MAX_RETRIES}): {e}")
                
                # Xử lý trạng thái chờ
                if "429" in str(e):  # Kiểm tra lỗi TooManyRequests
                    wait_time = 10
                    print(f"Rate limit exceeded. Waiting {wait_time} seconds...")
                    time.sleep(wait_time)
                elif retries >= MAX_RETRIES:
                    print(f"Max retries reached for {image_path}. Skipping...")
                else:
                    wait_time = randint(3, 5) # Random delay trước khi retry
                    print(f"Retrying in {wait_time} seconds...")
                    time.sleep(wait_time)
                    
    save_json(results, output_json)  # Lưu kết quả
    print(f"Processing completed. Results saved to {output_json}.")

In [8]:
def get_sorted_image_list(folder_path):
    # Lấy danh sách tất cả các file trong thư mục
    image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.png', '.jpeg', '.bmp'))]
    # Hàm để trích xuất thông tin sắp xếp từ tên file
    def extract_sort_keys(file_name):
        # Tách tên file và phần mở rộng
        file_name = os.path.basename(file_name)
        name, ext = os.path.splitext(file_name)
        # Tách các phần dựa vào dấu "_"
        parts = name.split('_')
        if len(parts) < 3:
            raise ValueError(f"Invalid file name format: {file_name}")
        
        label = parts[0].strip()  # Phần label
        label_index = int(parts[1].strip())  # Phần label index (số nguyên)
        page_index = int(parts[2].strip())  # Phần page index (số nguyên)
        return (label_index, page_index)

    # Sắp xếp danh sách file dựa trên khóa sắp xếp
    sorted_files = sorted(image_files, key=extract_sort_keys)

    return sorted_files


In [None]:
IMAGE_DIR = "IMAGE DIDDY"
OUTPUT_JSON = "ocr_results.json"
image_list = get_sorted_image_list(IMAGE_DIR)
process_images(image_list, OUTPUT_JSON)

Processing: /kaggle/input/han-viet/LABELED/ORI_1_39.png
OCR: /kaggle/input/han-viet/LABELED/ORI_1_39.png
Processing: /kaggle/input/han-viet/LABELED/ORI_1_40.png
OCR: /kaggle/input/han-viet/LABELED/ORI_1_40.png
Processing: /kaggle/input/han-viet/LABELED/ORI_2_41.png
OCR: /kaggle/input/han-viet/LABELED/ORI_2_41.png
Processing: /kaggle/input/han-viet/LABELED/ORI_2_42.png
OCR: /kaggle/input/han-viet/LABELED/ORI_2_42.png
Processing: /kaggle/input/han-viet/LABELED/ORI_3_43.png
OCR: /kaggle/input/han-viet/LABELED/ORI_3_43.png
Processing: /kaggle/input/han-viet/LABELED/ORI_3_44.png
OCR: /kaggle/input/han-viet/LABELED/ORI_3_44.png
Processing: /kaggle/input/han-viet/LABELED/ORI_4_47.png
OCR: /kaggle/input/han-viet/LABELED/ORI_4_47.png
Processing: /kaggle/input/han-viet/LABELED/ORI_4_48.png
OCR: /kaggle/input/han-viet/LABELED/ORI_4_48.png
Processing: /kaggle/input/han-viet/LABELED/ORI_5_49.png
OCR: /kaggle/input/han-viet/LABELED/ORI_5_49.png
Processing: /kaggle/input/han-viet/LABELED/ORI_5_50.png