In [None]:
"""[Adjusted] Sample Code from AWS"""
"""This code takes in a root directory folder with images/frames inside and applies OCR to extract text from them with matching frame number."""

from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials

import os
import time
import json

# Authentication
subscription_key = os.environ["AI_SERVICES_KEY"]
endpoint = os.environ["AI_SERVICES_ENDPOINT"]

# Start the client server
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))

# Main folder containing subfolders of images
main_folder = "frames" # YOUR FOLDER PATH HERE

def process_image(image_path):
    # Change local path to a URL
    with open(image_path, "rb") as image_stream:
        # Call the OCR Read API
        read_response = computervision_client.read_in_stream(image_stream, raw=True)

    # Get the operation location (URL with an ID at the end) from the response
    read_operation_location = read_response.headers["Operation-Location"]
    # Grab the ID from the URL
    operation_id = read_operation_location.split("/")[-1]

    # Call the "GET" API and wait for it to retrieve the results 
    while True:
        read_result = computervision_client.get_read_result(operation_id)
        if read_result.status not in ['notStarted', 'running']:
            break
        time.sleep(1)
                
    # Extract and return the text
    if read_result.status == OperationStatusCodes.succeeded:
        text_lines = []
        for page in read_result.analyze_result.read_results:
            for line in page.lines:
                text_lines.append(line.text)
        return text_lines
    else:
        return None

# Traverse the main folder and its subfolders
for root, dirs, files in os.walk(main_folder):
    # Collect image paths in the current subfolder
    image_paths = [os.path.join(root, file) for file in files if file.endswith((".jpg", ".png", ".jpeg"))]
    
    if not image_paths:
        continue  # Skip if no images in the folder

    # Dictionary to store OCR results and status
    results = {}
    status = {}

    # Process all images in the current subfolder
    for image_path in image_paths:
        print(f"Processing image: {image_path}")
        text = process_image(image_path)
        image_name = os.path.basename(image_path)  # Extract the file name
        if text:
            results[image_name] = text
            status[image_name] = True
        else:
            status[image_name] = False
            print(f"Failed to process image: {image_path}")

    # Save results for the current subfolder
    folder_name = os.path.basename(root)
    output_folder = os.path.join("ocr", folder_name)
    os.makedirs(output_folder, exist_ok=True)

    # Save results to JSON files
    result_file = os.path.join(output_folder, "ocr_results.json")
    status_file = os.path.join(output_folder, "status.json")
    
    with open(result_file, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=4, ensure_ascii=False)

    with open(status_file, "w", encoding="utf-8") as f:
        json.dump(status, f, indent=4, ensure_ascii=False)

    print(f"Results for folder '{folder_name}' saved to {output_folder}")

Processing image: frames/天之驕女│最新台語八點檔大戲即將上映！天之蕉子續集 Proud of You│ Vidol.tv/frame_360.jpg
Processing image: frames/天之驕女│最新台語八點檔大戲即將上映！天之蕉子續集 Proud of You│ Vidol.tv/frame_570.jpg
Processing image: frames/天之驕女│最新台語八點檔大戲即將上映！天之蕉子續集 Proud of You│ Vidol.tv/frame_210.jpg
Processing image: frames/天之驕女│最新台語八點檔大戲即將上映！天之蕉子續集 Proud of You│ Vidol.tv/frame_600.jpg
Failed to process image: frames/天之驕女│最新台語八點檔大戲即將上映！天之蕉子續集 Proud of You│ Vidol.tv/frame_600.jpg
Processing image: frames/天之驕女│最新台語八點檔大戲即將上映！天之蕉子續集 Proud of You│ Vidol.tv/frame_510.jpg
Processing image: frames/天之驕女│最新台語八點檔大戲即將上映！天之蕉子續集 Proud of You│ Vidol.tv/frame_300.jpg
Processing image: frames/天之驕女│最新台語八點檔大戲即將上映！天之蕉子續集 Proud of You│ Vidol.tv/frame_270.jpg
Processing image: frames/天之驕女│最新台語八點檔大戲即將上映！天之蕉子續集 Proud of You│ Vidol.tv/frame_450.jpg
Processing image: frames/天之驕女│最新台語八點檔大戲即將上映！天之蕉子續集 Proud of You│ Vidol.tv/frame_240.jpg
Processing image: frames/天之驕女│最新台語八點檔大戲即將上映！天之蕉子續集 Proud of You│ Vidol.tv/frame_480.jpg
Processing image: frames/