In [None]:
import json
import os
from urllib.parse import unquote

def extract_ls_data(input_file_path, output_file_path):
    try:
        # 1. Read the input JSON file
        with open(input_file_path, 'r', encoding='utf-8') as f:
            input_data = json.load(f)
        
        final_output = {}

        # 2. Process each task in the Label Studio export
        for task in input_data:
            # Extract image filename from the 'data' section
            # Format usually looks like: "/data/local-files/?d=folder/images/filename.jpg"
            image_path_raw = task.get('data', {}).get('ocr', '') or task.get('data', {}).get('images', '')
            
            # Logic to clean the filename
            if 'd=' in image_path_raw:
                # Extract path after 'd=' and decode URI characters (e.g., %20)
                clean_path = unquote(image_path_raw.split('d=')[1])
                image_name = os.path.basename(clean_path)
            else:
                image_name = os.path.basename(image_path_raw)

            # Prepare list for this image
            img_entries = []

            # Loop through annotations (usually one per task, but handled as a list)
            for annotation in task.get('annotations', []):
                results = annotation.get('result', [])
                
                # Dictionary to group separate components (poly, text, labels) by their shared UUID
                grouped_items = {}

                for item in results:
                    # The 'id' field links the polygon to the transcription
                    item_id = item.get('id')
                    if not item_id:
                        continue

                    if item_id not in grouped_items:
                        grouped_items[item_id] = {
                            "coordinates": [],
                            "text": "",
                            "labels": [],
                            "original_width": item.get('original_width'),
                            "original_height": item.get('original_height')
                        }

                    value = item.get('value', {})
                    item_type = item.get('type')

                    # Extract Coordinates
                    if item_type == 'polygon':
                        grouped_items[item_id]['coordinates'] = value.get('points', [])
                    
                    # Extract Text
                    elif item_type == 'textarea':
                        # Label Studio stores text as a list ["Text here"]
                        text_list = value.get('text', [])
                        grouped_items[item_id]['text'] = " ".join(text_list) if text_list else ""
                    
                    # Extract Labels (if needed for additional_data)
                    elif item_type == 'labels':
                        grouped_items[item_id]['labels'] = value.get('labels', [])

                # Format the grouped items into the final list structure
                for unique_id, data in grouped_items.items():
                    # Only add if we actually have coordinates
                    if data['coordinates']:
                        entry = {
                            "coordinates": data['coordinates'],
                            "text": data['text'],
                            "additional_data": {
                                "id": unique_id,
                                "labels": data['labels'],
                                "width": data['original_width'],
                                "height": data['original_height']
                            }
                        }
                        img_entries.append(entry)

            # Add to main dictionary
            final_output[image_name] = img_entries

        # 3. Write to the output JSON file
        with open(output_file_path, 'w', encoding='utf-8') as f:
            json.dump(final_output, f, indent=4, ensure_ascii=False)
            
        print(f"Successfully processed {len(input_data)} tasks.")
        print(f"Output saved to: {output_file_path}")

    except FileNotFoundError:
        print(f"Error: The file '{input_file_path}' was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

# --- Configuration ---
if __name__ == "__main__":
    # Change these filenames as needed
    INPUT_FILE = 'label-studio_exports/project-7-at-2025-12-15-15-15-f68c6772.json'
    OUTPUT_FILE = 'converted_jsons/project-7-at-2025-12-15-15-15-f68c6772_converted.json'
    
    extract_ls_data(INPUT_FILE, OUTPUT_FILE)

Successfully processed 3 tasks.
Output saved to: converted_jsons/project-7-at-2025-12-15-15-15-f68c6772_converted.json


In [None]:
import json
import cv2
import numpy as np
import os

# --- CONFIGURATION ---
JSON_FILE = 'converted_jsons/project-7-at-2025-12-15-15-15-f68c6772_converted.json'        # The file created in the previous step
IMAGE_FOLDER = 'images'          # Folder where your raw .jpg files are located
OUTPUT_FOLDER = 'annotated_images' # Folder where results will be saved
# ---------------------

def draw_boxes_on_images():
    # 1. Create output folder if it doesn't exist
    if not os.path.exists(OUTPUT_FOLDER):
        os.makedirs(OUTPUT_FOLDER)

    # 2. Load the JSON data
    try:
        with open(JSON_FILE, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"Error: Could not find {JSON_FILE}")
        return

    # 3. Iterate through each image in the JSON
    for filename, annotations in data.items():
        image_path = os.path.join(IMAGE_FOLDER, filename)
        
        # Check if image exists
        if not os.path.exists(image_path):
            print(f"Warning: Image file not found: {image_path}")
            continue

        # Load image
        # We read as unchanged to preserve channels, but typically imread defaults to BGR
        img = cv2.imread(image_path)
        
        if img is None:
            print(f"Error: Could not read image {filename}")
            continue

        # Get actual image dimensions to convert percentages to pixels
        height, width = img.shape[:2]

        print(f"Processing: {filename} ({len(annotations)} boxes)")

        # 4. Iterate through each box/polygon in the list
        for item in annotations:
            points = item.get('coordinates', [])
            
            if not points:
                continue

            # Convert Label Studio percentage coordinates (0-100) to Pixel coordinates
            # Format in JSON is usually [[x1, y1], [x2, y2], ...]
            pixel_points = []
            for pt in points:
                x_pct, y_pct = pt[0], pt[1]
                
                x_px = int((x_pct / 100.0) * width)
                y_px = int((y_pct / 100.0) * height)
                
                pixel_points.append([x_px, y_px])

            # Reshape for OpenCV (requires a specific numpy array shape)
            pts = np.array(pixel_points, np.int32)
            pts = pts.reshape((-1, 1, 2))

            # Draw the Polygon
            # cv2.polylines(image, [pts], isClosed, color(BGR), thickness)
            # Color: (0, 255, 0) is Green
            cv2.polylines(img, [pts], True, (255, 0, 0), 1)
            
            # Optional: Draw the text ID or start of text slightly above the box
            # extracted_text = item.get('text', '')[:15] # First 15 chars
            # if extracted_text:
            #     cv2.putText(img, extracted_text, (pixel_points[0][0], pixel_points[0][1] - 5), 
            #                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)

        # 5. Save the annotated image
        output_path = os.path.join(OUTPUT_FOLDER, filename)
        cv2.imwrite(output_path, img)

    print(f"\nDone! Check the '{OUTPUT_FOLDER}' directory.")

if __name__ == "__main__":
    draw_boxes_on_images()

Processing: IMG-20251127-WA0004.jpg (24 boxes)
Processing: IMG-20251127-WA0003.jpg (19 boxes)
Processing: 019_2.jpg (19 boxes)

Done! Check the 'annotated_images' directory.


In [None]:
import json
import cv2
import numpy as np
import os
import csv
import math

# --- CONFIGURATION ---
INPUT_JSON = 'converted_jsons/project-12-at-2025-12-15-14-48-2d2ece96_converted.json'
INPUT_IMAGE_DIR = 'images'
BASE_OUTPUT_DIR = 'dataset/train'
OUTPUT_IMAGE_DIR = os.path.join(BASE_OUTPUT_DIR, 'images')
METADATA_FILE = os.path.join(BASE_OUTPUT_DIR, 'metadata.csv')
# ---------------------

def order_points(pts):
    """
    Orders coordinates in the form: top-left, top-right, bottom-right, bottom-left.
    Essential for perspective warping.
    """
    rect = np.zeros((4, 2), dtype="float32")

    # The top-left point will have the smallest sum, whereas
    # the bottom-right point will have the largest sum
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]

    # The top-right point will have the smallest difference,
    # whereas the bottom-left point will have the largest difference
    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]

    return rect

def four_point_transform(image, pts):
    """
    Applies perspective transform to obtain a top-down, "straightened" view of the image.
    """
    # 1. Obtain a consistent order of the points
    rect = order_points(pts)
    (tl, tr, br, bl) = rect

    # 2. Compute the width of the new image
    # Maximum distance between bottom-right and bottom-left or top-right and top-left
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))

    # 3. Compute the height of the new image
    # Maximum distance between top-right and bottom-right or top-left and bottom-left
    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))

    # 4. Construct the set of destination points to obtain a "birds eye view",
    # (i.e. top-down view) of the image, specifying points in the TL, TR, BR, BL order
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype="float32")

    # 5. Compute the Perspective Transform Matrix and then apply it
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))

    return warped

def process_dataset():
    # Create directories
    if not os.path.exists(OUTPUT_IMAGE_DIR):
        os.makedirs(OUTPUT_IMAGE_DIR)

    # Load JSON
    try:
        with open(INPUT_JSON, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"Error: {INPUT_JSON} not found.")
        return

    # Open CSV for writing
    with open(METADATA_FILE, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['file_name', 'text']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        # Iterate over images
        for filename, annotations in data.items():
            img_path = os.path.join(INPUT_IMAGE_DIR, filename)
            
            if not os.path.exists(img_path):
                print(f"Skipping missing image: {filename}")
                continue

            # Load the original image
            original_img = cv2.imread(img_path)
            if original_img is None:
                continue

            h, w = original_img.shape[:2]
            
            # Remove extension from filename for crop naming
            base_name = os.path.splitext(filename)[0]

            for idx, item in enumerate(annotations):
                points = item.get('coordinates', [])
                text_content = item.get('text', '').strip()

                # Skip if no coordinates or empty text (optional: remove 'not text_content' if you want empty text files)
                if not points: 
                    continue

                # 1. Convert Percentage Coordinates to Pixels
                pixel_points = []
                for pt in points:
                    px = int((pt[0] / 100.0) * w)
                    py = int((pt[1] / 100.0) * h)
                    pixel_points.append([px, py])
                
                np_points = np.array(pixel_points, dtype="float32")

                # 2. Logic: Handle different polygon shapes
                # Perspective warp requires exactly 4 points.
                # If the user drew a complex polygon (5+ points) or a triangle (3 points),
                # we calculate the "Minimum Area Rotated Rectangle" that fits those points,
                # get the 4 corners of *that* rectangle, and warp that.
                
                rect = cv2.minAreaRect(np_points)
                box = cv2.boxPoints(rect)
                box = np.int0(box)
                
                # Perform the perspective warp (crop & straighten)
                try:
                    cropped_img = four_point_transform(original_img, box.astype("float32"))
                except Exception as e:
                    print(f"Error cropping {filename} item {idx}: {e}")
                    continue

                # 3. Save the Crop
                crop_filename = f"{base_name}_crop_{idx}.jpg"
                save_path = os.path.join(OUTPUT_IMAGE_DIR, crop_filename)
                
                cv2.imwrite(save_path, cropped_img)

                # 4. Write to CSV
                # Requirement: file_name should be ./image/{image_crop_name}
                relative_path = f"./image/{crop_filename}"
                
                writer.writerow({
                    'file_name': relative_path, 
                    'text': text_content
                })

    print(f"Processing complete.")
    print(f"Images saved to: {OUTPUT_IMAGE_DIR}")
    print(f"Metadata saved to: {METADATA_FILE}")

if __name__ == "__main__":
    process_dataset()

Processing complete.
Images saved to: dataset/train\images
Metadata saved to: dataset/train\metadata.csv


  box = np.int0(box)
