## Extracted_Keyword to image_description



In [25]:
import json
import re
from typing import List

def update_image_description_with_keywords(
    extracted_keywords_path: str,
    image_description_path: str,
    output_path: str,
    category: str
) -> None:
    """
    This function processes the extracted keywords from a JSON file, cleans them from unwanted characters,
    and adds them under a specified category in the image description JSON file.

    Parameters:
    - extracted_keywords_path (str): Path to the extracted keywords JSON file.
    - image_description_path (str): Path to the image description JSON file.
    - output_path (str): Path to save the updated image description JSON file.
    - category (str): The category in the image description JSON file to update.
    """
    
    # Regular expression to match unwanted characters (anything except alphanumeric, space, hyphen)
    unwanted_pattern = re.compile(r"[^a-zA-Z0-9äöüÄÖÜß\s\-]")

    # Load extracted keywords
    with open(extracted_keywords_path, "r", encoding="utf-8") as f:
        extracted_data = json.load(f)

    # Extract, clean, and sort keywords
    extracted_keywords = sorted({
        value[1].strip() for values in extracted_data.values() for value in values
        if not unwanted_pattern.search(value[1].strip())  # Filters out unwanted values using regex
    })

    # Load image description JSON
    with open(image_description_path, "r", encoding="utf-8") as f:
        image_data = json.load(f)

    # Update the relevant dictionary
    for detail in image_data["details"]:
        if detail.get("category") == category:
            detail["extracted_keywords_from_image"] = extracted_keywords
            break  # Assuming only one match is needed

    # Save the updated JSON to the output path
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(image_data, f, indent=2, ensure_ascii=False)

    print(f"Updated JSON saved successfully to {output_path}.")

# Example usage
update_image_description_with_keywords(
    extracted_keywords_path=r"C:\\Users\\AndréHerrera\\OneDrive - Archipel Generalplanung AG\\Desktop\\Projekt IA-IFKL\\Hackathon 2025\\Extracted_Keywords.json",
    image_description_path=r"C:\\Users\\AndréHerrera\\OneDrive - Archipel Generalplanung AG\\Desktop\\Projekt IA-IFKL\\Hackathon 2025\\image_description.json",
    output_path=r"C:\\Users\\AndréHerrera\\OneDrive - Archipel Generalplanung AG\\Desktop\\Projekt IA-IFKL\\Hackathon 2025\\image_description_updated.json",
    category="ECG_GCG"
)


Updated JSON saved successfully to C:\\Users\\AndréHerrera\\OneDrive - Archipel Generalplanung AG\\Desktop\\Projekt IA-IFKL\\Hackathon 2025\\image_description_updated.json.
