In [None]:
import os
import shutil
import re
import json
import requests
import time

# Culture/Target Language Captioning Script
# - first rephrases English caption to ensure it is grammatically correct then
# - translates the rephrased caption to the target language and saves image name as translated caption.
# - saves the translated captioned image in one folder and its corresponding caption data as a JSON file in another folder.
# - ensure you already have english captioned images in one folder e.g yourlanguage_images 

def sanitize_filename(filename):
    # Remove invalid characters from filename
    return re.sub(r'[<>:"/\\|?*]', '', filename).strip()

def translate_to_target_language(text, target_language, api_key):
    try:
        # Gemini API endpoint
        url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent" # can switch between models see doc https://ai.google.dev/gemini-api/docs/rate-limits

        headers = {
            "Content-Type": "application/json"
        }
        # Prompt for accurate translation
        prompt = (
            f"First, rephrase the following English text to ensure it is grammatically correct and clear, if needed: '{text}'. "
            f"Then, translate the rephrased text to {target_language}, ensuring proper phrasing and natural language. "
            f"Provide only the translated {target_language} text in your response, without a trailing full stop."
        )
        data = {
            "contents": [{
                "parts": [{
                    "text": prompt
                }]
            }]
        }
        # Make API request
        response = requests.post(f"{url}?key={api_key}", headers=headers, json=data)
        response.raise_for_status()
        result = response.json()
        # Extract translated text and remove trailing full stop
        translated_text = result.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "").strip()
        translated_text = translated_text.rstrip('.')
        return translated_text if translated_text else None
    except requests.exceptions.RequestException as e:
        print(f"API error during translation: {str(e)}")
        return None
    except Exception as e:
        print(f"Unexpected error during translation: {str(e)}")
        return None

def process_captions():
    # Target language for translation
    target_language = "Your Language"
    
    # Source and destination directories
    source_dir = 'yourlanguage_images' # ensure correct path
    image_dest_dir = f'{target_language.lower()}_captioned_images'
    json_dest_dir = f'{target_language.lower()}_captioned_jsonData'
    
    # Create destination directories if they don't exist
    for dest_dir in [image_dest_dir, json_dest_dir]:
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)
    
    # Get API key 
    api_key = "YOUR_GEMINI_API_KEY"
    if not api_key:
        print("Error: GEMINI_API_KEY environment variable not set!")
        return
    
    # Get list of files in source directory
    try:
        files = os.listdir(source_dir)
    except FileNotFoundError:
        print(f"Error: {source_dir} directory not found!")
        return
    
    # Process each file with an image ID
    image_id = 1
    for filename in files:
        try:
            # Get file extension and English caption (filename without extension)
            name, ext = os.path.splitext(filename)
            english_caption = name
            
            # Translate caption to target language
            translated_caption = translate_to_target_language(english_caption, target_language, api_key)
            if not translated_caption:
                print(f"Skipping {filename} due to translation failure.")
                continue
            
            # Sanitize translated caption for filename
            cleaned_translated_caption = sanitize_filename(translated_caption)
            if not cleaned_translated_caption:
                print(f"Skipping {filename}: Sanitized {target_language} caption is empty.")
                continue
            
            # Create new filename with translated caption
            new_filename = f"{cleaned_translated_caption}{ext}"
            
            # Full paths for source and destination
            source_path = os.path.join(source_dir, filename)
            image_dest_path = os.path.join(image_dest_dir, new_filename)
            
            # Copy file with new name
            shutil.copy2(source_path, image_dest_path)
            
            # Create JSON data
            caption_data = {
                "image_id": image_id,
                "english_caption": english_caption,
                f"{target_language.lower()}_caption": translated_caption
            }
            
            # Save JSON file
            json_filename = f"caption_data_image_{image_id}.json"
            json_path = os.path.join(json_dest_dir, json_filename)
            with open(json_path, 'w', encoding='utf-8') as json_file:
                json.dump(caption_data, json_file, ensure_ascii=False, indent=4)
            
            print(f"Processed: {filename} -> {new_filename} (JSON: {json_filename})")
            image_id += 1
            
            # Delay to avoid rate limit 439 error 
            time.sleep(5) # adjust according to your rate limit see doc https://ai.google.dev/gemini-api/docs/rate-limits
            
        except Exception as e:
            print(f"Error processing {filename}: {str(e)}")
            continue
    
    print("\nTranslation and captioning complete!")
    print(f"Original images are in: {source_dir}")
    print(f"Translated images are in: {image_dest_dir}")
    print(f"JSON files are in: {json_dest_dir}")

if __name__ == "__main__":
    process_captions()