In [None]:
import os
from dotenv import load_dotenv
from google import genai
from google.genai import types
from diffusers import StableDiffusionPipeline
import torch
from gtts import gTTS
from deep_translator import GoogleTranslator
from PIL import Image

# -------------------------------
# Load environment variables
# -------------------------------
load_dotenv()  # loads .env file

API_KEY = os.environ.get("API_KEY")
HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")

if not API_KEY or not HF_TOKEN:
    raise ValueError("Please set API_KEY and HUGGINGFACE_TOKEN in your .env file!")

# -------------------------------
# Google API Setup
# -------------------------------
genai_client = genai.Client(api_key=API_KEY)
MODEL_NAME = "gemini-2.0-flash"

# -------------------------------
# Story Generator
# -------------------------------
def generate_story_for_letter(letter: str) -> str:
    prompt = (
        f"Write a simple story or scene that begins with the letter '{letter}'.\n"
        "Make it engaging, simple and suitable for kids (2-3 years). "
        "Do not include extra introductions, only the story."
    )
    response = genai_client.models.generate_content(
        model=MODEL_NAME,
        contents=prompt,
        config=types.GenerateContentConfig(
            max_output_tokens=4000,
            temperature=0.8
        )
    )
    try:
        story = response.text.strip()
    except AttributeError:
        story = response.candidates[0].content.parts[0].text.strip()
    return story

# -------------------------------
# Story Generator from Image
# -------------------------------
def generate_story_from_image(image_path: str) -> str:
    prompt = (
        f"Write a simple kids' story (2-3 years) based only on the uploaded image: {image_path}\n"
        "Do not include extra informations, describe only the uploaded picture."
    )
    response = genai_client.models.generate_content(
        model=MODEL_NAME,
        contents=prompt,
        config=types.GenerateContentConfig(
            max_output_tokens=4000,
            temperature=0.8
        )
    )
    try:
        story = response.text.strip()
    except AttributeError:
        story = response.candidates[0].content.parts[0].text.strip()
    return story

# -------------------------------
# Image Prompt Generator
# -------------------------------
def generate_image_prompt_from_story(story: str) -> str:
    prompt_request = (
        f"Here is a short kids' story:\n{story}\n\n"
        "Now create a vivid illustration prompt for Stable Diffusion. "
        "Return only the final descriptive prompt."
    )
    response = genai_client.models.generate_content(
        model=MODEL_NAME,
        contents=prompt_request,
        config=types.GenerateContentConfig(max_output_tokens=150, temperature=0.9)
    )
    try:
        image_prompt = response.text.strip()
    except AttributeError:
        image_prompt = response.candidates[0].content.parts[0].text.strip()
    return image_prompt.strip('"').replace("\n", " ")

# -------------------------------
# Stable Diffusion
# -------------------------------
pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    use_auth_token=HF_TOKEN,
    torch_dtype=torch.float16
)
pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")

def generate_image_from_prompt(prompt: str, filename: str):
    image = pipe(prompt).images[0]
    image.save(filename)

# -------------------------------
# Multilingual Audio Generator
# -------------------------------
def generate_audio(story: str, filename_prefix: str):
    translations = {
        "en": story,
        "ta": GoogleTranslator(source="auto", target="ta").translate(story),
        "fr": GoogleTranslator(source="auto", target="fr").translate(story)
    }

    for lang, text in translations.items():
        tts = gTTS(text=text, lang=lang)
        filename = f"{filename_prefix}_{lang}.mp3"
        tts.save(filename)
        print(f"‚úÖ Saved {lang} audio as {filename}")

# -------------------------------
# Main
# -------------------------------
if __name__ == "__main__":
    choice = input("Type 'L' to provide a letter or 'I' to provide an image: ").strip().upper()

    if choice == 'L':
        letter = input("Enter a single alphabet letter (A‚ÄìZ): ").strip().upper()
        if len(letter) == 1 and letter.isalpha():
            story = generate_story_for_letter(letter)
            print(f"\n===== {letter} Story =====\n{story}\n")

            image_prompt = generate_image_prompt_from_story(story)
            print(f"üé® Image Prompt: {image_prompt}")
            generate_image_from_prompt(image_prompt, f"{letter}_story_image.png")

            generate_audio(story, f"{letter}_story_audio")
        else:
            print("‚ùå Please enter exactly one alphabet letter (A‚ÄìZ).")

    elif choice == 'I':
        image_path = input("Enter the image file path: ").strip()
        if os.path.exists(image_path):
            story = generate_story_from_image(image_path)
            print(f"\n===== Story from Image =====\n{story}\n")

            image_prompt = generate_image_prompt_from_story(story)
            print(f"üé® Image Prompt: {image_prompt}")
            generate_image_from_prompt(image_prompt, f"image_story.png")

            generate_audio(story, "image_story_audio")
        else:
            print("‚ùå Image file not found.")

    else:
        print("‚ùå Invalid choice. Please type 'L' or 'I'.")
