In [None]:
"""
EchoVerse - Generative AI Audiobook Creation System
Uses IBM Granite 3.2 2B Instruct model and gTTS for audio generation
"""

# Install required packages
!pip install -q gradio transformers torch gtts accelerate sentencepiece

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from gtts import gTTS
import os
from datetime import datetime

# Initialize the IBM Granite model
print("Loading IBM Granite model...")
MODEL_NAME = "ibm-granite/granite-3.2-2b-instruct"

try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True
    )
    print("Model loaded successfully!")
except Exception as e:
    print(f"Error loading model: {e}")
    raise

# Tone-specific prompts
TONE_PROMPTS = {
    "Neutral": """Rewrite the following text in a clear, professional, and neutral tone.
Maintain all key information and meaning while making it suitable for audiobook narration.
Keep the same structure and length:

{text}

Rewritten text:""",

    "Suspenseful": """Rewrite the following text with a suspenseful and thrilling tone.
Add dramatic tension, vivid imagery, and engaging pacing while preserving the core message.
Make it captivating for audiobook listeners:

{text}

Rewritten text:""",

    "Inspiring": """Rewrite the following text with an inspiring and motivational tone.
Add uplifting language, powerful imagery, and encouraging elements while keeping the original meaning.
Make it energizing and impactful for audiobook listeners:

{text}

Rewritten text:"""
}

def generate_rewritten_text(text, tone, temperature=0.7, max_tokens=1024):
    """
    Rewrite text using IBM Granite model with specified tone
    """
    try:
        # Get the appropriate prompt
        prompt = TONE_PROMPTS[tone].format(text=text)

        # Tokenize input
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
        inputs = {k: v.to(model.device) for k, v in inputs.items()}

        # Generate rewritten text
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                temperature=temperature,
                do_sample=True,
                top_p=0.9,
                pad_token_id=tokenizer.eos_token_id
            )

        # Decode output
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract only the rewritten part (after the prompt)
        if "Rewritten text:" in generated_text:
            rewritten = generated_text.split("Rewritten text:")[-1].strip()
        else:
            # Fallback: take text after the original input
            rewritten = generated_text[len(prompt):].strip()

        return rewritten if rewritten else generated_text

    except Exception as e:
        return f"Error during text generation: {str(e)}"

def text_to_speech(text, lang='en', slow=False):
    """
    Convert text to speech using gTTS
    """
    try:
        # Create timestamp for unique filename
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        audio_file = f"audiobook_{timestamp}.mp3"

        # Generate speech
        tts = gTTS(text=text, lang=lang, slow=slow)
        tts.save(audio_file)

        return audio_file
    except Exception as e:
        return f"Error generating audio: {str(e)}"

def process_audiobook(input_text, file_input, tone, speed):
    """
    Main processing function for the EchoVerse system
    """
    # Determine text source
    if file_input is not None:
        try:
            with open(file_input.name, 'r', encoding='utf-8') as f:
                original_text = f.read()
        except Exception as e:
            return "Error reading file", f"Error: {str(e)}", None
    elif input_text and input_text.strip():
        original_text = input_text
    else:
        return "No input provided", "Please provide text or upload a file", None

    # Validate text length
    if len(original_text.strip()) < 10:
        return original_text, "Text is too short. Please provide at least 10 characters.", None

    if len(original_text) > 5000:
        return original_text, "Text is too long. Please provide less than 5000 characters.", None

    # Step 1: Rewrite text with selected tone
    status_update = f"🔄 Rewriting text with {tone} tone..."
    print(status_update)

    rewritten_text = generate_rewritten_text(original_text, tone)

    if rewritten_text.startswith("Error"):
        return original_text, rewritten_text, None

    # Step 2: Generate audio
    status_update = "🎙️ Generating audio..."
    print(status_update)

    slow_speech = (speed == "Slow")
    audio_file = text_to_speech(rewritten_text, slow=slow_speech)

    if isinstance(audio_file, str) and audio_file.startswith("Error"):
        return original_text, rewritten_text, None

    return original_text, rewritten_text, audio_file

def clear_all():
    """
    Clear all inputs and outputs
    """
    return "", None, "", "", None

# Create Gradio Interface
with gr.Blocks(title="EchoVerse - AI Audiobook Generator", theme=gr.themes.Soft()) as app:

    gr.Markdown("""
    # 🎧 EchoVerse - AI Audiobook Generator

    Transform your text into expressive, downloadable audio content with customizable tone and voice.
    Powered by **IBM Granite 3.2 2B Instruct** model.

    ### Features:
    - 📝 Paste text or upload .txt files
    - 🎭 Choose from Neutral, Suspenseful, or Inspiring tones
    - 🎵 Generate natural-sounding narrations
    - ⬇️ Download audio in MP3 format
    """)

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Input")

            input_text = gr.Textbox(
                label="Paste Your Text",
                placeholder="Enter the text you want to convert to audiobook...",
                lines=8,
                max_lines=15
            )

            file_input = gr.File(
                label="Or Upload .txt File",
                file_types=[".txt"],
                type="filepath"
            )

            with gr.Row():
                tone_selector = gr.Radio(
                    choices=["Neutral", "Suspenseful", "Inspiring"],
                    value="Neutral",
                    label="Select Tone"
                )

                speed_selector = gr.Radio(
                    choices=["Normal", "Slow"],
                    value="Normal",
                    label="Speech Speed"
                )

            with gr.Row():
                generate_btn = gr.Button("🎙️ Generate Audiobook", variant="primary", size="lg")
                clear_btn = gr.Button("🗑️ Clear All", size="lg")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Original Text")
            original_output = gr.Textbox(
                label="Your Original Input",
                lines=6,
                interactive=False
            )

        with gr.Column(scale=1):
            gr.Markdown("### Rewritten Text")
            rewritten_output = gr.Textbox(
                label=f"Tone-Enhanced Version",
                lines=6,
                interactive=False
            )

    with gr.Row():
        audio_output = gr.Audio(
            label="Generated Audiobook",
            type="filepath"
        )

    gr.Markdown("""
    ---
    ### 📚 Usage Instructions:
    1. **Input**: Paste text or upload a .txt file (max 5000 characters)
    2. **Tone**: Select desired tone (Neutral, Suspenseful, or Inspiring)
    3. **Speed**: Choose narration speed (Normal or Slow)
    4. **Generate**: Click the button to create your audiobook
    5. **Download**: Use the download button in the audio player

    ### 💡 Tips:
    - For best results, use well-formatted text with proper punctuation
    - Shorter texts (200-1000 words) work best
    - The Inspiring tone is great for motivational content
    - The Suspenseful tone works well for stories and narratives
    - The Neutral tone is perfect for educational or professional content
    """)

    # Event handlers
    generate_btn.click(
        fn=process_audiobook,
        inputs=[input_text, file_input, tone_selector, speed_selector],
        outputs=[original_output, rewritten_output, audio_output]
    )

    clear_btn.click(
        fn=clear_all,
        inputs=[],
        outputs=[input_text, file_input, original_output, rewritten_output, audio_output]
    )

# Launch the app
print("\n🚀 Launching EchoVerse...")
app.launch(debug=True, share=True)

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/98.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hLoading IBM Granite model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/786 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Model loaded successfully!

🚀 Launching EchoVerse...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://b98c2baaa69d263f70.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


🔄 Rewriting text with Neutral tone...
🎙️ Generating audio...
