In [1]:
!pip install -q transformers accelerate gradio gtts pydub
!apt-get install -y espeak ffmpeg

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
The following additional packages will be installed:
  espeak-data libespeak1 libportaudio2 libsonic0
The following NEW packages will be installed:
  espeak espeak-data libespeak1 libportaudio2 libsonic0
0 upgraded, 5 newly installed, 0 to remove and 41 not upgraded.
Need to get 1,382 kB of archives.
After this operation, 3,178 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libportaudio2 amd64 19.6.0-1.1 [65.3 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libsonic0 amd64 0.2.0-11build1 [10.3 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 espeak-data amd64 1.48.15+dfsg-3 [1,085 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy/u

In [2]:
import gradio as gr
from transformers import pipeline
import torch
from gtts import gTTS
import os
from datetime import datetime
import re

In [None]:
print("Loading IBM Granite 3.3 2b Instruct model...")
device = "cuda" if torch.cuda.is_available() else "cpu"
granite_pipe = pipeline(
    "text-generation",
    model="ibm-granite/granite-3.3-2b-instruct",
    device=device,
    max_new_tokens=2048,
    temperature=0.7
)
print(f"Model loaded successfully on {device}!")

# Tone-specific prompts for text rewriting
TONE_PROMPTS = {
    "Neutral": """Rewrite the following text in a clear, neutral, and professional tone.
Maintain all key information and meaning while making it suitable for straightforward narration.
Keep the same length and structure.

Original Text:
{text}

Rewritten Text:""",

    "Suspenseful": """Rewrite the following text in a suspenseful and dramatic tone.
Add tension, mystery, and engaging language while preserving the core message and facts.
Use vivid descriptions and compelling pacing.

Original Text:
{text}

Rewritten Text:""",

    "Inspiring": """Rewrite the following text in an inspiring and motivational tone.
Make it uplifting, encouraging, and energizing while maintaining factual accuracy.
Use empowering language and positive emphasis.

Original Text:
{text}

Rewritten Text:"""
}

# Voice options mapping
VOICE_OPTIONS = {
    "Lisa (Female - US)": "en-us",
    "Michael (Male - US)": "en-us",
    "Allison (Female - UK)": "en-uk"
}

def rewrite_text_with_tone(original_text, tone):
    """Rewrite text using IBM Granite model with specified tone"""
    if not original_text or not original_text.strip():
        return "Please provide text to rewrite."

    try:
        # Prepare prompt
        prompt = TONE_PROMPTS[tone].format(text=original_text)

        messages = [
            {"role": "user", "content": prompt}
        ]

        # Generate rewritten text
        result = granite_pipe(messages)
        rewritten = result[0]['generated_text']

        # Extract only the assistant's response
        if isinstance(rewritten, list):
            rewritten = rewritten[-1]['content']
        elif isinstance(rewritten, str):
            # Try to extract content after the prompt
            if "Rewritten Text:" in rewritten:
                rewritten = rewritten.split("Rewritten Text:")[-1].strip()

        return rewritten.strip()

    except Exception as e:
        return f"Error during text rewriting: {str(e)}"

def generate_audio(text, voice_name):
    """Generate audio from text using gTTS"""
    if not text or not text.strip():
        return None, "No text to convert to audio."

    try:
        # Map voice to language/accent
        lang_code = VOICE_OPTIONS.get(voice_name, "en-us")

        # Determine TLD based on accent
        if "UK" in voice_name:
            tld = "co.uk"
        elif "AU" in voice_name:
            tld = "com.au"
        else:
            tld = "com"

        # Generate audio
        tts = gTTS(text=text, lang='en', tld=tld, slow=False)

        # Save to file
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        audio_filename = f"echoverse_audio_{timestamp}.mp3"
        tts.save(audio_filename)

        return audio_filename, f"✅ Audio generated successfully! ({len(text)} characters)"

    except Exception as e:
        return None, f"❌ Error generating audio: {str(e)}"

def process_audiobook(original_text, tone, voice):
    """Main processing function"""
    if not original_text or not original_text.strip():
        return "", None, "⚠️ Please provide text input."

    # Step 1: Rewrite text
    status_message = f"🔄 Rewriting text in {tone} tone..."
    rewritten_text = rewrite_text_with_tone(original_text, tone)

    if rewritten_text.startswith("Error") or rewritten_text.startswith("Please"):
        return rewritten_text, None, "❌ Failed to rewrite text."

    # Step 2: Generate audio
    audio_file, audio_status = generate_audio(rewritten_text, voice)

    if audio_file:
        final_status = f"""✨ Processing Complete!

📝 Original length: {len(original_text)} characters
🎨 Rewritten length: {len(rewritten_text)} characters
🎙️ Voice: {voice}
🎵 Audio: Ready for playback and download
"""
        return rewritten_text, audio_file, final_status
    else:
        return rewritten_text, None, audio_status

def load_file(file):
    """Load text from uploaded file"""
    if file is None:
        return ""
    try:
        with open(file.name, 'r', encoding='utf-8') as f:
            return f.read()
    except Exception as e:
        return f"Error reading file: {str(e)}"

# Custom CSS for professional styling
custom_css = """
#title {
    text-align: center;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    color: white;
    padding: 30px;
    border-radius: 10px;
    margin-bottom: 20px;
}

#subtitle {
    text-align: center;
    color: #666;
    margin-bottom: 30px;
}

.output-box {
    border: 2px solid #667eea;
    border-radius: 8px;
    padding: 15px;
}

#generate-btn {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    border: none !important;
    color: white !important;
    font-size: 18px !important;
    font-weight: bold !important;
    padding: 15px !important;
}
"""

# Create Gradio interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:

    # Header
    gr.HTML("""
        <div id="title">
            <h1>🎙️ EchoVerse</h1>
            <h3>AI-Powered Audiobook Creation System</h3>
        </div>
    """)

    gr.Markdown("""
        <div id="subtitle">
            Transform your text into expressive, professional audiobooks with customizable tones and voices.
            Powered by IBM Granite 3.3 2b Instruct Model.
        </div>
    """)

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📄 Input Text")

            # File upload option
            file_input = gr.File(
                label="Upload .txt file (optional)",
                file_types=[".txt"]
            )

            # Text input
            input_text = gr.Textbox(
                label="Or paste your text here",
                placeholder="Enter or paste the text you want to convert into an audiobook...",
                lines=10,
                max_lines=15
            )

            # Load file button
            load_btn = gr.Button("📂 Load from File", size="sm")

            gr.Markdown("### 🎨 Customize Your Audiobook")

            # Tone selection
            tone_radio = gr.Radio(
                choices=["Neutral", "Suspenseful", "Inspiring"],
                value="Neutral",
                label="Select Tone",
                info="Choose how you want the text to be narrated"
            )

            # Voice selection
            voice_dropdown = gr.Dropdown(
                choices=list(VOICE_OPTIONS.keys()),
                value="Lisa (Female - US)",
                label="Select Voice",
                info="Choose the narrator's voice"
            )

            # Generate button
            generate_btn = gr.Button(
                "🎬 Generate Audiobook",
                variant="primary",
                size="lg",
                elem_id="generate-btn"
            )

        with gr.Column(scale=1):
            gr.Markdown("### ✨ Results")

            # Status message
            status_output = gr.Textbox(
                label="Status",
                lines=6,
                interactive=False
            )

            # Rewritten text output
            rewritten_output = gr.Textbox(
                label="Tone-Adapted Text",
                lines=10,
                max_lines=15,
                interactive=False,
                elem_classes="output-box"
            )

            # Audio output
            audio_output = gr.Audio(
                label="🎵 Generated Audiobook",
                type="filepath",
                interactive=False
            )

            gr.Markdown("""
                <div style="text-align: center; margin-top: 20px; padding: 15px; background-color: #f0f0f0; border-radius: 8px;">
                    💡 <b>Tip:</b> Click the download button on the audio player to save your audiobook!
                </div>
            """)

    # Examples section
    gr.Markdown("### 📚 Try These Examples")
    gr.Examples(
        examples=[
            ["The quantum computer represents a paradigm shift in computational capability. Unlike classical computers that use bits, quantum computers use qubits that can exist in multiple states simultaneously.", "Inspiring", "Lisa (Female - US)"],
            ["The old mansion stood at the end of the winding road. No one had entered its doors for decades, yet lights were seen flickering in the windows at midnight.", "Suspenseful", "Michael (Male - US)"],
            ["Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without explicit programming.", "Neutral", "Allison (Female - UK)"]
        ],
        inputs=[input_text, tone_radio, voice_dropdown],
        label="Click to load example"
    )

    # Event handlers
    load_btn.click(
        fn=load_file,
        inputs=[file_input],
        outputs=[input_text]
    )

    generate_btn.click(
        fn=process_audiobook,
        inputs=[input_text, tone_radio, voice_dropdown],
        outputs=[rewritten_output, audio_output, status_output]
    )

    # Footer
    gr.Markdown("""
        ---
        <div style="text-align: center; color: #666; padding: 20px;">
            <p>🚀 Built with IBM Granite 3.3 2b Instruct | Powered by Hugging Face Transformers</p>
            <p>Perfect for students, professionals, and accessibility needs</p>
        </div>
    """)

# Launch the app
if __name__ == "__main__":
    demo.launch(
        share=True,
        debug=True,
        show_error=True
    )

Loading IBM Granite 3.3 2b Instruct model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/207 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

Device set to use cuda


Model loaded successfully on cuda!
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://889a4a7ac0802d281c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
