In [None]:
# Install Required Libraries
!pip install python-pptx==0.6.23 edge-tts==6.1.12 pydub==0.25.1 fastapi==0.109.2 pyngrok==7.0.0 uvicorn nest-asyncio

# System Dependencies for Audio Processing
!apt-get install -y ffmpeg -qqq

import os
import uvicorn
import nest_asyncio
import re
import zipfile
import asyncio
import shutil
from fastapi import FastAPI, File, UploadFile, HTTPException, BackgroundTasks
from fastapi.responses import FileResponse
from pptx import Presentation
from edge_tts import Communicate
from pydub import AudioSegment
from pyngrok import ngrok

# Apply nest_asyncio to allow running in environments with existing event loops
nest_asyncio.apply()

# Ngrok Authentication Token (Replace with yours)
NGROK_AUTH_TOKEN = "2tLdFeGUdnzFr4sqdtNUhgni7of_3pPNuvJ5y368fT8HnwRPE"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# FastAPI App
app = FastAPI()

print("🔊 Fast Audio Service Using Edge TTS (PrabhatNeural)")

def clean_text(text):
    """Removes bullets and extra spaces for faster processing"""
    return re.sub(r'[•\u2022\u25E6]', '', text).strip()

async def process_slide(slide, index, output_folder):
    """Processes each slide in parallel for faster audio generation"""
    try:
        slide_text = [clean_text(shape.text) for shape in slide.shapes if hasattr(shape, "text") and shape.text.strip()]
        if not slide_text:
            return None  # Skip empty slides

        full_text = " ".join(slide_text)
        temp_mp3 = f"{output_folder}/temp_{index}.mp3"
        output_wav = f"{output_folder}/slide_{index}.wav"

        # Generate TTS audio with optimized settings
        communicate = Communicate(full_text, "en-IN-PrabhatNeural", rate="+20%")  # Increase speed by 20%
        await communicate.save(temp_mp3)

        # Convert and optimize audio (reduced bitrate for faster processing)
        AudioSegment.from_mp3(temp_mp3).set_frame_rate(44100).set_channels(1).export(output_wav, format="wav", bitrate="128k")
        os.remove(temp_mp3)
        return output_wav
    except Exception as e:
        print(f"Error processing slide {index}: {str(e)}")
        return None

async def process_pptx(file_path):
    """Processes PPTX slides in parallel"""
    try:
        prs = Presentation(file_path)
        output_folder = "fast_audio_output"
        os.makedirs(output_folder, exist_ok=True)

        tasks = [process_slide(slide, i, output_folder) for i, slide in enumerate(prs.slides, 1)]
        audio_files = await asyncio.gather(*tasks)

        return [file for file in audio_files if file]  # Remove None values
    except Exception as e:
        print(f"Error processing PPTX: {str(e)}")
        return []

def create_zip(audio_files):
    """Creates ZIP archive for audio files"""
    try:
        zip_path = "fast_audio_output.zip"
        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for file in audio_files:
                zipf.write(file, os.path.basename(file))
        return zip_path
    except Exception as e:
        print(f"Error creating ZIP: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to create ZIP file")

@app.post("/generate-audio/")
async def create_upload_file(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
    """API to upload PPTX, generate audio, and return ZIP"""
    try:
        if not file.filename.endswith('.pptx'):
            raise HTTPException(status_code=400, detail="Invalid file type. Only .pptx accepted")

        file_path = f"uploaded_{file.filename}"
        with open(file_path, "wb") as buffer:
            buffer.write(await file.read())

        audio_files = await process_pptx(file_path)
        if not audio_files:
            raise HTTPException(status_code=500, detail="No audio generated from the presentation")

        zip_path = create_zip(audio_files)

        # Schedule cleanup
        background_tasks.add_task(shutil.rmtree, "fast_audio_output", ignore_errors=True)
        background_tasks.add_task(os.remove, file_path)
        background_tasks.add_task(os.remove, zip_path)

        return FileResponse(zip_path, media_type='application/zip', filename="fast_audio_output.zip")
    except Exception as e:
        print(f"API Error: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Server error: {str(e)}")

# Start Ngrok
def start_ngrok():
    """Starts Ngrok Tunnel"""
    try:
        tunnel = ngrok.connect(8000)
        public_url = tunnel.public_url
        print(f"🚀 Fast Audio Service Ready at: {public_url}/generate-audio/")
    except Exception as e:
        print("❌ Error starting Ngrok:", str(e))

# Run the FastAPI Server with Ngrok
def run_server():
    """Run the server"""
    start_ngrok()
    uvicorn.run(app, host="0.0.0.0", port=8000)

if __name__ == "__main__":
    run_server()

Collecting fastapi==0.109.2
  Using cached fastapi-0.109.2-py3-none-any.whl.metadata (25 kB)
Collecting pyngrok==7.0.0
  Using cached pyngrok-7.0.0-py3-none-any.whl
Collecting starlette<0.37.0,>=0.36.3 (from fastapi==0.109.2)
  Using cached starlette-0.36.3-py3-none-any.whl.metadata (5.9 kB)
Using cached fastapi-0.109.2-py3-none-any.whl (92 kB)
Using cached starlette-0.36.3-py3-none-any.whl (71 kB)
Installing collected packages: pyngrok, starlette, fastapi
  Attempting uninstall: pyngrok
    Found existing installation: pyngrok 7.2.3
    Uninstalling pyngrok-7.2.3:
      Successfully uninstalled pyngrok-7.2.3
  Attempting uninstall: starlette
    Found existing installation: starlette 0.46.0
    Uninstalling starlette-0.46.0:
      Successfully uninstalled starlette-0.46.0
  Attempting uninstall: fastapi
    Found existing installation: fastapi 0.115.11
    Uninstalling fastapi-0.115.11:
      Successfully uninstalled fastapi-0.115.11
Successfully installed fastapi-0.109.2 pyngrok-7.0.



🔊 Fast Audio Service Using Edge TTS (PrabhatNeural)


INFO:     Started server process [672]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


🚀 Fast Audio Service Ready at: https://6643-34-106-25-186.ngrok-free.app/generate-audio/
INFO:     203.192.225.140:0 - "POST /generate-audio/ HTTP/1.1" 200 OK
INFO:     203.192.225.140:0 - "POST /generate-audio/ HTTP/1.1" 200 OK
INFO:     203.192.225.140:0 - "POST /generate-audio/ HTTP/1.1" 200 OK
INFO:     203.192.225.140:0 - "POST /generate-audio/ HTTP/1.1" 200 OK
INFO:     203.192.225.140:0 - "POST /generate-audio/ HTTP/1.1" 200 OK


In [None]:
pip install --upgrade fastapi uvicorn pyngrok


Collecting fastapi
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.46.0-py3-none-any.whl.metadata (6.2 kB)
Downloading fastapi-0.115.11-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Downloading starlette-0.46.0-py3-none-any.whl (71 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, starlette, fastapi
  Attempting uninstall: pyngrok
    Found existing installation: pyngrok 7.0.0
    Uninstalling pyngrok-7.0.0:
      Successfully uninstalled pyngrok-7.0.0
  Attempting uninstall: starlette
    Found existing installation: starlette 0.36.3
    Uninst

In [None]:
!pip install uvicorn



In [None]:
!pip install python-multipart

Collecting python-multipart
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Downloading python_multipart-0.0.20-py3-none-any.whl (24 kB)
Installing collected packages: python-multipart
Successfully installed python-multipart-0.0.20


In [None]:
from google import genai

client = genai.Client(api_key="AIzaSyCvrzvvFUq0IkT7vMBjlmtkwUNCbdWQ7Y0")

response = client.models.generate_content(
    model="gemini-2.0-flash",
    contents="Explain how AI works",
)

print(response.text)

Okay, let's break down how AI works, aiming for clarity and avoiding overly technical jargon.  We'll cover the core concepts and some common techniques.

**The Core Idea:  Making Machines "Smart"**

At its heart, Artificial Intelligence (AI) is about creating machines that can perform tasks that typically require human intelligence.  This includes things like:

*   **Learning:**  Improving performance based on data.
*   **Problem-solving:**  Finding solutions to complex issues.
*   **Decision-making:**  Choosing the best course of action.
*   **Understanding Language:** Processing and interpreting human language.
*   **Recognizing Patterns:** Identifying trends and relationships in data.
*   **Perception:**  Interpreting sensory input (like images or sound).

**Two Main Approaches to AI:**

While the lines are blurring, we can broadly categorize AI approaches into two main camps:

1.  **Rule-Based AI (Symbolic AI or "Good Old-Fashioned AI" - GOFAI):**

    *   **How it Works:**  This a

In [None]:
# Install Required Libraries in Colab
!pip install python-pptx edge-tts pydub google-generativeai
!apt-get install -y ffmpeg -qqq  # For pydub audio processing

import os
import re
import zipfile
import asyncio
from google.colab import files
from pptx import Presentation
from pptx.enum.shapes import PP_PLACEHOLDER
from edge_tts import Communicate
from pydub import AudioSegment
import google.generativeai as genai

# User Configuration
USER_FIELD_OF_STUDY = "Computer Science"  # Change this to your field of study

# Google Gemini API Configuration
GEMINI_API_KEY = "AIzaSyCvrzvvFUq0IkT7vMBjlmtkwUNCbdWQ7Y0"  # Your provided key
genai.configure(api_key=GEMINI_API_KEY)

def clean_text(text):
    """Removes bullets and extra spaces for cleaner narration."""
    return re.sub(r'[•\u2022\u25E6]', '', text).strip()

def create_embedding(text):
    """Simulates creating an embedding for the text (mocked)."""
    print(f"Creating embedding for: {text[:50]}...")  # Truncate for brevity
    return text  # Mock embedding (replace with actual embedding logic if needed)

def generate_narration(slide_text, overall_topic, slide_title=None):
    """Generates a personalized narration using Gemini API."""
    if slide_title:
        prompt = (
            f"As a student studying {USER_FIELD_OF_STUDY}, for my presentation on '{overall_topic}', "
            f"specifically on the slide titled '{slide_title}', create an engaging narration in plain text "
            f"to explain the following content to my audience: {slide_text}"
        )
    else:
        prompt = (
            f"As a student studying {USER_FIELD_OF_STUDY}, for my presentation on '{overall_topic}', "
            f"create an engaging narration in plain text to explain the following slide content to my audience: {slide_text}"
        )
    try:
        model = genai.GenerativeModel('gemini-pro')  # Use a valid model; gemini-2.0-flash may not exist
        response = model.generate_content(prompt)
        narration_text = response.text.strip()
        # Additional cleaning to ensure no markdown remains
        narration_text = re.sub(r'[\*\*_]', '', narration_text)  # Remove **, *, _
        return narration_text
    except Exception as e:
        print(f"Error calling Gemini API: {str(e)}")
        return f"Fallback narration: {slide_text}"  # Fallback if API fails

async def process_slide(slide, index, output_folder, overall_topic):
    """Processes a slide: extracts text, generates narration, converts to audio."""
    try:
        # Extract slide title
        slide_title = None
        for shape in slide.shapes:
            if shape.is_placeholder and shape.placeholder_format.type == PP_PLACEHOLDER.TITLE:
                slide_title = clean_text(shape.text)
                break

        # Extract and clean all text from slide
        slide_text = [clean_text(shape.text) for shape in slide.shapes if hasattr(shape, "text") and shape.text.strip()]
        if not slide_text:
            print(f"Slide {index}: No text found, skipping.")
            return None

        full_text = " ".join(slide_text)

        # Create embedding (mocked)
        embedded_text = create_embedding(full_text)

        # Generate narration
        narration_text = generate_narration(embedded_text, overall_topic, slide_title)
        print(f"Slide {index} narration: {narration_text[:50]}...")  # Preview narration

        # Define audio file paths
        temp_mp3 = f"{output_folder}/temp_{index}.mp3"
        output_wav = f"{output_folder}/slide_{index}.wav"

        # Generate audio with Edge TTS
        communicate = Communicate(narration_text, "en-IN-PrabhatNeural", rate="+20%")
        await communicate.save(temp_mp3)

        # Convert MP3 to WAV
        AudioSegment.from_mp3(temp_mp3).set_frame_rate(44100).set_channels(1).export(output_wav, format="wav", bitrate="128k")
        os.remove(temp_mp3)
        print(f"Slide {index}: Audio generated at {output_wav}")
        return output_wav
    except Exception as e:
        print(f"Error processing slide {index}: {str(e)}")
        return None

async def process_pptx(file_path):
    """Processes the PPTX file and generates audio for all slides."""
    try:
        prs = Presentation(file_path)
        output_folder = "audio_output"
        os.makedirs(output_folder, exist_ok=True)

        # Extract overall topic from filename
        overall_topic = os.path.splitext(os.path.basename(file_path))[0].replace('_', ' ')
        print(f"Overall topic: {overall_topic}")

        # Process slides
        tasks = [process_slide(slide, i, output_folder, overall_topic) for i, slide in enumerate(prs.slides, 1)]
        audio_files = await asyncio.gather(*tasks)

        return [file for file in audio_files if file]  # Filter out None values
    except Exception as e:
        print(f"Error processing PPTX: {str(e)}")
        return []

def create_zip(audio_files):
    """Creates a ZIP file from the generated audio files."""
    zip_path = "audio_output.zip"
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file in audio_files:
            zipf.write(file, os.path.basename(file))
    return zip_path

# Upload PPTX file in Colab
print("Please upload your PPTX file (e.g., 'ChatGPT3.pptx'):")
uploaded = files.upload()

# Process the uploaded file
for filename in uploaded.keys():
    if not filename.lower().endswith('.pptx'):
        print("Error: Only .pptx files are supported!")
        continue

    pptx_file_path = filename
    print(f"Processing {pptx_file_path}...")

    # Run async processing
    audio_files = asyncio.run(process_pptx(pptx_file_path))

    if not audio_files:
        print("No audio files generated!")
    else:
        # Create ZIP file
        zip_path = create_zip(audio_files)
        print(f"ZIP file created at: {zip_path}")

        # Download the ZIP file in Colab with explicit confirmation
        try:
            files.download(zip_path)
            print(f"Downloaded: {zip_path}")
        except Exception as e:
            print(f"Error downloading ZIP file: {str(e)}")

        # Clean up
        for file in audio_files:
            os.remove(file)
        os.rmdir("audio_output")
        os.remove(pptx_file_path)
        os.remove(zip_path)
        print("Cleanup completed.")

Please upload your PPTX file (e.g., 'ChatGPT3.pptx'):


Saving chatgpt3.pptx to chatgpt3 (1).pptx
Processing chatgpt3 (1).pptx...
Overall topic: chatgpt3 (1)
Creating embedding for: ChatGPT: Unraveling user Challenges & Proposing Ta...




Error calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
Slide 1 narration: Fallback narration: ChatGPT: Unraveling user Chall...
Creating embedding for: Introduction ChatGPT: An advanced AI language mode...




Error calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
Slide 2 narration: Fallback narration: Introduction ChatGPT: An advan...
Creating embedding for: NLP  APPLICATION IN CHATGPT Understanding Meaning:...




Error calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
Slide 3 narration: Fallback narration: NLP  APPLICATION IN CHATGPT Un...
Creating embedding for: EVOLUTION OF  CHATGPT GPT-1 (2018):
First iteratio...




Error calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
Slide 4 narration: Fallback narration: EVOLUTION OF  CHATGPT GPT-1 (2...
Creating embedding for: CONCLUSION Enhanced Decision-Making:
Integrating a...




Error calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
Slide 5 narration: Fallback narration: CONCLUSION Enhanced Decision-M...
Creating embedding for: THANK YOU...




Error calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
Slide 6 narration: Fallback narration: THANK YOU...
Slide 1: Audio generated at audio_output/slide_1.wav
Slide 6: Audio generated at audio_output/slide_6.wav
Slide 2: Audio generated at audio_output/slide_2.wav
Slide 4: Audio generated at audio_output/slide_4.wav
Slide 5: Audio generated at audio_output/slide_5.wav
Slide 3: Audio generated at audio_output/slide_3.wav
ZIP file created at: audio_output.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded: audio_output.zip
Cleanup completed.


In [None]:
models = genai.list_models()
for model in models:
    print(model.name)

models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/learnlm-1.5-pro-experim

In [None]:


        # Clean up after download
        for file in audio_files:
            os.remove(file)
        os.rmdir("audio_output")
        os.remove(pptx_file_path)
        os.remove(zip_path)
        print("Cleanup completed.")

Cleanup completed.


In [None]:
# Install Required Libraries in Colab
!pip install python-pptx edge-tts pydub google-generativeai
!apt-get install -y ffmpeg -qqq  # For pydub audio processing

import os
import re
import zipfile
import asyncio
from google.colab import files
from pptx import Presentation
from pptx.enum.shapes import PP_PLACEHOLDER
from edge_tts import Communicate
from pydub import AudioSegment
import google.generativeai as genai

# User Configuration
USER_FIELD_OF_STUDY = "Computer Science"  # Change this to your field of study

# Google Gemini API Configuration
GEMINI_API_KEY = "AIzaSyCvrzvvFUq0IkT7vMBjlmtkwUNCbdWQ7Y0"  # Your provided key
genai.configure(api_key=GEMINI_API_KEY)

def clean_text(text):
    """Removes bullets and extra spaces for cleaner narration."""
    return re.sub(r'[•\u2022\u25E6]', '', text).strip()

def create_embedding(text):
    """Simulates creating an embedding for the text (mocked)."""
    print(f"Creating embedding for: {text[:50]}...")  # Truncate for brevity
    return text  # Mock embedding (replace with actual embedding logic if needed)

def generate_narration(slide_text, overall_topic, slide_title=None):
    """Generates a short, impressive, and attractive narration using Gemini API."""
    if slide_title:
        prompt = (
            f"Generate a concise (40-50 words), engaging, and attractive narration that impressively explains "
            f"the slide titled '{slide_title}' in a presentation on '{overall_topic}'. "
            f"The slide content is: {slide_text}. Speak as a student in {USER_FIELD_OF_STUDY}."
        )
    else:
        prompt = (
            f"Generate a concise (40-50 words), engaging, and attractive narration that impressively explains "
            f"the slide content in a presentation on '{overall_topic}'. "
            f"The slide content is: {slide_text}. Speak as a student in {USER_FIELD_OF_STUDY}."
        )
    try:
        model = genai.GenerativeModel('gemini-1.5-flash')  # Valid model
        response = model.generate_content(prompt)
        narration_text = response.text.strip()
        # Remove markdown for clean text
        narration_text = re.sub(r'[\*\*_]', '', narration_text)  # Remove **, *, _
        return narration_text
    except Exception as e:
        print(f"Error calling Gemini API: {str(e)}")
        return f"Fallback narration: {slide_text}"  # Fallback if API fails

async def process_slide(slide, index, output_folder, overall_topic):
    """Processes a slide: extracts text, generates narration, converts to audio."""
    try:
        # Extract slide title
        slide_title = None
        for shape in slide.shapes:
            if shape.is_placeholder and shape.placeholder_format.type == PP_PLACEHOLDER.TITLE:
                slide_title = clean_text(shape.text)
                break

        # Extract and clean all text from slide
        slide_text = [clean_text(shape.text) for shape in slide.shapes if hasattr(shape, "text") and shape.text.strip()]
        if not slide_text:
            print(f"Slide {index}: No text found, skipping.")
            return None

        full_text = " ".join(slide_text)

        # Create embedding (mocked)
        embedded_text = create_embedding(full_text)

        # Generate narration
        narration_text = generate_narration(embedded_text, overall_topic, slide_title)
        print(f"Slide {index} narration: {narration_text[:50]}...")  # Preview narration

        # Define audio file paths
        temp_mp3 = f"{output_folder}/temp_{index}.mp3"
        output_wav = f"{output_folder}/slide_{index}.wav"

        # Generate audio with Edge TTS using Indian male voice
        communicate = Communicate(narration_text, "en-IN-PrabhatNeural", rate="+10%")
        await communicate.save(temp_mp3)

        # Convert MP3 to WAV with high-quality settings
        AudioSegment.from_mp3(temp_mp3).set_frame_rate(44100).set_channels(1).export(output_wav, format="wav")
        os.remove(temp_mp3)
        print(f"Slide {index}: Audio generated at {output_wav}")
        return output_wav
    except Exception as e:
        print(f"Error processing slide {index}: {str(e)}")
        return None

async def process_pptx(file_path):
    """Processes the PPTX file and generates audio for all slides."""
    try:
        prs = Presentation(file_path)
        output_folder = "audio_output"
        os.makedirs(output_folder, exist_ok=True)

        # Extract overall topic from filename
        overall_topic = os.path.splitext(os.path.basename(file_path))[0].replace('_', ' ')
        print(f"Overall topic: {overall_topic}")

        # Process slides
        tasks = [process_slide(slide, i, output_folder, overall_topic) for i, slide in enumerate(prs.slides, 1)]
        audio_files = await asyncio.gather(*tasks)

        return [file for file in audio_files if file]  # Filter out None values
    except Exception as e:
        print(f"Error processing PPTX: {str(e)}")
        return []

def create_zip(audio_files):
    """Creates a ZIP file from the generated audio files."""
    zip_path = "audio_output.zip"
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file in audio_files:
            zipf.write(file, os.path.basename(file))
    return zip_path

# Upload PPTX file in Colab
print("Please upload your PPTX file (e.g., 'ChatGPT3.pptx'):")
uploaded = files.upload()

# Process the uploaded file
for filename in uploaded.keys():
    if not filename.lower().endswith('.pptx'):
        print("Error: Only .pptx files are supported!")
        continue

    pptx_file_path = filename
    print(f"Processing {pptx_file_path}...")

    # Run async processing
    audio_files = asyncio.run(process_pptx(pptx_file_path))

    if not audio_files:
        print("No audio files generated!")
    else:
        # Create ZIP file
        zip_path = create_zip(audio_files)
        print(f"ZIP file created at: {zip_path}")

        # Download the ZIP file in Colab before cleanup
        try:
            files.download(zip_path)
            print(f"Successfully downloaded: {zip_path}")
        except Exception as e:
            print(f"Error downloading ZIP file: {str(e)}")
            print("Please manually download 'audio_output.zip' from the Colab file explorer (/content/).")

Please upload your PPTX file (e.g., 'ChatGPT3.pptx'):


Saving chatgpt3.pptx to chatgpt3 (1).pptx
Processing chatgpt3 (1).pptx...
Overall topic: chatgpt3 (1)
Creating embedding for: ChatGPT: Unraveling user Challenges & Proposing Ta...
Slide 1 narration: Hey everyone!  Harsh Bande and I, Aniket Gaikwad, ...
Creating embedding for: Introduction ChatGPT: An advanced AI language mode...
Slide 2 narration: Hey everyone,  meet ChatGPT-3!  This amazing OpenA...
Creating embedding for: NLP  APPLICATION IN CHATGPT Understanding Meaning:...
Slide 3 narration: Hey everyone, so ChatGPT3's magic lies in its NLP ...
Creating embedding for: EVOLUTION OF  CHATGPT GPT-1 (2018):
First iteratio...
Slide 4 narration: Hey everyone!  So, ChatGPT's journey's been amazin...
Creating embedding for: CONCLUSION Enhanced Decision-Making:
Integrating a...
Slide 5 narration: ChatGPT-3's future is bright!  We're not just gett...
Creating embedding for: THANK YOU...
Slide 6 narration: That's all for my ChatGPT-3 overview!  We've just ...
Slide 6: Audio generated at audio

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Successfully downloaded: audio_output.zip
