In [None]:
!pip install requests PyPDF2 transformers pydub

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub, PyPDF2
Successfully installed PyPDF2-3.0.1 pydub-0.25.1


In [21]:
import requests
import os
import json
from PyPDF2 import PdfReader
from transformers import pipeline
from pydub import AudioSegment
from google.colab import files

In [22]:
from google.colab import userdata
ELEVENLABS_API_KEY = userdata.get('eleven_lab')

# ElevenLabs API config
ELEVENLABS_URL = "https://api.elevenlabs.io/v1/text-to-speech"

if ELEVENLABS_API_KEY:
    print("API key loaded successfully.")
else:
    print("Failed to load API key.")

API key loaded successfully.


In [24]:

#Extracting the text from the pdf
def extract_text_from_pdf(pdf_path):
    try:
        reader = PdfReader(pdf_path)
        text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
        return text
    except Exception as e:
        print(f"Error extracting  text from PDF: {e}")
        return ""

# Summarizing text using an Hugging Face Transformers model
def summarize_text(text, max_length=150):
    try:
        summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
        chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
        summaries = [
            summarizer(chunk, max_length=min(max_length, len(chunk)//2), min_length=50, do_sample=False)[0]["summary_text"]
            for chunk in chunks
        ]
        return " ".join(summaries)
    except Exception as e:
        print(f"Error summarizing text: {e}")
        return ""

# Now we generate the speech using ElevenLabs API
def generate_speech(text, voice_id):
    headers = {"xi-api-key": ELEVENLABS_API_KEY, "Content-Type": "application/json"}
    payload = {
        "text": text,
        "model_id": "eleven_monolingual_v1",
        "voice_settings": {"stability": 0.5, "similarity_boost": 0.8},
    }
    response = requests.post(f"{ELEVENLABS_URL}/{voice_id}", headers=headers, data=json.dumps(payload))
    if response.status_code == 200:
        return response.content
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None

#Saving audio
def save_audio(file_name, audio_content):
    with open(file_name, "wb") as audio_file:
        audio_file.write(audio_content)

#Using conversation script. Taking hints from how google nootbook lm model generates the podcasts
def create_conversational_script(summary):
    phrases_speaker1 = [
        "Hmm, that's an interesting point. What do you think about that?",
        "I’ve been thinking about this—how does it relate to what we discussed earlier?",
        "You know, this reminds me of something crucial. The author mentions...",
        "That’s a great observation. I think the author is also trying to say...",
        "How do you feel about that? I’d love to hear your thoughts.",
        "This really brings up an important aspect. The author really wants us to focus on...",
        "I find this perspective intriguing. It adds a whole new layer to the topic.",
        "You know, that’s a thought-provoking idea. How does it change our view of the issue?",
        "What are your thoughts on this? I'm curious to hear how you see it.",
        "This really highlights something significant. It's a point worth revisiting."
    ]

    phrases_speaker2 = [
        "Oh, you're spot on there!",
        "Yeah, let me expand on that for a moment.",
        "I think you're right, and it connects really well to our earlier discussion.",
        "Absolutely, it ties into the broader themes we’ve been exploring.",
        "That’s a great point. I really think it deserves more attention.",
        "I love that argument. It makes so much sense.",
        "That’s a great question. How would you approach it from your angle?",
        "I think that really sheds light on a key point we shouldn’t overlook.",
        "You’re absolutely right. Let’s dive deeper into that.",
        "I can totally relate to that. It resonates with something I’ve noticed before.",
        "Oh, I hadn’t considered that! You’ve got me thinking in a new direction.",
        "Can you break that down a bit more? I think there’s more to it."
    ]

    sentences = summary.split(". ")
    conversation = [
        ("Speaker1", "Welcome to the podcast! Let's dive into today's discussion."),
        ("Speaker2", "Thanks for tuning in! This is going to be an engaging session."),
    ]

    for i, sentence in enumerate(sentences):
        if i % 2 == 0:
            speaker_line = f"{phrases_speaker1[i % len(phrases_speaker1)]} {sentence.strip()}"
            conversation.append(("Speaker1", speaker_line))
        else:
            speaker_line = f"{phrases_speaker2[i % len(phrases_speaker2)]} {sentence.strip()}"
            conversation.append(("Speaker2", speaker_line))

    conversation.append(("Speaker1", "That concludes our discussion today. Thanks for joining us!"))
    conversation.append(("Speaker2", "We hope you found this insightful. See you next time!"))

    return conversation


# Generaterating our podcast
def generate_podcast_conversation(text_segments, voices, output_file="podcast_conversation.mp3"):
    podcast_audio = AudioSegment.silent(duration=0)
    for speaker, text in text_segments:
        print(f"Generating speech for {speaker}: {text[:50]}...")
        audio_content = generate_speech(text, voices[speaker])
        if audio_content:
            temp_file = f"{speaker}_temp.mp3"
            save_audio(temp_file, audio_content)
            segment = AudioSegment.from_file(temp_file)
            podcast_audio += segment
    podcast_audio.export(output_file, format="mp3")
    print(f"Podcast saved as {output_file}")


if __name__ == "__main__":
    # Asking user to upload pdf
    print("Upload your PDF file:")
    uploaded = files.upload()
    if uploaded:
        pdf_path = next(iter(uploaded))
        full_text = extract_text_from_pdf(pdf_path)

        if full_text:
            # Summarizing  text
            summary = summarize_text(full_text, max_length=300)

            # Creating a conversational script
            text_segments = create_conversational_script(summary)

            # Using eleven lab voice code
            voices = {
                "Speaker1": "iP95p4xoKVk53GoZ742B",
                "Speaker2": "9BWtsMINqrJLrRacOk9x",
            }

            #Final podcast
            generate_podcast_conversation(text_segments, voices, output_file="podcast_from_pdf.mp3")
        else:
            print("Failed to extract text from PDF.")


Upload your PDF file:


Saving love.pdf to love (2).pdf


Device set to use cpu
Your max_length is set to 300, but your input_length is only 251. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=125)
Your max_length is set to 300, but your input_length is only 262. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=131)
Your max_length is set to 300, but your input_length is only 280. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=140)
Your max_length is set to 86, but your input_length is only 60. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer

Generating speech for Speaker1: Welcome to the podcast! Let's dive into today's di...
Generating speech for Speaker2: Thanks for tuning in! This is going to be an engag...
Generating speech for Speaker1: Hmm, that's an interesting point. What do you thin...
Generating speech for Speaker2: Yeah, let me expand on that for a moment. I might ...
Generating speech for Speaker1: You know, this reminds me of something crucial. Th...
Generating speech for Speaker2: Absolutely, it ties into the broader themes we’ve ...
Generating speech for Speaker1: How do you feel about that? I’d love to hear your ...
Generating speech for Speaker2: I love that argument. It makes so much sense. When...
Generating speech for Speaker1: I find this perspective intriguing. It adds a whol...
Generating speech for Speaker2: I think that really sheds light on a key point we ...
Generating speech for Speaker1: What are your thoughts on this? I'm curious to hea...
Generating speech for Speaker2: I can totally relate t