In [1]:
!pip install pypdf pdfplumber pytesseract openai llama-index langchain transformers torch coqui-tts elevenlabs gtts pydub ffmpeg-python streamlit soundfile


Collecting pypdf
  Downloading pypdf-5.4.0-py3-none-any.whl.metadata (7.3 kB)
Collecting pdfplumber
  Downloading pdfplumber-0.11.6-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Collecting llama-index
  Downloading llama_index-0.12.26-py3-none-any.whl.metadata (12 kB)
Collecting coqui-tts
  Downloading coqui_tts-0.26.0-py3-none-any.whl.metadata (19 kB)
Collecting elevenlabs
  Downloading elevenlabs-1.55.0-py3-none-any.whl.metadata (7.3 kB)
Collecting gtts
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting streamlit
  Downloading streamlit-1.44.0-py3-none-any.whl.metadata (8.9 kB)
Collecting p

In [2]:
!pip install streamlit pdfplumber pytesseract torch transformers gtts pydub
!apt-get install -y tesseract-ocr
!npm install -g localtunnel
!wget -qO cloudflared https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 && chmod +x cloudflared


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  tesseract-ocr-eng tesseract-ocr-osd
The following NEW packages will be installed:
  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd
0 upgraded, 3 newly installed, 0 to remove and 29 not upgraded.
Need to get 4,816 kB of archives.
After this operation, 15.6 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-eng all 1:4.00~git30-7274cfa-1.1 [1,591 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-osd all 1:4.00~git30-7274cfa-1.1 [2,990 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr amd64 4.1.1-2.1build1 [236 kB]
Fetched 4,816 kB in 2s (3,088 kB/s)
Selecting previously unselected package tesseract-ocr-eng.
(Reading database ... 126209 files and directories currently installed.)
Preparing to unpack .../tesseract-ocr-

In [6]:
%%writefile app.py
import base64
import os
import pdfplumber
import pytesseract
from PIL import Image
import streamlit as st
import torch
from transformers import pipeline
from gtts import gTTS
from pydub import AudioSegment

# ✅ Define the correct image path
background_image_path = "/content/back.jpg"  # Ensure this matches the actual path

# ✅ Function to encode image to Base64 (Google Colab does not support direct file URLs)
def get_base64(file_path):
    with open(file_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode()

# ✅ Apply Background Image
if os.path.exists(background_image_path):
    base64_image = get_base64(background_image_path)
    st.markdown(
        f"""
        <style>
            .stApp {{
                background-image: url("data:image/jpg;base64,{base64_image}");
                background-size: cover;
                background-position: center;
                background-repeat: no-repeat;
            }}
        </style>
        """,
        unsafe_allow_html=True
    )
else:
    st.warning("⚠️ Background image not found. Please upload 'back.jpg' in Google Colab.")

# 🎙️ App Title
st.title("📄 PDF to Podcast Converter 🎙️\n Created By Jaydip Maskar\n At AIS Solutions Pvt. Ltd.")
st.write("Upload a research paper or any PDF, and this app will generate an audio podcast.")

# 📂 File Upload
uploaded_file = st.file_uploader("📂 Upload a PDF file", type=["pdf"])



# Load Hugging Face summarization model
device = "cuda" if torch.cuda.is_available() else "cpu"
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", truncation=True, device=0 if device == "cuda" else -1)

def extract_text_from_pdf(pdf_file):
    """Extract text from PDF using pdfplumber and OCR for images."""
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            extracted_text = page.extract_text()
            if extracted_text:
                text += extracted_text + "\n"
            else:
                # OCR for image-based PDFs
                image = page.to_image()
                img_path = "temp_page.png"
                image.save(img_path, format="PNG")
                ocr_text = pytesseract.image_to_string(Image.open(img_path))
                text += ocr_text + "\n"

    if not text.strip():
        return "❌ No extractable text found. The PDF might be empty or contain unsupported formats."

    return text.strip()

def chunk_text(text, max_tokens=1024):
    """Split text into smaller chunks to fit model limits."""
    words = text.split()
    chunks = [" ".join(words[i:i+max_tokens]) for i in range(0, len(words), max_tokens)]
    return chunks

def summarize_text(text):
    """Summarize extracted text while handling errors."""
    try:
        text_chunks = chunk_text(text)
        summaries = []
        for chunk in text_chunks:
            if len(chunk.split()) > 50:  # Ensure chunk has enough words for summarization
                summaries.append(summarizer(chunk, max_length=512, min_length=100, do_sample=False)[0]['summary_text'])
            else:
                summaries.append(chunk)  # Keep short chunks as-is
        return " ".join(summaries)
    except Exception as e:
        return f"❌ Error during summarization: {str(e)}"

def text_to_speech(text, output_audio):
    """Convert text to speech and save as MP3."""
    tts = gTTS(text=text, lang="en")
    tts.save(output_audio)

def process_pdf(uploaded_file):
    """Process the uploaded PDF."""
    if uploaded_file is not None:
        temp_pdf_path = "temp_uploaded.pdf"
        with open(temp_pdf_path, "wb") as f:
            f.write(uploaded_file.read())

        st.write("🔍 Extracting text from PDF...")
        text = extract_text_from_pdf(temp_pdf_path)

        if "❌" in text:
            st.error(text)
            return

        st.write("✍️ Summarizing text...")
        summary = summarize_text(text)
        st.text_area("📌 Summary:", summary, height=400)

        if "❌" in summary:
            st.error(summary)
            return

        st.write("🔊 Converting to speech...")
        output_audio = "podcast.mp3"
        text_to_speech(summary, output_audio)

        st.audio(output_audio, format="audio/mp3", start_time=0)
        st.success("✅ Podcast generated successfully!")

if uploaded_file:
    process_pdf(uploaded_file)


Overwriting app.py


In [None]:
!streamlit run app.py &>/dev/null &
!./cloudflared tunnel --url http://localhost:8501 --no-autoupdate


[90m2025-03-29T10:47:02Z[0m [32mINF[0m Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee, are subject to the Cloudflare Online Services Terms of Use (https://www.cloudflare.com/website-terms/), and Cloudflare reserves the right to investigate your use of Tunnels for violations of such terms. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps
[90m2025-03-29T10:47:02Z[0m [32mINF[0m Requesting new quick Tunnel on trycloudflare.com...
[90m2025-03-29T10:47:06Z[0m [32mINF[0m +--------------------------------------------------------------------------------------------+
[90m2025-03-29T10:47:06Z[0m [32mINF[0m |  Your quick Tunnel has been created! Visit it at (it may take some time to be reachable):  |
[90m2025