In [1]:
# Install required libraries
!pip install pdfplumber transformers googletrans==3.1.0a0 ipywidgets

import pdfplumber
from transformers import pipeline
from googletrans import Translator
from google.colab import files
import io
import ipywidgets as widgets
from IPython.display import display, clear_output

# Initialize models and translator
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
translator = Translator()

def extract_pdf_text(pdf_file):
    """Extract text from the uploaded PDF file."""
    try:
        with pdfplumber.open(pdf_file) as pdf:
            text = ""
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + " "
        if not text.strip():
            return "Error: No text extracted from the PDF."
        return text
    except Exception as e:
        return f"Error extracting text: {str(e)}"

def summarize_text(text):
    """Summarize the provided text."""
    try:
        # Split text into chunks to handle token limits
        max_chunk_length = 1000
        chunks = [text[i:i+max_chunk_length] for i in range(0, len(text), max_chunk_length)]
        summaries = []
        for chunk in chunks:
            summary = summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
            summaries.append(summary)
        return " ".join(summaries)
    except Exception as e:
        return f"Error summarizing text: {str(e)}"

def translate_summary(summary):
    """Translate the summary into Hindi, Spanish, and French."""
    try:
        translations = {
            "Hindi": translator.translate(summary, dest="hi").text,
            "Spanish": translator.translate(summary, dest="es").text,
            "French": translator.translate(summary, dest="fr").text
        }
        return translations
    except Exception as e:
        return {"Error": f"Translation failed: {str(e)}"}

def answer_question(context, question):
    """Answer a question based on the PDF content."""
    try:
        result = qa_pipeline(question=question, context=context)
        return result["answer"]
    except Exception as e:
        return f"Error answering question: {str(e)}"

# Global variable to store PDF text
pdf_text = ""

# Create widgets for the interface
upload_button = widgets.FileUpload(accept=".pdf", multiple=False, description="Upload PDF")
question_input = widgets.Text(placeholder="Enter your question about the PDF", description="Question:", layout={'width': '600px'})
submit_button = widgets.Button(description="Submit Question", button_style="success")
stop_button = widgets.Button(description="Stop Asking", button_style="danger")
output = widgets.Output()

def process_pdf(_):
    """Process the uploaded PDF and display summary and translations."""
    with output:
        clear_output()
        if not upload_button.value:
            print("Please upload a PDF file.")
            return

        # Get the uploaded file
        pdf_file = list(upload_button.value.values())[0]["content"]
        pdf_file = io.BytesIO(pdf_file)

        # Extract text
        global pdf_text
        pdf_text = extract_pdf_text(pdf_file)
        if "Error" in pdf_text:
            print(pdf_text)
            return

        # Summarize text
        summary = summarize_text(pdf_text)
        if "Error" in summary:
            print(summary)
            return
        print("Summary:")
        print(summary)

        # Translate summary
        translations = translate_summary(summary)
        if "Error" in translations:
            print(translations["Error"])
            return
        print("\nTranslations:")
        for lang, trans in translations.items():
            print(f"{lang}: {trans}")

        print("\nEnter a question below and click 'Submit Question' to get an answer.")

def submit_question(_):
    """Answer the user's question."""
    with output:
        clear_output()
        if not pdf_text:
            print("Please upload and process a PDF first.")
            return
        question = question_input.value.strip()
        if not question:
            print("Please enter a question.")
            return
        answer = answer_question(pdf_text, question)
        print(f"Question: {question}")
        print(f"Answer: {answer}")
        # Reprint summary and translations for reference
        summary = summarize_text(pdf_text)
        print("\nSummary:")
        print(summary)
        translations = translate_summary(summary)
        print("\nTranslations:")
        for lang, trans in translations.items():
            print(f"{lang}: {trans}")

def stop_asking(_):
    """Stop the question-answering loop."""
    with output:
        clear_output()
        print("Stopped. Upload a new PDF to start again.")
        question_input.value = ""
        upload_button.value.clear()

# Connect button clicks to functions
upload_button.observe(process_pdf, names='value')
submit_button.on_click(submit_question)
stop_button.on_click(stop_asking)

# Display the interface
display(widgets.VBox([upload_button, question_input, submit_button, stop_button, output]))



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cpu
Device set to use cpu


VBox(children=(FileUpload(value={}, accept='.pdf', description='Upload PDF'), Text(value='', description='Ques…