In [1]:
import fitz
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import GPT4All
import gradio as gr


# Initialize GPT model
def load_gpt4all_model(model_path=r"C:\Users\Omaid\AppData\Local\nomic.ai\GPT4All\Meta-Llama-3-8B-Instruct.Q4_0.gguf"):
    return GPT4All(model=model_path, verbose=False)

llm = load_gpt4all_model()


# Prompt template
prompt_template = PromptTemplate(
    input_variables=["text"],
    template="Summarize the following text:\n\n{text}\n\nSummary:",
)


# LangChain
chain = LLMChain(llm=llm, prompt=prompt_template)


# Extract text from PDF
def extractPDF(pdf, maxPages=5):
    textContent = ""
    try:
        doc = fitz.open(pdf)
        for i in range(min(maxPages, len(doc))):
            page = doc.load_page(i)
            textContent += page.get_text() or ""
    except Exception as e:
        return f"Error reading PDF: {e}"
    return textContent.strip() or "No extractable text found in PDF."


# Summarize text
def summarize_text(fullText, chunkSize=1000):
    if len(fullText) <= chunkSize:
        return chain.run({"text": fullText})
    
    # Make text chunks
    chunks = [fullText[i:i + chunkSize] for i in range(0, len(fullText), chunkSize)]
    summaries = [chain.run({"text": chunk}) for chunk in chunks]
    return " ".join(summaries)


# Gradio
def generateSummary(input_text, pdf):
    if pdf is not None:
        # Extract and summarize text from PDF
        textContent = extractPDF(pdf)
        if textContent:
            return summarize_text(textContent)
        else:
            return "No text could be extracted from the PDF."
    elif input_text:
        # Summarize direct input text
        return summarize_text(input_text)
    else:
        return "Please provide input text or upload a PDF file."

    
# Set up interface
interface = gr.Interface(
    fn=generateSummary,
    inputs=[
        gr.Textbox(lines=10, label="Input text (Leave blank if uploading PDF)"),
        gr.File(label="Upload PDF file"),
    ],
    outputs=gr.Textbox(label="Summary", lines=15),
    title="PDF & Text Summarizer",
    description="Summarize text from an uploaded PDF or inputted text",
)


# Launch
interface.launch()

  chain = LLMChain(llm=llm, prompt=prompt_template)


* Running on local URL:  http://127.0.0.1:7867

To create a public link, set `share=True` in `launch()`.


