<a href="https://colab.research.google.com/github/Stefina-11/LLM/blob/main/LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers langchain pypdf gradio langchain-community langchain-text-splitters
!pip show langchain
!pip show langchain-community
!pip show langchain-text-splitters

Name: langchain
Version: 1.2.7
Summary: Building applications with LLMs through composability
Home-page: https://docs.langchain.com/
Author: 
Author-email: 
License: MIT
Location: /usr/local/lib/python3.12/dist-packages
Requires: langchain-core, langgraph, pydantic
Required-by: 
Name: langchain-community
Version: 0.4.1
Summary: Community contributed LangChain integrations.
Home-page: 
Author: 
Author-email: 
License: MIT
Location: /usr/local/lib/python3.12/dist-packages
Requires: aiohttp, dataclasses-json, httpx-sse, langchain-classic, langchain-core, langsmith, numpy, pydantic-settings, PyYAML, requests, SQLAlchemy, tenacity
Required-by: 
Name: langchain-text-splitters
Version: 1.1.0
Summary: LangChain text splitting utilities
Home-page: https://docs.langchain.com/
Author: 
Author-email: 
License: MIT
Location: /usr/local/lib/python3.12/dist-packages
Requires: langchain-core
Required-by: langchain-classic


In [5]:

!pip install -q transformers langchain pypdf gradio

import json
import gradio as gr
from transformers import pipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
llm = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    max_length=1024
)
mcq_data = {}
def generate_mcq_from_pdf(pdf_file):
    global mcq_data

    loader = PyPDFLoader(pdf_file.name)
    docs = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=100
    )
    chunks = splitter.split_documents(docs)

    for chunk in chunks:
        if len(chunk.page_content.strip()) < 200:
            continue

        prompt = f"""
Generate ONE multiple choice question from the text below.

Return ONLY valid JSON in this exact format:
{{
  "question": "...",
  "options": {{
    "A": "...",
    "B": "...",
    "C": "...",
    "D": "..."
  }},
  "answer": "A"
}}

Text:
{chunk.page_content}
"""

        raw_output = llm(prompt)[0]["generated_text"]

        try:
            json_start = raw_output.find("{")
            json_end = raw_output.rfind("}") + 1
            mcq_json = json.loads(raw_output[json_start:json_end])

            mcq_data = {
                "question": mcq_json["question"],
                "options": [
                    mcq_json["options"]["A"],
                    mcq_json["options"]["B"],
                    mcq_json["options"]["C"],
                    mcq_json["options"]["D"]
                ],
                "answer": mcq_json["answer"].upper()
            }

            radio_options = [
                f"A) {mcq_data['options'][0]}",
                f"B) {mcq_data['options'][1]}",
                f"C) {mcq_data['options'][2]}",
                f"D) {mcq_data['options'][3]}"
            ]

            return mcq_data["question"], radio_options

        except Exception as e:
            continue

    return "Unable to generate MCQ. Try another PDF.", []
def check_answer(user_choice):
    correct_letter = mcq_data["answer"]
    correct_text = mcq_data["options"][ord(correct_letter) - 65]

    if user_choice.startswith(correct_letter):
        return "Correct!", ""
    else:
        return "Wrong!", f"Correct Answer: {correct_letter}) {correct_text}"
with gr.Blocks() as app:
    gr.Markdown("PDF-based Interactive MCQ Generator (LLM)")

    pdf = gr.File(label="Upload PDF")
    question = gr.Textbox(label="Question")
    options = gr.Radio(label="Choose an answer", choices=[])

    gen_btn = gr.Button("Generate MCQ")
    submit_btn = gr.Button("Submit Answer")

    result = gr.Textbox(label="Result")
    correct = gr.Textbox(label="Correct Answer")

    gen_btn.click(
        generate_mcq_from_pdf,
        inputs=pdf,
        outputs=[question, options]
    )

    submit_btn.click(
        check_answer,
        inputs=options,
        outputs=[result, correct]
    )

app.launch()


Device set to use cuda:0


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://216dcafe35d67950e3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [6]:

!pip install -q transformers langchain faiss-cpu sentence-transformers pypdf gradio

import gradio as gr
from transformers import pipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

llm = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    max_length=512
)

vector_db = None

def load_pdf_and_create_db(pdf_file):
    global vector_db

    loader = PyPDFLoader(pdf_file.name)
    docs = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=100
    )
    chunks = splitter.split_documents(docs)

    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )

    vector_db = FAISS.from_documents(chunks, embeddings)

    return "PDF processed. You can now chat with it!"


def chat_with_pdf(user_query):
    if vector_db is None:
        return "Please upload and process a PDF first."

    docs = vector_db.similarity_search(user_query, k=3)
    context = " ".join([doc.page_content for doc in docs])

    prompt = f"""
Use the context below to answer the question.
If the answer is not in the context, say "I don't know."

Context:
{context}

Question:
{user_query}

Answer:
"""

    response = llm(prompt)[0]["generated_text"]
    return response

with gr.Blocks() as app:
    gr.Markdown("## Mini PDF Chatbot (LLM-Powered)")

    pdf = gr.File(label="Upload PDF")
    status = gr.Textbox(label="Status")

    process_btn = gr.Button("Process PDF")

    chatbot = gr.Chatbot()
    query = gr.Textbox(label="Ask a question")
    send_btn = gr.Button("Send")

    process_btn.click(
        load_pdf_and_create_db,
        inputs=pdf,
        outputs=status
    )

    def respond(message, history):
        reply = chat_with_pdf(message)
        history.append((message, reply))
        return history, ""

    send_btn.click(
        respond,
        inputs=[query, chatbot],
        outputs=[chatbot, query]
    )

app.launch()


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m72.5 MB/s[0m eta [36m0:00:00[0m
[?25h

Device set to use cuda:0
  chatbot = gr.Chatbot()
  chatbot = gr.Chatbot()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://39056bf35587fdaf71.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


