In [2]:
pip install torch torchvision faiss-cpu openai sentence-transformers gradio

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting gradio
  Downloading gradio-5.23.3-py3-none-any.whl.metadata (16 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)

In [6]:
pip install PyMuPDF langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.51 (from langchain_community)
  Downloading langchain_core-0.3.51-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain<1.0.0,>=0.3.23 (from langchain_community)
  Downloading langchain-0.3.23-py3-none-any.whl.metadata (7.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-

In [None]:
import gradio as gr
import faiss
import numpy as np
import fitz  # PyMuPDF for PDF text extraction
from sentence_transformers import SentenceTransformer
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory  # Simpler memory
from langchain.text_splitter import RecursiveCharacterTextSplitter
import getpass


# ✅ Load Sentence Transformer for Embeddings
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embedding_dim = 384

# ✅ FAISS Vector Store
index = faiss.IndexFlatL2(embedding_dim)
stored_texts = []

# ✅ LLM Model (OpenRouter)
API_KEY = getpass.getpass("Enter your OpenRouter API key: ")
BASE_URL = "https://openrouter.ai/api/v1"

llm = ChatOpenAI(
    model="mistralai/mistral-small",
    openai_api_key=API_KEY,
    openai_api_base=BASE_URL
)

# ✅ Memory for Context
memory = ConversationBufferMemory(memory_key="chat_history", input_key="query")

# Clear memory after every 5 queries to refresh context
if len(memory.load_memory_variables({})['chat_history']) > 5:
    memory.clear()


# ✅ Convert Text to Embeddings
def embed_text(text):
    return embedder.encode([text])[0]

# ✅ Add Text to FAISS
def add_to_index(text):
    vector = embed_text(text)
    index.add(np.array([vector], dtype=np.float32))
    stored_texts.append(text)

# ✅ Retrieve Similar Text from PDFs
def retrieve_similar_text(query, top_k=3):
    if len(stored_texts) == 0:
        return ["No relevant data found."]

    query_vector = embed_text(query).reshape(1, -1)
    distances, indices = index.search(query_vector, top_k)
    return [stored_texts[i] for i in indices[0] if i < len(stored_texts)]

# ✅ Create Prompt Template
prompt = PromptTemplate(
    input_variables=["context", "query"],
    template="""
    You are an AI assistant answering questions based on uploaded PDFs.
    Context: {context}
    Question: {query}
    Answer:
    """
)

# ✅ Create LLM Chain
llm_chain = LLMChain(llm=llm, prompt=prompt, memory=memory)

# ✅ Function to Extract Text from PDFs
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text("text") + "\n"
    return text

# ✅ Function to Upload PDF and Store Embeddings
def process_pdf(file):
    pdf_text = extract_text_from_pdf(file)

    # ✅ Improved Chunking
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500, chunk_overlap=50
    )
    chunks = text_splitter.split_text(pdf_text)

    for chunk in chunks:
        add_to_index(chunk)

    return "PDF processed successfully! Now you can ask questions."

# ✅ Generate Response
def generate_response(query):
    contexts = retrieve_similar_text(query)
    context_str = "\n\n---\n\n".join(contexts) if contexts else "No relevant data found."

    response = llm_chain.invoke({"context": context_str, "query": query})

    return f"<pre>{response['text']}</pre>"  # Preserves formatting




# ✅ Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# 📄 PDF-Based Chatbot")

    file_upload = gr.File(label="Upload PDF", type="filepath")

    upload_btn = gr.Button("Process PDF")
    output_text = gr.Textbox(label="PDF Processing Status", interactive=False)

    query_input = gr.Textbox(label="Ask a question")
    query_btn = gr.Button("Submit")
    answer_output = gr.Textbox(label="Answer", interactive=False)

    upload_btn.click(process_pdf, inputs=file_upload, outputs=output_text)
    query_btn.click(generate_response, inputs=query_input, outputs=answer_output)

demo.launch(debug=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  llm = ChatOpenAI(
  memory = ConversationBufferMemory(memory_key="chat_history", input_key="query")
  llm_chain = LLMChain(llm=llm, prompt=prompt, memory=memory)


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://e57f2df9c6d1583eff.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [None]:
pip install gradio faiss-cpu numpy pymupdf sentence-transformers langchain_community openai

Collecting langchain_community
  Downloading langchain_community-0.3.20-py3-none-any.whl.metadata (2.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain_community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB

**Medical Report**

In [None]:
import gradio as gr
import faiss
import numpy as np
import fitz  # PyMuPDF for PDF text extraction
from sentence_transformers import SentenceTransformer
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI

# ✅ Load Sentence Transformer for Medical Embeddings
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embedding_dim = 384

# ✅ FAISS Vector Store
index = faiss.IndexFlatL2(embedding_dim)
stored_texts = []

# ✅ LLM Model (OpenRouter) - Add Your API Key
API_KEY = getpass.getpass("Enter your OpenRouter API key: ")  # Replace with your actual key
BASE_URL = "https://openrouter.ai/api/v1"


from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(
    model="mistralai/mistral-small",
    openai_api_base=BASE_URL,
    openai_api_key=API_KEY,
    request_timeout=60,
    default_headers={"Authorization": f"Bearer {API_KEY}"}  # ✅ OpenRouter authentication
)


# ✅ Memory for Context
memory = ConversationBufferMemory(memory_key="chat_history", input_key="query")

# ✅ Convert Text to Embeddings
def embed_text(text):
    return embedder.encode([text])[0]

# ✅ Add Text to FAISS
def add_to_index(text):
    vector = embed_text(text)
    index.add(np.array([vector], dtype=np.float32))
    stored_texts.append(text)

# ✅ Retrieve Similar Text from Medical Reports
def retrieve_similar_text(query, top_k=3):
    if len(stored_texts) == 0:
        return ["No relevant data found."]

    query_vector = embed_text(query).reshape(1, -1)
    distances, indices = index.search(query_vector, top_k)
    return [stored_texts[i] for i in indices[0] if i < len(stored_texts)]

# ✅ Create Medical Chatbot Prompt
prompt = PromptTemplate(
    input_variables=["context", "query"],
    template="""
    You are a medical AI assistant analyzing a patient's medical report.
    Based on the provided medical data, answer the user's query accurately.

    Medical Report Context:
    {context}

    Patient Query:
    {query}

    Medical AI Response:
    """
)

# ✅ Create LLM Chain
llm_chain = LLMChain(llm=llm, prompt=prompt, memory=memory)

# ✅ Function to Extract Text from Medical Reports
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text("text") + "\n"
    return text

# ✅ Function to Upload and Process Medical Reports
def process_pdf(file):
    pdf_text = extract_text_from_pdf(file)

    # ✅ Chunking for Better Processing
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500, chunk_overlap=50
    )
    chunks = text_splitter.split_text(pdf_text)

    for chunk in chunks:
        add_to_index(chunk)

    return "Medical report processed successfully! Now you can ask questions."

# ✅ Generate Response
def generate_response(query):
    contexts = retrieve_similar_text(query)  # Retrieve relevant medical info
    context_str = "\n\n---\n\n".join(contexts) if contexts else "No relevant data found."

    response = llm_chain.invoke({"context": context_str, "query": query})
    return response

# ✅ Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# 🏥 AI Healthcare Chatbot")

    file_upload = gr.File(label="Upload Medical Report (PDF)", type="filepath")
    upload_btn = gr.Button("Process Report")
    output_text = gr.Textbox(label="Processing Status", interactive=False)

    query_input = gr.Textbox(label="Ask a Medical Question")
    query_btn = gr.Button("Submit")
    answer_output = gr.Textbox(label="AI Medical Response", interactive=False)

    upload_btn.click(process_pdf, inputs=file_upload, outputs=output_text)
    query_btn.click(generate_response, inputs=query_input, outputs=answer_output)

demo.launch(debug=True)


Enter your OpenRouter API key: ··········
Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://6c8fe71a82ec419d8b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://6c8fe71a82ec419d8b.gradio.live


