<a href="https://colab.research.google.com/github/Akhileshberi/agenticAICourse/blob/main/Copy_of_Untitled113.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q \
  langchain==0.1.16 \
  langchain-community==0.0.34 \
  langchain-core==0.1.45 \
  chromadb \
  pypdf \
  gradio \
  langchain-google-genai \
  google-genai


# Resume Screening Application

In [None]:

!pip install -q sentence-transformers

# ==============================
# STEP 1: Imports
# ==============================
import os
import gradio as gr
from google.colab import userdata

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

from langchain_google_genai import ChatGoogleGenerativeAI

# HuggingFace Embeddings
from langchain_community.embeddings import HuggingFaceEmbeddings

# Agent
from langchain.agents import initialize_agent, AgentType
from langchain_core.tools import tool

# ==============================
# API Key (Gemini)
# ==============================
os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")

# ==============================
# LLM + Embeddings
# ==============================
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash-lite",
    temperature=0.2
)

# HF embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

vectordb = None  # global

# ==============================
# Process PDFs
# ==============================
def process_pdfs(resume_file, jd_file):
    global vectordb

    if resume_file is None or jd_file is None:
        return "⚠️ Upload both Resume and Job Description PDFs."

    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    docs = []

    # Resume
    resume_docs = PyPDFLoader(resume_file.name).load()
    for d in resume_docs:
        d.metadata["type"] = "RESUME"
        d.metadata["source"] = resume_file.name
    docs.extend(splitter.split_documents(resume_docs))

    # JD
    jd_docs = PyPDFLoader(jd_file.name).load()
    for d in jd_docs:
        d.metadata["type"] = "JD"
        d.metadata["source"] = jd_file.name
    docs.extend(splitter.split_documents(jd_docs))

    vectordb = Chroma.from_documents(
        documents=docs,
        embedding=embeddings
    )


    return f" Resume & JD processed. Total chunks: {len(docs)}. Now go to 'Ask' tab."

# ==============================
# Tool
# ==============================
@tool
def search_docs(query: str) -> str:
    """Search resume and JD content and return evidence."""
    global vectordb
    if vectordb is None:
        return "NO_DOCUMENTS: Please upload and process PDFs first."

    results = vectordb.as_retriever(search_kwargs={"k": 6}).invoke(query)
    if not results:
        return "NO_MATCHES: Nothing relevant found in the documents."

    return "\n\n---\n\n".join(
        f"[{r.metadata.get('type')}] {r.page_content}"
        for r in results
    )

# ==============================
# STEP 6: Agent
# ==============================
SYSTEM_RULES = """
You are a Resume Fit Scoring Agent.

TASK:
- Compare Resume vs Job Description
- Give Fit Score (0–100)
- Verdict (Strong / Moderate / Weak)
- Strengths
- Missing Skills / Gaps
- Resume Improvements
- Interview Prep Topics

RULES:
- Use search_docs BEFORE answering (collect evidence).
- Do NOT hallucinate skills/experience not present in the resume.
- If info is missing, say: "I don't see this information in the documents."
- Keep the output structured and simple.
"""

agent = initialize_agent(
    tools=[search_docs],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=False
)

# ==============================
# Chat
# ==============================
def resume_chat(message, history):
    return agent.run(f"{SYSTEM_RULES}\n\nUser question: {message}")

# ==============================
# UI
# ==============================
with gr.Blocks() as demo:
    gr.Markdown("# 📄 Resume Fit Scorer")

    with gr.Tab("Upload"):
        r = gr.File(label="Resume PDF", file_types=[".pdf"])
        j = gr.File(label="Job Description PDF", file_types=[".pdf"])
        b = gr.Button("Process", variant="primary")
        s = gr.Textbox(label="Status", lines=5, interactive=False)
        b.click(process_pdfs, [r, j], s)

    with gr.Tab("Ask"):
        gr.ChatInterface(
            fn=resume_chat,
            examples=[
                "Give me fit score (0-100) and missing skills.",
                "List strengths and gaps for this JD.",
                "How can I improve my resume for this role?",
                "What interview topics should I prepare based on missing skills?"
            ]
        )

demo.launch(share=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  warn_deprecated(
  self.chatbot = Chatbot(


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9679c6fdf7501b21aa.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




#Vector DB (Additional Information)

In [None]:
print("vectordb is None?", vectordb is None)
print("Total chunks stored:", vectordb._collection.count())

peek = vectordb._collection.peek(limit=1)
vector = peek["embeddings"][0]

print("Vector length:", len(vector))
print("First 10 numbers:", vector[:10])


peek = vectordb._collection.peek(limit=5)
print("Metadatas:", peek["metadatas"])

for i, doc in enumerate(peek["documents"], 1):
    print(f"\n--- Chunk {i} ---")
    print(doc)


vectordb is None? False
Total chunks stored: 5
Vector length: 384
First 10 numbers: [-0.01521417 -0.01262698 -0.02824473 -0.00053034 -0.05514342 -0.04659703
  0.00857372  0.02665649 -0.0838716  -0.00907371]
Metadatas: [{'type': 'RESUME', 'source': '/tmp/gradio/7dc3ee3005be28c7b03f7b06ac891ccca88e586edc57e18d9614d4dc6963bce6/Resume_Sample 1.pdf', 'page': 0}, {'source': '/tmp/gradio/7dc3ee3005be28c7b03f7b06ac891ccca88e586edc57e18d9614d4dc6963bce6/Resume_Sample 1.pdf', 'type': 'RESUME', 'page': 0}, {'source': '/tmp/gradio/7dc3ee3005be28c7b03f7b06ac891ccca88e586edc57e18d9614d4dc6963bce6/Resume_Sample 1.pdf', 'page': 0, 'type': 'RESUME'}, {'type': 'JD', 'page': 0, 'source': '/tmp/gradio/f05a6dad2bc4136298ab4edf769b6120bca12f13d632968219fbcd5ba2bb864c/Sample_Job_Description_Software_Developer.pdf'}, {'type': 'JD', 'source': '/tmp/gradio/f05a6dad2bc4136298ab4edf769b6120bca12f13d632968219fbcd5ba2bb864c/Sample_Job_Description_Software_Developer.pdf', 'page': 0}]

--- Chunk 1 ---
VAISHNAVI 
 
SK

# Agentic AI Medical Report





In [None]:
!pip install -q sentence-transformers

# ==============================
# Imports
# ==============================
import os
import gradio as gr
from google.colab import userdata

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

from langchain_google_genai import ChatGoogleGenerativeAI

from langchain.agents import initialize_agent, AgentType
from langchain_core.tools import tool

# ==============================
# API Key (Gemini)
# ==============================
os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")

# ==============================
# LLM + Embeddings
# ==============================
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash-lite",
    temperature=0.2
)

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

vectordb = None  # global

# ==============================
# Process Medical PDFs
# ==============================
def process_pdfs(files):
    global vectordb

    if not files:
        return "⚠️ Upload at least one medical PDF."

    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    docs = []

    for file in files:
        pdf_docs = PyPDFLoader(file.name).load()
        for d in pdf_docs:
            d.metadata["type"] = "MEDICAL_DOC"
        docs.extend(splitter.split_documents(pdf_docs))

    vectordb = Chroma.from_documents(
        documents=docs,
        embedding=embeddings
    )

    return f"✅ Medical PDFs processed. Total chunks: {len(docs)}. Go to Ask tab."

# ==============================
# Tool
# ==============================
@tool
def search_docs(query: str) -> str:
    """Search medical documents and return evidence."""
    global vectordb
    if vectordb is None:
        return "NO_DOCUMENTS: Please upload and process medical PDFs first."

    results = vectordb.as_retriever(search_kwargs={"k": 6}).invoke(query)
    if not results:
        return "NO_MATCHES: Nothing relevant found in the documents."

    return "\n\n".join(
        f"{r.page_content}"
        for r in results
    )

# ==============================
# Agent
# ==============================
SYSTEM_RULES = """
You are a Medical Document Explainer Agent.

TASK:
- Answer questions only using uploaded medical documents
- Explain in VERY SIMPLE language
- Explain medicines, test results, and precautions
- If info is missing, say: "I don't see this information in the documents."

RULES:
- Use search_docs BEFORE answering
- Do NOT give medical advice
- Always end with:
  "This is NOT a substitute for a doctor's advice. Please consult a doctor."
"""

agent = initialize_agent(
    tools=[search_docs],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=False
)

# ==============================
# Chat
# ==============================
def medical_chat(message, history):
    return agent.run(f"{SYSTEM_RULES}\n\nUser question: {message}")

# ==============================
# UI
# ==============================
with gr.Blocks() as demo:
    gr.Markdown("# 🩺 Medical Document Explainer")

    with gr.Tab("Upload"):
        files = gr.File(
            label="Upload Medical PDFs",
            file_types=[".pdf"],
            file_count="multiple"
        )
        b = gr.Button("Process", variant="primary")
        s = gr.Textbox(label="Status", lines=4, interactive=False)
        b.click(process_pdfs, files, s)

    with gr.Tab("Ask"):
        gr.ChatInterface(
            fn=medical_chat,
            examples=[
                "What does this medicine do?",
                "What precautions should I take?",
                "Explain the blood test results in simple words.",
                "Summarize the discharge advice."
            ]
        )

demo.launch(share=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  warn_deprecated(
  self.chatbot = Chatbot(


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://55dc5c57ea35b98eb1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




#Google File Search


In [None]:
!pip install -q google-genai gradio

from google import genai
from google.genai import types
import gradio as gr
import os, time

# ---- GLOBAL VARIABLES ----
client = None
store = None


# =========================================================
# Initialize Gemini client with API Key
# =========================================================
def setup_gemini(api_key):
    global client, store
    os.environ["GOOGLE_API_KEY"] = api_key
    client = genai.Client(api_key=api_key)

    # Create one simple File Search store
    store = client.file_search_stores.create(
        config={"display_name": "gradio-store"}
    )

    return "API Key Set! Store created."


# =========================================================
# Upload PDF to Gemini File Search
# =========================================================
def upload_pdf(pdf_file):
    global store

    if pdf_file is None:
        return "Please upload a PDF."

    # Save temporary file
    pdf_path = pdf_file.name

    # Upload to File Search Store
    client.file_search_stores.upload_to_file_search_store(
        file=pdf_path,
        file_search_store_name=store.name,
        config={"display_name": pdf_path}
    )

    # Basic wait
    time.sleep(5)

    return "PDF uploaded and processed successfully! Ask your question."


# =========================================================
# Ask question → Gemini answers from the PDF
# =========================================================
def ask_question(question):
    global store

    if not question.strip():
        return "Please enter a question."

    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=question,
        config=types.GenerateContentConfig(
            tools=[
                types.Tool(
                    file_search=types.FileSearch(
                        file_search_store_names=[store.name]
                    )
                )
            ]
        )
    )

    return response.text


# =========================================================
# Gradio UI
# =========================================================
with gr.Blocks(title="Gemini File Search (Simple)") as demo:

    gr.Markdown("# 📄 Gemini File Search\n### Upload a PDF → Ask a Question → Get Answer")

    api_key_box = gr.Textbox(label="Enter your Gemini API Key")
    set_key_button = gr.Button("Set API Key")
    key_output = gr.Textbox(label="Status")

    pdf_upload = gr.File(label="Upload PDF")
    upload_button = gr.Button("Process PDF")
    upload_status = gr.Textbox(label="Upload Status")

    question_box = gr.Textbox(label="Ask a Question about the PDF")
    ask_button = gr.Button("Ask")
    answer_box = gr.Textbox(label="Answer", lines=6)

    # Button actions
    set_key_button.click(fn=setup_gemini, inputs=api_key_box, outputs=key_output)
    upload_button.click(fn=upload_pdf, inputs=pdf_upload, outputs=upload_status)
    ask_button.click(fn=ask_question, inputs=question_box, outputs=answer_box)

# Start the app
demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://fbd41117b35111a569.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


