# Import necessary libraries

In [1]:
from pathlib import Path
from langchain_community .document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

# Load the dataset

In [2]:
from pathlib import Path

Data_dir = Path("/workspace/DATA") 
assert Data_dir.exists(), f"Folder not found: {Data_dir}"


# Document Loading Phase in Rag 

In [3]:
docs = []
for pdf in Data_dir.glob("*.pdf"):
    loader = PyMuPDFLoader(str(pdf))
    docs.extend(loader.load())

print(f"Loaded {len(docs)} documents")
print("Sample text:" , docs[0].page_content[:300])

Loaded 9 documents
Sample text: Lina Alami
MLOps Engineer — Amman, Jordan
Email: lina.alami3@example.com | Phone: +971597257890
Professional Summary
Experienced MLOps Engineer with a strong track record of building production-grade machine learning
systems, from data engineering and model training to deployment and monitoring. Ski


# Text Chunking

In [4]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, # maximum size of each chunk
    chunk_overlap=50 # overlap so contecxt is maintained
)

chunks = splitter.split_documents(docs)
print(f"Split into {len(chunks)} chunks")
print("Sample chunk:" , chunks[0].page_content[:300])

Split into 42 chunks
Sample chunk: Lina Alami
MLOps Engineer — Amman, Jordan
Email: lina.alami3@example.com | Phone: +971597257890
Professional Summary
Experienced MLOps Engineer with a strong track record of building production-grade machine learning
systems, from data engineering and model training to deployment and monitoring. Ski


we do here texxt chunking because the documents are too large to be processed in one go by the model. By breaking them into smaller, manageable chunks, we can ensure that the model can effectively analyze and retrieve relevant information from each part of the document. This approach enhances the model's ability to understand and respond accurately to queries related to the content of the documents.

# Embedding Generation

In [5]:
embeddings= HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
# create vector store
db = Chroma.from_documents(
    documents = chunks,
    embedding = embeddings,
    collection_name = "resumes",

)

Each text chunk is transformed into vector embeddings using the `all-MiniLM-L6-v2` model from Sentence Transformers.  
These embeddings capture semantic meaning and are stored in a Chroma vector database under the collection `resumes`.  
This enables efficient similarity search, allowing the assistant to retrieve the most relevant resume sections for any query.

# Semantic Retrieval

In [6]:
query = "Find candidates with Tensorflow and AWS experience"
results = db.similarity_search(query, k=2)

for i, r in enumerate(results, start=1):
    print(f"\n[{i}] {r.metadata}")
    print(r.page_content[:300])


[1] {'author': '(anonymous)', 'creationDate': "D:20250930175541+00'00'", 'creator': '(unspecified)', 'file_path': '/workspace/DATA/ai_resume_realistic_01_Khalid_Nassar.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': "D:20250930175541+00'00'", 'page': 0, 'producer': 'ReportLab PDF Library - www.reportlab.com', 'source': '/workspace/DATA/ai_resume_realistic_01_Khalid_Nassar.pdf', 'subject': '(unspecified)', 'title': '(anonymous)', 'total_pages': 1, 'trapped': ''}
post-processing to extract structured candidate data.
• Optimized model inference using TensorRT and mixed precision; achieved 2.5x throughput improvement
on GPU.
Education
MSc in Computer Science, Machine Learning Track — University of Technology, 2019
Certifications
• AWS Certified Machine Learnin

[2] {'author': '(anonymous)', 'creationDate': "D:20250930175541+00'00'", 'creator': '(unspecified)', 'file_path': '/workspace/DATA/ai_resume_realistic_05_Sara_Karim.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': "D:202

The user query is embedded and compared against all stored resume embeddings using cosine similarity.  
Chroma returns the most semantically similar chunks (k=2), allowing the system to find relevant resumes even if they use different wording.

#Language Model Integration

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

model_name = "microsoft/phi-2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=300,
    temperature=0.2,
    device=0   
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


The Microsoft Phi-2 model is loaded using Hugging Face Transformers for text generation.  
Its tokenizer and model handle converting user queries and retrieved context into coherent, natural-language responses.  
This LLM forms the “Generation” part of the RAG pipeline, enabling the assistant to answer questions based on resume data.

# Connecting Chroma Retrieval with the Phi-2 Generator

In [8]:
def extract_candidate_info(context_text, user_query):
    prompt = f"""
You are an AI recruiter assistant.

Using ONLY the context below, answer the user query.
Output MUST be formatted exactly like this:
Candidate: <candidate placeholder or role if name missing>
Skills/Certifications: <comma-separated list>
Education: <if mentioned>
If nothing matches, say "I don't know".

Context:
{context_text}

Question: {user_query}

Answer:
"""
    result = generator(prompt, max_new_tokens=200, temperature=0.1)
    return result[0]["generated_text"].split("Answer:")[-1].strip()


In this step, we connect the retrieval and generation components of our RAG pipeline.
ChromaDB is used to retrieve the most relevant resume chunks that match the user’s query through semantic similarity search.
The retrieved text is then combined into a single context_text, which is passed — along with the user’s query to the Phi-2 language model via the extract_candidate_info() function.
This function uses a structured prompt to guide the model in generating clear, formatted answers (e.g., candidate name, skills, and education).

# Gradio Interface

In [None]:
import gradio as gr
from langchain.llms import HuggingFacePipeline

# Connect your Phi-2 model
llm = HuggingFacePipeline(pipeline=generator)

# Chat Handler 
def chat_with_assistant(message, history):
    retrieved_docs = db.similarity_search(message, k=3)
    context_text = "\n\n".join([doc.page_content for doc in retrieved_docs])
    answer = extract_candidate_info(context_text, message)
    formatted_answer = (
        f"**{answer}**" if answer else "🤖 I couldn't find matching information."
    )
    history.append((message, formatted_answer))
    return history

#  Gradio UI 
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## 🧠 RAG Resume Assistant (Phi-2)")
    gr.Markdown(
        "Chat with your AI assistant to find the best AI Engineer candidates. "
        "Ask for specific skills, frameworks, or experiences — the assistant retrieves data from your resumes."
    )

    chatbot = gr.Chatbot(label="💬 Chat with Resume Assistant")
    user_input = gr.Textbox(
        placeholder="Example: Give me one candidate with AWS and TensorFlow experience",
        label="Your Question",
        lines=1
    )
    clear = gr.Button("Clear Chat")

    #  Event bindings (inside the block)
    user_input.submit(chat_with_assistant, [user_input, chatbot], chatbot)
    clear.click(lambda: None, None, chatbot, queue=False)

# Launch once
demo.launch(share=True)


Running on local URL:  http://127.0.0.1:7862
IMPORTANT: You are using gradio version 4.16.0, however version 4.44.1 is available, please upgrade.
--------
Running on public URL: https://8d68f130d176ce8358.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Done By: Rami Assaf