In [1]:
from langchain.document_loaders import PyMuPDFLoader
import os
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

In [2]:
genai.configure(api_key="AIzaSyAYy9o2kwX5Y2BI1BLalZJjnnNJ08yiEuY")

In [3]:
# Document loading
data = r"C:\Users\dell\Desktop\ITI_2024\NLP\Rag_Project\docs"

def load_documents():
    documents = []
    for filename in os.listdir(data):
        if filename.endswith(".pdf"):
            path = os.path.join(data, filename)
            loader = PyMuPDFLoader(path)
            documents.extend(loader.load())
    return documents

docs = load_documents()
print(f"Loaded {len(docs)} document(s)")

Loaded 2 document(s)


In [4]:
docs=load_documents()
docs

[Document(metadata={'source': 'C:\\Users\\dell\\Desktop\\ITI_2024\\NLP\\Rag_Project\\docs\\Hajar_Elbehairy_AL&ML.pdf', 'file_path': 'C:\\Users\\dell\\Desktop\\ITI_2024\\NLP\\Rag_Project\\docs\\Hajar_Elbehairy_AL&ML.pdf', 'page': 0, 'total_pages': 2, 'format': 'PDF 1.5', 'title': "John Doe's CV", 'author': 'John Doe', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Word 2016', 'producer': 'www.ilovepdf.com', 'creationDate': "D:20250508161516+00'00'", 'modDate': 'D:20250508161516Z', 'trapped': ''}, page_content='Hajar Elsayed Elbehairy \nhagarelbehairy3@gmail.com| 01003554652 /kafr Elshiekh(easy to relocated) \nlinkedin.com/in/hajar-elbehairy-| github.com/HajarElbehairy \nSummary \n \nAI/ML Engineer with a strong background in deep learning, computer vision, and data analysis. Currently completing the ITI 9-month program, focusing on Artificial Intelligence \nand machine learning applications. Passionate about developing intelligent systems that solve real-world problems. \nEducati

In [36]:
# Document splitting
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500, 
    chunk_overlap=300,
    length_function=len,
    add_start_index=True,
    separators=["\n\n", "\n", " ", ""]
)

split_docs = text_splitter.split_documents(docs)
print(f"Split into {len(split_docs)} chunks")

Split into 6 chunks


In [37]:
for i, chunk in enumerate(split_docs):
    print(f"Chunk {i+1}:\n{chunk.page_content}\n{'-'*40}\n")


Chunk 1:
Hajar Elsayed Elbehairy 
hagarelbehairy3@gmail.com| 01003554652 /kafr Elshiekh(easy to relocated) 
linkedin.com/in/hajar-elbehairy-| github.com/HajarElbehairy 
Summary 
 
AI/ML Engineer with a strong background in deep learning, computer vision, and data analysis. Currently completing the ITI 9-month program, focusing on Artificial Intelligence 
and machine learning applications. Passionate about developing intelligent systems that solve real-world problems. 
Education 
 
 
9-Month Diploma (ITI) Information Technology Institute ,(MCIT) Ministry Of Communications And Information Technology , AI track ,Mansoura 
Branch 
 
 
 
 
 
 
 
 
 
Oct 2024– June 2025 
 
Bsc,Artificial Intelligence,KafrElshiekh University                                                                  
 
                     Oct 2020– June 2024 
Internships 
 
Generative AI Intern | Digital Egypt Pioneers Initiative (DEPI) 
📅 July 2024 – Oct 2024 
 
Built and deployed AI and Generative AI models using 

In [None]:
embeddings = GoogleGenerativeAIEmbeddings(
    google_api_key="AIzaSyAYy9o2kwX5Y2BI1BLalZJjnnNJ08yiEuY",
    model="er"
)


# Create and persist vector store
vectorstore = Chroma.from_documents(
    documents=split_docs,
    embedding=embeddings,
    persist_directory="chroma__db"
)
vectorstore.persist()

In [39]:
# Initialize the LLM
os.environ["GOOGLE_API_KEY"] = "AIzaSyAYy9o2kwX5Y2BI1BLalZJjnnNJ08yiEuY"
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0.2,
    convert_system_message_to_human=True
)

In [40]:
# Create HR prompt template
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an expert HR assistant helping with candidate screening.

Given the resume information below, answer the HR professional's question accurately and professionally.

---
📄 Resume:
{context}
---

❓ HR Question:
{question}

---
🎯 Your Task:
- Provide a clear and direct answer based only on the resume.
- If the answer is not explicitly mentioned, respond with: "This information is not available in the resume."
- Use formal HR language.

Answer:
"""
)


In [55]:

retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [56]:
retriever 


VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000019E00395E80>, search_kwargs={'k': 3})

In [42]:
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt_template}
)

In [52]:
# Set up your question
question = "Is hajar ready to move to work place ?"

# Process the question through your RAG chain
result = rag_chain({"query": question})

# Display the answer
print(f"Question: {question}")
print(f"Answer: {result['result']}")
print("\nSources:")
for i, doc in enumerate(result["source_documents"][:2], 1):
    print(f"Source {i}: {doc.page_content[:150]}...")



Question: Is hajar ready to move to work place ?
Answer: Yes, the resume indicates that Hajar is easily relocated.

Sources:
Source 1: Hajar Elsayed Elbehairy 
hagarelbehairy3@gmail.com| 01003554652 /kafr Elshiekh(easy to relocated) 
linkedin.com/in/hajar-elbehairy-| github.com/HajarE...
Source 2: Hajar Elsayed Elbehairy 
hagarelbehairy3@gmail.com| 01003554652 /kafr Elshiekh(easy to relocated) 
linkedin.com/in/hajar-elbehairy-| github.com/HajarE...
