<a href="https://colab.research.google.com/github/MBilalSharif/RAG-based-CV-Reader/blob/main/CV_Reader_RAG_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q langchain langchain-community chromadb sentence-transformers pypdf google-generativeai
print("Installed!")

In [None]:
# Directly use your CV file
cv_filename = "M.Bilal Sharif.pdf"
print(f"‚úÖ Using CV file: {cv_filename}")

In [None]:
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_core.documents import Document

if cv_filename.endswith('.pdf'):
    loader = PyPDFLoader(cv_filename)
    documents = loader.load()
elif cv_filename.endswith('.txt'):
    loader = TextLoader(cv_filename)
    documents = loader.load()
else:
    with open(cv_filename, 'r') as f:
        documents = [Document(page_content=f.read())]

print(f"Loaded {len(documents)} document(s)")
print(f"Total characters: {sum(len(doc.page_content) for doc in documents)}")

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

#Chunking doc
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=100
)

chunks = text_splitter.split_documents(documents)
print(f"Created {len(chunks)} chunks")

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

# Creating embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

print("Creating vector database...")
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory="./cv_vectorstore"
)
print("Vector database created!")

In [None]:
import google.generativeai as genai
from getpass import getpass


api_key = getpass("üîë Paste your Google AI API key: ")
genai.configure(api_key=api_key)

model = genai.GenerativeModel(
    model_name='models/gemini-2.5-flash',
    generation_config={
        'temperature': 0.3,
        'max_output_tokens': 500,
    }
)

print("Gemini ready!")

In [None]:
def ask_cv_question(question, k=5):
    """Ask a question about your CV using RAG"""


    print(f"üîç Searching for relevant info...")
    relevant_docs = vectorstore.similarity_search(question, k=k)
    context = "\n\n".join([doc.page_content for doc in relevant_docs])
    print(f"‚úÖ Found {len(relevant_docs)} relevant sections\n")


    prompt = f"""You must respond with ONLY the answer, nothing else.

CV Information:
{context}

Question: {question}

Rules:
1. Extract the exact answer from the CV
2. Do NOT add any preamble like "The answer is" or "According to"
3. Do NOT add explanations
4. Just state the fact directly

Example:
Question: What is the person's email?
Bad: "The person's email address is john@example.com"
Good: "john@example.com"

Question: What is the person's name?
Bad: "According to the CV, the person's name is John Doe"
Good: "John Doe"

Now answer the actual question above with just the direct answer:"""

    print("ü§ñ Generating answer...\n")

    try:
        response = model.generate_content(prompt)
        answer = response.text.strip()
    except Exception as e:
        answer = f"Error: {str(e)}"

    # Display result
    print("="*60)
    print(f"‚ùì Question: {question}")
    print("="*60)
    print(f"üí° Answer: {answer}")
    print("="*60)

    return answer

print("‚úÖ RAG function ready!")

In [None]:
ask_cv_question("Give a short summary of CV?", k=10)