In [1]:
import os
import shutil
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter
from langchain.schema.document import Document
from langchain.vectorstores.chroma import Chroma
from langchain_community.embeddings.ollama import OllamaEmbeddings

# Constants
CHROMA_PATH = "chroma"
DATA_PATH = "Data"


In [2]:
# from langchain.vectorstores.chroma import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_community.llms.ollama import Ollama
from langchain_community.embeddings.ollama import OllamaEmbeddings

In [3]:
def get_embedding_function():
    return OllamaEmbeddings(model="nomic-embed-text")


In [4]:
# def load_documents():
#     document_loader = PyPDFDirectoryLoader(DATA_PATH)
#     return document_loader.load()


# #ADD maybe like o
# documents = load_documents()
# print(f"üìö Loaded {len(documents)} documents")


def load_documents():
    loader = PyPDFDirectoryLoader(DATA_PATH)
    docs = loader.load()
    for doc in docs:
        filename = os.path.basename(doc.metadata.get("source", "unknown.pdf"))
        doc.metadata["resume_id"] = filename  # Tag each document with its filename
    return docs

documents = load_documents()
print(f"üìö Loaded {len(documents)} documents")

üìö Loaded 3 documents


In [5]:
# def split_documents(documents: list[Document]):
#     text_splitter = RecursiveCharacterTextSplitter(
#         chunk_size=800,
#         chunk_overlap=80,
#         length_function=len,
#         is_separator_regex=False,
#     )
#     return text_splitter.split_documents(documents)

# chunks = split_documents(documents)
# print(f"‚úÇÔ∏è Total Chunks: {len(chunks)}")

# 2. Split documents and retain metadata
def split_documents(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=80,
        length_function=len,
        is_separator_regex=False,
    )
    chunks = text_splitter.split_documents(documents)

    for chunk in chunks:
        # Carry over resume ID and source
        chunk.metadata["resume_id"] = chunk.metadata.get("resume_id") or chunk.metadata.get("source")

    return chunks


chunks = split_documents(documents)
print(f"‚úÇÔ∏è Total Chunks: {len(chunks)}")




‚úÇÔ∏è Total Chunks: 18


In [6]:
def calculate_chunk_ids(chunks):
    last_page_id = None
    current_chunk_index = 0

    # for chunk in chunks:
    #     source = chunk.metadata.get("source")
    #     page = chunk.metadata.get("page")
    #     current_page_id = f"{source}:{page}"

    for chunk in chunks:
        source = chunk.metadata.get("resume_id", "unknown")
        page = chunk.metadata.get("page", 0)
        current_page_id = f"{source}:{page}"

        if current_page_id == last_page_id:
            current_chunk_index += 1
        else:
            current_chunk_index = 0

        chunk_id = f"{current_page_id}:{current_chunk_index}"
        last_page_id = current_page_id
        chunk.metadata["id"] = chunk_id

    return chunks

chunks_with_ids = calculate_chunk_ids(chunks)


In [7]:
embedding_fn = get_embedding_function()
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_fn)

existing_items = db.get(include=[])
existing_ids = set(existing_items["ids"])
print(f"üì¶ Existing items in DB: {len(existing_ids)}")

new_chunks = [chunk for chunk in chunks_with_ids if chunk.metadata["id"] not in existing_ids]

if new_chunks:
    print(f"üöÄ Adding {len(new_chunks)} new chunks")
    new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
    db.add_documents(new_chunks, ids=new_chunk_ids)
    db.persist()
else:
    print("‚úÖ No new documents to add")


üì¶ Existing items in DB: 18
‚úÖ No new documents to add


In [8]:
# def format_context_with_resume_ids(results):
#     formatted_chunks = []
#     for doc, score in results:
#         resume_id = doc.metadata.get("resume_id", "unknown")
#         formatted_chunk = f"[RESUME: {resume_id}]\n{doc.page_content}\n[END RESUME: {resume_id}]"
#         formatted_chunks.append(formatted_chunk)
#     return "\n\n".join(formatted_chunks)



In [9]:
def group_chunks_by_resume(results):
    resume_groups = {}
    for doc, score in results:
        resume_id = doc.metadata.get("resume_id", "unknown")
        if resume_id not in resume_groups:
            resume_groups[resume_id] = []
        resume_groups[resume_id].append((doc, score))
    
    # Format the context with resume groups
    formatted_context = []
    for resume_id, chunks in resume_groups.items():
        resume_content = f"===== RESUME: {resume_id} =====\n"
        resume_content += "\n---\n".join([doc.page_content for doc, _ in chunks])
        resume_content += f"\n===== END RESUME: {resume_id} =====\n"
        formatted_context.append(resume_content)
        
    print(formatted_context)
    return "\n\n".join(formatted_context)



In [10]:
# If you can install sentence-transformers
from sentence_transformers.cross_encoder import CrossEncoder

def rerank_results(query, results, top_k=3):
    model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
    
    # Prepare pairs for reranking
    pairs = [(query, doc.page_content) for doc, _ in results]
    
    # Get scores
    scores = model.predict(pairs)
    
    # Sort by score
    reranked_results = [(results[i][0], scores[i]) for i in range(len(scores))]
    reranked_results.sort(key=lambda x: x[1], reverse=True)
    
    return reranked_results[:top_k]

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# #RUN IF YOU WANT TO CLEAR CHROMA DB----> RESTART KERNEL BEFORE RUNNING THIS COMMAND

# import shutil
# import os

# CHROMA_PATH = "chroma"  # or wherever your DB is saved

# if os.path.exists(CHROMA_PATH):
#     shutil.rmtree(CHROMA_PATH)
#     print("üßπ Chroma database cleared.")
# else:
#     print("üìÅ Chroma path does not exist.")


GETTING THE QUERY AND EMBEDDING IT AND STUFF

In [12]:
CHROMA_PATH = "chroma"
PROMPT_TEMPLATE = """
You are a helpful assistant analyzing MULTIPLE different resumes. The information provided comes from different candidates with different work histories.

Resume chunks:
{context}

---

Question: {question}

Important instructions:
1. Make sure to identify WHICH specific resume/candidate contains the information requested
2. DO NOT mix information between different resumes
3. Clearly state which resume_id or candidate contains the information in your answer
4. If multiple candidates match the criteria, list all of them separately
5. If the information is not clear from the provided chunks, say so
"""


In [13]:
def query_rag(query_text: str, k: int = 8):  # Increase k for better recall
    # Load vector store
    embedding_function = get_embedding_function()
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

    # Retrieve top-k chunks
    results = db(query_text, k=k)
    
    # Group chunks by resume
    context_text = group_chunks_by_resume(results)

    # Create prompt
    prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE).format(
        context=context_text, question=query_text
    )

    # Generate answer with Ollama
    model = Ollama(model="mistral")
    response = model.invoke(prompt)

    # Show sources with resume_id for better debugging
    sources = [f"{doc.metadata.get('resume_id', 'unknown')}:{doc.metadata.get('id', None)}" for doc, _ in results]
    print(f"üß† Response:\n{response}\n\nüìÑ Sources: {sources}")
    return response

In [14]:
# # Main query function
# def query_rag(query_text: str, k: int = 5):
#     # Load vector store
#     embedding_function = get_embedding_function()
#     db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

#     # Retrieve top-k chunks
#     results = db.similarity_search_with_score(query_text, k=k)  ##specify how many chunks
#     context_text = "\n\n---\n\n".join([doc.page_content for doc, _ in results])

#     # Create prompt
#     prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE).format(
#         context=context_text, question=query_text
#     )

#     # Generate answer with Ollama
#     model = Ollama(model="mistral") ##change here if i want to try another ollama model, maybe for specific unstructured data!!
#     response = model.invoke(prompt)

#     # Show sources
#     sources = [doc.metadata.get("id", None) for doc, _ in results]
#     print(f"üß† Response:\n{response}\n\nüìÑ Sources: {sources}")
#     return response

This is when 3 different resumes are loaded in the Chroma DB, Sadhana's Nakshatra's and Surabhi's Resume

In [16]:
##these questions are after Ive loaded all resumes
query_rag("How many unique candidate's cvs do you have?")

TypeError: 'Chroma' object is not callable

In [None]:
query_rag("How many resumes do you have?") #Completely wrong response? does candidate need to be mentioned? maybe the phrasing is wrong..try generating mutliple queries for vague ones

üß† Response:
 The information requested is contained in the resume with the ID "Sadhana_Jayakumar_Resume.pdf". This candidate has experience in developing a full-stack productivity application using various technologies and programming languages such as ReactJS, Express.js(Node.js), Bootstrap, BackboneJS, MongoDB, Firebase, C++, C, Java, Python, JavaScript, TypeScript, Selenium, PowerMockito, Mockito, and Jest. They also have a background in Electrical and Electronics Engineering from the National Institute of Technology Karnataka with a CGPA of 8.5/10. Currently, they are working as a Software Engineer II at CISCO.

üìÑ Sources: ['Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:8', 'Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:0', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:0', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:2', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:1', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:1', 'Nakshatra_Gopi_2024.pdf:Nakshatra_Gop

' The information requested is contained in the resume with the ID "Sadhana_Jayakumar_Resume.pdf". This candidate has experience in developing a full-stack productivity application using various technologies and programming languages such as ReactJS, Express.js(Node.js), Bootstrap, BackboneJS, MongoDB, Firebase, C++, C, Java, Python, JavaScript, TypeScript, Selenium, PowerMockito, Mockito, and Jest. They also have a background in Electrical and Electronics Engineering from the National Institute of Technology Karnataka with a CGPA of 8.5/10. Currently, they are working as a Software Engineer II at CISCO.'

In [18]:
query_rag("Who works at Cisco?")

üß† Response:
 The candidate who works at Cisco is Sadhana Jayakumar. This information can be found under the "WORK EXPERIENCE" section in her resume with the id "Sadhana_Jayakumar_Resume.pdf". She is currently a Software Engineer II at CISCO, part of the Certificates Management team within Identity Services Engine (ISE).

üìÑ Sources: ['Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:1', 'Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:0', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:0', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:3', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:6', 'Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:1', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:2', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:2']


' The candidate who works at Cisco is Sadhana Jayakumar. This information can be found under the "WORK EXPERIENCE" section in her resume with the id "Sadhana_Jayakumar_Resume.pdf". She is currently a Software Engineer II at CISCO, part of the Certificates Management team within Identity Services Engine (ISE).'

In [23]:
query_rag("Who has switched domains from electrical to business operations?")


üß† Response:
 The candidate who has switched domains from electrical to business operations can be found in the "Resume: Nakshatra_Gopi_2024.pdf". In this resume, Nakshatra Gopi transitioned from an internship in the Electronics Team at TRESA Energy, where they conducted a literature survey on Battery Management technology and designed a current monitor circuit, to a Business Operations Specialist role at o9 Solutions, where they translates complex business requirements into scalable solutions for clients.

üìÑ Sources: ['Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:6', 'Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:1', 'Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:0', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:3', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:2', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:5', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:7', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:1']


' The candidate who has switched domains from electrical to business operations can be found in the "Resume: Nakshatra_Gopi_2024.pdf". In this resume, Nakshatra Gopi transitioned from an internship in the Electronics Team at TRESA Energy, where they conducted a literature survey on Battery Management technology and designed a current monitor circuit, to a Business Operations Specialist role at o9 Solutions, where they translates complex business requirements into scalable solutions for clients.'

In [None]:
query_rag("Who has the most experience in software development?")  ##Has hallucinated Surabhi's and Nakshatra's projects are mentioned in the generated answer

üß† Response:
 The resume with the most experience in software development, specifically in coding and web development, is Sadhana Jayakumar (resume_id: Sadhana_Jayakumar_Resume.pdf). This is based on her work experience as a Software Engineer II at CISCO where she developed automated end-to-end systems and utilized technologies like ARIMA for anomaly detection in time series data. Additionally, she has practical coding skills in languages such as Python and SQL, and she has demonstrated expertise in web development by creating projects such as an audio fingerprinting system using Shazam's algorithm and a keypoint detection program using the David Lowe SIFT algorithm for template matching.

üìÑ Sources: ['Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:9', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:0', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:1', 'Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:0', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume

" The resume with the most experience in software development, specifically in coding and web development, is Sadhana Jayakumar (resume_id: Sadhana_Jayakumar_Resume.pdf). This is based on her work experience as a Software Engineer II at CISCO where she developed automated end-to-end systems and utilized technologies like ARIMA for anomaly detection in time series data. Additionally, she has practical coding skills in languages such as Python and SQL, and she has demonstrated expertise in web development by creating projects such as an audio fingerprinting system using Shazam's algorithm and a keypoint detection program using the David Lowe SIFT algorithm for template matching."

In [25]:
query_rag("Which candidate is the most suitable for building front end and back end of a website?")

üß† Response:
 The candidate who appears to have experience in both front-end and back-end development is Sadhana Jayakumar (Resume_id: Sadhana_Jayakumar_Resume.pdf). This is based on the following details in her resume:

   - Front-end skills: She has worked with ReactJS, Express.js(Node.js), Bootstrap, BackboneJS, and has a good understanding of JavaScript, TypeScript, and the Spotify Web API.
   - Back-end skills: She has experience in using Node.js for server-side logic, MongoDB for data storage and management, and Google Firebase for handling user information.

   Nakshatra Gopi's resume focuses more on supply chain solutions consulting, and Surabhi's resume contains projects related to optimization models and machine learning, but there is no clear mention of front-end or back-end development experience in these resumes.

üìÑ Sources: ['Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:9', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:6', 'Sadhana_Jayak

" The candidate who appears to have experience in both front-end and back-end development is Sadhana Jayakumar (Resume_id: Sadhana_Jayakumar_Resume.pdf). This is based on the following details in her resume:\n\n   - Front-end skills: She has worked with ReactJS, Express.js(Node.js), Bootstrap, BackboneJS, and has a good understanding of JavaScript, TypeScript, and the Spotify Web API.\n   - Back-end skills: She has experience in using Node.js for server-side logic, MongoDB for data storage and management, and Google Firebase for handling user information.\n\n   Nakshatra Gopi's resume focuses more on supply chain solutions consulting, and Surabhi's resume contains projects related to optimization models and machine learning, but there is no clear mention of front-end or back-end development experience in these resumes."

In [20]:
query_rag("What is the name of individual who works at Cisco?")

üß† Response:
 The name of the individual who works at Cisco can be found in the resume with the ID "Sadhana_Jayakumar_Resume.pdf". The candidate named Sadhana Jayakumar is currently employed as a Software Engineer II at Cisco.

üìÑ Sources: ['Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:0', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:0', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:6', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:1', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:3', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:2', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:2', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:5']


' The name of the individual who works at Cisco can be found in the resume with the ID "Sadhana_Jayakumar_Resume.pdf". The candidate named Sadhana Jayakumar is currently employed as a Software Engineer II at Cisco.'

In [26]:
query_rag("Who would be most eligible for a Supply chain role?")

üß† Response:
 Candidate with the information requested for a Supply chain role would be "Sadhana Jayakumar" (Resume_id: Sadhana_Jayakumar_Resume.pdf) as she has experience in developing automated end-to-end testing frameworks using Selenium and Python, which is crucial in ensuring comprehensive test coverage and significantly improving system reliability and performance, attributes essential for supply chain roles that require quality assurance and process optimization. Additionally, her work on developing new features for TCNAC, focusing on introducing a new node when the primary node goes down, highlights her understanding of system resilience and uptime, critical aspects in supply chain operations to ensure minimal disruptions.

üìÑ Sources: ['Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:0', 'Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:3', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:1', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:3', 'Surabhi-resume.pdf:S

' Candidate with the information requested for a Supply chain role would be "Sadhana Jayakumar" (Resume_id: Sadhana_Jayakumar_Resume.pdf) as she has experience in developing automated end-to-end testing frameworks using Selenium and Python, which is crucial in ensuring comprehensive test coverage and significantly improving system reliability and performance, attributes essential for supply chain roles that require quality assurance and process optimization. Additionally, her work on developing new features for TCNAC, focusing on introducing a new node when the primary node goes down, highlights her understanding of system resilience and uptime, critical aspects in supply chain operations to ensure minimal disruptions.'

In [27]:
query_rag("Who does supply chain solutions consulting?")

üß† Response:
 The information about supply chain solutions consulting can be found for Candidate Nakshatra Gopi (Resume_id: Nakshatra_Gopi_2024.pdf). In this resume, it's stated that Nakshatra Gopi has work experience as a Business Operations Specialist at o9 Solutions Bangalore where he translated complex business requirements into scalable solutions for supply chain solutions consulting.

üìÑ Sources: ['Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:0', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:2', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:1', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:6', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:2', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:0', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:3', 'Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:1']


" The information about supply chain solutions consulting can be found for Candidate Nakshatra Gopi (Resume_id: Nakshatra_Gopi_2024.pdf). In this resume, it's stated that Nakshatra Gopi has work experience as a Business Operations Specialist at o9 Solutions Bangalore where he translated complex business requirements into scalable solutions for supply chain solutions consulting."

In [None]:
embedding_function = get_embedding_function()
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

def query_rag_v2(query_text: str, k: int = 8):
    # Load vector store
    

    # Step 1: Retrieve top-k chunks (more than we need for reranking)
    results = db.similarity_search_with_score(query_text, k=k)
    
    # Step 4: Rerank results to get more relevant chunks
    # Note: This requires installing sentence-transformers
    try:
        reranked_results = rerank_results(query_text, results, top_k=5)
        print(f"‚úÖ Reranked results from {len(results)} to {len(reranked_results)}")
    except Exception as e:
        print(f"‚ö†Ô∏è Reranking failed: {e}. Using original results.")
        reranked_results = results[:5]  # Fallback to top 5 from original results
    
    # Step 3: Group chunks by resume for better context
    context_text = group_chunks_by_resume(reranked_results)

    # Create prompt
    prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE).format(
        context=context_text, question=query_text
    )

    # Generate answer with Ollama
    model = Ollama(model="mistral")
    response = model.invoke(prompt)

    # Show sources with resume_id for better debugging
    sources = [f"{doc.metadata.get('resume_id', 'unknown')}:{doc.metadata.get('id', None)}" 
               for doc, _ in reranked_results]
    print(f"üß† Response:\n{response}\n\nüìÑ Sources: {sources}")
    return response

In [29]:
query_rag_v2("Who would be most eligible for a Supply chain role?")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


‚úÖ Reranked results from 8 to 5
üß† Response:
 The resume with information that seems most relevant for a Supply chain role is **resumee_id: Nakshatra_Gopi_2024.pdf**. The candidate, Nakshatra Gopi, has experience as a Business Operations Specialist at o9 Solutions Bangalore, where he worked on optimizing operational efficiency of clients in the supply chain sector. This practical experience combined with his education in Electrical and Electronics Engineering might make him a strong fit for a Supply chain role.

üìÑ Sources: ['Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:0', 'Nakshatra_Gopi_2024.pdf:Nakshatra_Gopi_2024.pdf:0:3', 'Sadhana_Jayakumar_Resume.pdf:Sadhana_Jayakumar_Resume.pdf:0:3', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:0', 'Surabhi-resume.pdf:Surabhi-resume.pdf:0:2']


' The resume with information that seems most relevant for a Supply chain role is **resumee_id: Nakshatra_Gopi_2024.pdf**. The candidate, Nakshatra Gopi, has experience as a Business Operations Specialist at o9 Solutions Bangalore, where he worked on optimizing operational efficiency of clients in the supply chain sector. This practical experience combined with his education in Electrical and Electronics Engineering might make him a strong fit for a Supply chain role.'

In [32]:
query_rag_v2("Who works in supply chain?")

‚úÖ Reranked results from 8 to 5
['===== RESUME: Nakshatra_Gopi_2024.pdf =====\nNakshatra Gopi\n‚ôÇ¬∂obile-alt9360205861 ‚Ä¢ /envel‚å¢penakshatragopi2001@gmail.com ‚Ä¢ /gl‚å¢be-americaslinkedin.com/in/nakshatra-gopi\nEducation\nNational Institute of Technology Karnataka Surathkal, India\nBachelor of Technology in Electrical and Electronics Engineering, CGPA: 8.14/10 2019 - 2023\nWork Experience\no9 Solutions Bangalore\nBusiness Operations Specialist, Supply Chain Solutions Consulting May 2023 - Present\n‚óã Translated complex business requirements into scalable solutions, enhancing operational efficiency and data-driven\ndecision-making for the clients.\n‚óã Developed assortment planning and store clustering solutions to optimize operational efficiency of a Fortune 500 Retailer.\n‚óã Fostered seamless data flow and user access control for a Fortune 500 client post an M&A.\n---\n‚óã Developed customized commercial planning workflows, KPIs, and dashboards to enable actionable insights an

' The candidate who works in supply chain is Nakshatra Gopi (resume_id: Nakshatra_Gopi_2024.pdf). He is currently a Business Operations Specialist at o9 Solutions Bangalore where he works on Supply Chain Solutions Consulting.'

In [None]:
query_rag_v2("How many resumes do you have?") #Hallucinating

‚úÖ Reranked results from 8 to 5
['===== RESUME: Sadhana_Jayakumar_Resume.pdf =====\nEDUCATION                Bachelor  of  Technology,  Electrical  and  Electronics  Engineering                                                                                            2019-2023  National  Institute  of  Technology  Karnataka  -   CGPA:  8.5  /10   AISSCE,  CBSE,  Class  XII                                                                                                                                                          2018-2019  Indian  School  Sohar,  Sultanate  of  Oman  -  Percentage:  92%   WORK  EXPERIENCE     Software  Engineer  II  -  CISCO                                                                                                                                      Aug  2023  -  Present  -  Part  of  the  Certificates  Management  team  within\n---\nthe\n \nSpotify\n \nWeb\n \nAPI.\n -  Managed  data  storage  and  user  information  securely  and  efficiently  usin

' The information requested about technical skills, such as languages and tools, can be found in the resume with the ID "Sadhana_Jayakumar_Resume.pdf". Here are the technical skills mentioned for candidate Sadhana Jayakumar:\n- Languages: C++, C, Java, Python, JavaScript, TypeScript\n- Web Development Tools and Frameworks: ReactJS, Express.js(Node.js), Bootstrap, BackboneJS, MongoDB, Firebase\n- Testing Frameworks: Selenium, PowerMockito, Mockito, Jest'

In [None]:
query_rag_v2("How many CVs do you have?") #Hallucinating

‚úÖ Reranked results from 8 to 5
['===== RESUME: Sadhana_Jayakumar_Resume.pdf =====\nEDUCATION                Bachelor  of  Technology,  Electrical  and  Electronics  Engineering                                                                                            2019-2023  National  Institute  of  Technology  Karnataka  -   CGPA:  8.5  /10   AISSCE,  CBSE,  Class  XII                                                                                                                                                          2018-2019  Indian  School  Sohar,  Sultanate  of  Oman  -  Percentage:  92%   WORK  EXPERIENCE     Software  Engineer  II  -  CISCO                                                                                                                                      Aug  2023  -  Present  -  Part  of  the  Certificates  Management  team  within\n===== END RESUME: Sadhana_Jayakumar_Resume.pdf =====\n', '===== RESUME: Surabhi-resume.pdf =====\nSurabhi Pachpande\n‚ôÇ¬∂o

' The information requested was about a candidate who worked as a Data Science Consultant at Wells Fargo. According to the resumes provided, the candidate "Surabhi Pachpande" fits this description. Therefore, the resume containing the information requested is "Surabhi-resume.pdf".'

In [16]:
query_rag_v2("Who has interned in Microsoft?")

‚úÖ Reranked results from 8 to 5
['===== RESUME: Sadhana_Jayakumar_Resume.pdf =====\nnode\n \ngoes\n \ndown,\n \nensuring\n \nenhanced\n \nsystem\n \nresilience\n \nand\n \nuptime.\n  Software  Engineering  Intern-  MICROSOFT  India                                                                                                May  2022-  July  2022  -  Developed  a  React-based  component  to  enhance  toast  notifications,  improving  user  experience  and  interface  consistency.  -  Redesigned  the  Excel-Online  scrollbar  to  align  with  the  new  Office  scrollbar  design,  ensuring  a  cohesive  look  across  \nplatforms.\n -  Designed  and  implemented  a  user  interface  for  the  name  box  component  to  gracefully  manage  and  display  messages  during  \ndata\n \nunavailability.\n---\nprogress\n \nacross\n \nall\n \nnodes\n \nin\n \nISE,\n \nwhile\n \ncollaborating\n \nwith\n \ncross-functional\n \nteams\n \nto\n \nensure\n \nseamless\n \nintegration\n \nand\n \nresolut

' The information about an internship at Microsoft can be found in the resume with the ID "Sadhana_Jayakumar_Resume.pdf". Specifically, the candidate named Sadhana Jayakumar was a Software Engineering Intern at Microsoft India from May 2022 to July 2022.'