### Resume Analyzer

In [1]:
import faiss
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader
from langchain_google_genai import ChatGoogleGenerativeAI
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Fetch the Gemini API key
gemini_api_key = os.getenv("GEMINI_API_KEY")

# Check if API key is found
if gemini_api_key:
    print("Gemini API key found.")
else:
    print("Gemini API key not found. Please set the GEMINI_API_KEY environment variable.")

Gemini API key found.


  from .autonotebook import tqdm as notebook_tqdm


In [36]:
# Load and Embed Your Resume
loader = PyPDFLoader("sampleResume.pdf")
documents = loader.load()

In [3]:
from langchain_text_splitters import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=10)
docs = text_splitter.split_documents(documents)

In [4]:
len(docs)

2

In [5]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=gemini_api_key
)

In [6]:
db = FAISS.from_documents(docs, embedding_model)

In [31]:
from langchain.prompts import PromptTemplate
template = """
Identify the technical skills mentioned in the following job description:
{job_description}

Output the skills in a bullet points format along with mentioning whether the skills are present in the retrieved relevant skills from the vector database.
For example sql, python and Power BI are mentioned in the {job_description} but SQL in not present in the vector database and python and Power BI are present the vector database, then your response should be as below:
    1. SQL: missing from the resume  
    2. Python: present in the resume
    3. Power BI: present in the resume
    And don't try to explain the response. Only output the response in the specified format."""

prompt = PromptTemplate(
    input_variables=["job_description"],
    template=template
)

In [32]:
job_description = """What We're Looking For

2+ years of proven experience in product analytics or data analysis, emphasizing SaaS business models.
2+ years of hands-on experience in data analysis tools such as Python, R, SQL, and advanced visualization tools beyond traditional BI
Strong analytical mindset, dedicated to solving business challenges with data-driven insights.
Exceptional communication skills, both written and verbal, for effective presentation of insights.
Self-motivated with leadership capabilities, thriving in fast-paced environments.
Committed to continuous learning and professional growth.
2+ years of hands-on experience with A/B testing and statistical experiments preferred
2+ years of hands-on experience in Either of the BI platforms - Quicksight, Power BI, LookerStudio, Tableau, Excel
Familiarity with DBT and Redshift - ETL/ELT functions and pipelines
2+ years of hands-on experience with product analytics tools like Mixpanel, and Segment.
Ability to recognize data-driven insights and optimize opportunities.
Expert in developing BI models across various platforms to address critical use cases.
Expert in documenting technical requirements according to company standards.
Track record of successful client relationship management, ensuring collaboration and project completion.
Can brainstorm and ideate towards building Proof of Concepts (POCs) for new business exploration and use case hypothesis.
Demonstrated ownership of project delivery metrics and team performance. *."""

In [33]:
from langchain_google_genai import ChatGoogleGenerativeAI # Import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA


qa_chain = RetrievalQA.from_chain_type(
    llm=ChatGoogleGenerativeAI( # Use ChatGoogleGenerativeAI instead of GoogleGenerativeLanguage
        model="gemini-1.5-pro-latest", # Updated model name
        temperature=0,
        max_output_tokens=100,
        google_api_key=gemini_api_key # Use google_api_key instead of api_key
    ),
    retriever=db.as_retriever(),
    return_source_documents=True
)

In [34]:
# Run the query using the RetrievalQA chain
# Construct the query input with the prompt template
query_input = prompt.format(job_description=job_description)

# Run the query using the RetrievalQA chain
result = qa_chain({"query": query_input})

In [35]:
from IPython.display import Markdown, display
Markdown(result["result"])


- Python: present from the resume
- R: missing in the resume
- SQL: present from the resume
- Quicksight: missing in the resume
- Power BI: present from the resume
- LookerStudio: missing in the resume
- Tableau: missing in the resume
- Excel: present from the resume
- DBT: missing in the resume
- Redshift: missing in the resume
- Mixpanel: missing in the resume
- Segment: missing in

In [12]:
response = result.get("result", "No relevant skills found.")

In [34]:
response

'        1. SQL: present in the resume  \n        2. Python: present in the resume\n        3. Power BI: present in the resume\n        4. R: present in the resume\n        5. Quicksight: missing from the resume\n        6. LookerStudio: missing from the resume\n        7. Tableau: missing from the resume\n        8. Excel: present in the resume\n        9. DBT: missing from the resume\n        10.'

In [18]:
import re
missing_skills = []
for line in response.split('\n'):
    if re.search(r"missing", line):
        match = re.search(r"^\d+\.\s(.*?):", line)
        if match:
            skill_name = match.group(1).strip()
            missing_skills.append(skill_name)

print("Missing Skills:",missing_skills)

Missing Skills: []


Thanks 