### Resume Analyzer

In [2]:
import faiss
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader
from langchain_google_genai import ChatGoogleGenerativeAI
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Fetch the Gemini API key
gemini_api_key = os.getenv("GEMINI_API_KEY")

# Check if API key is found
if gemini_api_key:
    print("Gemini API key found.")
else:
    print("Gemini API key not found. Please set the GEMINI_API_KEY environment variable.")

Gemini API key found.


In [3]:
# Load and Embed Your Resume
loader = PyPDFLoader("sampleResume.pdf")
documents = loader.load()

# Extract text from the resume
texts = [doc.page_content for doc in documents]

In [4]:
from langchain_text_splitters import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=10)
docs = text_splitter.split_documents(documents)

In [5]:
len(docs)

2

In [6]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=gemini_api_key
)

In [7]:
db = FAISS.from_documents(docs, embedding_model)

In [69]:
from langchain.prompts import PromptTemplate
template = """
Your task is as a helpful assistant is to identify technical skills and tools mentioned in the following Job Description:
{job_description_text}
that are not present in the retrieved context from vector database:
{resume_text}
Please output the skills in a structured format.

For example sql is mentioned in the resume but python and Power BI are not mentioned, then your response should be as below:
1. SQL: present  
2. Python: missing
3. Power BI: missing
And don't try to explain the response. Only output the response in the specified format.

"""

prompt = PromptTemplate(
    input_variables=["job_description", "resume_text"],
    template=template
)

In [70]:
job_description = """What We're Looking For

2+ years of proven experience in product analytics or data analysis, emphasizing SaaS business models.
2+ years of hands-on experience in data analysis tools such as Python, R, SQL, and advanced visualization tools beyond traditional BI
Strong analytical mindset, dedicated to solving business challenges with data-driven insights.
Exceptional communication skills, both written and verbal, for effective presentation of insights.
Self-motivated with leadership capabilities, thriving in fast-paced environments.
Committed to continuous learning and professional growth.
2+ years of hands-on experience with A/B testing and statistical experiments preferred
2+ years of hands-on experience in Either of the BI platforms - Quicksight, Power BI, LookerStudio, Tableau, Excel
Familiarity with DBT and Redshift - ETL/ELT functions and pipelines
2+ years of hands-on experience with product analytics tools like Mixpanel, and Segment.
Ability to recognize data-driven insights and optimize opportunities.
Expert in developing BI models across various platforms to address critical use cases.
Expert in documenting technical requirements according to company standards.
Track record of successful client relationship management, ensuring collaboration and project completion.
Can brainstorm and ideate towards building Proof of Concepts (POCs) for new business exploration and use case hypothesis.
Demonstrated ownership of project delivery metrics and team performance. *."""
resume_text = "\n".join(texts)  # Combine the extracted resume text

In [71]:
from langchain_google_genai import ChatGoogleGenerativeAI # Import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA


qa_chain = RetrievalQA.from_chain_type(
    llm=ChatGoogleGenerativeAI( # Use ChatGoogleGenerativeAI instead of GoogleGenerativeLanguage
        model="gemini-1.5-pro-latest", # Updated model name
        temperature=0,
        max_output_tokens=100,
        google_api_key=gemini_api_key # Use google_api_key instead of api_key
    ),
    retriever=db.as_retriever(),
    return_source_documents=True
)

In [72]:
# Run the query using the RetrievalQA chain
# Construct the query input with the prompt template
query_input = template.format(job_description_text=job_description, resume_text=resume_text)

# Run the query using the RetrievalQA chain
result = qa_chain({"query": query_input})

In [77]:
from IPython.display import Markdown, display
Markdown(result["result"])


1. SQL: present
2. Python: present
3. R: missing
4. Quicksight: missing
5. Power BI: present
6. LookerStudio: missing
7. Tableau: missing
8. Excel: present
9. DBT: missing
10. Redshift: missing
11. Mixpanel: missing
12. Segment: missing 


In [74]:
response = result.get("result", "No relevant skills found.")

In [61]:
response

'1. SQL: present\n2. Python: present\n3. R: missing\n4. Quicksight: missing\n5. Power BI: present\n6. LookerStudio: missing\n7. Tableau: missing\n8. Excel: present\n9. DBT: missing\n10. Redshift: missing\n11. Mixpanel: missing\n12. Segment: missing \n'

In [68]:
import re
missing_skills = []
for line in response.split('\n'):
    if re.search(r"missing", line):
        match = re.search(r"^\d+\.\s(.*?):", line)
        if match:
            skill_name = match.group(1).strip()
            missing_skills.append(skill_name)

print("Missing Skills:",missing_skills)

Missing Skills: ['R', 'Quicksight', 'LookerStudio', 'Tableau', 'DBT', 'Redshift', 'Mixpanel', 'Segment']


Thanks 