In [2]:
# Read resume
with open("../data/resume.txt", "r", encoding="utf-8") as f:
    resume_text = f.read()

# Read job description
with open("../data/jd.txt", "r", encoding="utf-8") as f:
    jd_text = f.read()

print("RESUME TEXT:\n")
print(resume_text)

print("\n" + "-"*50 + "\n")

print("JOB DESCRIPTION TEXT:\n")
print(jd_text)


RESUME TEXT:

I am a Data Scientist with 3.5 years of experience.
I have worked as Data Analyst and Data Engineer.
Skills include Python, SQL, Machine Learning, Deep Learning, AWS.
I have experience with data pipelines, model building and deployment.


--------------------------------------------------

JOB DESCRIPTION TEXT:

We are looking for a Data Scientist.
Required skills: Python, SQL, Machine Learning.
Experience in Deep Learning and cloud platforms like AWS is preferred.
Candidate should have experience in end-to-end model deployment.



In [3]:
import re

def clean_text(text):
    # 1. Convert to lowercase
    text = text.lower()
    
    # 2. Remove punctuation and special characters
    text = re.sub(r"[^a-z0-9\s]", "", text)
    
    # 3. Remove extra spaces
    text = re.sub(r"\s+", " ", text).strip()
    
    return text

clean_resume = clean_text(resume_text)
clean_jd = clean_text(jd_text)

print("CLEAN RESUME:\n")
print(clean_resume)

print("\n" + "-"*50 + "\n")

print("CLEAN JD:\n")
print(clean_jd)


CLEAN RESUME:

i am a data scientist with 35 years of experience i have worked as data analyst and data engineer skills include python sql machine learning deep learning aws i have experience with data pipelines model building and deployment

--------------------------------------------------

CLEAN JD:

we are looking for a data scientist required skills python sql machine learning experience in deep learning and cloud platforms like aws is preferred candidate should have experience in endtoend model deployment


In [4]:
from sentence_transformers import SentenceTransformer

# Load model (downloads once)
model = SentenceTransformer("all-MiniLM-L6-v2")

# Generate embeddings
resume_embedding = model.encode(clean_resume)
jd_embedding = model.encode(clean_jd)

print("Resume embedding length:", len(resume_embedding))
print("JD embedding length:", len(jd_embedding))

print("\nFirst 10 numbers of resume embedding:\n", resume_embedding[:10])


Resume embedding length: 384
JD embedding length: 384

First 10 numbers of resume embedding:
 [-0.02466562 -0.04261731  0.04265124  0.0743174  -0.06062206 -0.10230157
 -0.01304876 -0.02319215 -0.09969875 -0.03157195]


In [6]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Reshape embeddings (required by sklearn)
resume_vec = resume_embedding.reshape(1, -1)
jd_vec = jd_embedding.reshape(1, -1)

# Calculate similarity
similarity_score = cosine_similarity(resume_vec, jd_vec)[0][0]

# Convert to percentage
match_percentage = round(similarity_score * 100, 2)

print("Resume–JD Match Percentage:", match_percentage, "%")


Resume–JD Match Percentage: 75.83 %


In [7]:
def generate_explanation_prompt(resume_text, jd_text, match_percentage):
    prompt = f"""
You are an AI hiring assistant.

Resume:
{resume_text}

Job Description:
{jd_text}

The resume matches the job description with a score of {match_percentage}%.

Explain the match in simple bullet points:
1. Key matching skills
2. Missing or weak skills
3. Overall summary in 2 lines
"""
    return prompt

match_percentage_rounded = round(match_percentage, 2)

prompt_text = generate_explanation_prompt(clean_resume, clean_jd, match_percentage_rounded)
print(prompt_text)



You are an AI hiring assistant.

Resume:
i am a data scientist with 35 years of experience i have worked as data analyst and data engineer skills include python sql machine learning deep learning aws i have experience with data pipelines model building and deployment

Job Description:
we are looking for a data scientist required skills python sql machine learning experience in deep learning and cloud platforms like aws is preferred candidate should have experience in endtoend model deployment

The resume matches the job description with a score of 75.83000183105469%.

Explain the match in simple bullet points:
1. Key matching skills
2. Missing or weak skills
3. Overall summary in 2 lines



In [8]:
from openai import OpenAI
import os

# Set API key (temporary method)
# os.environ["OPENAI_API_KEY"] = "your_api_key_here"

client = OpenAI()

def get_llm_explanation(prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful AI hiring assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.3
    )
    return response.choices[0].message.content

# Call LLM
# explanation = get_llm_explanation(prompt_text)
# print(explanation)


OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [9]:
def generate_interview_questions_prompt(resume_text, jd_text):
    prompt = f"""
You are an experienced technical interviewer.

Resume:
{resume_text}

Job Description:
{jd_text}

Generate interview questions in the following format:

Technical Questions:
- Question 1
- Question 2
- Question 3

Scenario-Based Questions:
- Question 1
- Question 2

Skill Gap Questions:
- Question 1
- Question 2
"""
    return prompt

question_prompt = generate_interview_questions_prompt(clean_resume, clean_jd)
print(question_prompt)



You are an experienced technical interviewer.

Resume:
i am a data scientist with 35 years of experience i have worked as data analyst and data engineer skills include python sql machine learning deep learning aws i have experience with data pipelines model building and deployment

Job Description:
we are looking for a data scientist required skills python sql machine learning experience in deep learning and cloud platforms like aws is preferred candidate should have experience in endtoend model deployment

Generate interview questions in the following format:

Technical Questions:
- Question 1
- Question 2
- Question 3

Scenario-Based Questions:
- Question 1
- Question 2

Skill Gap Questions:
- Question 1
- Question 2



In [None]:
def get_llm_response(prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful AI assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.4
    )
    return response.choices[0].message.content


In [None]:
# interview_questions = get_llm_response(question_prompt)
# print(interview_questions)
