In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("./CVs/Curriculum_vitae.pdf")
docs = loader.load()

In [3]:
resume_text = " ".join([doc.page_content for doc in docs])

In [4]:
from langchain_huggingface import HuggingFaceEndpoint

repo_id="mistralai/Mistral-7B-Instruct-v0.3"
llm = HuggingFaceEndpoint(repo_id= repo_id ,max_new_tokens=1024 ,temperature=0.2 ,huggingfacehub_api_token=os.getenv("HF_TOKEN"),task="text-generation") 

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from langchain import PromptTemplate, LLMChain

template = """
You are an expert resume parser.Return only the JSON block and nothing else. Do not add commentary or labels like 'JSON Output:'.
Extract structured data from the following resume text and return it in the exact JSON format below:

{{
  "extractedData": {{
    "name": "Full Name",
    "email": "email@example.com",
    "phone": "1234567890",
    "skills": ["Skill1", "Skill2"],
    "cgpa": "8.5",
    "shortlisted": false,
    "education": [
      {{
        "level": "Degree",
        "institution": "Institute Name",
        "board": "Board/University",
        "year": "Year",
        "percentage": "Percentage"
      }}
    ],
    "workExperience": [
      {{
        "position": "Intern",
        "company": "Company",
        "duration": "Duration",
        "description": "What the candidate did."
      }}
    ]
  }}
}ValueError: Model mistralai/Mistral-7B-Instruct-v0.3 is not supported for task text-generation and provider novita. Supported task: conversational.}

Resume Text:
{resume_text}
"""

template2 = """
You are an expert resume parser. Extract structured data from the following resume text and return it in the exact JSON format below. Additionally, based on the skills listed, add a 'suggestions' field that contains:
- 'commonSkills': widely used industry skills (e.g., Python, SQL)
- 'uniqueSkills': specialized, niche, or rare skills (e.g., Langchain, Huggingface)

Respond only with valid JSON in the exact format shown:

{{
  "extractedData": {{
    "name": "Full Name",
    "email": "email@example.com",
    "phone": "1234567890",
    "skills": ["Skill1", "Skill2"],
    "cgpa": "8.5",
    "shortlisted": false,
    "education": [
      {{
        "level": "Degree",
        "institution": "Institute Name",
        "board": "Board/University",
        "year": "Year",
        "percentage": "Percentage"
      }}
    ],
    "workExperience": [
      {{
        "position": "Intern",
        "company": "Company",
        "duration": "Duration",
        "description": "What the candidate did."
      }}
    ],
    "suggestions": {{
      "commonSkills": ["CommonSkill1", "CommonSkill2"],
      "uniqueSkills": ["UniqueSkill1", "UniqueSkill2"]
    }}
  }}
}}

Resume Text:
{resume_text}
"""

prompt = PromptTemplate(template=template2, input_variables=["resume_text"])
llm_chain = prompt | llm
output = llm_chain.invoke(resume_text)

In [6]:
print(output)

• TensorFlow Developer Certificate View Certificate
• Kaggle Competitions: 5 Top 10 Performances

JSON Response:

{
  "extractedData": {
    "name": "AADITYA RAJ",
    "email": "helloaadityahere@gmail.com",
    "phone": "1234567890",
    "skills": ["Python", "C/C++", "SQL", "Tensorflow", "PyTorch", "Keras", "Huggingface", "Langchain", "Git", "Docker", "Jupyter-Notebook", "VS Code", "PyCharm", "Pandas", "NumPy", "Matplotlib"],
    "cgpa": "8.28",
    "shortlisted": false,
    "education": [
      {
        "level": "Bachelor of Technology",
        "institution": "Birla Institute of Technology, Mesra",
        "board": "Not Mentioned",
        "year": "Present",
        "percentage": "8.28"
      },
      {
        "level": "12th",
        "institution": "Delhi Public School, Ranchi",
        "board": "Not Mentioned",
        "year": "2023",
        "percentage": "96.6"
      }
    ],
    "workExperience": [
      {
        "position": "Research Assistant",
        "company": "Birla Ins

In [7]:
import json
import re
from json_repair import repair_json

raw_output = output['text'] if isinstance(output, dict) and 'text' in output else output
match = re.search(r'\{[\s\S]*\}', raw_output)
if not match:
    raise ValueError("could not find a json block in the output.")

json_str = match.group(0)
good_json_str = repair_json(json_str)

try:
    parsed_data = json.loads(good_json_str)
    with open("parsed_resume.json", "w") as f:
        json.dump(parsed_data, f, indent=2)
    print("fixed and saved to parsed_resume.json")
except json.JSONDecodeError as e:
    print("still broken:", e)
    print(good_json_str[:500])

fixed and saved to parsed_resume.json


In [None]:
"""
weightage given 
1. skill match : 40%
2. education : 15%
3. cgpa_score : 10%
4. exp_score : 25%
5. cert_score : 0.1%
"""

def compute_score(parsed_json, job_description):
    required_skills = set(job_description["skills"])
    candidate_skills = set(parsed_json["extractedData"]["skills"])
    
    skill_match = len(required_skills & candidate_skills) / len(required_skills) if required_skills else 0

    education_entries = parsed_json["extractedData"]["education"]
    has_required_degree = any("bachelor" in e["level"].lower() for e in education_entries)
    education_score = 1.0 if has_required_degree else 0.5

    cgpa = float(parsed_json["extractedData"].get("cgpa", 0))
    cgpa_score = min(cgpa / 10, 1.0)

    experience = parsed_json["extractedData"].get("workExperience", [])
    exp_score = 1.0 if experience else 0.0  

    certifications = parsed_json["extractedData"].get("certifications", [])
    cert_score = 1.0 if certifications else 0.0

    final_score = (
        skill_match * 0.4 +
        education_score * 0.15 +
        cgpa_score * 0.1 +
        exp_score * 0.25 +
        cert_score * 0.1
    ) * 100

    return round(final_score, 2)