In [60]:
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv
load_dotenv()

True

In [61]:
model = ChatGroq(
    model='meta-llama/llama-4-maverick-17b-128e-instruct',
    temperature=0.1,
    max_tokens=4096
)

In [62]:
loader = PyPDFLoader('./AbdullahSiddiqui.pdf')
result = loader.load()

In [None]:
from pydantic import BaseModel 
from typing import Optional, List


class ContactInfo(BaseModel):
    email: Optional[str]
    phone: Optional[str]
    address: Optional[str]
    linkedin: Optional[str]
    github: Optional[str]
    website: Optional[str]

class Skills(BaseModel):
    technical_skills: Optional[List[str]]
    soft_skills: Optional[List[str]]

class Education(BaseModel):
    institution: str
    degree: Optional[str]
    field_of_study: Optional[str]
    start_date: Optional[str]
    end_date: Optional[str]
    grade: Optional[str]

class Experience(BaseModel):
    job_title: str
    company: str
    start_date: Optional[str]
    end_date: Optional[str]
    location: Optional[str]
    description: Optional[str]

class Project(BaseModel):
    name: str
    description: Optional[str]
    technologies: Optional[List[str]]
    link: Optional[str]

class Certification(BaseModel):
    name: str
    issuing_organization: Optional[str]
    issue_date: Optional[str]
    expiration_date: Optional[str]
    credential_id: Optional[str]
    credential_url: Optional[str]

class Resume(BaseModel):
    name: str
    contact: Optional[ContactInfo]
    summary: Optional[str]
    skills: Optional[Skills]
    education: Optional[List[Education]]
    experience: Optional[List[Experience]]
    projects: Optional[List[Project]]
    certifications: Optional[List[Certification]]
    languages: Optional[List[str]]
    interests: Optional[List[str]]

In [64]:
from langchain_core.output_parsers import PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=Resume)

In [65]:
prompt = PromptTemplate(
    template = """You are a resume parser. Extract information from the resume text and return ONLY a valid JSON object. Do not include any explanations, code, or additional text.

Resume text:
{resume_text}

Extract the information and format it as JSON according to this schema:
{format_instructions}

Return only the JSON object, nothing else:""",
    input_variables=['resume_text'],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [66]:
chain = prompt | model | parser
result = chain.invoke({'resume_text':result[0].page_content})
print(result)

name='M. ABDULLAH SIDDIQUI' contact=ContactInfo(email='abdullahsidzz333@gmail.com', phone='+923160116389', address='Karachi, Pakistan', linkedin='linkedin.com/in/muhammad-abdullah-siddiqui-587877280/', github='https://github.com/Abdullah786Siddiqui', website=None) summary='I am a motivated Full Stack Web Developer with expertise in JavaScript, React.js, PHP, Laravel, MySQL, Bootstrap, and GSAP. I specialize in creating responsive, user-friendly web applications with efficient backend systems and smooth UI animations. With strong problem-solving skills and a passion for learning new technologies,' skills=Skills(technical_skills=['JavaScript', 'PHP', 'C', 'C++', 'HTML5', 'CSS3', 'Bootstrap', 'Tailwind', 'Laravel', 'React', 'Redux', 'GSAP', 'MySQL', 'Git', 'Github'], soft_skills=['Explorer', 'Working in Collaborative Environment', 'Lifelong learner']) education=[Education(institution='ILMA University Karachi, PK', degree='Bachelor of Science (BS)', field_of_study='Software Engineering', s

In [67]:
# Display the parsed resume in a more readable format
import json

# Save to JSON file
with open(f"{result.name or "user"}_resume.json", 'w', encoding='utf-8') as f:
    json.dump(result.model_dump(), f, indent=2, ensure_ascii=False)
    
print("\n✅ Resume data saved to 'parsed_resume.json'")


✅ Resume data saved to 'parsed_resume.json'
