In [9]:
!pip install langchain langchain-core langchain-community groq pypdf2 python-docx

Collecting pypdf2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
   ---------------------------------------- 0.0/232.6 kB ? eta -:--:--
   - -------------------------------------- 10.2/232.6 kB ? eta -:--:--
   --------------------- ------------------ 122.9/232.6 kB 2.4 MB/s eta 0:00:01
   -------------------------------------- - 225.3/232.6 kB 2.8 MB/s eta 0:00:01
   ---------------------------------------- 232.6/232.6 kB 2.4 MB/s eta 0:00:00
Installing collected packages: pypdf2
Successfully installed pypdf2-3.0.1


## Set Up LangChain with Groq

In [10]:
import os
from langchain_groq import ChatGroq

# Set your Groq API key
os.environ["GROQ_API_KEY"] = "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"

llm = ChatGroq(
    groq_api_key=os.environ["GROQ_API_KEY"],
    model_name="llama-3.3-70b-versatile" 
)


## PDF & DOCX Text Extraction

In [11]:
def extract_text_from_pdf(file_path):
    from PyPDF2 import PdfReader
    reader = PdfReader(file_path)
    return "\n".join(page.extract_text() for page in reader.pages if page.extract_text())

def extract_text_from_docx(file_path):
    from docx import Document
    doc = Document(file_path)
    return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])


## LangChain Prompt Template

In [12]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

prompt = PromptTemplate.from_template("""
You are an intelligent resume parser.

Extract these fields from the resume text:
- skills: List of programming languages, tools, technologies.
- experience: List of roles with job_title, company, duration, description.
- projects: List of projects with title, description, and link (if any).

Resume Text:
```{text}```

Respond ONLY in this JSON format:
{{
  "skills": [...],
  "experience": [
    {{
      "job_title": "...",
      "company": "...",
      "duration": "...",
      "description": "..."
    }}
  ],
  "projects": [
    {{
      "title": "...",
      "description": "...",
      "link": "..."
    }}
  ]
}}
""")


## Run LangChain Pipeline

In [15]:
# Resume path
resume_path = input("Enter resume path (.pdf/.docx): ")

if resume_path.endswith(".pdf"):
    resume_text = extract_text_from_pdf(resume_path)
elif resume_path.endswith(".docx"):
    resume_text = extract_text_from_docx(resume_path)
else:
    raise ValueError("Unsupported file type")

# Use the new RunnableSequence format
chain = prompt | llm

# Invoke the chain
response = chain.invoke({"text": resume_text})

# Optional: If JSON output expected
import json
try:
    parsed_data = json.loads(response.content if hasattr(response, 'content') else response)
    print("Parsed Data:", parsed_data)
except json.JSONDecodeError:
    print("Raw LLM Response:\n", response)


Enter resume path (.pdf/.docx):  D:\\OneDrive\\Desktop\\AbhinavGuptaResume2025.pdf


Raw LLM Response:
 content='```\n{\n  "skills": [\n    "C",\n    "Java",\n    "C++",\n    "Python",\n    "TypeScript",\n    "React.js",\n    "Next.js",\n    "React Native",\n    "Node.js",\n    "Express.js",\n    "Django",\n    "Flask",\n    "MySQL",\n    "MongoDB",\n    "Git/GitHub",\n    "Vercel",\n    "Render",\n    "AppWrite",\n    "Cloudinary",\n    "Postman",\n    "Firebase",\n    "Pandas",\n    "NumPy",\n    "Scikit-learn",\n    "OpenCV",\n    "TensorFlow",\n    "Keras",\n    "PyTorch",\n    "Seaborn",\n    "Matplotlib",\n    "SpaCy",\n    "NLTK"\n  ],\n  "experience": [\n    {\n      "job_title": "Full Stack Developer Intern",\n      "company": "Nextup Robotics Pvt. Ltd",\n      "duration": "Dec 2024 - Feb 2025",\n      "description": "Developed a React.js-based control interface integrated with ROS 2 Humble for real-time robot communication. Implemented REST APIs WebSockets for seamless robot commands and telemetry data visualization."\n    },\n    {\n      "job_title": "Front

## Parse JSON Output

In [22]:
import json
import re

try:
    
    raw = response.content if hasattr(response, 'content') else str(response)

    
    json_match = re.search(r'\{.*\}', raw, re.DOTALL)
    if not json_match:
        raise ValueError("No JSON object found in the response.")
    
    json_str = json_match.group()

    
    parsed_data = json.loads(json_str)

   
    skills = parsed_data.get("skills", [])
    experience = parsed_data.get("experience", [])
    projects = parsed_data.get("projects", [])

    
    print("\n🔧 Skills:")
    print("\n".join(skills))

    print("\n💼 Experience:")
    for exp in experience:
        print(f"{exp.get('job_title', '')} at {exp.get('company', '')} ({exp.get('duration', '')})")
        print(f"{exp.get('description', '')}\n")

    print("\n🚀 Projects:")
    for proj in projects:
        print(f"{proj.get('title', '')} - {proj.get('description', '')}")
        print(f"Link: {proj.get('link', 'N/A')}\n")

except Exception as e:
    print("❌ LLM response was not valid JSON. Here's the raw output:\n")
    print(raw)
    print("\nError:", e)


print(type(raw))



🔧 Skills:
C
Java
C++
Python
TypeScript
React.js
Next.js
React Native
Node.js
Express.js
Django
Flask
MySQL
MongoDB
Git/GitHub
Vercel
Render
AppWrite
Cloudinary
Postman
Firebase
Pandas
NumPy
Scikit-learn
OpenCV
TensorFlow
Keras
PyTorch
Seaborn
Matplotlib
SpaCy
NLTK

💼 Experience:
Full Stack Developer Intern at Nextup Robotics Pvt. Ltd (Dec 2024 - Feb 2025)
Developed a React.js-based control interface integrated with ROS 2 Humble for real-time robot communication. Implemented REST APIs WebSockets for seamless robot commands and telemetry data visualization.

Frontend Developer Intern at Abhiwan Technology Pvt. Ltd (Jan 2024 - Apr 2024)
Built responsive web interfaces with React.js, ensuring smooth UX and accessibility. Converted Figma wireframes into interactive components and integrated REST APIs.


🚀 Projects:
CODE-CAP - Built a platform for students to form hackathon teams using filters (year, gender, college, branch). Optimized search filters and provided real-time hackathon updates