<a href="https://colab.research.google.com/github/Emmanuel10701/LangChain-Superstation-A-Comprehensive-Guide-to-Modern-LLM-Frameworks/blob/main/ResumeAIHrfiltering_systemWithLangGraph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary libraries
!pip install -qU langchain-google-genai langchain PyPDF2 python-docx requests langgraph

# Import required modules
import os
import json
from datetime import datetime
from typing import TypedDict, List
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langgraph.graph import StateGraph, END
from google.colab import userdata
from google.colab import files
import PyPDF2
import docx
import io
import requests
import re

# --- 1. Set up your Google API Key and LLM ---
os.environ["GOOGLE_API_KEY"] = userdata.get("geminisecrete")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")

# --- 2. Define the State for our Graph ---
class ApplicantState(TypedDict):
    """Represents the state of the applicant screening process."""
    resume_text: str
    skills: List[str]
    experience_level: str  # Beginner/Entry, Mid-level, Senior
    skill_match: bool
    skill_match_percentage: float  # Percentage of required skills matched
    location: str  # Detected location of applicant
    is_from_kenya: bool  # CHANGED: Whether applicant is from Kenya
    final_decision: str  # e.g., "REJECT", "HR_REVIEW", "RECRUITER_ESCALATE"
    conversation_id: str  # Unique ID for each conversation
    timestamp: str  # When the processing occurred

# --- 3. Memory Storage for Last 5 Chats ---
class ConversationMemory:
    def __init__(self, max_size=5):
        self.max_size = max_size
        self.conversations = []

    def add_conversation(self, conversation_data):
        if len(self.conversations) >= self.max_size:
            self.conversations.pop(0)  # Remove oldest conversation
        self.conversations.append(conversation_data)

    def get_recent_conversations(self):
        return self.conversations

    def clear_memory(self):
        self.conversations = []

# Initialize memory storage
memory = ConversationMemory(max_size=5)

# --- 4. Location Detection Functions ---
def detect_location_from_ip():
    """Detect location from IP address using free API"""
    try:
        response = requests.get('https://ipapi.co/json/', timeout=5)
        if response.status_code == 200:
            data = response.json()
            country = data.get('country_name', 'Unknown')
            city = data.get('city', 'Unknown')
            return f"{city}, {country}"
    except:
        pass
    return "Location detection failed"

def detect_location_from_resume(resume_text):
    """Try to detect location from resume text"""
    # Common location patterns in resumes
    location_patterns = [
        r'Location:\s*(.+)',
        r'Address:\s*(.+)',
        r'City:\s*(.+)',
        r'Country:\s*(.+)',
        r'Based in:\s*(.+)',
        r'(\w+,\s*\w+,\s*\w+)',  # City, State, Country pattern
        r'(\w+,\s*\w+)',  # City, Country pattern
    ]

    for pattern in location_patterns:
        match = re.search(pattern, resume_text, re.IGNORECASE)
        if match:
            return match.group(1).strip()

    return "Location not specified in resume"

def is_from_kenya(location): # CHANGED: Function now checks for Kenya specifically
    """Check if the detected location is in Kenya"""
    kenyan_indicators = [ # List of indicators for Kenya
        'kenya', 'nairobi', 'mombasa', 'kisumu', 'nakuru',
        'eldoret', 'ruiru', 'kikuyu', 'thika', 'kakamega',
        'malindi', 'garissa', 'lamu', 'nyeri', 'embu', 'meru',
        'machakos', 'naivasha', 'kitale', 'kericho', 'bungoma'
    ]

    location_lower = location.lower()
    for indicator in kenyan_indicators:
        if indicator in location_lower:
            return True
    return False

# --- 5. Document Upload and Processing Functions ---
def upload_and_read_file():
    """Upload a file and extract text from it"""
    print("📁 Please upload your resume file (PDF or DOCX format)")

    uploaded = files.upload()

    if not uploaded:
        print("❌ No file uploaded. Please try again.")
        return None

    file_name = list(uploaded.keys())[0]
    file_content = uploaded[file_name]

    # Process based on file type
    try:
        if file_name.lower().endswith('.pdf'):
            return extract_text_from_pdf(io.BytesIO(file_content))
        elif file_name.lower().endswith('.docx'):
            return extract_text_from_docx(io.BytesIO(file_content))
        else:
            print(f"❌ Unsupported file format: {file_name}")
            return None
    except Exception as e:
        print(f"❌ Error reading file: {e}")
        return None

def extract_text_from_pdf(file_stream):
    """Extract text from PDF file"""
    try:
        pdf_reader = PyPDF2.PdfReader(file_stream)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text() + "\n"
        return text
    except Exception as e:
        print(f"❌ Error extracting text from PDF: {e}")
        return None

def extract_text_from_docx(file_stream):
    """Extract text from DOCX file"""
    try:
        doc = docx.Document(file_stream)
        text = ""
        for paragraph in doc.paragraphs:
            text += paragraph.text + "\n"
        return text
    except Exception as e:
        print(f"❌ Error extracting text from DOCX: {e}")
        return None

# --- 6. Define the Nodes/Functions (Agents) with Verbose Logging ---
def extract_info_node(state: ApplicantState) -> ApplicantState:
    """Uses Gemini to extract skills and experience level from the resume."""
    print("🔍 Extracting information from resume...")
    resume_text = state['resume_text']

    # Prompt engineering is key here
    prompt_template = PromptTemplate(
        template="""
        Analyze the following resume text and provide the applicant's skills and experience level.
        Experience level should be one of: 'Beginner/Entry', 'Mid-level', or 'Senior'.
        Format the output as a JSON object with 'skills' (list of strings) and 'experience_level' (string).

        Resume Text:
        {resume}
        """,
        input_variables=["resume"]
    )

    # Use Pydantic for structured output
    class ApplicantDetails(BaseModel):
        skills: List[str] = Field(description="List of skills extracted from the resume.")
        experience_level: str = Field(description="The determined experience level: 'Beginner/Entry', 'Mid-level', or 'Senior'.")

    chain = prompt_template | llm.with_structured_output(ApplicantDetails)

    try:
        response = chain.invoke({"resume": resume_text})
        # Update the state with the extracted info
        state['skills'] = response.skills
        state['experience_level'] = response.experience_level
        print(f"✅ Extracted skills: {state['skills']}")
        print(f"✅ Extracted level: {state['experience_level']}")
    except Exception as e:
        print(f"❌ Error extracting information: {e}")
        # Set default values in case of error
        state['skills'] = []
        state['experience_level'] = "Unknown"

    return state

def match_skills_node(state: ApplicantState) -> ApplicantState:
    """Checks if the applicant's skills match the required job skills."""
    print("🔍 Matching skills...")
    required_skills = ["Python", "Machine Learning", "Cloud Computing", "Data Analysis"]  # Required skills

    applicant_skills = state['skills']

    # Improved matching logic: check percentage of required skills matched
    # Convert to lowercase for case-insensitive matching
    applicant_skills_lower = [skill.lower() for skill in applicant_skills]
    required_skills_lower = [skill.lower() for skill in required_skills]

    # Calculate match percentage
    matched_skills = []
    for skill in required_skills_lower:
        if any(skill in applicant_skill for applicant_skill in applicant_skills_lower):
            matched_skills.append(skill)

    match_percentage = (len(matched_skills) / len(required_skills)) * 100

    # 100% match required for consideration
    state['skill_match'] = (match_percentage == 100)
    state['skill_match_percentage'] = match_percentage

    print(f"📋 Required skills: {required_skills}")
    print(f"👤 Applicant skills: {applicant_skills}")
    print(f"✅ Skills match percentage: {match_percentage:.1f}%")
    print(f"✅ 100% match required: {'✅ Yes' if state['skill_match'] else '❌ No'}")

    return state

def detect_location_node(state: ApplicantState) -> ApplicantState:
    """Detects the applicant's location and checks if they're from Kenya.""" # CHANGED: Docstring
    print("🔍 Detecting applicant location...")
    resume_text = state['resume_text']

    # Try to detect location from resume first
    location_from_resume = detect_location_from_resume(resume_text)

    if location_from_resume != "Location not specified in resume":
        state['location'] = location_from_resume
        print(f"📍 Location from resume: {location_from_resume}")
    else:
        # Fall back to IP-based detection
        ip_location = detect_location_from_ip()
        state['location'] = ip_location
        print(f"📍 Location from IP: {ip_location}")

    # Check if applicant is from Kenya # CHANGED: Logic
    state['is_from_kenya'] = is_from_kenya(state['location'])
    print(f"🇰🇪 Is from Kenya: {'✅ Yes' if state['is_from_kenya'] else '❌ No'}")

    return state

def decide_outcome_node(state: ApplicantState) -> ApplicantState:
    """Applies the final decision logic based on skill match, experience level, and location."""
    print("🔍 Deciding final outcome...")
    experience_level = state['experience_level']
    skill_match = state['skill_match']
    is_from_kenya = state['is_from_kenya'] # CHANGED: Variable name
    skill_match_percentage = state['skill_match_percentage']

    # Decision logic with Kenya-only requirement # CHANGED: Core logic
    if not is_from_kenya:
        decision = "REJECT"
        reasoning = "Applicant is not from Kenya - organization policy restricts to Kenyan candidates only"
    elif skill_match and experience_level == "Senior":
        decision = "RECRUITER_ESCALATE"
        reasoning = "Senior Kenyan candidate with 100% skills match - escalate to recruiter"
    elif skill_match and experience_level in ["Mid-level", "Beginner/Entry"]:
        decision = "HR_REVIEW"
        reasoning = "Mid-level or Entry Kenyan candidate with 100% skills match - send to HR review"
    elif not skill_match:
        decision = "REJECT"
        reasoning = f"Kenyan candidate but only {skill_match_percentage:.1f}% skills match (100% required)"
    else:  # Unknown experience level or other cases
        decision = "REJECT"
        reasoning = "Unknown experience level or other issue - reject"

    state['final_decision'] = decision
    state['timestamp'] = datetime.now().isoformat()
    print(f"🤔 Reasoning: {reasoning}")
    print(f"✅ Final decision: {decision}")

    # Store conversation in memory
    conversation_data = {
        "conversation_id": state['conversation_id'],
        "timestamp": state['timestamp'],
        "resume_preview": state['resume_text'][:100] + "..." if len(state['resume_text']) > 100 else state['resume_text'],
        "skills": state['skills'],
        "experience_level": state['experience_level'],
        "skill_match_percentage": state['skill_match_percentage'],
        "location": state['location'],
        "is_from_kenya": state['is_from_kenya'], # CHANGED: Key name
        "final_decision": state['final_decision']
    }
    memory.add_conversation(conversation_data)

    return state

# --- 7. Define the Graph and Workflow ---
workflow = StateGraph(ApplicantState)

# Add the nodes to the graph
workflow.add_node("extract_info", extract_info_node)
workflow.add_node("match_skills", match_skills_node)
workflow.add_node("detect_location", detect_location_node)
workflow.add_node("decide_outcome", decide_outcome_node)

# Define the edges (connections)
workflow.add_edge("extract_info", "match_skills")
workflow.add_edge("match_skills", "detect_location")
workflow.add_edge("detect_location", "decide_outcome") # ERROR HERE
workflow.add_edge("decide_outcome", END)

# Set the entry point of the graph
workflow.set_entry_point("extract_info")

# Compile the graph
app = workflow.compile()

# --- 8. Resume Builder Function ---
def build_resume_from_input():
    print("\n📝 Let's build your resume for screening!")
    print("Please provide the following information:\n")

    # Collect basic information
    name = input("Your full name: ").strip()
    email = input("Your email: ").strip()
    phone = input("Your phone number: ").strip()
    location = input("Your location (City, Country): ").strip()

    # Collect professional information
    years_exp = input("Years of experience: ").strip()
    field = input("Your field/industry: ").strip()
    current_role = input("Current/most recent role: ").strip()

    # Collect education
    education = input("Highest education degree: ").strip()

    # Collect skills
    print("\n💻 Enter your skills (comma separated): ")
    skills_input = input("e.g., Python, Machine Learning, Cloud Computing, Data Analysis: ").strip()
    skills = [skill.strip() for skill in skills_input.split(",")]

    # Collect experience details
    print("\n💼 Describe your experience and projects: ")
    experience = input("e.g., Led a team of 5 developers, Built a machine learning model: ").strip()

    # Collect certifications if any
    certifications = input("\n🏆 Certifications (comma separated, or press Enter if none): ").strip()

    # Build the resume text
    resume_text = f"""
RESUME

Personal Information:
Name: {name}
Email: {email}
Phone: {phone}
Location: {location}

Professional Summary:
{current_role} with {years_exp} years of experience in the {field} field.
{experience}

Education:
{education}

Skills:
{', '.join(skills)}

Experience:
{experience}

Certifications:
{certifications if certifications else 'None'}
"""

    return resume_text

# --- 9. Chat Interface Function ---
def chat_with_system():
    print("🤖 Welcome to the Resume Screening System!")
    print("🇰🇪 Note: This program only accepts applicants from Kenya") # CHANGED: Message
    print("💯 Note: 100% skills match is required")
    print("=" * 50)

    conversation_count = 0

    while True:
        print("\nOptions:")
        print("1. Upload your resume (PDF/DOCX)")
        print("2. Build a resume from text input")
        print("3. Enter resume text directly")
        print("4. View screening history")
        print("5. Clear history")
        print("6. Quit")

        choice = input("\nChoose an option (1-6): ").strip()

        if choice == '6' or choice.lower() == 'quit':
            print("Goodbye! 👋")
            break

        if choice == '4' or choice.lower() == 'history':
            print("\n📋 Recent Conversations (Last 5):")
            conversations = memory.get_recent_conversations()
            if not conversations:
                print("   No conversations yet.")
            else:
                for i, conv in enumerate(conversations, 1):
                    print(f"\n   {i}. ID: {conv['conversation_id']}")
                    print(f"      Time: {conv['timestamp']}")
                    print(f"      Location: {conv['location']}")
                    print(f"      From Kenya: {'✅ Yes' if conv['is_from_kenya'] else '❌ No'}") # CHANGED: Message
                    print(f"      Skills Match: {conv['skill_match_percentage']:.1f}%")
                    print(f"      Level: {conv['experience_level']}")
                    print(f"      Decision: {conv['final_decision']}")
            continue

        if choice == '5' or choice.lower() == 'clear':
            memory.clear_memory()
            print("🗑️ Memory cleared!")
            continue

        if choice == '1':
            # Upload and process document
            resume_text = upload_and_read_file()
            if not resume_text:
                continue

            print("\n✅ Resume extracted from document!")
            print("\n📄 Extracted Resume Text (first 200 characters):")
            print("=" * 50)
            print(resume_text[:200] + "..." if len(resume_text) > 200 else resume_text)
            print("=" * 50)

            # Confirm if user wants to screen this resume
            confirm = input("\nScreen this resume? (y/n): ").strip().lower()
            if confirm != 'y':
                continue

        elif choice == '2':
            # Build resume from text input
            resume_text = build_resume_from_input()
            print("\n✅ Resume created successfully!")
            print("\n📄 Generated Resume:")
            print("=" * 50)
            print(resume_text)
            print("=" * 50)

            # Confirm if user wants to screen this resume
            confirm = input("\nScreen this resume? (y/n): ").strip().lower()
            if confirm != 'y':
                continue

        elif choice == '3':
            # Get resume text directly
            print("\n📝 Paste your resume text (press Enter twice when finished):")
            lines = []
            while True:
                try:
                    line = input()
                    if line == '':
                        if lines and lines[-1] == '': # Check for two consecutive empty lines
                             break
                        elif not lines: # Allow starting with an empty line
                             lines.append('')
                             continue
                    lines.append(line)
                except EOFError: # Handle potential EOF in some environments
                    break
            resume_text = "\n".join(lines)

            if not resume_text.strip():
                print("❌ No resume text provided.")
                continue
        else:
            print("❌ Invalid option. Please try again.")
            continue

        # Process the resume
        conversation_count += 1
        conversation_id = f"conv_{conversation_count}_{datetime.now().strftime('%H%M%S')}"

        print(f"\n🔍 Processing resume (ID: {conversation_id})...")
        print("=" * 50)

        try:
            result = app.invoke({
                "resume_text": resume_text,
                "conversation_id": conversation_id,
                "timestamp": "",
                "skills": [],
                "experience_level": "",
                "skill_match": False,
                "skill_match_percentage": 0,
                "location": "",
                "is_from_kenya": False, # CHANGED: Initial state
                "final_decision": ""
            })

            print("=" * 50)
            print(f"📊 SCREENING RESULTS:")
            print(f"   Applicant Level: {result['experience_level']}")
            print(f"   Skills Identified: {', '.join(result['skills'])}")
            print(f"   Skills Match Percentage: {result['skill_match_percentage']:.1f}%")
            print(f"   Location: {result['location']}")
            print(f"   From Kenya: {'✅ Yes' if result['is_from_kenya'] else '❌ No'}") # CHANGED: Message
            print(f"   Final Decision: {result['final_decision']}")

            # Explain the decision
            if result['final_decision'] == "RECRUITER_ESCALATE":
                print("   💼 Excellent! Your application will be escalated to a recruiter for immediate review!")
            elif result['final_decision'] == "HR_REVIEW":
                print("   📋 Good news! Your application will be reviewed by HR in the next round.")
            else:
                if not result['is_from_kenya']: # CHANGED: Logic in message
                    print("   ❌ Unfortunately, we only accept applicants from Kenya.")
                elif result['skill_match_percentage'] < 100:
                    print("   ❌ Unfortunately, you need 100% skills match for consideration.")
                else:
                    print("   ❌ Unfortunately, your application doesn't meet our current requirements.")

        except Exception as e:
            print(f"❌ Error processing resume: {e}")

# --- 10. Run the System ---
if __name__ == "__main__":
    # Start the chat interface
    chat_with_system()

🤖 Welcome to the Resume Screening System!
🇰🇪 Note: This program only accepts applicants from Kenya
💯 Note: 100% skills match is required

Options:
1. Upload your resume (PDF/DOCX)
2. Build a resume from text input
3. Enter resume text directly
4. View screening history
5. Clear history
6. Quit

Choose an option (1-6): 1
📁 Please upload your resume file (PDF or DOCX format)


Saving degree transcript.pdf to degree transcript.pdf

✅ Resume extracted from document!

📄 Extracted Resume Text (first 200 characters):



Screen this resume? (y/n): y

🔍 Processing resume (ID: conv_1_144808)...
🔍 Extracting information from resume...
✅ Extracted skills: []
✅ Extracted level: Beginner/Entry
🔍 Matching skills...
📋 Required skills: ['Python', 'Machine Learning', 'Cloud Computing', 'Data Analysis']
👤 Applicant skills: []
✅ Skills match percentage: 0.0%
✅ 100% match required: ❌ No
🔍 Detecting applicant location...
📍 Location from IP: Washington, United States
🇰🇪 Is from Kenya: ❌ No
🔍 Deciding final outcome...
🤔 Reasoning: Applicant is not from Kenya - organization policy restricts to Kenyan candidates only
✅ Final decision: REJECT
📊 SCREENING RESULTS:
   Applicant Level: Beginner/Entry
   Skills Identified: 
   Skills Match Percentage: 0.0%
   Location: Washington, United States
   From Kenya: ❌ No
   Final Decision: REJECT
   ❌ Unfortunately, we only accept applicants f

Saving curriculum vitae.pdf to curriculum vitae.pdf

✅ Resume extracted from document!

📄 Extracted Resume Text (first 200 characters):
                                            Emmanuel Makau   
 Nairobi, Kenya | emmanuelmakau90@gmail.com  | 0793472960   LinkedI n | GitHu b | Portfoli o   
Dedicated Full -Stack Software Engineer  
...

Screen this resume? (y/n): y

🔍 Processing resume (ID: conv_2_144906)...
🔍 Extracting information from resume...
✅ Extracted skills: ['Next.js 14', 'Django', 'PostgreSQL', 'MySQL', 'MongoDB', 'Python', 'JavaScript', 'REST API development', 'DevOps', 'CI/CD', 'machine learning', 'TensorFlow', 'React', 'Tailwind CSS', 'Web Sockets', 'Docker', 'Azure LLMs', 'Selenium', 'Beautiful Soup', 'NumPy', 'Pandas', 'Node.js', 'JSON', 'HTML', 'TypeScript', 'CSS5', 'AWS', 'Git', 'VS Code', 'Postman', 'Thunder client', 'GitLab', 'Jenkins']
✅ Extracted level: Mid-level
🔍 Matching skills...
📋 Required skills: ['Python', 'Machine Learning', 'Cloud Computing', 'Data Anal