In [None]:
from typing import Type, Dict, List, Optional, Any
from pydantic import BaseModel, Field, EmailStr, constr
from tenacity import retry, stop_after_attempt, wait_exponential
import json
import requests
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

class ResumeProfile(BaseModel):
    """
    Structured Resume Profile information with validation.
    
    Attributes:
        name: Full name of the candidate (non-empty string)
        email: Contact email address (validated email format)
        skills: List of professional skills (non-empty list)
    """
    name: constr(min_length=1) = Field(
        ...,
        description="Full name of the candidate",
        examples=["John Doe", "Jane Smith"]
    )
    email: EmailStr = Field(
        ...,
        description="Contact email address",
        examples=["john.doe@example.com"]
    )
    skills: List[constr(min_length=1)] = Field(
        default_factory=list,
        description="List of professional skills",
        min_items=1,
        examples=[["Python", "Data Analysis", "Machine Learning"]]
    )

    class Config:
        """Pydantic model configuration"""
        json_schema_extra = {
            "title": "Resume Profile Schema",
            "description": "Schema for structured resume information",
            "example": {
                "name": "John Doe",
                "email": "john.doe@example.com",
                "skills": ["Python", "Data Analysis", "Machine Learning"]
            }
        }
        
    def dict_with_schema(self) -> Dict[str, Any]:
        """
        Returns the model's JSON schema with additional validation rules.
        """
        schema = self.model_json_schema()
        schema.update({
            "required": ["name", "email", "skills"],
            "additionalProperties": False,
            "properties": {
                "name": {
                    "type": "string",
                    "minLength": 1,
                    "description": "Full name of the candidate"
                },
                "email": {
                    "type": "string",
                    "format": "email",
                    "description": "Contact email address"
                },
                "skills": {
                    "type": "array",
                    "items": {"type": "string", "minLength": 1},
                    "minItems": 1,
                    "description": "List of professional skills"
                }
            }
        })
        return schema

@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=10),
    retry_error_callback=lambda _: {"error": "API call failed after retries"}
)
def call_llm(
    resume_content: str,
    task_prompt: Optional[str] = None,
    output_model: Type[BaseModel] = ResumeProfile,
    model: str = "openai/gpt-4.1-mini"
) -> Optional[Dict[str, Any]]:
    """
    Call LLM API to extract structured information from resume content.
    
    Args:
        resume_content: Raw text content of the resume to analyze
        task_prompt: Custom system prompt for the LLM (uses default if None)
        output_model: Pydantic model class for response validation
        model: LLM model identifier to use
    
    Returns:
        Optional[Dict[str, Any]]: Structured resume data if successful, None on failure
        
    Raises:
        ValueError: If API response is invalid
        requests.RequestException: For API communication errors
        
    Example:
        >>> result = call_llm("Resume content here")
        >>> if result:
        >>>     print(f"Found candidate: {result['name']}")
    """
    if task_prompt is None:
        task_prompt = """
        You are an expert at extracting structured data from resumes.
        Your task is to carefully analyze the provided resume content and extract:
        1. Full name of the candidate (as it appears on the resume)
        2. Email address (ensure it's a valid email format)
        3. List of skills (both technical and soft skills mentioned in the resume)

        Important guidelines:
        - Extract the name exactly as written on the resume
        - Verify email format is valid
        - Include both hard and soft skills
        - Skills list must not be empty
        - Return only the specified fields
        - Ensure JSON format is valid
        """

    try:
        # Create model instance for schema validation
        schema_model = output_model()
        
        response = requests.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers={
                "Authorization": f"Bearer {os.getenv('API_KEY')}",
                "Content-Type": "application/json",
            },
            json={
                "model": model,
                "messages": [
                    {"role": "system", "content": task_prompt},
                    {"role": "user", "content": resume_content},
                ],
                "response_format": {
                    "type": "json_schema",
                    "json_schema": schema_model.dict_with_schema()
                },
            },
            timeout=30
        )
        
        response.raise_for_status()
        data = response.json()
        
        if "choices" not in data or not data["choices"]:
            raise ValueError("Invalid API response format")
            
        content = data["choices"][0]["message"]["content"]
        
        # Validate and convert to dict
        parsed_data = output_model.model_validate_json(content)
        return parsed_data.model_dump()
        
    except requests.RequestException as e:
        print(f"API request failed: {str(e)}")
        return None
    except ValueError as e:
        print(f"Data validation failed: {str(e)}")
        return None
    except Exception as e:
        print(f"Unexpected error: {str(e)}")
        return None

In [None]:
def test_resume_parser():
    """Simple TDD test function with multiple test cases"""
    parser = ResumeParser()
    
    # Test case 1: Classic management resume (DOCX)
    print("Testing Classic management resume...")
    result1 = parser("./data/inputs/Classic management resume.docx")
    expected1 = {
        "name": "Carmelo Barese",
        "email": "carmelo@example.com",
        "skills": ["Marketing", "Communication", "Project management", "Problem-solving", "Budget planning"]
    }
    
    assert result1["name"] == expected1["name"], f"Test 1 - Name: Expected {expected1['name']}, got {result1['name']}"
    assert result1["email"] == expected1["email"], f"Test 1 - Email: Expected {expected1['email']}, got {result1['email']}"
    assert set(result1["skills"]) == set(expected1["skills"]), f"Test 1 - Skills mismatch"
    print("✅ Test 1 passed!")
    
    # Test case 2: Customer Production Assistant resume (PDF)
    print("Testing Customer Production Assistant resume...")
    result2 = parser("data/inputs/Customer Production Assistant Resume.pdf")
    expected2 = {
        "name": "Jessica Garcia",
        "email": "jessgarcia@gmail.com",
        "skills": ["Microsoft Office", "Basic Math", "Communication Skills", "Microsoft Outlook", "Manufacturing", "Computer Skills", "Microsoft Excel", "Education"]
    }
    
    assert result2["name"] == expected2["name"], f"Test 2 - Name: Expected {expected2['name']}, got {result2['name']}"
    assert result2["email"] == expected2["email"], f"Test 2 - Email: Expected {expected2['email']}, got {result2['email']}"
    assert set(result2["skills"]) == set(expected2["skills"]), f"Test 2 - Skills mismatch"
    print("✅ Test 2 passed!")
    
    print("🎉 All tests passed! Both DOCX and PDF parsing work correctly.")

class ResumeParser:
    def __call__(self, filepath):
        # Implementation will go here - starts with failing test
        return {"name": "Carmelo Barese", "email": None, "skills": []}

test_resume_parser()