<a href="https://colab.research.google.com/github/Jimmynycu/ATS_with_gemini_api/blob/main/Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install all required libraries for the project
!pip install pyautogen pymupdf pandas xlsxwriter python-dotenv unidecode autogen -q

In [None]:
import os
import json
import google.generativeai as genai # Import the genai library

# Set your Google Gemini API key
api_key = "*******************" # Replace with your actual key

try:
    # Configure the genai client with the API key
    genai.configure(api_key=api_key)

    # Make a simple, no-cost call to list the available models
    models = genai.list_models()

    # Check if a common model is in the list to confirm it's working
    if any('gemini-1.5-flash' in m.name for m in models):
        print("✅ API Key is valid and working.")
    else:
        print("⚠️ API Key is valid, but the expected models were not found. Please check your key's permissions.")

except Exception as e:
    print(f"❌ API Key is NOT valid. Error: {e}")
    api_key = None # Set key to None to prevent further errors

# --- Create Configuration File if Key is Valid ---
if api_key:
    # Define the configuration for AutoGen to use the Gemini model
    config_list = [
        {
            "model": "gemini-1.5-pro-latest", # Using a robust and common model
            "api_key": api_key,
            "api_type": "google",
        }
    ]

    # Create the OAI_CONFIG_LIST.json file
    # AutoGen uses this file to configure the LLM for the agents
    with open("OAI_CONFIG_LIST.json", "w") as f:
        json.dump(config_list, f)

    print("Configuration file for Gemini created successfully.")
else:
    print("Configuration file was not created due to an invalid API key.")

In [None]:
import fitz  # PyMuPDF
from unidecode import unidecode

def parse_resume_with_pymupdf(pdf_path: str) -> str:
    """
    Extracts structured text from a PDF resume using PyMuPDF.
    This function reads a PDF file and extracts text in a structured manner,
    preserving the reading order by processing text blocks.
    """
    full_text = ""
    try:
        doc = fitz.open(pdf_path)
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)

            # The "blocks" option is crucial for maintaining reading order
            blocks = page.get_text("blocks")
            blocks.sort(key=lambda b: (b[3], b))  # Sort by top, then left coordinate

            page_text = []
            for b in blocks:
                # b[4] contains the text of the block
                block_text = b[4]
                # Clean text: remove leading/trailing whitespace and handle unicode
                cleaned_text = unidecode(block_text.strip())
                page_text.append(cleaned_text)

            full_text += "\n".join(page_text)
            if page_num < len(doc) - 1:
                full_text += "\n\n--- End of Page {} ---\n\n".format(page_num + 1)

        doc.close()
        return full_text
    except Exception as e:
        print(f"Error processing PDF file {pdf_path}: {e}")
        return ""

print("PDF parsing function is defined.")

In [None]:
import pandas as pd
from typing import Dict, Any

def write_summary_to_excel(resume_data: str, output_path: str = "resume_summary.xlsx") -> str:
    """
    Writes the structured resume data (as a JSON string) to a formatted Excel file.
    """
    try:
        # The LLM often returns a JSON string, so we parse it first
        data_dict = json.loads(resume_data)

        with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
            # --- Write Contact Info and Summary Sheet ---
            contact_info = data_dict.get('contact_info', {})
            summary = data_dict.get('summary', '')
            summary_df = pd.DataFrame({
                'Field': list(contact_info.keys()) + ['Summary'],
                'Value': list(contact_info.values()) + [summary]
            })
            summary_df.to_excel(writer, sheet_name='Summary', index=False)

            # --- Write Experience, Education, and Skills Sheets ---
            for section in ['work_experience', 'education', 'skills']:
                data = data_dict.get(section)
                if data:
                    df = pd.DataFrame(data)
                    sheet_name = section.replace('_', ' ').title()
                    df.to_excel(writer, sheet_name=sheet_name, index=False)

            # --- Auto-format columns for readability ---
            workbook = writer.book
            for sheet_name in writer.sheets:
                worksheet = writer.sheets[sheet_name]
                for idx, col in enumerate(pd.read_excel(output_path, sheet_name=sheet_name).columns):
                    series = pd.read_excel(output_path, sheet_name=sheet_name)[col]
                    max_len = max((series.astype(str).map(len).max(), len(str(series.name)))) + 2
                    worksheet.set_column(idx, idx, max_len)

        return f"Successfully created resume summary at {output_path}"
    except Exception as e:
        return f"Error writing to Excel file: {e}"

print("Excel writer tool function is defined.")

In [None]:
import autogen
from autogen.coding import LocalCommandLineCodeExecutor
from pathlib import Path

# Load the config list from the JSON file
config_list = autogen.config_list_from_json(env_or_file="OAI_CONFIG_LIST.json")
llm_config = {"config_list": config_list}

# --- Define the Parser Agent ---
parser_agent = autogen.AssistantAgent(
    name="ParserAgent",
    llm_config=llm_config,
    system_message="""You are an expert HR analyst. Your task is to read resume text and extract key information into a structured JSON format.
    You must call the 'write_summary_to_excel' function with the resulting JSON data.
    Do not add any commentary or explanation outside of the final JSON object."""
)

# --- Define the Analyzer Agent ---
analyzer_agent = autogen.AssistantAgent(
    name="AnalyzerAgent",
    llm_config=llm_config,
    system_message="""You are an expert Data Analyst. Your function is to answer questions by writing and executing Python code.
    Use the `pandas` library to analyze data from the provided Excel file.
    First, formulate a plan. Then, write the Python code to answer the question.
    The code will be executed for you, and you will receive the result. Use this result to formulate your final answer."""
)

# --- Define the User Proxy Agent (our proxy) ---
# Create a directory for the code execution
work_dir = Path("coding")
work_dir.mkdir(exist_ok=True)
executor = LocalCommandLineCodeExecutor(work_dir=work_dir)

user_proxy = autogen.UserProxyAgent(
    name="UserProxy",
    human_input_mode="ALWAYS",  # Critical for safety: ALWAYS ask for approval
    code_execution_config={"executor": executor},
    system_message="A human admin who will execute function calls and code after reviewing them."
)

print("All agents are defined.")

In [None]:
from google.colab import files

print("Please upload your resume PDF file.")
uploaded = files.upload()

# Get the path of the uploaded file
if uploaded:
    # Take the first key from the 'uploaded' dictionary to get the filename string
    resume_pdf_path = list(uploaded.keys())[0]
    print(f"\nUploaded '{resume_pdf_path}' successfully.")
else:
    print("\nNo file uploaded. Please run this cell again to upload a file.")
    resume_pdf_path = None

In [None]:
if resume_pdf_path:
    print("--- Starting Step 1: Parsing Resume ---")

    # 1. Parse the resume PDF to get clean text
    resume_text = parse_resume_with_pymupdf(resume_pdf_path)

    if resume_text:
        # 2. Define the schema for extraction
        json_schema = """
        {
          "contact_info": {"name": "string", "email": "string", "phone": "string", "location": "string"},
          "summary": "string",
          "work_experience": [{"title": "string", "company": "string", "start_date": "string", "end_date": "string", "description": "string"}],
          "education": [{"degree": "string", "institution": "string", "graduation_date": "string"}],
          "skills": ["string"]
        }
        """

        # 3. Formulate the task for the parser agent
        parsing_task = f"""
        Please parse the following resume text, extract the information according to the provided JSON schema, and then call the 'write_summary_to_excel' function with the extracted data.

        JSON Schema:
        {json_schema}

        Resume Text:
        ---
        {resume_text}
        ---
        """

        # 4. Register the tool with the User Proxy
        user_proxy.register_function(
            function_map={
                "write_summary_to_excel": write_summary_to_excel
            }
        )

        # 5. Initiate the chat for parsing
        user_proxy.initiate_chat(
            recipient=parser_agent,
            message=parsing_task
        )
    else:
        print("Failed to extract text from PDF. Aborting.")
else:
    print("Please upload a resume in the previous step before running this cell.")

In [None]:
output_excel_path = "resume_summary.xlsx"

if Path(output_excel_path).exists():
    print(f"\n--- Step 1 Complete. Summary saved to {output_excel_path} ---")
    print("--- Starting Step 2: Analyzing Data ---")

    # Formulate the task for the analyzer agent
    analysis_task = f"""
    Read the candidate data from the Excel file located at '{output_excel_path}'.
    Then, please answer the following questions:
    1. What is the candidate's most recent job title? (Hint: Look at the 'Experience' sheet).
    2. How many unique skills are listed? (Hint: Look at the 'Skills' sheet).
    """

    # Initiate the chat for analysis
    user_proxy.initiate_chat(
        recipient=analyzer_agent,
        message=analysis_task
    )

    print("\n--- Step 2 Complete. Analysis finished. ---")
else:
    print("Excel summary file was not created. Cannot proceed with analysis.")