In [1]:
from smolagents import (
    CodeAgent,
    HfApiModel,
    tool,
    Tool,
    ManagedAgent,
    ToolCallingAgent,
)

import os
from dotenv import load_dotenv
import requests
import bs4
from markdownify import markdownify as md
from textwrap import dedent

load_dotenv()

  from pandas.core import (
  warn(
2025-01-25 14:55:58.942120: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


True

In [2]:
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
job_url_ya = "https://www.linkedin.com/jobs/view/4132249179"
job_url = "https://www.linkedin.com/jobs/view/4072151400"
all_projects_path = "./all_projects.md"

In [3]:
@tool
def scrape_linkedin_job_page(job_url: str) -> str:
    """
    Scrapes a LinkedIn job page and returns the job title and job description in markdown format.

    Args:
        job_url: The LinkedIn job page URL.

    Returns:
        str: The job title and job description in markdown format.
    """
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
    }

    res = requests.get(job_url, headers=headers)
    res.raise_for_status()

    soup = bs4.BeautifulSoup(res.text, "html.parser")

    title = soup.select_one("title").text.split("|")[0].strip()

    jd = soup.select_one("div.show-more-less-html__markup")
    jd = md(jd.prettify())

    final = f"# {title}\n\n## Job Description\n{jd}"

    return final

In [4]:
class FileReadTool(Tool):
    name = "file_read_tool"
    description = "Use this tool to read a simple text file."
    inputs = {
        "file_path": {"type": "string", "description": "The path to the file to read."}
    }
    output_type = "string"

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def forward(self, file_path: str) -> str:
        directory = os.path.dirname(file_path)

        if directory and not os.path.exists(directory):
            raise ValueError(f"Directory {directory} does not exist.")

        if not os.path.exists(file_path):
            raise FileNotFoundError(f"File {file_path} does not exist.")

        with open(file_path, "r") as f:
            return f.read()


class FileWriteTool(Tool):
    name = "file_write_tool"
    description = "Use this tool to write a string to a file. Write the provided data to the file at the provided path. Returns the path to the file."
    inputs = {
        "data": {"type": "string", "description": "The data to write to the file."},
        "file_name": {
            "type": "string",
            "description": "The path to the file to write. Must have a .txt or .md extension.",
        },
        "directory": {
            "type": "string",
            "description": "The directory to write the file to. If not provided, will write to the current directory.",
            "nullable": "true",
        },
        "overwrite": {
            "type": "boolean",
            "description": "Whether to overwrite the file if it already exists. Default is False.",
            "nullable": "true",
        },
    }
    output_type = "string"

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def forward(
        self, data: str, file_name: str, directory: str = None, overwrite: bool = True
    ) -> str:
        if not file_name.endswith(".txt") and not file_name.endswith(".md"):
            raise ValueError("File name must end with .txt or .md.")

        if directory is not None:
            if not os.path.exists(directory):
                raise ValueError(f"Directory {directory} does not exist.")
            file_path = os.path.join(directory, file_name)
        else:
            file_path = file_name

        if os.path.exists(file_path) and not overwrite:
            raise FileExistsError(
                f"File {file_path} already exists. Set overwrite to True to overwrite it."
            )

        with open(file_path, "w") as f:
            f.write(data)

        return f"File written to {file_path}."

In [5]:
model_id_deepseek = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"

# use whichever model you want
deepseek_model = HfApiModel(model_id=model_id_deepseek, token=HUGGINGFACE_API_KEY)
model = HfApiModel(model_id=model_id, token=HUGGINGFACE_API_KEY)

In [6]:
# Create the `job_description_agent` agent that can scrape a LinkedIn job page and summarize the job description.
job_description_agent = ToolCallingAgent(
    tools=[scrape_linkedin_job_page],
    model=model,
    add_base_tools=False,
)

# wrap the agent in a managed agent
job_description_agent = ManagedAgent(
    job_description_agent,
    name="job_description_agent",
    description="Have a knack for finding the relevant information from a job description. Can scrape the job description from a LinkedIn job posting and summarize its contents.",
)

# Create the `project_filter_agent` agent that can read a markdown file containing all your projects and select the most relevant projects to include in your job application.
project_filter_agent = ToolCallingAgent(
    tools=[FileReadTool()],
    add_base_tools=False,
    model=model,
)
# wrap the agent in a managed agent
project_filter_agent = ManagedAgent(
    project_filter_agent,
    name="project_filter_agent",
    description="Good at creating CVs that are tailored to the job description and are ATS friendly by selecting the most relevant projects with the right keywords from your portfolio.",
)

# Create the `file_manager_agent` agent to manage the above two agents.
manager_agent = CodeAgent(
    tools=[FileWriteTool()],
    managed_agents=[job_description_agent, project_filter_agent],
    add_base_tools=False,
    model=model,
)

# The prompt for the task
start_prompt = dedent(
    """
    You are an experienced data scientist looking for a new job. You come across a job posting on LinkedIn that you are interested in. You need to save the job title and a brief summary of the job description, mostly about 5-10 bullet points, to a markdown file for later reference. The file must have a format: `<position_name>_<company_name>.md`. If position_name or company_name is not found, use the format `<job_id>.md`. Here, job_id is the last part of the job posting url. The job posting is at the following URL:
    {job_url}
    
    Once the summary is stored, you need to select which projects from your portfolio to include in your job application. You have a markdown file that lists all your projects and their descriptions. You want to select at most five projects that are most relevant to the job posting and save the selected projects to the same markdown file where you saved the job description. You need to just mention the name of the projects that you want to include in the application. No need to write the project descriptions.

    The markdown file with all your projects is located at `{all_projects_path}`. You MUST NOT modify this file.
    """
)

## Job 1

In [7]:
manager_agent.run(
    start_prompt.format(job_url=job_url, all_projects_path=all_projects_path)
)

'The job description and selected projects have been saved to the markdown file.'