In [None]:
from aixplain.factories import AgentFactory, TeamAgentFactory

web_scrape_agent = AgentFactory.create(
    name="scrape and summarize Job description",
    description="scrapes a Job lising from a user provided link and provides summary of requirements",
    instructions="When the user provides a URL or says ‘scrape this link: X’, call the scrape_tool function with the parameter url=X and provide the company name, job title, and a detailed set of requirments for the recruiter, format: 'company', 'title', and 'summary'.",
    # tools=[AgentFactory.create_model_tool("66f423426eb563fa213a3531")],
)

INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/669a63646eb56306647e1091 - {'Authorization': 'Token 2fa34265d7ae3236f75962bef0cd4f4b5bb267b947354c80a60ed0ff40fd79e6', 'Content-Type': 'application/json'}
INFO:root:Model Creation: Model 669a63646eb56306647e1091 instantiated.


In [2]:
def scrape_and_clean_link(url: str):
    """
    Navigates to the given URL using Selenium (headless Chrome),
    grabs the rendered HTML, removes <script> and <style> tags,
    and returns a cleaned text version of the page content.
    """

    import os

    # 1) Install dependencies at runtime (if needed)
    try:
        os.system("pip install selenium webdriver-manager beautifulsoup4")
    except Exception as e:
        print("Could not install dependencies. Error:", e)
        return None

    try:
        # 2) Import libraries after install
        from selenium import webdriver
        from selenium.webdriver.chrome.options import Options
        from selenium.webdriver.chrome.service import Service
        from webdriver_manager.chrome import ChromeDriverManager

        from bs4 import BeautifulSoup
        import time
        import re

        # 3) Configure Selenium in headless mode
        chrome_options = Options()
        chrome_options.add_argument("--headless=new")

        driver = webdriver.Chrome(
            service=Service(ChromeDriverManager().install()), options=chrome_options
        )

        try:
            # 4) Actually scrape the page
            wait_time = 5
            driver.get(url)
            time.sleep(wait_time)  # Adjust as needed for heavy JS sites

            page_source = driver.page_source

        finally:
            driver.quit()

        # 5) Clean the HTML with BeautifulSoup
        soup = BeautifulSoup(page_source, "html.parser")

        for tag in soup(["script", "style"]):
            tag.decompose()

        text = soup.get_text(separator="\n")

        # 6) Strip out extra blank lines
        lines = [line.strip() for line in text.splitlines()]
        lines = [line for line in lines if line]
        cleaned_text = "\n".join(lines)

        return cleaned_text

    except Exception as e:
        print("Scraping/cleaning process failed. Error:", e)
        return None

In [3]:
from aixplain.factories import AgentFactory

scrape_tool = AgentFactory.create_custom_python_code_tool(
    code=scrape_and_clean_link,
    description="takes a link to a web page, and returnes a cleaned up string containing the scraped page content",
)
web_scrape_agent.tools.append(scrape_tool)
web_scrape_agent.save()

INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/669a63646eb56306647e1091 - {'Authorization': 'Token 2fa34265d7ae3236f75962bef0cd4f4b5bb267b947354c80a60ed0ff40fd79e6', 'Content-Type': 'application/json'}
INFO:root:Model Creation: Model 669a63646eb56306647e1091 instantiated.


In [None]:
from aixplain.factories import ModelFactory

scrape_utility = ModelFactory.create_utility_model(
    name="scrape link",
    description="takes a link to a web page, and returnes a cleaned up string containing the scraped page content",
    code=scrape_and_clean_link,
)
scrape_utility.deploy()

scrape_tool = AgentFactory.create_model_tool(model=scrape_utility.id)
web_scrape_agent.tools.append(scrape_tool)
web_scrape_agent.save()

INFO:root:Start service for POST Utility Model - https://platform-api.aixplain.com/sdk/utilities - {'x-api-key': '2fa34265d7ae3236f75962bef0cd4f4b5bb267b947354c80a60ed0ff40fd79e6', 'Content-Type': 'application/json'} - {'name': 'scrape link', 'description': 'takes a link to a web page, and returnes a cleaned up string containing the scraped page content', 'inputs': [{'name': 'url', 'description': 'The url input is a text', 'type': 'text'}], 'code': 's3://aixplain-platform-backend-temp/23681/sdk/1744931121657-e901c942-4628-46cd-bafb-5f380c5cba8f', 'function': 'utilities', 'outputDescription': '', 'status': 'draft'}
INFO:root:Utility Model Creation: Model 68018932c315cb3fcdaea033 instantiated.
INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/68018932c315cb3fcdaea033 - {'Authorization': 'Token 2fa34265d7ae3236f75962bef0cd4f4b5bb267b947354c80a60ed0ff40fd79e6', 'Content-Type': 'application/json'}
INFO:root:Utility Model Already Exists, skipping code vali

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

from bs4 import BeautifulSoup
import time
import re


def scrape_and_clean_page(url: str, wait_time: int = 5):
    """
    Navigates to the given URL using Selenium (headless Chrome),
    grabs the rendered HTML, removes <script> and <style> tags,
    and returns a cleaned text version of the page content.

    :param url: str, the webpage URL
    :param wait_time: int, how many seconds to wait for JS-heavy pages
    :return: str, cleaned text content of the page
    """
    # Setup headless Chrome
    chrome_options = Options()
    # chrome_options.headless = True  # no browser UI
    chrome_options.add_argument("--headless=new")
    driver = webdriver.Chrome(
        service=Service(ChromeDriverManager().install()), options=chrome_options
    )

    try:
        driver.get(url)
        # Wait for the page to load (adjust wait_time as needed)
        time.sleep(wait_time)

        # Get full rendered HTML
        page_source = driver.page_source

    finally:
        driver.quit()

    # Parse HTML with BeautifulSoup
    soup = BeautifulSoup(page_source, "html.parser")

    # Remove script and style elements
    for tag in soup(["script", "style"]):
        tag.decompose()

    # Extract text
    text = soup.get_text(separator="\n")

    # Remove extra blank lines and whitespace clutter:
    # 1. Strip each line
    # 2. Filter out lines that are just whitespace
    # 3. Join them back together with single newlines
    lines = [line.strip() for line in text.splitlines()]
    lines = [line for line in lines if line]  # remove empty lines
    cleaned_text = "\n".join(lines)

    return cleaned_text


if __name__ == "__main__":
    test_url = "https://aixplain.com/careers/senior-backend-engineer-48/"
    page_text = scrape_and_clean_page(test_url, wait_time=5)

    print("===== Cleaned Page Text =====")
    print(page_text)
    print("===== End of Text =====")

INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [/Users/ramiibrahimi/.wdm/drivers/chromedriver/mac64/135.0.7049.95/chromedriver-mac-arm64/chromedriver] found in cache


===== Cleaned Page Text =====
Senior Backend Engineer
Senior Backend Engineer - aiXplain
🧑‍💻 Join our live AI agent training
Blog
Events
Company
Careers
Contact us
FAQ
Login
Platform
Overview
Studio
Bel Esprit
Chat
SDK
Company
About
Careers
Pricing
Resources
Blog
Videos
Research
Case studies
Whitepapers
Release notes
All resources
Contact us
FAQ
Sign up now
Senior Backend Engineer
Remote · Riyadh, SA
Apply now
Come join a team of industry and science leaders to achieve a vision of empowering innovation through state-of-the-art artificial intelligence leveraging multiple cloud technologies. We are addressing exciting challenges for our customers, at the intersection of AI/ML and cutting-edge cloud infrastructure.
We are in search of a Backend Software Engineer who will bring a depth of knowledge on basic algorithms and data structures. As a Backend Software Engineer, on any given day you might be architecting new features for our customers, refactoring existing code to be more scalable,

In [4]:
response = web_scrape_agent.run(
    query=f"scrape this link: https://aixplain.com/careers/senior-backend-engineer-48/"
)

INFO:root:Polling for Model: Start polling for model_process


In [5]:
print(response)

AgentResponse(status=SUCCESS, data='AgentResponseData(input={'input': 'scrape this link: https://aixplain.com/careers/senior-backend-engineer-48/', 'chat_history': [], 'outputFormat': 'text'}, output={'query': {'company': 'Aixplain', 'title': 'Senior Backend Engineer', 'summary': 'The role requires strong experience in backend development, proficiency in programming languages such as Python, and familiarity with cloud services. Candidates should have a solid understanding of database management and API design, along with the ability to work collaboratively in a team environment.'}}, session_id='67140f03-2a8a-47fd-b28b-2ce13eac6691', intermediate_steps=[{'agent': 'scrape and summarize Job description', 'input': "{'input': 'scrape this link: https://aixplain.com/careers/senior-backend-engineer-48/', 'chat_history': [], 'outputFormat': 'text'}", 'output': "{'query': {'company': 'Aixplain', 'title': 'Senior Backend Engineer', 'summary': 'The role requires strong experience in backend devel

In [6]:
import ast

# Just get the full string — no [0]
api_response = response.data.output

# Parse it safely
job__response = ast.literal_eval(api_response)["query"]

# Now access the summary
job_description = job__response["summary"]
job_title = job__response["title"]
company_name = job__response["company"]

job_info = {
    "company_name": company_name,
    "job_title": job_title,
    "job_description": job_description,
}

In [7]:
job_info

{'company_name': 'Aixplain',
 'job_title': 'Senior Backend Engineer',
 'job_description': 'The role requires strong experience in backend development, proficiency in programming languages such as Python, and familiarity with cloud services. Candidates should have a solid understanding of database management and API design, along with the ability to work collaboratively in a team environment.'}

In [4]:
from pipelines.jd_search import search_background

relevant_background = search_background(job_description)

In [5]:
content = {"job_info": job_info, "my_background_info": relevant_background}

In [None]:
from aixplain.factories import AgentFactory


model_tool_4o = AgentFactory.create_model_tool(
    model="6646261c6eb563165658bbb1",  # gpt-4o
)

resume_agent = AgentFactory.create(
    name="resume generation agent",
    description="Agent that takes relevant background info and generates resume",
    instructions=(
        """
    you are a career advisor tasked with creating a job specific custom resume
    accept a job description, my relevant background info, and desired number of pages, and create a custom resume with that number of pages.
    you do not have to include everything from my background, priorotize what is most relevant
    """
    ),
    tools=[model_tool_4o],
)

INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/6646261c6eb563165658bbb1 - {'Authorization': 'Token 2fa34265d7ae3236f75962bef0cd4f4b5bb267b947354c80a60ed0ff40fd79e6', 'Content-Type': 'application/json'}
INFO:root:Model Creation: Model 6646261c6eb563165658bbb1 instantiated.
INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/669a63646eb56306647e1091 - {'Authorization': 'Token 2fa34265d7ae3236f75962bef0cd4f4b5bb267b947354c80a60ed0ff40fd79e6', 'Content-Type': 'application/json'}
INFO:root:Model Creation: Model 669a63646eb56306647e1091 instantiated.


In [7]:
import json

print(json.dumps(content, indent=2))

{
  "job_info": {
    "company_name": "aiXplain",
    "job_title": "Senior Applied AI/ML Scientist",
    "job_description": "The role requires an advanced degree in a relevant field and 7+ years of industry experience in machine learning. Candidates should have expertise in designing and implementing machine learning models, particularly in NLP, and be proficient in Python and machine learning libraries. Experience with large language models, conversational AI, and cloud infrastructure is preferred. The position involves collaboration with scientists and engineers, presenting complex solutions, and working in an Agile environment."
  },
  "my_background_info": {
    "professional_experience": "2018-Present\tGoogle LLC\nProduct Operations T/ Program Manager \u2013 Phones (2019-Present) Mountain View, CA\nManaged oversees third party team of data scientists to deliver machine learning predictive models for return rates & manufacturing cell qualification resulting in reduction of 15% in q

In [None]:
page_no = 2
response_rusume = resume_agent.run(
    query=f"use this job_info, and background_info {content} to follow the instructions and create a {page_no} resume"
)
response_rusume

INFO:root:Polling for Model: Start polling for model_process


AgentResponse(status=SUCCESS, data='AgentResponseData(input={'input': "use the job_info, and background_info to follow the instructions{'job_info': {'company_name': 'aiXplain', 'job_title': 'Senior Applied AI/ML Scientist', 'job_description': 'The role requires an advanced degree in a relevant field and 7+ years of industry experience in machine learning. Candidates should have expertise in designing and implementing machine learning models, particularly in NLP, and be proficient in Python and machine learning libraries. Experience with large language models, conversational AI, and cloud infrastructure is preferred. The position involves collaboration with scientists and engineers, presenting complex solutions, and working in an Agile environment.'}, 'my_background_info': {'professional_experience': '2018-Present\\tGoogle LLC\\nProduct Operations T/ Program Manager – Phones (2019-Present) Mountain View, CA\\nManaged oversees third party team of data scientists to deliver machine learni

In [27]:
from aixplain.factories import AgentFactory


model_tool_4o = AgentFactory.create_model_tool(
    model="6646261c6eb563165658bbb1",  # gpt-4o
)

free_form_agent = AgentFactory.create(
    name="resume generation agent",
    description="Agent that takes relevant background info and generates resume",
    instructions=(
        """
    Create an free form writing
    accept info on a job openning and my relevant background info and create a custom essay.
    essay can be a cover letter, a specific essay, or answers to specific questions
    you do not have to include everything from my background, only what is relevant
    """
    ),
    tools=[model_tool_4o],
)

INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/6646261c6eb563165658bbb1 - {'Authorization': 'Token 2fa34265d7ae3236f75962bef0cd4f4b5bb267b947354c80a60ed0ff40fd79e6', 'Content-Type': 'application/json'}
INFO:root:Model Creation: Model 6646261c6eb563165658bbb1 instantiated.
INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/669a63646eb56306647e1091 - {'Authorization': 'Token 2fa34265d7ae3236f75962bef0cd4f4b5bb267b947354c80a60ed0ff40fd79e6', 'Content-Type': 'application/json'}
INFO:root:Model Creation: Model 669a63646eb56306647e1091 instantiated.


In [28]:
essay_type = "cover letter"
response = free_form_agent.run(
    query=f"use the job_info, and background_info to creat a/an {essay_type}, content: {content}"
)
response

INFO:root:Polling for Model: Start polling for model_process


AgentResponse(status=SUCCESS, data='AgentResponseData(input={'input': "use the job_info, and background_info to creat a/an cover letter, content: {'job_info': {'company_name': 'aiXplain', 'job_title': 'Senior Applied AI/ML Scientist', 'job_description': 'The role requires an advanced degree in a relevant field and 7+ years of industry experience in machine learning. Candidates should have expertise in designing and implementing machine learning models, particularly in NLP, and be proficient in programming languages like Python, C, C++, or C#. Experience with state-of-the-art models, cloud infrastructure, and Agile methodologies is essential. The position involves collaboration with scientists and engineers to develop AI/ML solutions and present complex ideas to diverse audiences.'}, 'my_background_info': {'professional_experience': '2018-Present\\tGoogle LLC\\nProduct Operations T/ Program Manager – Phones (2019-Present) Mountain View, CA\\nManaged oversees third party team of data sci

In [10]:
from aixplain.factories import AgentFactory


model_tool_4o = AgentFactory.create_model_tool(
    model="6646261c6eb563165658bbb1",  # gpt-4o
)

formatting_agent = AgentFactory.create(
    name="Formats job documents",
    description="Agent that takes content for resume or free form and formats it for docx",
    instructions=(
        """
    your job is to take an input representing the content of a resume or a cover letter and formatting it such that it is ready to be copied and pasted onto a docx document
    """
    ),
    tools=[model_tool_4o],
)

INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/6646261c6eb563165658bbb1 - {'Authorization': 'Token 2fa34265d7ae3236f75962bef0cd4f4b5bb267b947354c80a60ed0ff40fd79e6', 'Content-Type': 'application/json'}
INFO:root:Model Creation: Model 6646261c6eb563165658bbb1 instantiated.
INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/669a63646eb56306647e1091 - {'Authorization': 'Token 2fa34265d7ae3236f75962bef0cd4f4b5bb267b947354c80a60ed0ff40fd79e6', 'Content-Type': 'application/json'}
INFO:root:Model Creation: Model 669a63646eb56306647e1091 instantiated.


In [11]:
response = formatting_agent.run(
    query=f"take this content and format it in a suitable impressive way",
    session_id="221c3785-27be-45e2-b3eb-aa517e0c9716",
)
response

INFO:root:Polling for Model: Start polling for model_process


AgentResponse(status=SUCCESS, data='AgentResponseData(input={'input': 'take this content and format it in a suitable impressive way', 'chat_history': [HumanMessage(content="use the job_info, and background_info to follow the instructions{'job_info': {'company_name': 'aiXplain', 'job_title': 'Senior Applied AI/ML Scientist', 'job_description': 'The role requires an advanced degree in a relevant field and 7+ years of industry experience in machine learning. Candidates should have expertise in designing and implementing machine learning models, particularly in NLP, and be proficient in Python and machine learning libraries. Experience with large language models, conversational AI, and cloud infrastructure is preferred. The position involves collaboration with scientists and engineers, presenting complex solutions, and working in an Agile environment.'}, 'my_background_info': {'professional_experience': '2018-Present\\tGoogle LLC\\nProduct Operations T/ Program Manager – Phones (2019-Prese