In [None]:
!pip install -qq nest_asyncio pydantic pydantic_ai rich html2text python-dotenv

In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
from dotenv import load_dotenv

load_dotenv()

In [None]:
from rich import print as pprint

In [None]:
from pydantic_ai import Agent

test_agent = Agent(
    model="groq:llama-3.2-3b-preview",
    system_prompt="You are a commercial international Pilot."
)

response = test_agent.run_sync("Guide me and give me a path to become a commercial pilot like you")

In [None]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import html2text
import re

def clean_text(text):
    """Clean extracted text by removing extra whitespace and empty lines"""
    text = re.sub(r'\n\s*\n', '\n\n', text.strip())
    return text

def scrape_website(url, selector=None):
    """
    Scrapes data from a website and converts HTML to Markdown.
    
    Parameters:
    url (str): The URL of the website to scrape
    selector (str, optional): CSS selector to target specific elements
    
    Returns:
    str: Markdown formatted text
    """
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Remove unwanted elements
        for element in soup.select('script, style, nav, footer, header'):
            element.decompose()
        
        # Convert to Markdown
        h = html2text.HTML2Text()
        h.ignore_links = False
        h.ignore_images = False
        h.body_width = 0  # No wrapping
        
        if selector:
            elements = soup.select(selector)
            content = '\n\n'.join(h.handle(str(element)) for element in elements)
        else:
            content = h.handle(str(soup.body))
        
        # Clean and save content
        content = clean_text(content)
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        
        with open(f'scraped_content_{timestamp}.md', 'w', encoding='utf-8') as file:
            file.write(content)
            
        return content
        
    except Exception as e:
        print(f"Error: {e}")
        return ""

website_url = "https://www.cs.cmu.edu/~fgandon/miscellaneous/murphy/"
content = scrape_website(website_url)

In [None]:
from dataclasses import dataclass
from typing import List

from pydantic import BaseModel, Field
from pydantic_ai import RunContext


# Tool Input
@dataclass
class JobInformationFetchDeps:
    job_post_url: str


# Tool Output - Agent Input
@dataclass
class JobDescriptionAgentDependecies:
    job_posting_information: str


class JobDescriptionAgentResult(BaseModel):
    role: str = Field(
        description="The job title or role position being described (e.g., 'Senior Software Engineer', 'Product Manager')"
    )
    company_name: str = Field(description="The Company which posted job")
    experience: str = Field(
        description="Required years and type of experience for the position (e.g., '5+ years of software development')"
    )
    skills: List[str] = Field(
        description="List of specific technical skills, tools, or competencies required for the role (e.g., ['Python', 'AWS', 'Machine Learning'])"
    )
    description: str = Field(
        description="Detailed overview of the job responsibilities, requirements, and expectations"
    )


job_description_parser_agent = Agent(
    model="groq:llama-3.2-3b-preview",
    deps_type=JobInformationFetchDeps,
    result_type=JobDescriptionAgentResult,
    system_prompt="""You are a specialized HR assistant focused on analyzing and structuring job descriptions. Your primary responsibilities are:
1. Use the get_job_details tool to retrieve job posting information
2. Extract and categorize key components including:
   - Core role/position title
   - Required experience level
   - Essential skills and qualifications
   - Detailed role description and responsibilities
Format all outputs according to the JobDescription schema. Be precise and consistent in your categorization. When analyzing skills:
- Focus on specific technical and professional competencies
- Separate distinct skills into individual items
- Standardize skill names (e.g., "Python" not "python programming")
If job details are ambiguous or incomplete, make reasonable inferences based on industry standards while maintaining accuracy.""",
)

@job_description_parser_agent.tool
def get_job_details(
    ctx: RunContext[JobInformationFetchDeps],
) -> JobDescriptionAgentDependecies:
    """
    Retrieves and extracts job posting information
    """
    job_post_url = ctx.deps.job_post_url
    job_posting_information = scrape_website(url=job_post_url)
    # pprint(job_posting_information)
    return JobDescriptionAgentDependecies(
        job_posting_information=job_posting_information
    )

In [None]:
job_description_agent_result = job_description_parser_agent.run_sync(
    "Please extract job description for the provided URL",
    deps=JobInformationFetchDeps(
        job_post_url="https://openai.com/careers/ai-abuse-and-threat-intelligence-analyst/"
    ),
)
pprint(job_description_agent_result.data)

In [None]:
class ColdEmailWriterAgentInput(BaseModel):
    job_description: JobDescriptionAgentResult = Field(
        description="Parsed job posting details including role, company, required experience, skills, and full description"
    )

class ColdEmailWriterAgentResponse(BaseModel):
    subject: str = Field(
        description="A catchy and personalised subject line that captures attention and highlights key value proposition"
    )
    body: str = Field(
        description="Professional email body that matches job requirements with portfolio expertise, includes introduction, value proposition, relevant project examples, and call-to-action"
    )


cold_email_writer_agent = Agent(
    model='groq:llama-3.2-3b-preview',
    deps_type=ColdEmailWriterAgentInput,
    result_type=ColdEmailWriterAgentResponse,
    system_prompt="""
You are Ria, a tech recruitment specialist at TopTal.io, reaching out to hiring managers about your firm's pre-vetted engineering talent pool. Using the provided job description:

1. Analyze role requirements and highlight relevant TopTal portfolio projects
2. Create concise, compelling subject lines highlighting available talent
3. Write brief, impactful email body (3-4 paragraphs max) that:
   - Opens with specific reference to company's hiring needs
   - Showcases relevant TopTal's portfolio projects matching required tech stack
   - Emphasizes that TopTal has pre-vetted engineers ready to interview
   - Includes clear call-to-action to discuss available candidates

Keep tone professional yet conversational. Focus on Toptal's talent pool and proven project experience.
"""
)

In [None]:
result = cold_email_writer_agent.run_sync(
    "Write a cold email",
    deps=ColdEmailWriterAgentInput(
        job_description=job_description_agent_result.data
    )
)
pprint(result.data)