In [15]:
import requests
from bs4 import BeautifulSoup
import json
from typing import List, Optional, Any
from pydantic import BaseModel, Extra

In [16]:
# Función para obtener enlaces de ofertas de trabajo
def get_job_links(base_url):
    jobs = []
    while base_url:
        response = requests.get(base_url)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Encuentra todos los enlaces de trabajos
        job_elements = soup.find_all('a', class_='job-link')
        
        for job_element in job_elements:
            job_url = "https://jobs.apple.com" + job_element['href']
            job_title = job_element.get_text(strip=True)
            jobs.append({'title': job_title, 'url': job_url})

        # Manejo de paginación
        next_page = soup.find('a', class_='pagination-next')
        if next_page and 'disabled' not in next_page.get('class', []):
            base_url = "https://jobs.apple.com" + next_page['href']
        else:
            base_url = None

    return jobs

# URL base 
base_url = 'https://jobs.apple.com/en-us/search?location=mexico-MEXC'

# Obtener los enlaces de trabajos
job_links = get_job_links(base_url)

# Obtener detalles de cada trabajo
job_details_list = []
for job in job_links:
    details = get_job_details(job)
    job_details_list.append(details)

In [17]:
# Función para obtener detalles de cada oferta de trabajo
def get_job_details(job):
    response = requests.get(job['url'])
    soup = BeautifulSoup(response.content, 'html.parser')
    
    job_details = JobPost(
        name=job['title'],
        externalLink=job['url'],
        description=soup.find('div', class_='description').get_text(strip=True) if soup.find('div', class_='description') else None,
        createdAt=soup.find('span', class_='posted-date').get_text(strip=True) if soup.find('span', class_='posted-date') else None,
        availableSlots=int(soup.find('span', class_='available-slots').get_text(strip=True)) if soup.find('span', class_='available-slots') else None,
        skills=[Skill(name=skill.get_text(strip=True), level="", experience=0) for skill in soup.find_all('li', class_='skill')] if soup.find_all('li', class_='skill') else None,
        aptitudes=[Aptitude(name=aptitude.get_text(strip=True)) for aptitude in soup.find_all('li', class_='aptitude')] if soup.find_all('li', class_='aptitude') else None,
        tools=[Tool(name=tool.get_text(strip=True)) for tool in soup.find_all('li', class_='tool')] if soup.find_all('li', class_='tool') else None,
        languages=[Language(name=language.get_text(strip=True), proficiency="") for language in soup.find_all('li', class_='language')] if soup.find_all('li', class_='language') else None,
        benefits=[Benefit(name=benefit.get_text(strip=True)) for benefit in soup.find_all('li', class_='benefit')] if soup.find_all('li', class_='benefit') else None,
        scholarity=soup.find('span', class_='scholarity').get_text(strip=True) if soup.find('span', class_='scholarity') else None,
        workhours=soup.find('span', class_='workhours').get_text(strip=True) if soup.find('span', class_='workhours') else None,
        locationConditions=soup.find('span', class_='location').get_text(strip=True) if soup.find('span', class_='location') else None,
        nationalRemote=bool(soup.find('span', class_='national-remote').get_text(strip=True)) if soup.find('span', class_='national-remote') else None,
        minSalary=int(soup.find('span', class_='min-salary').get_text(strip=True)) if soup.find('span', class_='min-salary') else None,
        maxSalary=int(soup.find('span', class_='max-salary').get_text(strip=True)) if soup.find('span', class_='max-salary') else None,
        minAge=int(soup.find('span', class_='min-age').get_text(strip=True)) if soup.find('span', class_='min-age') else None,
        maxAge=int(soup.find('span', class_='max-age').get_text(strip=True)) if soup.find('span', class_='max-age') else None,
        sex=soup.find('span', class_='sex').get_text(strip=True) if soup.find('span', class_='sex') else None,
        yearsOfExperience=int(soup.find('span', class_='years-of-experience').get_text(strip=True)) if soup.find('span', class_='years-of-experience') else None,
        status=soup.find('span', class_='status').get_text(strip=True) if soup.find('span', class_='status') else None,
        country=soup.find('span', class_='country').get_text(strip=True) if soup.find('span', class_='country') else None,
        updatedAt=soup.find('span', class_='updated-at').get_text(strip=True) if soup.find('span', class_='updated-at') else None,
        driversLicense=bool(soup.find('span', class_='drivers-license').get_text(strip=True)) if soup.find('span', class_='drivers-license') else None,
        degree=bool(soup.find('span', class_='degree').get_text(strip=True)) if soup.find('span', class_='degree') else None,
        validPassport=bool(soup.find('span', class_='valid-passport').get_text(strip=True)) if soup.find('span', class_='valid-passport') else None,
        validVisa=bool(soup.find('span', class_='valid-visa').get_text(strip=True)) if soup.find('span', class_='valid-visa') else None,
        nationalRelocation=bool(soup.find('span', class_='national-relocation').get_text(strip=True)) if soup.find('span', class_='national-relocation') else None,
        internationalRelocation=bool(soup.find('span', class_='international-relocation').get_text(strip=True)) if soup.find('span', class_='international-relocation') else None,
        availabilityToTravel=bool(soup.find('span', class_='availability-to-travel').get_text(strip=True)) if soup.find('span', class_='availability-to-travel') else None,
        seniority=soup.find('span', class_='seniority').get_text(strip=True) if soup.find('span', class_='seniority') else None,
        showSalaryRange=bool(soup.find('span', class_='show-salary-range').get_text(strip=True)) if soup.find('span', class_='show-salary-range') else None,
        state=soup.find('span', class_='state').get_text(strip=True) if soup.find('span', class_='state') else None,
        city=soup.find('span', class_='city').get_text(strip=True) if soup.find('span', class_='city') else None,
        postalCode=soup.find('span', class_='postal-code').get_text(strip=True) if soup.find('span', class_='postal-code') else None,
        slug=soup.find('span', class_='slug').get_text(strip=True) if soup.find('span', class_='slug') else None,
        latitude=float(soup.find('span', class_='latitude').get_text(strip=True)) if soup.find('span', class_='latitude') else None,
        longitude=float(soup.find('span', class_='longitude').get_text(strip=True)) if soup.find('span', class_='longitude') else None,
        companyName=soup.find('span', class_='company-name').get_text(strip=True) if soup.find('span', class_='company-name') else None,
        companyImg=soup.find('img', class_='company-img')['src'] if soup.find('img', class_='company-img') else None
    )
    return job_details.dict()

# URL de la bolsa de trabajo de Apple México
base_url = "https://jobs.apple.com/en-us/search?location=mexico-MEXC"

# Obtener los enlaces de trabajos
job_links = get_job_links(base_url)

# Obtener detalles de cada trabajo
job_details_list = []
for job in job_links:
    details = get_job_details(job)
    job_details_list.append(details)

In [18]:
# Guardar los resultados en un archivo JSON
with open('apple_jobs.json', 'w', encoding='utf-8') as f:
    json.dump(job_details_list, f, ensure_ascii=False, indent=4)