### instalar dependencias

In [1]:
! pip install pandas
! pip install requests
! pip install beautifulsoup4



### Obtener data

In [2]:
import requests
import json

try:
    with open("jobs.json", 'r', encoding='utf-8') as file:
        jobs = json.load(file)

except FileNotFoundError:
    response = requests.get("https://www.getonbrd.com/api/v0/search/jobs", params={
        "query": "Backend",
        "per_page": 100,
        "page": 1,
    })

    jobs = response.json()["data"]

    with open("jobs.json", 'w', encoding='utf-8') as file:
        json.dump(jobs, file, ensure_ascii=False, indent=4)
        



### guardar data

In [3]:
import json

def guardar_en_json(array_de_objetos, nombre_archivo):
    
    with open(nombre_archivo, 'w', encoding='utf-8') as archivo_json:
        json.dump(array_de_objetos, archivo_json, ensure_ascii=False, indent=4)


guardar_en_json(jobs, 'jobs.json')

### Construir data frame

In [9]:
import pandas as pd
from bs4 import BeautifulSoup

def transform_jobs_to_dataframe(jobs):
    
    structured_data = []
    for job in jobs:
        job_data = {
            "id": job["id"],
            "title": job["attributes"]["title"],
            "description": BeautifulSoup(job["attributes"].get("description", ""), 'html.parser').get_text(),
            "projects": BeautifulSoup(job["attributes"].get("projects", ""), 'html.parser').get_text(),
            "functions": BeautifulSoup(job["attributes"].get("functions", ""), 'html.parser').get_text(),
            "benefits": BeautifulSoup(job["attributes"].get("benefits", ""), 'html.parser').get_text(),
            "desirable": BeautifulSoup(job["attributes"].get("desirable", ""), 'html.parser').get_text(),
            "seniority": job["attributes"].get("seniority", ""),
            "remote": job["attributes"]["remote"],
            "remote_modality": job["attributes"].get("remote_modality", ""),
            "remote_zone": job["attributes"].get("remote_zone", ""),
            "countries": job["attributes"]["countries"],
            "lang": job["attributes"]["lang"],
            "category_name": job["attributes"]["category_name"],
            "perks": job["attributes"]["perks"],
            "min_salary": job["attributes"].get("min_salary"),
            "max_salary": job["attributes"].get("max_salary"),
            "published_at": job["attributes"]["published_at"],
            "response_time_min": job["attributes"]["response_time_in_days"].get("min"),
            "response_time_max": job["attributes"]["response_time_in_days"].get("max"),
            "applications_count": job["attributes"]["applications_count"],
            "public_url": job["links"]["public_url"]
        }
        structured_data.append(job_data)
    
    return pd.DataFrame(structured_data)

df_jobs = transform_jobs_to_dataframe(jobs)
df_jobs

Unnamed: 0,id,title,description,projects,functions,benefits,desirable,seniority,remote,remote_modality,...,lang,category_name,perks,min_salary,max_salary,published_at,response_time_min,response_time_max,applications_count,public_url
0,desarrollador-a-backend-javase-spring-boot-aws...,Desarrollador/a Back-end Javase Spring Boot AWS,"¿Qué requerimos?Técnico, Tecnólogo o Profesion...",Acerca del empleoSophos Solutions es una compa...,Tendrás la oportunidad de:Desarrollar en Java ...,Nosotros te ofrecemos 💜Contrato a término inde...,Es un Plus:Experiencia/Conocimientos en Mongo ...,"{'data': {'id': 3, 'type': 'seniority'}}",True,remote_local,...,lang_not_specified,SysAdmin / DevOps / QA,"[wellness, digital_library, health_coverage, c...",,,1716475444,,,15,https://www.getonbrd.com/jobs/desarrollador-a-...
1,tech-lead-arkho-santiago-70da,Data Tech Lead,¿Cuáles son los requerimientos del cargo? Expe...,ARKHO es una consultora experta en tecnologías...,Estamos en busca de un profesional con experie...,Día administrativo semestral hasta los 12 mese...,"Back: Microservicios, API devolopmentManejo de...","{'data': {'id': 3, 'type': 'seniority'}}",True,fully_remote,...,lang_not_specified,Programming,"[remote_full, flexible_hours, informal_dressco...",,,1716436734,5.0,9.0,172,https://www.getonbrd.com/jobs/tech-lead-arkho-...
2,desarrollador-a-backend-springboot-empresas-sb...,Desarrollador/a Back-end (Spring Boot),Experiencia en desarrollo de soluciones inform...,"Somos un grupo de empresas de retail, especial...",¿Cuál será tu desafío en este cargo?Crear o ma...,,,"{'data': {'id': 4, 'type': 'seniority'}}",False,hybrid,...,lang_not_specified,Programming,"[wellness, accessible, meals_provided, commuti...",,,1716413884,,,34,https://www.getonbrd.com/jobs/desarrollador-a-...
3,machine-learning-engineer-neuralworks-santiago...,Machine Learning Engineer,Ingeniería Civil en Computación o similarAl me...,NeuralWorks es una compañía de alto crecimient...,El equipo de analytics trabaja en diferentes p...,MacBook Air M1 o similar (con opción de compra...,"Experiencia en Infraestructura as code, observ...","{'data': {'id': 3, 'type': 'seniority'}}",False,hybrid,...,es,Machine Learning & AI,"[library, accessible, flexible_hours, internal...",,,1716241209,,,55,https://www.getonbrd.com/jobs/machine-learning...
4,arquitecto-java-bilingue-kibernum-bogota,Arquitecto Java Bilingüe,RequisitosProfesional en ingeniería de sistema...,"Kibernum, es una de las empresas líderes en te...","Responsable del diseño, arquitectura y documen...",Tipo de Trabajo: Remoto Tipo de Contrato: Inde...,,"{'data': {'id': 4, 'type': 'seniority'}}",True,remote_local,...,en,Programming,[computer_provided],,,1715899810,,,9,https://www.getonbrd.com/jobs/arquitecto-java-...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,analista-qa-automatizador-iconstruye-remote,Analista QA Automatizador,Conocimientos de Ingeniería del Software.Conoc...,¡Súmate a nuestro equipo! Estamos buscando a n...,"Analizar Requerimientos, Historias de Usuario,...",5 días extras de descanso.Tarjeta amipass para...,,"{'data': {'id': 3, 'type': 'seniority'}}",False,hybrid,...,lang_not_specified,SysAdmin / DevOps / QA,"[flexible_hours, computer_provided, informal_d...",,,1715187574,,,16,https://www.getonbrd.com/jobs/analista-qa-auto...
96,full-stack-web-developer-remote-integrated-com...,Senior JavaScript Full-Stack Web Developer,A 4 or 5-year traditional Bachelor's degree in...,About ICS & the rolePowering Innovation for Hi...,"Utilize expert knowledge of JavaScript, includ...","Opportunity to work on complex, creative chall...",Experience with Client-side Database managemen...,"{'data': {'id': 4, 'type': 'seniority'}}",True,remote_local,...,en,Programming,"[flexible_hours, informal_dresscode]",3800.0,4800.0,1715187249,,,91,https://www.getonbrd.com/jobs/full-stack-web-d...
97,back-end-developer-remote-integrated-computer-...,Senior Back-end Web Developer,A 4 or 5-year traditional Bachelor's degree in...,About ICS & the PositionPowering Innovation fo...,Utilize expert JavaScript skills to develop an...,"Opportunity to work on complex, creative chall...","Experience with gRPC, MQTT or similar","{'data': {'id': 4, 'type': 'seniority'}}",True,remote_local,...,en,Programming,"[flexible_hours, informal_dresscode]",3800.0,4800.0,1715184233,,,74,https://www.getonbrd.com/jobs/back-end-develop...
98,desarollador-fullstack-iconstruye-remote-c104,Desarrollador Full-Stack,"Lenguaje de Programación Angular, JavaScript, ...",¡Súmate a nuestro equipo! Estamos buscando a n...,Colaborar en definiciones arquitectónicas TI.E...,5 días extras de descanso.Tarjeta amipass para...,,"{'data': {'id': 3, 'type': 'seniority'}}",False,hybrid,...,es,Programming,"[flexible_hours, health_coverage, computer_pro...",,,1715181455,,,118,https://www.getonbrd.com/jobs/desarollador-ful...


Cuantas categorias hay en el dataset

In [5]:
unique_categories = df_jobs['category_name'].unique()

print("Categorias:", unique_categories)

print("Numero de categorias categorias:", len(unique_categories))


Categorias: ['SysAdmin / DevOps / QA' 'Programming' 'Machine Learning & AI'
 'Mobile Development' 'Data Science / Analytics'
 'Product, Innovation & Agile' 'Customer Support' 'Operations / Admin']
Numero de categorias categorias: 8


Trabajos total dataset

In [6]:
total_jobs = len(df_jobs)
print(total_jobs)

100


In [10]:
df_jobs.shape

(100, 22)

¿Cuál es la categoría con más y menos ofertas de trabajo?

In [7]:
categorias_frecuencia = df_jobs['category_name'].value_counts()
print(categorias_frecuencia)

category_name
Programming                    78
SysAdmin / DevOps / QA          9
Data Science / Analytics        4
Machine Learning & AI           2
Product, Innovation & Agile     2
Customer Support                2
Operations / Admin              2
Mobile Development              1
Name: count, dtype: int64


In [8]:
df_jobs.to_csv("try.csv", index=False)