In [1]:
! pip install pandas
! pip install requests
! pip install beautifulsoup4
! pip install matplotlib
! pip install mlxtend



In [2]:
import requests
import json
import json

try:
    with open("jobs.json", 'r', encoding='utf-8') as file:
        jobs = json.load(file)

except FileNotFoundError:
    response = requests.get("https://www.getonbrd.com/api/v0/search/jobs", params={
        "query": "Software engineering",
        "per_page": 100,
        "page": 1,
    })

    jobs = response.json()["data"]

    with open("jobs.json", 'w', encoding='utf-8') as file:
        json.dump(jobs, file, ensure_ascii=False, indent=4)


def guardar_en_json(array_de_objetos, nombre_archivo):
    
    with open(nombre_archivo, 'w', encoding='utf-8') as archivo_json:
        json.dump(array_de_objetos, archivo_json, ensure_ascii=False, indent=4)


guardar_en_json(jobs, 'jobs.json')

In [3]:
import pandas as pd
from bs4 import BeautifulSoup

def transform_jobs_to_dataframe(jobs):
    
    structured_data = []
    for job in jobs:
        job_data = {
            "id": job["id"],
            "title": job["attributes"]["title"],
            "description": BeautifulSoup(job["attributes"].get("description", ""), 'html.parser').get_text(),
            "projects": BeautifulSoup(job["attributes"].get("projects", ""), 'html.parser').get_text(),
            "functions": BeautifulSoup(job["attributes"].get("functions", ""), 'html.parser').get_text(),
            "benefits": BeautifulSoup(job["attributes"].get("benefits", ""), 'html.parser').get_text(),
            "desirable": BeautifulSoup(job["attributes"].get("desirable", ""), 'html.parser').get_text(),
            "seniority": job["attributes"].get("seniority", ""),
            "remote": job["attributes"]["remote"],
            "remote_modality": job["attributes"].get("remote_modality", ""),
            "remote_zone": job["attributes"].get("remote_zone", ""),
            "countries": job["attributes"]["countries"],
            "lang": job["attributes"]["lang"],
            "category_name": job["attributes"]["category_name"],
            "perks": job["attributes"]["perks"],
            "min_salary": job["attributes"].get("min_salary"),
            "max_salary": job["attributes"].get("max_salary"),
            "published_at": job["attributes"]["published_at"],
            "response_time_min": job["attributes"]["response_time_in_days"].get("min"),
            "response_time_max": job["attributes"]["response_time_in_days"].get("max"),
            "applications_count": job["attributes"]["applications_count"],
            "public_url": job["links"]["public_url"]
        }
        structured_data.append(job_data)
    
    return pd.DataFrame(structured_data)

df_jobs = transform_jobs_to_dataframe(jobs)

def get_seniority_type(seniority):
    return seniority['data']['type']

df_jobs['seniority_type'] = df_jobs['seniority'].apply(get_seniority_type)

seniority_index = df_jobs.columns.get_loc('seniority')

df_jobs.insert(seniority_index + 1, 'seniority_type', df_jobs.pop('seniority_type'))

df_jobs


Unnamed: 0,id,title,description,projects,functions,benefits,desirable,seniority,seniority_type,remote,...,lang,category_name,perks,min_salary,max_salary,published_at,response_time_min,response_time_max,applications_count,public_url
0,android-engineer-haystack-news-lima-3ad4,Android Engineer,Strong written and spoken English is a must!B....,Haystack News is the leading local & world new...,You'll be responsible for developing the Hayst...,Unlimited vacations :)Travel to team's offsite...,,"{'data': {'id': 3, 'type': 'seniority'}}",seniority,False,...,en,Mobile Development,"[accessible, relocation, pet_friendly, flexibl...",,,1717080915,,,93,https://www.getonbrd.com/jobs/android-engineer...
1,senior-qa-qc-automation-softserve-remote,Senior QA/QC (Manual),Possessing a Bachelor's degree in Computer Sci...,WE ARESoftServe is a IT & global digital solut...,Collaborate closely with software engineers re...,Gain certifications from leading providers (Go...,,"{'data': {'id': 4, 'type': 'seniority'}}",seniority,True,...,en,SysAdmin / DevOps / QA,"[pet_friendly, flexible_hours, health_coverage...",,,1717000763,,,41,https://www.getonbrd.com/jobs/senior-qa-qc-aut...
2,machine-learning-engineer-neuralworks-santiago...,Machine Learning Engineer,Ingeniería Civil en Computación o similarAl me...,NeuralWorks es una compañía de alto crecimient...,El equipo de analytics trabaja en diferentes p...,MacBook Air M1 o similar (con opción de compra...,"Experiencia en Infraestructura as code, observ...","{'data': {'id': 3, 'type': 'seniority'}}",seniority,False,...,es,Machine Learning & AI,"[library, accessible, flexible_hours, internal...",,,1716241209,,,86,https://www.getonbrd.com/jobs/machine-learning...
3,software-engineer-flutter-humanforest-santiago,Software Engineer Flutter,Essential Experience:2+ years of experience as...,HumanForest is a forward-thinking company that...,Join the development of Forest’s mobile applic...,,Knowledge of a state management system (BLOC d...,"{'data': {'id': 3, 'type': 'seniority'}}",seniority,False,...,en,Mobile Development,"[pet_friendly, flexible_hours, remote_partial,...",3500.0,5100.0,1715886739,,,48,https://www.getonbrd.com/jobs/software-enginee...
4,solutions-engineer-openloop-remote,Solutions Engineer,3+ years experience building production-ready ...,OpenLoop is looking for a Solutions Engineer t...,Take complex screen designs and quickly render...,"In addition to competitive salaries, this role...",Experience working with Zoho and Healthie is c...,"{'data': {'id': 4, 'type': 'seniority'}}",seniority,False,...,en,Programming,"[life_insurance, paid_sick_days, bicycle_parki...",,,1715884417,11.0,21.0,79,https://www.getonbrd.com/jobs/solutions-engine...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,junior-ios-software-engineer-kti-hunter-remote,Junior iOS Software Engineer,ExperienciaExperienced for 2+ years in program...,"Somos una Empresa de Tecnología innovadora, co...","DeberesDevelop scalable, robust, fault-toleran...",BeneficiosSeguro ComplementarioProgramas de Bi...,,"{'data': {'id': 2, 'type': 'seniority'}}",seniority,True,...,lang_not_specified,Mobile Development,"[flexible_hours, health_coverage, computer_pro...",1400.0,2100.0,1707345234,,,85,https://www.getonbrd.com/jobs/junior-ios-softw...
66,software-engineer-full-stack-apply-digital-lat...,Software Engineer (Full-Stack),Strong working knowledge of JavaScript ES6 and...,If you'd like to learn more about Apply Digita...,We are seeking a dedicated and innovative Full...,Flexibility - work where you work bestCompetit...,AWSPostgreSQLKubernetesGraphQLJAMstackNetlifyC...,"{'data': {'id': 4, 'type': 'seniority'}}",seniority,True,...,lang_not_specified,Programming,"[remote_full, flexible_hours, health_coverage,...",,,1704808682,,,582,https://www.getonbrd.com/jobs/software-enginee...
67,intermediate-qa-automation-engineer-apply-digi...,Intermediate QA Automation Engineer,You have a bachelor’s degree in Computer Scien...,If you'd like to learn more about Apply Digita...,As an Intermediate QA Automation Engineer with...,Who we are: We’re a global digital transformat...,,"{'data': {'id': 3, 'type': 'seniority'}}",seniority,True,...,lang_not_specified,SysAdmin / DevOps / QA,"[remote_full, flexible_hours, computer_provide...",,,1704393832,,,268,https://www.getonbrd.com/jobs/intermediate-qa-...
68,software-engineer-react-native-apply-digital-l...,Software Engineer React Native,Qualifications:Bachelor's degree in Computer S...,Apply Digital is hybrid/remote-friendly. The p...,"Design, develop, and maintain high-quality mob...",Flexibility — work where you work bestOpportun...,,"{'data': {'id': 3, 'type': 'seniority'}}",seniority,True,...,en,Mobile Development,"[remote_full, flexible_hours, informal_dressco...",,,1704393605,24.0,32.0,393,https://www.getonbrd.com/jobs/software-enginee...


In [8]:
def salary_interval(min_salary):
    if pd.isna(min_salary):
        return 'no indica'
    elif min_salary < 1000:
        return 'menor que 1000'
    elif 1000 <= min_salary < 1500:
        return 'entre 1000 y 1500'
    elif 1500 <= min_salary < 2000:
        return 'entre 1500 y 2000'
    elif 2000 <= min_salary < 2500:
        return 'entre 2000 y 2500'
    elif 2500 <= min_salary < 3000:
        return 'entre 2500 y 3000'
    else:
        return 'mayor que 3000'

df_jobs['salary_interval'] = df_jobs['min_salary'].apply(salary_interval)
print(df_jobs)





                                                   id  \
0            android-engineer-haystack-news-lima-3ad4   
1            senior-qa-qc-automation-softserve-remote   
2   machine-learning-engineer-neuralworks-santiago...   
3      software-engineer-flutter-humanforest-santiago   
4                  solutions-engineer-openloop-remote   
..                                                ...   
65     junior-ios-software-engineer-kti-hunter-remote   
66  software-engineer-full-stack-apply-digital-lat...   
67  intermediate-qa-automation-engineer-apply-digi...   
68  software-engineer-react-native-apply-digital-l...   
69  senior-frontend-developer-mediastream-santiago...   

                                  title  \
0                      Android Engineer   
1                 Senior QA/QC (Manual)   
2             Machine Learning Engineer   
3             Software Engineer Flutter   
4                    Solutions Engineer   
..                                  ...   
65         Jun

[['Mobile Development', 'no indica'],
 ['SysAdmin / DevOps / QA', 'no indica'],
 ['Machine Learning & AI', 'no indica'],
 ['Mobile Development', 'mayor que 3000'],
 ['Programming', 'no indica'],
 ['SysAdmin / DevOps / QA', 'entre 1500 y 2000'],
 ['Programming', 'entre 1500 y 2000'],
 ['Programming', 'entre 2500 y 3000'],
 ['Programming', 'no indica'],
 ['Programming', 'no indica'],
 ['Programming', 'no indica'],
 ['Programming', 'no indica'],
 ['Programming', 'no indica'],
 ['SysAdmin / DevOps / QA', 'no indica'],
 ['SysAdmin / DevOps / QA', 'no indica'],
 ['Data Science / Analytics', 'entre 2500 y 3000'],
 ['Programming', 'no indica'],
 ['Programming', 'entre 1500 y 2000'],
 ['SysAdmin / DevOps / QA', 'entre 1000 y 1500'],
 ['Programming', 'no indica'],
 ['Programming', 'entre 1500 y 2000'],
 ['Programming', 'mayor que 3000'],
 ['SysAdmin / DevOps / QA', 'no indica'],
 ['SysAdmin / DevOps / QA', 'no indica'],
 ['Programming', 'no indica'],
 ['Programming', 'no indica'],
 ['Machine Lea

In [19]:
## Los trabajos de backend tienen salarios mas altos.
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

array = df_jobs[['category_name','salary_interval' ]].values.tolist()
array

te = TransactionEncoder()
te_array = te.fit_transform(array)
te_array

df = pd.DataFrame(te_array, columns=te.columns_)


frequent_categories_salary = apriori(df, min_support=0.001, use_colnames=True)
frequent_categories_salary

rules = association_rules(frequent_categories_salary, metric="confidence", min_threshold=0.5)


rules = rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
rules
df.shape

(70, 14)

In [None]:
for idx, rule in rules.iterrows():
    antecedents = ', '.join(list(rule['antecedents']))
    consequents = ', '.join(list(rule['consequents']))
    support = rule['support']
    confidence = rule['confidence']
    lift = rule['lift']
    
    print(f"Regla: {antecedents} -> {consequents}")
    print(f"  Soporte: {support:.2f}")
    print(f"  Confianza: {confidence:.2f}")
    print(f"  Lift: {lift:.2f}")
    print(f"  Interpretación: Si un cliente compra {antecedents}, hay una confianza del {confidence:.2%} de que también comprará {consequents}. El lift de {lift:.2f} indica que esta relación es {lift:.2f} veces más probable que si los productos fueran independientes.\n")