In [None]:
!pip install psycopg2-binary

Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[?25l[K     |                                | 10 kB 19.7 MB/s eta 0:00:01[K     |▏                               | 20 kB 27.9 MB/s eta 0:00:01[K     |▎                               | 30 kB 31.9 MB/s eta 0:00:01[K     |▍                               | 40 kB 35.4 MB/s eta 0:00:01[K     |▌                               | 51 kB 26.6 MB/s eta 0:00:01[K     |▋                               | 61 kB 22.8 MB/s eta 0:00:01[K     |▊                               | 71 kB 20.1 MB/s eta 0:00:01[K     |▉                               | 81 kB 21.5 MB/s eta 0:00:01[K     |█                               | 92 kB 23.1 MB/s eta 0:00:01[K     |█                               | 102 kB 20.2 MB/s eta 0:00:01[K     |█▏                              | 112 kB 20.2 MB/s eta 0:00:01[K     |█▎                              | 122 kB 20.2 MB/s eta 0:00:01[K     

In [None]:
# Python
import json
import re

# Request
import requests

# Pandas
import pandas as pd 

# BeautifulSoup
from bs4 import BeautifulSoup

# Drive mount
from google.colab import drive
drive.mount('/content/drive/')

# SQL Alchemy
from sqlalchemy import create_engine

Mounted at /content/drive/


In [None]:
def get_data(page_number: int):
  print(page_number)
  url = f'https://www.getonbrd.com/api/v0/categories/programming/jobs?per_page=100&page={page_number}&expand=["company", "modality", "seniority"]'
  payload={'content-type': 'application/json'}

  response = requests.request("GET", url, data=payload)
  content =  json.loads(response.text)
  if len(content['data']) == 0:
    return None
  return content['data']

In [None]:
def remove_emojis(text: str):
    regrex_pattern = re.compile(pattern = "["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags = re.UNICODE)
    return regrex_pattern.sub(r'',text)

In [None]:
df = pd.DataFrame()

for i in range(1,10):
  new_data = get_data(i)
  if new_data is not None:
    df_temp = pd.json_normalize(new_data, max_level=None)
    df = pd.concat([df, df_temp])
    print(df.shape)
  else:
    break


1
(100, 54)
2
(200, 54)
3
(300, 54)
4
(400, 54)
5
(496, 54)
6


In [None]:
df['attributes.published_at'] = pd.to_datetime(df['attributes.published_at'], unit='s')

In [None]:
columns_to_rename = {'attributes.title': 'position',
                     'attributes.company.data.attributes.name': 'company_name',
                     'attributes.category_name': 'category_name',
                     'attributes.modality.data.attributes.name': 'modality',
                     'attributes.projects': 'projects',
                     'attributes.description': 'description',
                     'attributes.functions': 'functions',
                     'attributes.remote': 'remote',
                     'attributes.remote_modality': 'remote_modality',
                     'attributes.country': 'country',
                     'attributes.min_salary': 'salary_min',
                     'attributes.max_salary': 'salary_max',
                     'attributes.seniority.data.attributes.name': 'seniority',
                     'attributes.published_at': 'date_position',
                     'links.public_url': 'public_url',
                     }

columns_job = list(columns_to_rename.keys())
columns_to_clean = ['attributes.description', 'attributes.projects', 'attributes.functions']

In [None]:
df_data = df.loc[:, columns_job]

In [None]:
def get_text_t(x):
  text = BeautifulSoup(x).get_text()
  return text

In [None]:
df_data.loc[:, columns_to_clean] = df_data.loc[:, columns_to_clean].applymap(get_text_t)

In [None]:
df_data.rename(columns=columns_to_rename, inplace=True)
df_data.reset_index(inplace=True, drop=True)
df_data = df_data.convert_dtypes()
df_data['salary_type'] = 'USD'
df_data['activate'] = True
df_data[['projects','description', 'functions']] = df_data[['projects','description', 'functions']].applymap(remove_emojis)

In [None]:
df_data

Unnamed: 0,position,company_name,category_name,modality,projects,description,functions,remote,remote_modality,country,salary_min,salary_max,seniority,date_position,public_url,salary_type,activate
0,BI Data Streaming Engineer,Cornershop Inc.,Programming,Full time,Cornershop by Uber es una app de servicio de e...,Experience in software development with Python...,How many engineers does it take to build an ap...,False,temporarily_remote,Chile,,,Semi Senior,2022-01-06 13:32:17,https://www.getonbrd.com/jobs/bi-data-streamin...,USD,True
1,Desarrollador Full-Stack Banco Ripley,Ripley,Programming,Full time,Quieres ser parte de una de las empresas líder...,Los conocimientos requeridos para cargo son la...,La misión de esta persona es participar del pr...,False,temporarily_remote,Chile,,,Semi Senior,2022-01-05 21:01:39,https://www.getonbrd.com/jobs/desarrollador-fu...,USD,True
2,Data Engineer,Brain Food,Programming,Full time,"Somos Brain Food, una consultora especializada...","2+ years of data delivery, ETL (extract, trans...",El cargo tendrá como responsabilidad desarroll...,True,fully_remote,Remote,,,Junior,2022-01-04 21:45:24,https://www.getonbrd.com/jobs/data-engineer-br...,USD,True
3,React Native Typescript Developer,The Highlights App,Programming,Full time,The Highlights App. The new way to watch and e...,We are looking for an Engineer who is capable ...,Develop Typescript / React-Native main applica...,True,fully_remote,Remote,2900,3200,Semi Senior,2022-01-04 20:45:02,https://www.getonbrd.com/jobs/frontend-develop...,USD,True
4,Front-end Engineer,Cornershop Inc.,Programming,Full time,Cornershop by Uber es una app de servicio de e...,"Have experience with JavaScript, HTML, CSS, Re...",How many people are needed to create an applic...,False,temporarily_remote,Chile,,,Semi Senior,2022-01-04 16:53:20,https://www.getonbrd.com/jobs/frontend-enginee...,USD,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
491,Data Engineer,Intelimétrica,Programming,Full time,Los servicios de Intelimétrica se enfocan en e...,¿Qué necesitas para trabajar con nosotros?✓ Ha...,Actualmente estamos en la búsqueda de un(a) ca...,False,no_remote,Mexico,1100,1400,Sin experiencia,2021-03-11 13:11:08,https://www.getonbrd.com/jobs/data-engineer-in...,USD,True
492,Back-end Engineer (Python/aPIs),Belvo,Programming,Full time,"A little bit about us:We are Belvo, a financia...",Degree in Computer Science or equivalent worki...,The experience team is building applications a...,True,fully_remote,Remote,3400,4800,Semi Senior,2020-10-16 17:10:02,https://www.getonbrd.com/jobs/backend-engineer...,USD,True
493,Desarrollador/a Ruby on Rails,Lexgo,Programming,Full time,En Lexgo llevamos más de 3 años ayudando a cie...,"Proactividad. Queremos que pienses y opines, n...","En una primera instancia, serás responsable de...",True,remote_local,Remote,2000,3000,Semi Senior,2020-10-14 13:06:43,https://www.getonbrd.com/jobs/desarrollador-ra...,USD,True
494,Full-Stack Developer RoR,Garage Labs,Programming,Full time,En Garagelabs somos consultores tecnológicos. ...,En Garage Labs tenemos sólo dos requisitos exc...,"En una primera instancia, serás responsable de...",True,remote_local,Remote,1800,2500,Junior,2019-12-27 19:52:58,https://www.getonbrd.com/jobs/full-stack-devel...,USD,True
