### Import packages

In [63]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import pandas as pd

### Get webpage

In [3]:
# get webpage
def simple_get(url):
    
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_resp(resp):
                return resp.content
            else:
                return None
            
    except RequestException as e:
        log_error(f'Error during requsts to {url}: {str(e)}')
        pass
    
def is_good_resp(resp):
    
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200
            and content_type is not None
            and content_type.find('html') > -1)

def log_error(e):
    
    print(e)
    

In [4]:
# search parameters Data Scientist jobs in Greater Los Angeles
raw_html = simple_get('https://www.linkedin.com/jobs/search/?alertAction=viewjobs&geoId=90000049&keywords=data%20scientist&location=Los%20Angeles%20Metropolitan%20Area')
len(raw_html)

107981

### Parse and filter using BeautifulSoup

In [5]:
# parse raw html
html = BeautifulSoup(raw_html, 'html.parser')
#print(html.prettify())

In [69]:
# find all job listing titles
job_list = html.find_all(class_='result-card job-result-card result-card--with-hover-state')
#print(job_list)

sieved_list = []

for job in job_list:
    title = job.find('h3').get_text()
    company = job.find('h4').get_text()
    location = job.find(class_='job-result-card__location').get_text()
    description = job.find('p').get_text()
    link = job.find('a').get('href')
    
    sieved_job = [title, company, location, description, link]
    sieved_list.append(sieved_job)
    
print(sieved_list)



[['Data Scientist', 'Jobot', 'Long Beach, CA, US', 'What can we do for you? We are looking for…. We combine the technical expertise with the experience gained over time, to produce the ...', 'https://www.linkedin.com/jobs/view/data-scientist-at-jobot-1561452494?refId=6afc739f-2481-446c-9510-e3c1066b931b&position=1&pageNum=0&trk=guest_job_search_job-result-card_result-card_full-click'], ['Senior Data Scientist', 'Jobspring Partners', 'Los Angeles, CA, US', 'What You Will Be Doing. What we are really looking for is someone ready to make an impact in their current role, a candidate to deliver ...', 'https://www.linkedin.com/jobs/view/senior-data-scientist-at-jobspring-partners-1541345160?refId=6afc739f-2481-446c-9510-e3c1066b931b&position=2&pageNum=0&trk=guest_job_search_job-result-card_result-card_full-click'], ['Data Scientist, Analytics', 'NEXT Trucking', 'Los Angeles, CA, US', 'Armed with experienced professionals from Amazon, Google, Facebook, Snap, and Salesforce, NEXT is seeking ch

In [None]:
# this only gets the first results page. we need to repeat for all pages


In [71]:
df = pd.DataFrame(sieved_list, columns=['title', 'company', 'location', 'description', 'link'])
df.head()

Unnamed: 0,title,company,location,description,link
0,Data Scientist,Jobot,"Long Beach, CA, US",What can we do for you? We are looking for…. W...,https://www.linkedin.com/jobs/view/data-scient...
1,Senior Data Scientist,Jobspring Partners,"Los Angeles, CA, US",What You Will Be Doing. What we are really loo...,https://www.linkedin.com/jobs/view/senior-data...
2,"Data Scientist, Analytics",NEXT Trucking,"Los Angeles, CA, US",Armed with experienced professionals from Amaz...,https://www.linkedin.com/jobs/view/data-scient...
3,Data Scientist - Operations Research,Beyond Limits,"Glendale, California",Our technical team is looking for a specialist...,https://www.linkedin.com/jobs/view/data-scient...
4,Data Scientist - Legends of Runeterra,Riot Games,"Los Angeles, CA, US",E.g. you may lead the design and implementatio...,https://www.linkedin.com/jobs/view/data-scient...
