In [81]:
import logging
import json
import pandas as pd
from linkedin_jobs_scraper import LinkedinScraper
from linkedin_jobs_scraper.events import Events, EventData
from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, TypeFilters, ExperienceLevelFilters, RemoteFilters

# Change root logger level (default is WARN)
logging.basicConfig(level = logging.INFO)

job_data = {'title' : [], 'company' : [], 'date_posted' : [], 'job_desc' : [], 'link' : []}

def on_data(data: EventData):
    # jobs_df = jobs_df.append(pd.DataFrame({'title' : data.title, 'company' : data.company, 'date_posted' : data.date, 'link' : data.link, 'jd' : data.description}))
    print('[ON_DATA]', data.title, data.company, data.date, data.link, len(data.description))
    job_data['title'].append(data.title)
    job_data['company'].append(data.company)
    job_data['date_posted'].append(data.date)
    job_data['job_desc'].append(data.description)
    job_data['link'].append(data.link)

def on_error(error):
    print('[ON_ERROR]', error)


def on_end():
    print('[ON_END]')


scraper = LinkedinScraper(
    chrome_executable_path='C:\\Users\\manho\\Downloads\\chromedriver_win32\\chromedriver', # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver) 
    chrome_options=None,  # Custom Chrome options here
    headless=True,  # Overrides headless mode only if chrome_options is None
    max_workers=4,  # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
    slow_mo=1.3,  # Slow down the scraper to avoid 'Too many requests (429)' errors
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [Query(
        query='Data Scientist',
        options=QueryOptions(
            locations=['United States'],
            optimize=False,
            limit=100,
            filters=QueryFilters(relevance=RelevanceFilters.RELEVANT,
                time=TimeFilters.MONTH,
                type=[TypeFilters.FULL_TIME, TypeFilters.INTERNSHIP],
                experience=None,                
            )
        )
    ),
]

scraper.run(queries)
with open('job_data.json', 'w') as fp:
    json.dump(job_data, fp)

erp-result_search-card 3883
INFO:li:scraper:('[Data Scientist][United States][43]', 'Processed')
[ON_DATA] Data Scientist, Analytics Facebook 2021-05-13 https://www.linkedin.com/jobs/view/data-scientist-analytics-at-facebook-2540548093?refId=lyEmVZz6mYDMWJSSg5yiiw%3D%3D&trackingId=cBjHSdErg1wtpLDffHswtw%3D%3D&position=19&pageNum=1&trk=public_jobs_jserp-result_search-card 4433
INFO:li:scraper:('[Data Scientist][United States][44]', 'Processed')
[ON_DATA] Data Scientist Deloitte 2021-05-28 https://www.linkedin.com/jobs/view/data-scientist-at-deloitte-2564928187?refId=lyEmVZz6mYDMWJSSg5yiiw%3D%3D&trackingId=nZgH%2F%2BGwszJe1laDJOwQrw%3D%3D&position=20&pageNum=1&trk=public_jobs_jserp-result_search-card 4955
INFO:li:scraper:('[Data Scientist][United States][45]', 'Processed')
[ON_DATA] Data Scientist Microsoft 2021-05-28 https://www.linkedin.com/jobs/view/data-scientist-at-microsoft-2560757140?refId=lyEmVZz6mYDMWJSSg5yiiw%3D%3D&trackingId=M03JAfinVwR5mnpm5nVvWg%3D%3D&position=21&pageNum=1&t

In [91]:
job_data = pd.read_json('job_data.json')
job_data.head()

Unnamed: 0,title,company,date_posted,job_desc,link
0,Data Scientist,Hulu,2021-05-28,Summary\n\nThe Data Science team at Disney Str...,https://www.linkedin.com/jobs/view/data-scient...
1,Data Scientist,Deloitte,2021-05-28,"Are you an analytical, data-driven professiona...",https://www.linkedin.com/jobs/view/data-scient...
2,Data Scientist,TheLoops,2021-05-29,We fundamentally believe that today’s product ...,https://www.linkedin.com/jobs/view/data-scient...
3,Data Scientist,Noom Inc.,2021-05-29,"At Noom, we use scientifically proven methods ...",https://www.linkedin.com/jobs/view/data-scient...
4,Data Scientist - Podcasts,Spotify,2021-05-28,Spotify’s goal is to become the world’s leadin...,https://www.linkedin.com/jobs/view/data-scient...
