In [4]:
from jobspy import scrape_jobs
from utils import Logger
import time, os
job_titles = [
    "Software Engineer",
    "Data Scientist",
    "Systems Administrator",
    "Database Administrator",
    "Network Engineer",
    "Cybersecurity Analyst",
    "IT Project Manager",
    "DevOps Engineer",
    "UX UI Designer",
    "Cloud Engineer",
    "Technical Support Specialist",
    "Business Intelligence Analyst"
]

In [2]:
def scrape_it_jobs(logger, site_name, search_term, location, results_wanted, hours_old):
    try:
        jobs = scrape_jobs(
            site_name=[site_name],
            search_term=search_term,
            location=location,
            results_wanted=results_wanted,
            hours_old=hours_old,  # (only Linkedin/Indeed is hour specific, others round up to days old)
            country_indeed=location,  # only needed for indeed / glassdoor
            # linkedin_fetch_description=True  # get full description and direct job url for linkedin (slower)
            # proxies=["208.195.175.46:65095", "208.195.175.45:65095", "localhost"],
        )
        logger.log(f"Found {len(jobs)} jobs", 4)
    except Exception as e:
        logger.log(f"Error during scrape jobs: {e}", 1)
    try:
        # Dynamically create the CSV filename
        curdate = time.strftime("%Y%m%d")
        resultpath = "./downloaded_files"
        if not os.path.isdir(resultpath):
            os.mkdir(resultpath)
        filename = f"{resultpath}/{site_name}_{search_term.replace(' ', '_')}_{location.replace(' ', '_')}_{curdate}.json"
        jobs.to_json(filename, orient='records', force_ascii=False)
        logger.log(f"CSV saved as {filename}", 4)
    except Exception as e:
        logger.log(f"Error during jsonizing results: {e}", 1)

In [3]:
logger = Logger()
site_name = "linkedin"
location = "South Korea"
results_wanted = 100
hours_old = 72
for job_title in job_titles:
    logger.log(f"scraping for {job_title}...", 4)
    scrape_it_jobs(
        logger=logger,
        site_name=site_name,
        search_term=job_title,
        location=location,
        results_wanted=results_wanted,
        hours_old=hours_old
    )
    time.sleep(5)

2024-08-12 16:53:26,659 - JobSpy - INFO - LinkedIn search page: 1
2024-08-12 16:53:30,428 - JobSpy - INFO - LinkedIn search page: 2
2024-08-12 16:53:34,907 - JobSpy - INFO - LinkedIn search page: 3
2024-08-12 16:53:41,943 - JobSpy - INFO - LinkedIn search page: 4
2024-08-12 16:53:45,574 - JobSpy - INFO - LinkedIn search page: 5
2024-08-12 16:53:45,972 - JobSpy - INFO - Linkedin finished scraping
2024-08-12 16:53:51,056 - JobSpy - INFO - LinkedIn search page: 1
2024-08-12 16:53:56,141 - JobSpy - INFO - LinkedIn search page: 2
2024-08-12 16:53:59,811 - JobSpy - INFO - LinkedIn search page: 3
2024-08-12 16:54:06,447 - JobSpy - INFO - LinkedIn search page: 4
2024-08-12 16:54:12,277 - JobSpy - INFO - LinkedIn search page: 5
2024-08-12 16:54:13,131 - JobSpy - INFO - Linkedin finished scraping
2024-08-12 16:54:18,224 - JobSpy - INFO - LinkedIn search page: 1
2024-08-12 16:54:21,878 - JobSpy - INFO - LinkedIn search page: 2
2024-08-12 16:54:22,484 - JobSpy - INFO - Linkedin finished scraping
2