In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import pickle

# Function to extract job details
def extract_job_details(job_soup):
    job_post = {}

    try:
        job_post["job_title"] = job_soup.find("h2", {"class": "top-card-layout__title"}).text.strip()
    except:
        job_post["job_title"] = None

    try:
        job_post["company_name"] = job_soup.find("a", {"class": "topcard__org-name-link"}).text.strip()
    except:
        job_post["company_name"] = None

    try:
        job_post["time_posted"] = job_soup.find("span", {"class": "posted-time-ago__text"}).text.strip()
    except:
        job_post["time_posted"] = None

    try:
        job_post["num_applicants"] = job_soup.find("span", {"class": "num-applicants__caption"}).text.strip()
    except:
        job_post["num_applicants"] = None

    try:
        job_post["job_description"] = job_soup.find("div", {"class": "description__text description__text--rich"}).text.strip()
    except:
        job_post["job_description"] = None

    return job_post

# Example variables
title = "Data Scientist"

# Construct the URL for LinkedIn job search
list_url = f"https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search?keywords={title}"

# Send a GET request to the URL and store the response
response = requests.get(list_url)

# Get the HTML, parse the response and find all list items (job postings)
list_data = response.text
list_soup = BeautifulSoup(list_data, "html.parser")
page_jobs = list_soup.find_all("li")

# Create an empty list to store the job IDs
id_list = []

# Iterate through job postings to find job IDs
for job in page_jobs:
    base_card_div = job.find("div", {"class": "base-card"})
    job_id = base_card_div.get("data-entity-urn").split(":")[3]
    id_list.append(job_id)

# Initialize an empty list to store job information
job_list = []

# Loop through the list of job IDs and get each URL
for job_id in id_list:
    # Construct the URL for each job using the job ID
    job_url = f"https://www.linkedin.com/jobs-guest/jobs/api/jobPosting/{job_id}"

    # Send a GET request to the job URL and parse the response
    job_response = requests.get(job_url)
    job_soup = BeautifulSoup(job_response.text, "html.parser")

    # Extract job details
    job_post = extract_job_details(job_soup)

    # Append the job details to the job_list
    job_list.append(job_post)

# Save data to pickle file
with open('DS_Linkedin.pkl', 'wb') as f:
    pickle.dump(job_list, f)

# Load data from pickle file
with open('DS_Linkedin.pkl', 'rb') as f:
    job_list_loaded = pickle.load(f)

# Create a pandas DataFrame using the loaded job list
jobs_df = pd.DataFrame(job_list_loaded)

# Display or further process the jobs_df DataFrame as needed
print(jobs_df.head(50))

                                       job_title       company_name  \
0                                 Data Scientist                Glo   
1                        Data Scientist, Product             Notion   
2                          Junior Data Scientist  Team Remotely Inc   
3  Data Scientist (L5) - Content & Marketing DSE            Netflix   
4                                 Data Scientist    Rue Gilt Groupe   
5              Data Scientist, Product Analytics               Etsy   
6                                 Data Scientist    Rue Gilt Groupe   
7              Data Scientist, Product Analytics               Etsy   

    time_posted  num_applicants  \
0  14 hours ago  177 applicants   
1    4 days ago            None   
2  13 hours ago   41 applicants   
3    4 days ago            None   
4    5 days ago            None   
5    1 week ago            None   
6    1 week ago            None   
7    1 week ago  176 applicants   

                                     job_des