In [None]:
#Import dependencies
import requests
from bs4 import BeautifulSoup
import random
import pandas as pd
import logging
from tqdm import tqdm
import time

In [2]:
title = "Software Developer"  
location = "Linköping"  

In [3]:
from collections import defaultdict
programming_languages = pd.read_csv('./skills.csv')['skills'].to_list()
programming_languages = set(programming_languages)

users_of_languages = defaultdict(int)
for l in programming_languages:
    users_of_languages[l] = 0

In [4]:
job_list = []

In [None]:
for start in tqdm(range(0, 100, 25)):
    list_url = f"https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search?keywords={title}&location={location}&start={start}"

    response = requests.get(list_url)

    list_data = response.text
    list_soup = BeautifulSoup(list_data, "html.parser")
    page_jobs = list_soup.find_all("li")
        
    id_list = []

    for job in page_jobs:
        base_card_div = job.find("div", {"class": "base-card"})
        job_id = base_card_div.get("data-entity-urn").split(":")[3]
        id_list.append(job_id)
    
    for job_id in id_list:
        job_url = f"https://www.linkedin.com/jobs-guest/jobs/api/jobPosting/{job_id}"
        
        job_response = requests.get(job_url)
        if job_response.status_code != 200:
            logging.warning(f"Failed to retrieve job posting {job_id}: Status code {job_response.status_code}")
            continue
        job_soup = BeautifulSoup(job_response.text, "html.parser")
        
        job_post = {}
        
        try:
            job_post["job_title"] = job_soup.find("h2", {"class":"top-card-layout__title font-sans text-lg papabear:text-xl font-bold leading-open text-color-text mb-0 topcard__title"}).text.strip()
        except:
            job_post["job_title"] = None
            
        try:
            job_post["company_name"] = job_soup.find("a", {"class": "topcard__org-name-link topcard__flavor--black-link"}).text.strip()
        except:
            job_post["company_name"] = None
            
        try:
            job_post["time_posted"] = job_soup.find("span", {"class": "posted-time-ago__text topcard__flavor--metadata"}).text.strip()
        except:
            job_post["time_posted"] = None
            
        try:
            job_post["num_applicants"] = job_soup.find("span", {"class": "num-applicants__caption topcard__flavor--metadata topcard__flavor--bullet"}).text.strip()
        except:
            job_post["num_applicants"] = None

        for lang in programming_languages:
            # description is in show-more-less-html__markup show-more-less-html__markup--clamp-after-5 relative overflow-hidden
            if f" {lang.lower()}" in job_soup.find("div", {"class": "show-more-less-html__markup show-more-less-html__markup--clamp-after-5 relative overflow-hidden"}).text.lower():  
                users_of_languages[lang] += 1 
        job_list.append(job_post)
        time.sleep(random.uniform(1, 5))  

100%|██████████| 4/4 [00:18<00:00,  4.62s/it]


In [6]:
# Create a pandas DataFrame using the list of job dictionaries 'job_list'
jobs_df = pd.DataFrame(job_list)
jobs_df

Unnamed: 0,job_title,company_name,time_posted,num_applicants
0,Algoritmutveckling / Reglerteknik / Modellbase...,Combine,2 weeks ago,
1,SOFTWARE DEVELOPER,Atos,2 days ago,136 applicants
2,Software Engineer,Medius,2 months ago,
3,System developer to Linköping!,TMC Sweden,6 days ago,26 applicants
4,Embedded-utvecklare,Consid,1 year ago,
5,Fullstack Developer,Cambio Group,4 days ago,37 applicants
6,Testare inom mjukvara,Professional Galaxy AB,8 months ago,39 applicants
7,C/C++-utvecklare,Deploja,2 days ago,
8,"Software Developer, Backend - Sectra Medical",Sectra,2 weeks ago,192 applicants
9,Embedded-utvecklare,Consid,1 year ago,


In [7]:
lang_usage = pd.DataFrame.from_dict(users_of_languages, orient='index', columns=['Number of Job Postings']).reset_index().rename(columns={'index': 'Programming Language'})
lang_usage.drop(lang_usage[lang_usage['Number of Job Postings'] == 0].index, inplace=True)
lang_usage = lang_usage.sort_values(by='Number of Job Postings', ascending=False)
lang_usage = lang_usage.reset_index(drop=True)
lang_usage

Unnamed: 0,Programming Language,Number of Job Postings
0,C,36
1,Python,15
2,Linux,12
3,C#,12
4,C++,10
5,Azure,10
6,C/C++,8
7,SQL,7
8,Git,6
9,Java,5
