In [1]:
import requests
from bs4 import BeautifulSoup
import json


class LinkedinScraper:
    
    # Define the two lists to concatenate
    DataScience = ['Data Science', 'Big data', 'Machine learning','ML', 'Data mining', 'Artificial intelligence', 'AI', 'Predictive modeling',
               'Statistical analysis', 'Data visualization', 'Deep learning', 'Natural language processing', 'Business intelligence',
               'Data warehousing', 'Data management', 'Data cleaning', 'Feature engineering', 'Time series analysis', 'Text analytics',
               'Database', 'SQL', 'NoSQL', 'Neural networks', 'Regression analysis', 'Clustering', 'Dimensionality reduction',
               'Anomaly detection', 'Recommender systems', 'Data integration', 'Data governance']
    
    MachineLearning = ['Machine learning', 'Data preprocessing', 'Feature selection', 'Feature engineering', 'Data visualization',
                   'Model selection', 'Hyperparameter tuning', 'Cross-validation', 'Ensemble methods', 'Neural networks', 'Deep learning',
                   'Convolutional neural networks', 'Recurrent neural networks', 'Natural language processing', 'Computer vision',
                   'Reinforcement learning', 'Unsupervised learning', 'Clustering', 'Dimensionality reduction', 'Bayesian methods',
                   'Time series analysis', 'Random forest', 'Gradient boosting', 'Support vector machines', 'Decision trees', 'Regression analysis']

    # Concatenate the two lists
    keywords = DataScience + MachineLearning
    
    def __init__(self, country_name, geoId, companies):
        self.country_name = country_name
        self.geoId = geoId
        self.companies = companies
        self.search_url_pattern = 'https://www.linkedin.com/jobs/search/?currentJobId={}&distance=25&geoId={}&keywords={}&refresh=true&start={}'

    
    
    def scrape_jobs(self, output_file_path):
        start_index = 0
        jobs_per_page = 25
        total_jobs_downloaded = 0        
        
        
        # Iterate over each company and search for matching jobs
        with open(output_file_path, 'w', encoding='utf-8') as json_file:
            for i, company in enumerate(self.companies):
                print(f"Company {i+1}: {company}")
                jobs_downloaded = 0
                # Iterate until no matching jobs are found
                while True:
                    # Construct the search URL
                    search_url = self.search_url_pattern.format(start_index, self.geoId, company, start_index)

                    # Send a GET request to the search URL
                    response = requests.get(search_url)

                    # Parse the HTML content of the page using BeautifulSoup
                    soup = BeautifulSoup(response.content, 'html.parser')

                    # Find all the job posting URLs on the page
                    job_urls = [a['href'] for a in soup.find_all('a', href=True) if '/jobs/view/' in a['href']]

                    # If no matching jobs are found, break the loop
                    if len(job_urls) == 0:
                        break

                    # Iterate over each job URL and extract the job information
                    for job_url in job_urls:               

                        # Send a GET request to the job URL

                        response = requests.get(job_url)

                        # Parse the HTML content of the page using BeautifulSoup
                        soup = BeautifulSoup(response.content, 'html.parser')

                        # Find the job title and company name
                        title = soup.find('h1', class_='topcard__title')
                        if title is not None:
                            title = title.text.strip()
                        else:
                            continue
                        company_name = soup.find('a', class_='topcard__org-name-link')
                        if company_name is not None:
                            company_name = company_name.text.strip()
                        else:
                            continue

                        # Find the div tag that contains the job description
                        description_div = soup.find('div', class_='description__text')

                        if description_div is not None:
                            # Extract the job description text
                            job_description = description_div.get_text().strip()
                            job_description = job_description.replace('Show more', '').replace('Show less', '')

                            # Remove any extra spaces from the job description
                            job_description = ' '.join(job_description.split())


                            found_keywords = [keyword for keyword in self.keywords if keyword in job_description]                      

                            if found_keywords:
                                job_info = {
                                    'Country_name': self.country_name,
                                    'company_name': company_name,
                                    'keywords': found_keywords,
                                    'description': job_description 
                                } 
                                
                                with open(output_file_path, 'a', encoding='utf-8') as json_file:
                                    json.dump(job_info, json_file, ensure_ascii=False)
                                    json_file.write('\n')
                                
                                jobs_downloaded += 1
                                total_jobs_downloaded += 1                       
                        
                    
                                print(f"         Job {jobs_downloaded}: {title} downloaded!") 
                            
                
                if jobs_downloaded == 0:
                    print(f"         No jobs found for {company}")  
                            
                        
        print()
        print("Total {} jobs downloaded!".format(total_jobs_downloaded ))                                  


## Scrappigng data for Sweden

In [6]:
country_name= "Sweden"
geoId="105117694"
companies=['Klarna', 'Spotify', ' Scania', 'Ericsson', 'Volvo Group', 'Electrolux', 'H&M', 'Nordea', 'Handelsbanken', 'Swedbank', 'SAS', 'Atlas Copco', 'Com Hem', 'Net Insight', 'Tacton', 'Peltarion', 'Fidesmo', 'Sup46', 'Kognity', 'Karma', 'Mavenoid', 'Qualcomm', 'Northvolt', 'King', 'TDA Creative', 'Ark Kapital', 'Sinch', 'AFRY', 'TELUS International AI Data Solutions', 'Chalmers University of Technology', 'Linköping University', 'Arm', 'Frontiers', 'AstraZeneca', 'Amoria Bond', 'CGI', 'TELUS International']

print("Country: ", country_name)
print()

scraper = LinkedinScraper(country_name, geoId, companies )
scraper.scrape_jobs("DS_jobs_sweden.json")

Country:  Sweden

Company 1: Klarna
         Job 1: Product Manager - Platform Products downloaded!
         Job 2: Sanctions & AML Governance downloaded!
         Job 3: Director, Global Vendor Operations downloaded!
         Job 4: Senior Product Manager - Platform Products downloaded!
         Job 5: Portfolio Credit Risk Senior Analyst downloaded!
         Job 6: Senior Software Engineer - Functional Programming downloaded!
         Job 7: Sanctions & AML Governance downloaded!
         Job 8: Senior Product Manager - Platform Products downloaded!
         Job 9: Portfolio Credit Risk Senior Analyst downloaded!
         Job 10: Senior Software Engineer - Java downloaded!
         Job 11: Frontend Developer / Sweden downloaded!
Company 2: Spotify
         Job 1: Digital Operations Specialist downloaded!
         Job 2: Digital Operations Specialist downloaded!
         Job 3: Python Software Developer downloaded!
         Job 4: C++ Software Developer downloaded!
         Job 5: Ass

         Job 1: Data Engineer Trade AI Team downloaded!
Company 35: Ark Kapital
         No jobs found for Ark Kapital
Company 36: Sinch
         No jobs found for Sinch
Company 37: AFRY
         No jobs found for AFRY
Company 38: TELUS International AI Data Solutions
         No jobs found for TELUS International AI Data Solutions
Company 39: Chalmers University of Technology
         No jobs found for Chalmers University of Technology
Company 40: Linköping University
         Job 1: Doktorand inom design downloaded!
Company 41: Arm
         Job 1: Senior Software Engineer, High-Performance Embedded Software (SE) downloaded!
         Job 2: Dynamic Documentation Lead (SE) downloaded!
         Job 3: Cloud Frontend Engineer (SE) downloaded!
Company 42: Frontiers
         No jobs found for Frontiers
Company 43: AstraZeneca
         Job 1: Head of Respiratory & Immunology, Neuroscience, Vaccines & Immune Therapies (RINVI) Safety downloaded!
         Job 2: Director Product Management (Da

# full stalk Sweden

In [1]:
import requests
from bs4 import BeautifulSoup
import json


class LinkedinScraper:    

    # Concatenate the two lists
    keywords = ['Front-end development', 'HTML', 'CSS', 'JavaScript', 'React', 'Angular', 'Vue.js', 'Bootstrap', 'jQuery', 'responsive design',      'Back-end development', 'Node.js', 'Python', 'Ruby', 'PHP', 'Java', '.NET', 'SQL', 'NoSQL', 'RESTful APIs', 'web servers',      'Database management', 'MySQL', 'PostgreSQL', 'MongoDB', 'Redis', 'Cassandra', 'Oracle', 'SQL Server',      'DevOps', 'AWS', 'Azure', 'Google Cloud', 'Docker', 'Kubernetes', 'Git', 'Jenkins', 'Travis CI', 'CircleCI', 'monitoring and logging tools',      'Project management', 'Agile', 'Scrum', 'Kanban', 'JIRA', 'Trello', 'Asana', 'project planning', 'team collaboration', 'communication skills']

    
    def __init__(self, country_name, geoId, companies):
        self.country_name = country_name
        self.geoId = geoId
        self.companies = companies
        self.search_url_pattern = 'https://www.linkedin.com/jobs/search/?currentJobId={}&distance=25&geoId={}&keywords={}&refresh=true&start={}'

    
    
    def scrape_jobs(self, output_file_path):
        start_index = 0
        jobs_per_page = 25
        total_jobs_downloaded = 0        
        
        
        # Iterate over each company and search for matching jobs
        with open(output_file_path, 'w', encoding='utf-8') as json_file:
            for i, company in enumerate(self.companies):
                print(f"Company {i+1}: {company}")
                jobs_downloaded = 0
                # Iterate until no matching jobs are found
                while True:
                    # Construct the search URL
                    search_url = self.search_url_pattern.format(start_index, self.geoId, company, start_index)

                    # Send a GET request to the search URL
                    response = requests.get(search_url)

                    # Parse the HTML content of the page using BeautifulSoup
                    soup = BeautifulSoup(response.content, 'html.parser')

                    # Find all the job posting URLs on the page
                    job_urls = [a['href'] for a in soup.find_all('a', href=True) if '/jobs/view/' in a['href']]

                    # If no matching jobs are found, break the loop
                    if len(job_urls) == 0:
                        break

                    # Iterate over each job URL and extract the job information
                    for job_url in job_urls:               

                        # Send a GET request to the job URL

                        response = requests.get(job_url)

                        # Parse the HTML content of the page using BeautifulSoup
                        soup = BeautifulSoup(response.content, 'html.parser')

                        # Find the job title and company name
                        title = soup.find('h1', class_='topcard__title')
                        if title is not None:
                            title = title.text.strip()
                        else:
                            continue
                        company_name = soup.find('a', class_='topcard__org-name-link')
                        if company_name is not None:
                            company_name = company_name.text.strip()
                        else:
                            continue

                        # Find the div tag that contains the job description
                        description_div = soup.find('div', class_='description__text')

                        if description_div is not None:
                            # Extract the job description text
                            job_description = description_div.get_text().strip()
                            job_description = job_description.replace('Show more', '').replace('Show less', '')

                            # Remove any extra spaces from the job description
                            job_description = ' '.join(job_description.split())


                            found_keywords = [keyword for keyword in self.keywords if keyword in job_description]                      

                            if found_keywords:
                                job_info = {
                                    'Country_name': self.country_name,
                                    'company_name': company_name,
                                    'keywords': found_keywords,
                                    'description': job_description 
                                } 
                                
                                with open(output_file_path, 'a', encoding='utf-8') as json_file:
                                    json.dump(job_info, json_file, ensure_ascii=False)
                                    json_file.write('\n')
                                
                                jobs_downloaded += 1
                                total_jobs_downloaded += 1                       
                        
                    
                                print(f"         Job {jobs_downloaded}: {title} downloaded!") 
                            
                
                if jobs_downloaded == 0:
                    print(f"         No jobs found for {company}")  
                            
                        
        print()
        print("Total {} jobs downloaded!".format(total_jobs_downloaded ))                                  


In [2]:
country_name= "Sweden"
geoId="105117694"
companies = ['SimpleSign', 'InterEx Group', 'Iver Sverige', 'HCLTech', 'Fortnox', 'Orbis', 'Lynk & Co', 'Swedbank','Computer Futures', 'HiQ', 'Canonical', 'Nexer Group','Enfo', 'tretton37', 'Tietoevry', 'Fortnox', 'HiQ', 'Boeing', 'Bluestep Bank', 'Telavox', 'Klarna', 'Scania', 'Volvo Group', 'Nordea', 'Handelsbanken', 'Swedbank', 'Northvolt', 'King', 'TDA Creative', 'AFRY', 'CGI',  'IBM', 'Amazon', 'Google', 'Microsoft']

print("Country: ", country_name)
print()

scraper = LinkedinScraper(country_name, geoId, companies )
scraper.scrape_jobs("FS_jobs_sweden.json")

Country:  Sweden

Company 1: SimpleSign
         Job 1: Backend-utvecklare downloaded!
         Job 2: Fullstack Engineer downloaded!
         Job 3: CTO downloaded!
         Job 4: Full Stack Engineer downloaded!
         Job 5: Backend-utvecklare downloaded!
         Job 6: Fullstack Engineer downloaded!
         Job 7: CTO downloaded!
         Job 8: Full Stack Engineer downloaded!
         Job 9: Back End-utvecklare downloaded!
         Job 10: Backend-utvecklare downloaded!
         Job 11: CTO downloaded!
         Job 12: Backend-utvecklare downloaded!
         Job 13: Fullstack Engineer downloaded!
         Job 14: Backend-utvecklare downloaded!
         Job 15: Fullstack Engineer downloaded!
         Job 16: CTO downloaded!
         Job 17: Backend-utvecklare downloaded!
         Job 18: Fullstack Engineer downloaded!
         Job 19: CTO downloaded!
         Job 20: Full Stack Engineer downloaded!
         Job 21: Back End-utvecklare downloaded!
Company 2: InterEx Group
      

         Job 104: Fullstack Developer downloaded!
         Job 105: Fullstack Developer downloaded!
         Job 106: Fullstack Developer downloaded!
         Job 107: Optimizely Developer downloaded!
         Job 108: Embedded Developer downloaded!
         Job 109: Front-end Developer downloaded!
         Job 110: Junior Project Manager downloaded!
         Job 111: Great coworker wanted! downloaded!
         Job 112: Data Engineer downloaded!
         Job 113: Projektledare IT downloaded!
         Job 114: SCRUM Master downloaded!
         Job 115: Embedded Developer downloaded!
         Job 116: Project Manager downloaded!
         Job 117: Fullstack Developer downloaded!
         Job 118: Fullstack Developer downloaded!
         Job 119: Fullstack Developer downloaded!
         Job 120: Optimizely Developer downloaded!
         Job 121: Data Engineer downloaded!
         Job 122: Embedded Developer downloaded!
         Job 123: Front-end Developer downloaded!
         Job 124: Pro

         Job 73: Web Developer downloaded!
         Job 74: Graduate Product Marketing Manager downloaded!
         Job 75: Chief Information Security Officer downloaded!
         Job 76: SDR team manager downloaded!
         Job 77: Performance Marketing Manager downloaded!
         Job 78: Talent Analytics Lead downloaded!
         Job 79: Product Marketing Lead downloaded!
         Job 80: Community Engineer downloaded!
         Job 81: Technical Author - Ubuntu and Canonical products downloaded!
         Job 82: Executive Assistant downloaded!
         Job 83: Marketing operations specialist downloaded!
         Job 84: Manager - Web and Design Project Management downloaded!
         Job 85: Web Developer downloaded!
         Job 86: Community Engineer downloaded!
         Job 87: Web Developer downloaded!
         Job 88: Graduate Product Marketing Manager downloaded!
         Job 89: Manager, Talent Science downloaded!
         Job 90: Performance Marketing Manager downloaded!
  

         Job 38: Experienced Front-End Developer downloaded!
         Job 39: Senior BI Developer downloaded!
         Job 40: DevOps Engineer downloaded!
         Job 41: Software Developer (Lead) downloaded!
         Job 42: Fullstack Software Developer downloaded!
         Job 43: Engineering Manager downloaded!
         Job 44: Software Developer (Lead) downloaded!
         Job 45: Experienced Front-end Developer downloaded!
         Job 46: Engineering Manager to Telavox downloaded!
         Job 47: IT Technician downloaded!
         Job 48: Fullstack Software Developer downloaded!
         Job 49: Backend Developer to successful SaaS company downloaded!
         Job 50: Experienced Front-End Developer downloaded!
         Job 51: Senior BI Developer downloaded!
         Job 52: DevOps Engineer downloaded!
         Job 53: Software Developer (Lead) downloaded!
         Job 54: Fullstack Software Developer downloaded!
         Job 55: Engineering Manager downloaded!
         Job 56

         Job 56: Customer Service Representative - German Speaking downloaded!
         Job 57: Senior Software Engineer - Java downloaded!
         Job 58: Senior Software Engineer - JavaScript downloaded!
         Job 59: Senior Developer - JavaScript - Search & Compare downloaded!
         Job 60: Senior Software Engineer - Functional Programming downloaded!
         Job 61: Senior Software Engineer - DevOps - Public API Gateway downloaded!
         Job 62: Engineering Manager - iOS / Android downloaded!
         Job 63: Product Manager - Platform Products downloaded!
         Job 64: Product Manager - Consumer Products downloaded!
         Job 65: Product Manager - AdTech Products downloaded!
         Job 66: Software Engineer - Java downloaded!
         Job 67: Director, Global Vendor Operations downloaded!
         Job 68: Vendor Governance Manager downloaded!
         Job 69: Senior Product Manager - Platform Products downloaded!
         Job 70: Portfolio Credit Risk Senior Ana

         Job 59: Backendutvecklare till Handelsbanken Stockholm downloaded!
         Job 60: Senior Data/ML Engineer to Advanced Analytics and AI Department downloaded!
         Job 61: SAS-utvecklare inom Financial Crime Prevention till Handelsbanken downloaded!
         Job 62: Javautvecklare inom integrationsområdet till Handelsbanken downloaded!
         Job 63: IT-arkitekt till Handelsbankens betalningsområde downloaded!
         Job 64: Senior Javautvecklare downloaded!
         Job 65: Javautvecklare till Handelsbanken i Stockholm downloaded!
         Job 66: Androidutvecklare till Handelsbanken i Malmö downloaded!
         Job 67: Senior Data/ML Engineer to Advanced Analytics and AI Department downloaded!
         Job 68: SAS-utvecklare inom Financial Crime Prevention till Handelsbanken downloaded!
         Job 69: Javautvecklare inom integrationsområdet till Handelsbanken downloaded!
Company 26: Swedbank
         No jobs found for Swedbank
Company 27: Northvolt
         Job 1: