In [1]:
import requests
from bs4 import BeautifulSoup
import json


class LinkedinScraper:
    
    # Define the two lists to concatenate
    DataScience = ['Data Science', 'Big data', 'Machine learning','ML', 'Data mining', 'Artificial intelligence', 'AI', 'Predictive modeling',
               'Statistical analysis', 'Data visualization', 'Deep learning', 'Natural language processing', 'Business intelligence',
               'Data warehousing', 'Data management', 'Data cleaning', 'Feature engineering', 'Time series analysis', 'Text analytics',
               'Database', 'SQL', 'NoSQL', 'Neural networks', 'Regression analysis', 'Clustering', 'Dimensionality reduction',
               'Anomaly detection', 'Recommender systems', 'Data integration', 'Data governance']
    
    MachineLearning = ['Machine learning', 'Data preprocessing', 'Feature selection', 'Feature engineering', 'Data visualization',
                   'Model selection', 'Hyperparameter tuning', 'Cross-validation', 'Ensemble methods', 'Neural networks', 'Deep learning',
                   'Convolutional neural networks', 'Recurrent neural networks', 'Natural language processing', 'Computer vision',
                   'Reinforcement learning', 'Unsupervised learning', 'Clustering', 'Dimensionality reduction', 'Bayesian methods',
                   'Time series analysis', 'Random forest', 'Gradient boosting', 'Support vector machines', 'Decision trees', 'Regression analysis']

    # Concatenate the two lists
    keywords = DataScience + MachineLearning
    
    def __init__(self, country_name, geoId, companies):
        self.country_name = country_name
        self.geoId = geoId
        self.companies = companies
        self.search_url_pattern = 'https://www.linkedin.com/jobs/search/?currentJobId={}&distance=25&geoId={}&keywords={}&refresh=true&start={}'

    
    
    def scrape_jobs(self, output_file_path):
        start_index = 0
        jobs_per_page = 25
        total_jobs_downloaded = 0        
        
        
        # Iterate over each company and search for matching jobs
        with open(output_file_path, 'w', encoding='utf-8') as json_file:
            for i, company in enumerate(self.companies):
                print(f"Company {i+1}: {company}")
                jobs_downloaded = 0
                # Iterate until no matching jobs are found
                while True:
                    # Construct the search URL
                    search_url = self.search_url_pattern.format(start_index, self.geoId, company, start_index)

                    # Send a GET request to the search URL
                    response = requests.get(search_url)

                    # Parse the HTML content of the page using BeautifulSoup
                    soup = BeautifulSoup(response.content, 'html.parser')

                    # Find all the job posting URLs on the page
                    job_urls = [a['href'] for a in soup.find_all('a', href=True) if '/jobs/view/' in a['href']]

                    # If no matching jobs are found, break the loop
                    if len(job_urls) == 0:
                        break

                    # Iterate over each job URL and extract the job information
                    for job_url in job_urls:               

                        # Send a GET request to the job URL

                        response = requests.get(job_url)

                        # Parse the HTML content of the page using BeautifulSoup
                        soup = BeautifulSoup(response.content, 'html.parser')

                        # Find the job title and company name
                        title = soup.find('h1', class_='topcard__title')
                        if title is not None:
                            title = title.text.strip()
                        else:
                            continue
                        company_name = soup.find('a', class_='topcard__org-name-link')
                        if company_name is not None:
                            company_name = company_name.text.strip()
                        else:
                            continue

                        # Find the div tag that contains the job description
                        description_div = soup.find('div', class_='description__text')

                        if description_div is not None:
                            # Extract the job description text
                            job_description = description_div.get_text().strip()
                            job_description = job_description.replace('Show more', '').replace('Show less', '')

                            # Remove any extra spaces from the job description
                            job_description = ' '.join(job_description.split())


                            found_keywords = [keyword for keyword in self.keywords if keyword in job_description]                      

                            if found_keywords:
                                job_info = {
                                    'Country_name': self.country_name,
                                    'company_name': company_name,
                                    'keywords': found_keywords,
                                    'description': job_description 
                                } 
                                
                                with open(output_file_path, 'a', encoding='utf-8') as json_file:
                                    json.dump(job_info, json_file, ensure_ascii=False)
                                    json_file.write('\n')
                                
                                jobs_downloaded += 1
                                total_jobs_downloaded += 1                       
                        
                    
                                print(f"         Job {jobs_downloaded}: {title} downloaded!") 
                            
                
                if jobs_downloaded == 0:
                    print(f"         No jobs found for {company}")  
                            
                        
        print()
        print("Total {} jobs downloaded!".format(total_jobs_downloaded ))                                  


## Scrappigng data for Denmark

In [None]:
country_name= "Denmark"
geoId="104514075"
companies=['Maersk', 'Danske Bank', 'William Demant Holding', 'Nets', 'Ambu A/S', 'NNIT', 'Bang & Olufsen', 'DFDS', 'FLSmidth', 'Workday', 'Boston Consulting Group (BCG)', 'the LEGO Group', 'Antler', 'DTU - Technical University of Denmark', 'ATLANT 3D', 'Silo AI', 'ROCKWOOL Group', 'Corti', 'Accenture Nordics', 'TELUS International AI Data Solutions', 'Appen', 'IT-Universitetet i København', 'Nigel Frank International', 'Capgemini', 'TELUS International', 'Mindway AI']


print("Country: ", country_name)
print()

scraper = LinkedinScraper(country_name, geoId, companies )
scraper.scrape_jobs("DS_jobs_denmark.json")

Country:  Denmark

Company 1: Maersk
         Job 1: Maersk Supply Service A/S - Procurement Project Manager - Renewables downloaded!
         Job 2: Lead Infrastructure Architect downloaded!
         Job 3: Analytics Engineer downloaded!
         Job 4: Lead Infrastructure Architect downloaded!
         Job 5: Analytics Engineer downloaded!
         Job 6: Analytics Engineer downloaded!
Company 2: Danske Bank
         Job 1: Skilled Business/Data Analyst in Impairment Data Analytics downloaded!
         Job 2: Software Engineer downloaded!
         Job 3: Skilled Business/Data Analyst in Impairment Data Analytics downloaded!
         Job 4: Senior Analyst in Group Treasury, Liquidity Steering downloaded!
Company 3: William Demant Holding
         No jobs found for William Demant Holding
Company 4: Nets
         No jobs found for Nets
Company 5: Ambu A/S
         No jobs found for Ambu A/S
Company 6: NNIT
         Job 1: Application Manager downloaded!
         Job 2: Senior Business I

# full stalk Denmark

In [2]:
import requests
from bs4 import BeautifulSoup
import json


class LinkedinScraper:    

    # Concatenate the two lists
    keywords = ['Front-end development', 'HTML', 'CSS', 'JavaScript', 'React', 'Angular', 'Vue.js', 'Bootstrap', 'jQuery', 'responsive design',      'Back-end development', 'Node.js', 'Python', 'Ruby', 'PHP', 'Java', '.NET', 'SQL', 'NoSQL', 'RESTful APIs', 'web servers',      'Database management', 'MySQL', 'PostgreSQL', 'MongoDB', 'Redis', 'Cassandra', 'Oracle', 'SQL Server',      'DevOps', 'AWS', 'Azure', 'Google Cloud', 'Docker', 'Kubernetes', 'Git', 'Jenkins', 'Travis CI', 'CircleCI', 'monitoring and logging tools',      'Project management', 'Agile', 'Scrum', 'Kanban', 'JIRA', 'Trello', 'Asana', 'project planning', 'team collaboration', 'communication skills']

    
    def __init__(self, country_name, geoId, companies):
        self.country_name = country_name
        self.geoId = geoId
        self.companies = companies
        self.search_url_pattern = 'https://www.linkedin.com/jobs/search/?currentJobId={}&distance=25&geoId={}&keywords={}&refresh=true&start={}'

    
    
    def scrape_jobs(self, output_file_path):
        start_index = 0
        jobs_per_page = 25
        total_jobs_downloaded = 0        
        
        
        # Iterate over each company and search for matching jobs
        with open(output_file_path, 'w', encoding='utf-8') as json_file:
            for i, company in enumerate(self.companies):
                print(f"Company {i+1}: {company}")
                jobs_downloaded = 0
                # Iterate until no matching jobs are found
                while True:
                    # Construct the search URL
                    search_url = self.search_url_pattern.format(start_index, self.geoId, company, start_index)

                    # Send a GET request to the search URL
                    response = requests.get(search_url)

                    # Parse the HTML content of the page using BeautifulSoup
                    soup = BeautifulSoup(response.content, 'html.parser')

                    # Find all the job posting URLs on the page
                    job_urls = [a['href'] for a in soup.find_all('a', href=True) if '/jobs/view/' in a['href']]

                    # If no matching jobs are found, break the loop
                    if len(job_urls) == 0:
                        break

                    # Iterate over each job URL and extract the job information
                    for job_url in job_urls:               

                        # Send a GET request to the job URL

                        response = requests.get(job_url)

                        # Parse the HTML content of the page using BeautifulSoup
                        soup = BeautifulSoup(response.content, 'html.parser')

                        # Find the job title and company name
                        title = soup.find('h1', class_='topcard__title')
                        if title is not None:
                            title = title.text.strip()
                        else:
                            continue
                        company_name = soup.find('a', class_='topcard__org-name-link')
                        if company_name is not None:
                            company_name = company_name.text.strip()
                        else:
                            continue

                        # Find the div tag that contains the job description
                        description_div = soup.find('div', class_='description__text')

                        if description_div is not None:
                            # Extract the job description text
                            job_description = description_div.get_text().strip()
                            job_description = job_description.replace('Show more', '').replace('Show less', '')

                            # Remove any extra spaces from the job description
                            job_description = ' '.join(job_description.split())


                            found_keywords = [keyword for keyword in self.keywords if keyword in job_description]                      

                            if found_keywords:
                                job_info = {
                                    'Country_name': self.country_name,
                                    'company_name': company_name,
                                    'keywords': found_keywords,
                                    'description': job_description 
                                } 
                                
                                with open(output_file_path, 'a', encoding='utf-8') as json_file:
                                    json.dump(job_info, json_file, ensure_ascii=False)
                                    json_file.write('\n')
                                
                                jobs_downloaded += 1
                                total_jobs_downloaded += 1                       
                        
                    
                                print(f"         Job {jobs_downloaded}: {title} downloaded!") 
                            
                
                if jobs_downloaded == 0:
                    print(f"         No jobs found for {company}")  
                            
                        
        print()
        print("Total {} jobs downloaded!".format(total_jobs_downloaded ))                                  


In [3]:
country_name= "Denmark"
geoId="104514075"
companies = ['Trustpilot', 'BESTSELLER', 'Canonical', 'HCLTech', 'Randstad Danmark', 'the LEGO Group', 'Mover Systems', 'Netcompany', 'Schibsted', 'Danfoss', 'Nigel Frank International', 'Tryg', 'Too Good To Go', 'Mastercard', 'Maersk', 'Danske Bank', 'William Demant Holding', 'Nets', 'Ambu A/S', 'NNIT', 'Bang & Olufsen', 'DFDS', 'FLSmidth', 'Workday', 'Boston Consulting Group (BCG)', 'Antler', 'DTU - Technical University of Denmark', 'ATLANT 3D', 'Silo AI', 'ROCKWOOL Group', 'Corti', 'Accenture Nordics', 'TELUS International AI Data Solutions', 'Appen', 'IT-Universitetet i København', 'Capgemini', 'TELUS International', 'Mindway AI']


print("Country: ", country_name)
print()

scraper = LinkedinScraper(country_name, geoId, companies )
scraper.scrape_jobs("FS_jobs_denmark.json")

Country:  Denmark

Company 1: Trustpilot
         Job 1: Engineering Manager - Business Systems downloaded!
         Job 2: Software Engineering Lead - Marketing downloaded!
         Job 3: Business Development Manager, FinTech downloaded!
         Job 4: Engineering Manager - Business Systems downloaded!
         Job 5: Engineering Manager - Business Systems downloaded!
         Job 6: Software Engineering Lead - Marketing downloaded!
         Job 7: Engineering Manager - Business Systems downloaded!
         Job 8: Software Engineering Lead - Marketing downloaded!
         Job 9: Business Development Manager, FinTech downloaded!
Company 2: BESTSELLER
         Job 1: User Experience Intern downloaded!
         Job 2: Communication Student Worker downloaded!
         Job 3: Backend Developer downloaded!
         Job 4: Backend Developer - Brand Tech Department downloaded!
         Job 5: Server Infrastructure Specialist downloaded!
         Job 6: Student Worker for Network Service Aut

Company 19: Ambu A/S
         No jobs found for Ambu A/S
Company 20: NNIT
         Job 1: SDA/LAN/Wireless Network Service Architect downloaded!
         Job 2: Technical Compliance Coordinator downloaded!
         Job 3: SDA/LAN/Wireless Network Service Architect downloaded!
         Job 4: Datacenter Networking Specialist downloaded!
         Job 5: IT Infrastructure Program Manager downloaded!
         Job 6: Cloud Architect downloaded!
         Job 7: Advanced IT Infrastructure Project Manager downloaded!
         Job 8: Technical Compliance Coordinator downloaded!
         Job 9: Senior Business Intelligence Developer to join our exciting data journey downloaded!
         Job 10: Datacenter Networking Specialist downloaded!
         Job 11: Cloud Transformation Lead downloaded!
Company 21: Bang & Olufsen
         Job 1: Student Assistant, Producer downloaded!
Company 22: DFDS
         Job 1: Integration Architect downloaded!
         Job 2: Business Integration Analyst downloaded!