In [1]:
import requests
from bs4 import BeautifulSoup
import json


class LinkedinScraper:
    
    # Define the two lists to concatenate
    DataScience = ['Data Science', 'Big data', 'Machine learning','ML', 'Data mining', 'Artificial intelligence', 'AI', 'Predictive modeling',
               'Statistical analysis', 'Data visualization', 'Deep learning', 'Natural language processing', 'Business intelligence',
               'Data warehousing', 'Data management', 'Data cleaning', 'Feature engineering', 'Time series analysis', 'Text analytics',
               'Database', 'SQL', 'NoSQL', 'Neural networks', 'Regression analysis', 'Clustering', 'Dimensionality reduction',
               'Anomaly detection', 'Recommender systems', 'Data integration', 'Data governance']
    
    MachineLearning = ['Machine learning', 'Data preprocessing', 'Feature selection', 'Feature engineering', 'Data visualization',
                   'Model selection', 'Hyperparameter tuning', 'Cross-validation', 'Ensemble methods', 'Neural networks', 'Deep learning',
                   'Convolutional neural networks', 'Recurrent neural networks', 'Natural language processing', 'Computer vision',
                   'Reinforcement learning', 'Unsupervised learning', 'Clustering', 'Dimensionality reduction', 'Bayesian methods',
                   'Time series analysis', 'Random forest', 'Gradient boosting', 'Support vector machines', 'Decision trees', 'Regression analysis']

    # Concatenate the two lists
    keywords = DataScience + MachineLearning
    
    def __init__(self, country_name, geoId, companies):
        self.country_name = country_name
        self.geoId = geoId
        self.companies = companies
        self.search_url_pattern = 'https://www.linkedin.com/jobs/search/?currentJobId={}&distance=25&geoId={}&keywords={}&refresh=true&start={}'

    
    
    def scrape_jobs(self, output_file_path):
        start_index = 0
        jobs_per_page = 25
        total_jobs_downloaded = 0        
        
        
        # Iterate over each company and search for matching jobs
        with open(output_file_path, 'w', encoding='utf-8') as json_file:
            for i, company in enumerate(self.companies):
                print(f"Company {i+1}: {company}")
                jobs_downloaded = 0
                # Iterate until no matching jobs are found
                while True:
                    # Construct the search URL
                    search_url = self.search_url_pattern.format(start_index, self.geoId, company, start_index)

                    # Send a GET request to the search URL
                    response = requests.get(search_url)

                    # Parse the HTML content of the page using BeautifulSoup
                    soup = BeautifulSoup(response.content, 'html.parser')

                    # Find all the job posting URLs on the page
                    job_urls = [a['href'] for a in soup.find_all('a', href=True) if '/jobs/view/' in a['href']]

                    # If no matching jobs are found, break the loop
                    if len(job_urls) == 0:
                        break

                    # Iterate over each job URL and extract the job information
                    for job_url in job_urls:               

                        # Send a GET request to the job URL

                        response = requests.get(job_url)

                        # Parse the HTML content of the page using BeautifulSoup
                        soup = BeautifulSoup(response.content, 'html.parser')

                        # Find the job title and company name
                        title = soup.find('h1', class_='topcard__title')
                        if title is not None:
                            title = title.text.strip()
                        else:
                            continue
                        company_name = soup.find('a', class_='topcard__org-name-link')
                        if company_name is not None:
                            company_name = company_name.text.strip()
                        else:
                            continue

                        # Find the div tag that contains the job description
                        description_div = soup.find('div', class_='description__text')

                        if description_div is not None:
                            # Extract the job description text
                            job_description = description_div.get_text().strip()
                            job_description = job_description.replace('Show more', '').replace('Show less', '')

                            # Remove any extra spaces from the job description
                            job_description = ' '.join(job_description.split())


                            found_keywords = [keyword for keyword in self.keywords if keyword in job_description]                      

                            if found_keywords:
                                job_info = {
                                    'Country_name': self.country_name,
                                    'company_name': company_name,
                                    'keywords': found_keywords,
                                    'description': job_description 
                                } 
                                
                                with open(output_file_path, 'a', encoding='utf-8') as json_file:
                                    json.dump(job_info, json_file, ensure_ascii=False)
                                    json_file.write('\n')
                                
                                jobs_downloaded += 1
                                total_jobs_downloaded += 1                       
                        
                    
                                print(f"         Job {jobs_downloaded}: {title} downloaded!") 
                            
                
                if jobs_downloaded == 0:
                    print(f"         No jobs found for {company}")  
                            
                        
        print()
        print("Total {} jobs downloaded!".format(total_jobs_downloaded ))                                  


## Scrappigng data for Netherlands

In [6]:
country_name= "Netherlands"
geoId="102890719"
companies =['Booking.com', 'ASML',  'NXP Semiconductors',  'Coolblue', 'Exact','Deloitte', 'Capgemini', 'IBM', 'Amazon', 'Google', 'Microsoft', 'Uber', 'Xccelerated | Part of Xebia','Agoda', 'Qualcomm', 'Eindhoven University of Technology', 'ABN AMRO Bank N.V.', 'Kadaster', 'Cooder', 'Barrington James', 'Harnham', 'Creative Fabrica', 'University of Amsterdam', 'Lely', 'TELUS International AI Data Solutions', 'StarApple', 'Albert Heijn', 'TELUS International', 'Delft University of Technology', 'Orange Quarter']

print("Country: ", country_name)
print()

scraper = LinkedinScraper(country_name, geoId, companies )
scraper.scrape_jobs("DS_jobs_netherlands.json")

Country:  Netherlands

Company 1: Booking.com
         Job 1: Instructional Designer downloaded!
         Job 2: Director of Engineering DMLP downloaded!
         Job 3: Principal Software Engineer - Marketing Tech downloaded!
         Job 4: Jr. Compliance Operations Analyst downloaded!
         Job 5: Financial Analyst downloaded!
         Job 6: Instructional Designer downloaded!
         Job 7: Principal Software Engineer - Marketing Tech downloaded!
Company 2: ASML
         Job 1: Senior Financial Controller downloaded!
         Job 2: Talent & Learning Sector Lead downloaded!
         Job 3: Strategic Sourcing Project Manager downloaded!
         Job 4: Senior Finance Controller downloaded!
         Job 5: Talent & Learning Manager – HR NL & EU downloaded!
         Job 6: Senior Financial Controller – Financial Services – General accounting downloaded!
         Job 7: Senior Business Program Manager downloaded!
         Job 8: Talent Acquisition Team Lead downloaded!
         Job

         Job 2: Data & Risk Analist downloaded!
Company 6: Deloitte
         Job 1: Consultant Data & Responsible Insight downloaded!
         Job 2: Consultant Data Engineering and Analytics downloaded!
         Job 3: Consultant Data & Responsible Insight downloaded!
Company 7: Capgemini
         No jobs found for Capgemini
Company 8: IBM
         Job 1: Tax Accounting & Compliance Analyst downloaded!
Company 9: Amazon
         No jobs found for Amazon
Company 10: Google
         Job 1: Student Researcher, 2023 downloaded!
         Job 2: Student Researcher, 2023 downloaded!
         Job 3: Junior Back-end Developer downloaded!
         Job 4: Data Analist downloaded!
         Job 5: Student Researcher, 2023 downloaded!
         Job 6: Junior Back-end Developer downloaded!
         Job 7: Student Researcher, 2023 downloaded!
         Job 8: Data Analist downloaded!
         Job 9: Student Researcher, 2023 downloaded!
         Job 10: Junior Back-end Developer downloaded!
         Job

# full stalk netherlands

In [10]:
import requests
from bs4 import BeautifulSoup
import json


class LinkedinScraper:    

    # Concatenate the two lists
    keywords = ['Front-end development', 'HTML', 'CSS', 'JavaScript', 'React', 'Angular', 'Vue.js', 'Bootstrap', 'jQuery', 'responsive design',      'Back-end development', 'Node.js', 'Python', 'Ruby', 'PHP', 'Java', '.NET', 'SQL', 'NoSQL', 'RESTful APIs', 'web servers',      'Database management', 'MySQL', 'PostgreSQL', 'MongoDB', 'Redis', 'Cassandra', 'Oracle', 'SQL Server',      'DevOps', 'AWS', 'Azure', 'Google Cloud', 'Docker', 'Kubernetes', 'Git', 'Jenkins', 'Travis CI', 'CircleCI', 'monitoring and logging tools',      'Project management', 'Agile', 'Scrum', 'Kanban', 'JIRA', 'Trello', 'Asana', 'project planning', 'team collaboration', 'communication skills']

    
    def __init__(self, country_name, geoId, companies):
        self.country_name = country_name
        self.geoId = geoId
        self.companies = companies
        self.search_url_pattern = 'https://www.linkedin.com/jobs/search/?currentJobId={}&distance=25&geoId={}&keywords={}&refresh=true&start={}'

    
    
    def scrape_jobs(self, output_file_path):
        start_index = 0
        jobs_per_page = 25
        total_jobs_downloaded = 0        
        
        
        # Iterate over each company and search for matching jobs
        with open(output_file_path, 'w', encoding='utf-8') as json_file:
            for i, company in enumerate(self.companies):
                print(f"Company {i+1}: {company}")
                jobs_downloaded = 0
                # Iterate until no matching jobs are found
                while True:
                    # Construct the search URL
                    search_url = self.search_url_pattern.format(start_index, self.geoId, company, start_index)

                    # Send a GET request to the search URL
                    response = requests.get(search_url)

                    # Parse the HTML content of the page using BeautifulSoup
                    soup = BeautifulSoup(response.content, 'html.parser')

                    # Find all the job posting URLs on the page
                    job_urls = [a['href'] for a in soup.find_all('a', href=True) if '/jobs/view/' in a['href']]

                    # If no matching jobs are found, break the loop
                    if len(job_urls) == 0:
                        break

                    # Iterate over each job URL and extract the job information
                    for job_url in job_urls:               

                        # Send a GET request to the job URL

                        response = requests.get(job_url)

                        # Parse the HTML content of the page using BeautifulSoup
                        soup = BeautifulSoup(response.content, 'html.parser')

                        # Find the job title and company name
                        title = soup.find('h1', class_='topcard__title')
                        if title is not None:
                            title = title.text.strip()
                        else:
                            continue
                        company_name = soup.find('a', class_='topcard__org-name-link')
                        if company_name is not None:
                            company_name = company_name.text.strip()
                        else:
                            continue

                        # Find the div tag that contains the job description
                        description_div = soup.find('div', class_='description__text')

                        if description_div is not None:
                            # Extract the job description text
                            job_description = description_div.get_text().strip()
                            job_description = job_description.replace('Show more', '').replace('Show less', '')

                            # Remove any extra spaces from the job description
                            job_description = ' '.join(job_description.split())


                            found_keywords = [keyword for keyword in self.keywords if keyword in job_description]                      

                            if found_keywords:
                                job_info = {
                                    'Country_name': self.country_name,
                                    'company_name': company_name,
                                    'keywords': found_keywords,
                                    'description': job_description 
                                } 
                                
                                with open(output_file_path, 'a', encoding='utf-8') as json_file:
                                    json.dump(job_info, json_file, ensure_ascii=False)
                                    json_file.write('\n')
                                
                                jobs_downloaded += 1
                                total_jobs_downloaded += 1                       
                        
                    
                                print(f"         Job {jobs_downloaded}: {title} downloaded!") 
                            
                
                if jobs_downloaded == 0:
                    print(f"         No jobs found for {company}")  
                            
                        
        print()
        print("Total {} jobs downloaded!".format(total_jobs_downloaded ))                                  


In [11]:
country_name= "Netherlands"
geoId="102890719"
companies =['NAVARA', 'Buro5 Recruitment', 'StarApple', 'CGI Nederland', 'Good Company', 'typ', 'StackBird', 'DEXTER', ' Sparkling People', 'Cooder', 'TNO', 'Ministerie van Defensie', 'ALTEN Nederland', 'Kadaster', 'Opus Recruitment Solutions', 'Capgemini', 'IBM', 'Amazon', 'Google', 'Microsoft', 'Agoda', 'Qualcomm', 'Eindhoven University of Technology', 'Trinamics', 'Cegeka', 'Barrington James', 'CodeGuild', 'Creative Fabrica', 'Centric', 'Leap29', 'Picnic Technologies', 'Albert Heijn', 'Frontend Professionals', 'Orange Quarter', 'HiQ', 'Darwin Recruitment']


print("Country: ", country_name)
print()

scraper = LinkedinScraper(country_name, geoId, companies )
scraper.scrape_jobs("FS_jobs_netherlands.json")

Country:  Netherlands

Company 1: NAVARA
         Job 1: Data Scientist | Navara Enterprise Analytics B.V. downloaded!
         Job 2: Data Science Consultant | Navara downloaded!
         Job 3: Data Engineer | Navara Enterprise Analytics B.V. downloaded!
         Job 4: Mobile Engineer | Navara downloaded!
         Job 5: Junior Frontend Developer in Rotterdam downloaded!
         Job 6: React Native Developer | Navara downloaded!
         Job 7: Full-stack Software Engineer downloaded!
         Job 8: Senior Full-stack Software Engineer downloaded!
         Job 9: Medior Frontend Developer in Rotterdam downloaded!
         Job 10: Senior Frontend Developer in Rotterdam downloaded!
         Job 11: Senior Tech Lead in Rotterdam downloaded!
         Job 12: Data Science Consultant | Navara downloaded!
         Job 13: Mobile Engineer | Navara downloaded!
         Job 14: Junior Frontend Developer in Rotterdam downloaded!
         Job 15: Frontend Engineer | Navara downloaded!
        

         Job 75: Netwerk Engineer | ANWB downloaded!
         Job 76: Data Analist | Scamander Solutions downloaded!
         Job 77: Data Scientist | Interfood Group downloaded!
         Job 78: Junior C# Front-end Developer | PalmSens downloaded!
         Job 79: Junior .NET developer | Rotterdam | Betabit downloaded!
         Job 80: Data Analyst | Valcon NL downloaded!
         Job 81: Junior Backend Developer C# .NET | Gamebasics downloaded!
         Job 82: Back-End Developer | ANWB downloaded!
         Job 83: Support engineer | IT Performance Group downloaded!
         Job 84: Business Intelligence Specialist | CleanLease downloaded!
         Job 85: Senior Back-end Developer | ANWB downloaded!
         Job 86: Business Analist | Meant2B downloaded!
         Job 87: Informatie Consultant | NORISK Visionair downloaded!
         Job 88: QA Engineer | Gaiyo downloaded!
         Job 89: Data Analist | Scamander Solutions downloaded!
         Job 90: Data Scientist | Interfood Group

         Job 202: Data Scientist | Brink downloaded!
         Job 203: Business Analist | Meant2B downloaded!
         Job 204: Data Scientist / Analist | Infiniot downloaded!
         Job 205: Informatie Consultant | NORISK Visionair downloaded!
         Job 206: Netwerk Engineer | ANWB downloaded!
         Job 207: Data Analist | Scamander Solutions downloaded!
         Job 208: Junior C# Front-end Developer | PalmSens downloaded!
         Job 209: Junior .NET developer | Rotterdam | Betabit downloaded!
         Job 210: Data Analyst | Valcon NL downloaded!
         Job 211: Junior Backend Developer C# .NET | Gamebasics downloaded!
         Job 212: Back-End Developer | ANWB downloaded!
         Job 213: Support engineer | IT Performance Group downloaded!
         Job 214: Business Analist | Meant2B downloaded!
         Job 215: Data Scientist / Analist | Infiniot downloaded!
         Job 216: Informatie Consultant | NORISK Visionair downloaded!
         Job 217: Netwerk Engineer | A

         Job 4: Business Analist - Banking downloaded!
         Job 5: Senior IT Project Manager downloaded!
Company 5: Good Company
         No jobs found for Good Company
Company 6: typ
         No jobs found for typ
Company 7: StackBird
         No jobs found for StackBird
Company 8: DEXTER
         No jobs found for DEXTER
Company 9:  Sparkling People
         Job 1: Immigration Associate downloaded!
         Job 2: Immigration Associate downloaded!
         Job 3: Immigration Associate downloaded!
         Job 4: Immigration Associate downloaded!
Company 10: Cooder
         No jobs found for Cooder
Company 11: TNO
         Job 1: Internship | Recurrent deep learning applied to radar data downloaded!
         Job 2: Internship | Photothermal design for light-driven sustainable chemical processes downloaded!
         Job 3: Internship | Metallic nanowires for improved photo/electrocatalysis downloaded!
         Job 4: Internship | Recurrent deep learning applied to radar data downlo

         Job 12: Python Developer - Utrecht (Hybrid) downloaded!
Company 16: Capgemini
         No jobs found for Capgemini
Company 17: IBM
         No jobs found for IBM
Company 18: Amazon
         Job 1: Data Scientist downloaded!
         Job 2: Global Head of Procurement downloaded!
         Job 3: E-commerce Specialist downloaded!
         Job 4: Global Head of Procurement downloaded!
         Job 5: International Business Director (Amsterdam) downloaded!
         Job 6: Data Scientist downloaded!
         Job 7: Global Head of Procurement downloaded!
         Job 8: Scrum Master downloaded!
         Job 9: Marketing Analytics & Operations Intern downloaded!
         Job 10: E-commerce Specialist downloaded!
         Job 11: Global Head of Procurement downloaded!
         Job 12: E-commerce Specialist downloaded!
         Job 13: Director, International Business (Amsterdam) downloaded!
         Job 14: International Business Director (Amsterdam) downloaded!
         Job 15: Data S

         Job 25: Office 365 Consultant downloaded!
         Job 26: Agile Coach downloaded!
         Job 27: Junior Software Architect downloaded!
         Job 28: Productivity Consultant downloaded!
         Job 29: Agile Projectmanager downloaded!
         Job 30: Product Owner downloaded!
         Job 31: Java Developer downloaded!
         Job 32: Junior Digital Workplace Consultant downloaded!
         Job 33: Office 365 Consultant downloaded!
         Job 34: Project Manager ERP downloaded!
         Job 35: Office 365 Consultant downloaded!
         Job 36: Traineeship Business & IT downloaded!
         Job 37: Junior IT Proces Consultant downloaded!
         Job 38: Agile Coach downloaded!
         Job 39: Junior Software Architect downloaded!
         Job 40: Productivity Consultant downloaded!
         Job 41: Agile Projectmanager downloaded!
         Job 42: Junior Digital Workplace Consultant downloaded!
         Job 43: Office 365 Consultant downloaded!
         Job 44: Scr

         Job 82: Java Developer | €70.000 downloaded!
         Job 83: Senior Java Developer | €90,000 downloaded!
         Job 84: Senior Full-Stack JavaScript Developer | €85.000 + 10% bonus + remote possibilities downloaded!
         Job 85: Front end Developer | €78.000 obv 36 hours + bonus downloaded!
         Job 86: Medior Java Developer| €70.000 downloaded!
         Job 87: Java Developer | eHealth downloaded!
         Job 88: Java Developer downloaded!
         Job 89: Java Developer | Salaris €77.800 + bonus downloaded!
         Job 90: Senior Backend Engineer | €115.000 + 100% Remote downloaded!
         Job 91: Senior Backend Node.JS Engineer | €85.000 + Shares downloaded!
         Job 92: Junior DevOps Engineer | Salaris tot € 55.000 + 6K bonus downloaded!
         Job 93: Senior Java Software Engineer | Tot € 125.000,- + aandelenopties + remote mogelijkheden downloaded!
         Job 94: Java Developer - AI Product leert Honderden Miljoenen Kinderen Leren! downloaded!
    

         Job 26: Software Engineer JS downloaded!
         Job 27: Front-end Software Engineer downloaded!
         Job 28: Front End Ontwikkelaar | React downloaded!
         Job 29: Internship/stage - IT downloaded!
         Job 30: Frontend Developer bij Stoffenmanager downloaded!
         Job 31: Stagiair(e) Development downloaded!
         Job 32: Front-end Developer downloaded!
         Job 33: Junior Web Developer downloaded!
         Job 34: Junior Support Web Developer downloaded!
         Job 35: Junior Software Engineer Utrecht Max EUR50K downloaded!
         Job 36: Javascript Developer downloaded!
         Job 37: Junior Front-end Developer downloaded!
         Job 38: Front-end Developer bij Picturae downloaded!
         Job 39: Front-End Developer and Analyst downloaded!
         Job 40: Front-end Developer downloaded!
         Job 41: Frontend Developer downloaded!
         Job 42: Junior Front-end Developer, Utrecht downloaded!
         Job 43: Front-End Developer down

Company 35: HiQ
         No jobs found for HiQ
Company 36: Darwin Recruitment
         Job 1: Business Analyst downloaded!
         Job 2: Office 365 Workplace Specialist downloaded!
         Job 3: Data analyst downloaded!
         Job 4: Data quality analyst downloaded!
         Job 5: System Administrator downloaded!
         Job 6: System Administrator downloaded!
         Job 7: IT Operations Specialist downloaded!
         Job 8: Jr. Systeembeheerder downloaded!
         Job 9: Jr. Systeembeheerder downloaded!
         Job 10: Data modeler downloaded!
         Job 11: Field Engineer downloaded!
         Job 12: Operations Manager (Cloud) downloaded!
         Job 13: Microsoft 365 Administrator downloaded!
         Job 14: System Administrator downloaded!
         Job 15: IAM Analyst downloaded!
         Job 16: Atlassian Consultant downloaded!
         Job 17: Cloud Operations Manager downloaded!
         Job 18: FullStack Dev. (React/NodeJs) Healthcare downloaded!
         Job 1