In [1]:
import requests
from bs4 import BeautifulSoup
import json


class LinkedinScraper:
    
    # Define the two lists to concatenate
    DataScience = ['Data Science', 'Big data', 'Machine learning','ML', 'Data mining', 'Artificial intelligence', 'AI', 'Predictive modeling',
               'Statistical analysis', 'Data visualization', 'Deep learning', 'Natural language processing', 'Business intelligence',
               'Data warehousing', 'Data management', 'Data cleaning', 'Feature engineering', 'Time series analysis', 'Text analytics',
               'Database', 'SQL', 'NoSQL', 'Neural networks', 'Regression analysis', 'Clustering', 'Dimensionality reduction',
               'Anomaly detection', 'Recommender systems', 'Data integration', 'Data governance']
    
    MachineLearning = ['Machine learning', 'Data preprocessing', 'Feature selection', 'Feature engineering', 'Data visualization',
                   'Model selection', 'Hyperparameter tuning', 'Cross-validation', 'Ensemble methods', 'Neural networks', 'Deep learning',
                   'Convolutional neural networks', 'Recurrent neural networks', 'Natural language processing', 'Computer vision',
                   'Reinforcement learning', 'Unsupervised learning', 'Clustering', 'Dimensionality reduction', 'Bayesian methods',
                   'Time series analysis', 'Random forest', 'Gradient boosting', 'Support vector machines', 'Decision trees', 'Regression analysis']

    # Concatenate the two lists
    keywords = DataScience + MachineLearning
    
    def __init__(self, country_name, geoId, companies):
        self.country_name = country_name
        self.geoId = geoId
        self.companies = companies
        self.search_url_pattern = 'https://www.linkedin.com/jobs/search/?currentJobId={}&distance=25&geoId={}&keywords={}&refresh=true&start={}'

    
    
    def scrape_jobs(self, output_file_path):
        start_index = 0
        jobs_per_page = 25
        total_jobs_downloaded = 0        
        
        
        # Iterate over each company and search for matching jobs
        with open(output_file_path, 'w', encoding='utf-8') as json_file:
            for i, company in enumerate(self.companies):
                print(f"Company {i+1}: {company}")
                jobs_downloaded = 0
                # Iterate until no matching jobs are found
                while True:
                    # Construct the search URL
                    search_url = self.search_url_pattern.format(start_index, self.geoId, company, start_index)

                    # Send a GET request to the search URL
                    response = requests.get(search_url)

                    # Parse the HTML content of the page using BeautifulSoup
                    soup = BeautifulSoup(response.content, 'html.parser')

                    # Find all the job posting URLs on the page
                    job_urls = [a['href'] for a in soup.find_all('a', href=True) if '/jobs/view/' in a['href']]

                    # If no matching jobs are found, break the loop
                    if len(job_urls) == 0:
                        break

                    # Iterate over each job URL and extract the job information
                    for job_url in job_urls:               

                        # Send a GET request to the job URL

                        response = requests.get(job_url)

                        # Parse the HTML content of the page using BeautifulSoup
                        soup = BeautifulSoup(response.content, 'html.parser')

                        # Find the job title and company name
                        title = soup.find('h1', class_='topcard__title')
                        if title is not None:
                            title = title.text.strip()
                        else:
                            continue
                        company_name = soup.find('a', class_='topcard__org-name-link')
                        if company_name is not None:
                            company_name = company_name.text.strip()
                        else:
                            continue

                        # Find the div tag that contains the job description
                        description_div = soup.find('div', class_='description__text')

                        if description_div is not None:
                            # Extract the job description text
                            job_description = description_div.get_text().strip()
                            job_description = job_description.replace('Show more', '').replace('Show less', '')

                            # Remove any extra spaces from the job description
                            job_description = ' '.join(job_description.split())


                            found_keywords = [keyword for keyword in self.keywords if keyword in job_description]                      

                            if found_keywords:
                                job_info = {
                                    'Country_name': self.country_name,
                                    'company_name': company_name,
                                    'keywords': found_keywords,
                                    'description': job_description 
                                } 
                                
                                with open(output_file_path, 'a', encoding='utf-8') as json_file:
                                    json.dump(job_info, json_file, ensure_ascii=False)
                                    json_file.write('\n')
                                
                                jobs_downloaded += 1
                                total_jobs_downloaded += 1                       
                        
                    
                                print(f"         Job {jobs_downloaded}: {title} downloaded!") 
                            
                
                if jobs_downloaded == 0:
                    print(f"         No jobs found for {company}")  
                            
                        
        print()
        print("Total {} jobs downloaded!".format(total_jobs_downloaded ))                                  


## Scrappigng data for Germany

In [2]:
country_name= "Germany"
geoId="101282230"
companies= ['Parsionate', 'Sopra Steria', 'talentsconnect AG', 'E.ON', 'JAAI | JUST ADD AI GmbH', 'Datasumi','Kärcher', 'Oraylis GmbH','Allianz','ACONEXT', 'Bayer', 'BASF', 'BMW', 'Daimler','Continental', 'Fresenius Medical Care', 'Lufthansa', 'Merck', 'Munich Re', 'Volkswagen', 'Wirecard', 'Zalando', 'Delivery Hero', 'Scalable Capital', 'Flixbus Europe', 'Wefox', 'Solarisbank', 'Agoda', 'adesso SE', 'Amazon', 'BCG Platinion', 'Fraunhofer Karriere', 'Luxoft', 'sennder', 'Capgemini']
print("Country: ", country_name)
print()

scraper = LinkedinScraper(country_name, geoId, companies )
scraper.scrape_jobs("DS_jobs_germany.json")

Country:  Germany

Company 1: Parsionate
         Job 1: Data Engineer / Machine Learning (all genders) downloaded!
         Job 2: Senior Presales Consultant (all genders) downloaded!
         Job 3: Senior Presales Consultant (all genders) downloaded!
         Job 4: Data Engineer / Machine Learning (all genders) downloaded!
         Job 5: Senior Presales Consultant (all genders) downloaded!
         Job 6: Senior Presales Consultant (all genders) downloaded!
         Job 7: Data Engineer / Machine Learning (all genders) downloaded!
         Job 8: Senior Presales Consultant (all genders) downloaded!
         Job 9: Senior Presales Consultant (all genders) downloaded!
         Job 10: Data Engineer / Machine Learning (all genders) downloaded!
         Job 11: Senior Presales Consultant (all genders) downloaded!
         Job 12: Senior Presales Consultant (all genders) downloaded!
         Job 13: Data Engineer / Machine Learning (all genders) downloaded!
         Job 14: Senior Pres

         Job 10: Digital Designer (m/w/d) downloaded!
         Job 11: Data Analyst (f/m/d) downloaded!
         Job 12: Senior PHP Developer (f/m/d) downloaded!
         Job 13: Data Engineer (m/f/d) downloaded!
         Job 14: Future Job Opening - Senior Java Developer (m/f/d) downloaded!
Company 4: E.ON
         No jobs found for E.ON
Company 5: JAAI | JUST ADD AI GmbH
         Job 1: Software Engineer Telefonie (m/f/x) downloaded!
         Job 2: Initiative Bewerbung (m/f/x) downloaded!
         Job 3: Sales Manager US (m/f/x) downloaded!
         Job 4: Werkstudent (m/f/x) downloaded!
         Job 5: Werkstudent (m/f/x) downloaded!
         Job 6: Technical Project Manager // Tech Consultant (m/f/x) downloaded!
         Job 7: Sales Manager US (m/f/x) downloaded!
         Job 8: Software & AI Engineer in Health (m/f/x) downloaded!
         Job 9: Sales Manager US (m/f/x) downloaded!
         Job 10: Software Engineer Telefonie (m/f/x) downloaded!
         Job 11: Rasa Chatbot Eng

         Job 2: Fluggerätmechaniker / Fluggeräteelektroniker (A320 Certifying Staff CAT A) (m/w/divers) downloaded!
         Job 3: Dispatcher/Flugdienstberater (Flight Operations Officer) (m/w/divers) downloaded!
         Job 4: Fluggerätmechaniker / Fluggeräteelektroniker (A320 Familiy Certifying Staff CAT A) (m/w/divers) downloaded!
         Job 5: Fluggerätmechaniker (m/w/d) - A330/A340/A350/B747/B777/B787 Certifying Staff CAT B1 (mehrere Stellen) downloaded!
         Job 6: Dispatcher/Flugdienstberater (Flight Operations Officer) (m/w/divers) downloaded!
         Job 7: Fluggeräteelektroniker (m/w/divers) - A320 Family Certifying Staff CAT B2 downloaded!
         Job 8: Fluggerätmechaniker / Fluggeräteelektroniker (A320 Certifying Staff CAT A) (m/w/divers) downloaded!
         Job 9: Dispatcher/Flugdienstberater (Flight Operations Officer) (m/w/divers) downloaded!
         Job 10: Fluggerätmechaniker / Fluggeräteelektroniker (A320 Certifying Staff CAT A) (m/w/divers) downloaded!
 

         Job 3: Information Security Officer (all genders) downloaded!
         Job 4: AML Intern - Voluntary Fulltime Internship (d/f/m) at Solaris downloaded!
         Job 5: Information Security Officer (all genders) downloaded!
         Job 6: Internal IT Auditor (f/m/div) at Solaris downloaded!
         Job 7: Information Security Officer (all genders) downloaded!
Company 28: Agoda
         Job 1: Senior Manager, Corporate Strategy (Bangkok Based) downloaded!
         Job 2: Senior Manager, Corporate Strategy (Bangkok Based) downloaded!
         Job 3: People BI Manager (Bangkok Based, Relocation Provided) downloaded!
         Job 4: Analyst (Supply Analytics team, Bangkok-based, Relocation provided)) downloaded!
Company 29: adesso SE
         Job 1: Data Scientist Insurance (all genders) downloaded!
Company 30: Amazon
         No jobs found for Amazon
Company 31: BCG Platinion
         No jobs found for BCG Platinion
Company 32: Fraunhofer Karriere
         Job 1: Speculative App

# full stalk Germany

In [3]:
import requests
from bs4 import BeautifulSoup
import json


class LinkedinScraper:    

    # Concatenate the two lists
    keywords = ['Front-end development', 'HTML', 'CSS', 'JavaScript', 'React', 'Angular', 'Vue.js', 'Bootstrap', 'jQuery', 'responsive design',      'Back-end development', 'Node.js', 'Python', 'Ruby', 'PHP', 'Java', '.NET', 'SQL', 'NoSQL', 'RESTful APIs', 'web servers',      'Database management', 'MySQL', 'PostgreSQL', 'MongoDB', 'Redis', 'Cassandra', 'Oracle', 'SQL Server',      'DevOps', 'AWS', 'Azure', 'Google Cloud', 'Docker', 'Kubernetes', 'Git', 'Jenkins', 'Travis CI', 'CircleCI', 'monitoring and logging tools',      'Project management', 'Agile', 'Scrum', 'Kanban', 'JIRA', 'Trello', 'Asana', 'project planning', 'team collaboration', 'communication skills']

    
    def __init__(self, country_name, geoId, companies):
        self.country_name = country_name
        self.geoId = geoId
        self.companies = companies
        self.search_url_pattern = 'https://www.linkedin.com/jobs/search/?currentJobId={}&distance=25&geoId={}&keywords={}&refresh=true&start={}'

    
    
    def scrape_jobs(self, output_file_path):
        start_index = 0
        jobs_per_page = 25
        total_jobs_downloaded = 0        
        
        
        # Iterate over each company and search for matching jobs
        with open(output_file_path, 'w', encoding='utf-8') as json_file:
            for i, company in enumerate(self.companies):
                print(f"Company {i+1}: {company}")
                jobs_downloaded = 0
                # Iterate until no matching jobs are found
                while True:
                    # Construct the search URL
                    search_url = self.search_url_pattern.format(start_index, self.geoId, company, start_index)

                    # Send a GET request to the search URL
                    response = requests.get(search_url)

                    # Parse the HTML content of the page using BeautifulSoup
                    soup = BeautifulSoup(response.content, 'html.parser')

                    # Find all the job posting URLs on the page
                    job_urls = [a['href'] for a in soup.find_all('a', href=True) if '/jobs/view/' in a['href']]

                    # If no matching jobs are found, break the loop
                    if len(job_urls) == 0:
                        break

                    # Iterate over each job URL and extract the job information
                    for job_url in job_urls:               

                        # Send a GET request to the job URL

                        response = requests.get(job_url)

                        # Parse the HTML content of the page using BeautifulSoup
                        soup = BeautifulSoup(response.content, 'html.parser')

                        # Find the job title and company name
                        title = soup.find('h1', class_='topcard__title')
                        if title is not None:
                            title = title.text.strip()
                        else:
                            continue
                        company_name = soup.find('a', class_='topcard__org-name-link')
                        if company_name is not None:
                            company_name = company_name.text.strip()
                        else:
                            continue

                        # Find the div tag that contains the job description
                        description_div = soup.find('div', class_='description__text')

                        if description_div is not None:
                            # Extract the job description text
                            job_description = description_div.get_text().strip()
                            job_description = job_description.replace('Show more', '').replace('Show less', '')

                            # Remove any extra spaces from the job description
                            job_description = ' '.join(job_description.split())


                            found_keywords = [keyword for keyword in self.keywords if keyword in job_description]                      

                            if found_keywords:
                                job_info = {
                                    'Country_name': self.country_name,
                                    'company_name': company_name,
                                    'keywords': found_keywords,
                                    'description': job_description 
                                } 
                                
                                with open(output_file_path, 'a', encoding='utf-8') as json_file:
                                    json.dump(job_info, json_file, ensure_ascii=False)
                                    json_file.write('\n')
                                
                                jobs_downloaded += 1
                                total_jobs_downloaded += 1                       
                        
                    
                                print(f"         Job {jobs_downloaded}: {title} downloaded!") 
                            
                
                if jobs_downloaded == 0:
                    print(f"         No jobs found for {company}")  
                            
                        
        print()
        print("Total {} jobs downloaded!".format(total_jobs_downloaded ))                                  


In [6]:
country_name= "Germany"
geoId="101282230"
companies = ['OBI next', 'Koelnmesse GmbH', 'JustWatch', 'Atos', 'ROSEN', 'GR4', 'Safety io', 'Expleo Group', 'JustWatch', 'FERCHAU', 'Nordex Group', 'XIBIX Solutions GmbH', 'Raisin', 'valantic', '360T', 'Vinted', 'Sopra Steria', 'Allianz', 'Bayer', 'Lufthansa', 'Munich Re', 'Volkswagen', 'Zalando', 'Delivery Hero', 'Agoda', 'adesso SE', 'BCG Platinion', 'Fraunhofer Karriere', 'sennder']
print("Country: ", country_name)
print()

scraper = LinkedinScraper(country_name, geoId, companies )
scraper.scrape_jobs("FS_jobs_germany.json")

Country:  Germany

Company 1: OBI next
         Job 1: Data Analyst – Web & App (m/w/d) downloaded!
         Job 2: Fullstack Developer (m/w/d) downloaded!
         Job 3: Backend Developer (m/w/d) downloaded!
         Job 4: Security Engineer (m/w/d) downloaded!
         Job 5: (Senior) Software Engineer Java (m/w/d) downloaded!
         Job 6: Junior Cloud Security Specialist / DevSecOps (m/w/d) downloaded!
         Job 7: (Senior) Software Engineer TypeScript (m/w/d) downloaded!
         Job 8: (Senior) Full Stack Software Engineer TypeScript (m/w/d) downloaded!
         Job 9: Data Analyst – Web & App (m/w/d) downloaded!
         Job 10: Fullstack Developer (m/w/d) downloaded!
         Job 11: Security Engineer (m/w/d) downloaded!
         Job 12: Junior Cloud Security Specialist / DevSecOps (m/w/d) downloaded!
         Job 13: (Senior) Software Engineer TypeScript (m/w/d) downloaded!
         Job 14: (Senior) Full Stack Software Engineer TypeScript (m/w/d) downloaded!
         Job

         Job 10: Global Sourcing Manager (m/f/d) Nacelle Sourcing downloaded!
         Job 11: Wind & Site Engineer (m/f/d) downloaded!
         Job 12: Electrical Engineer (m/f/d) Hybrid Power Plants downloaded!
         Job 13: Electrical Engineer (m/f/d) Hybrid Power Plants downloaded!
         Job 14: Group Lead Product Integrity (m/f/d) downloaded!
         Job 15: Teamlead (m/f/d) Global Supply Chain Planning downloaded!
         Job 16: Projectmanager Blade Service (m/w/d) downloaded!
         Job 17: (Senior) Global Sourcing Manager (m/f/d) Nacelle Sourcing downloaded!
         Job 18: Construction Services Sourcing Manager (m/f/d) downloaded!
         Job 19: Project Manager (w/d/m) Technology downloaded!
         Job 20: Electrical Engineer (m/f/d) Hybrid Power Plants downloaded!
         Job 21: Projectmanager (m/f/d) Blade Service downloaded!
         Job 22: (Junior) Controller / Data Analyst Group Controlling (m/w/d) downloaded!
         Job 23: Group Accountant (m/f/d) d

Company 16: Vinted
         Job 1: Analytics Engineer, Vinted Go downloaded!
         Job 2: Business Continuity Manager downloaded!
         Job 3: Business Continuity Manager downloaded!
Company 17: Sopra Steria
         Job 1: Graduate Program Data & Analytics downloaded!
         Job 2: (Junior) Angular Frontend Developer (m/w/d) downloaded!
         Job 3: (Junior) Angular Frontend Developer (m/w/d) downloaded!
         Job 4: (Junior) Cloud Business Analyst (m/w/d) downloaded!
         Job 5: (Junior) Consultant Data & Analytics (m/w/d) downloaded!
         Job 6: Graduate Program Data & Analytics downloaded!
         Job 7: Data Scientist (m/w/d) downloaded!
         Job 8: (Junior) Content Management Consultant (m/w/d) downloaded!
         Job 9: Werkstudent Data Science & Künstliche Intelligenz (m/w/d) downloaded!
         Job 10: Data Scientist Anti Financial Crime & Compliance (m/w/d) downloaded!
         Job 11: Graduate Program Data & Analytics downloaded!
         Job 12:

         Job 37: Head of Paid Search (Bangkok Based, Relocation Provided) downloaded!
         Job 38: Manager, Strategic Partnerships – Strategy and Analytics (Bangkok based, relocation provided) downloaded!
         Job 39: Head of Paid Search (Bangkok Based, Relocation Provided) downloaded!
         Job 40: People BI Manager (Bangkok Based, Relocation Provided) downloaded!
         Job 41: Manager, Strategic Partnerships – Strategy and Analytics (Bangkok based, relocation provided) downloaded!
         Job 42: People Operations Partner (Bangkok-based, Relocation Provided) downloaded!
         Job 43: Data Analyst (Bangkok Based, Relocation Provided) downloaded!
         Job 44: Associate Director, Head of Business Intelligence (Bangkok based, Relocation provided)) downloaded!
         Job 45: Senior Manager, Corporate Strategy (Bangkok Based) downloaded!
         Job 46: Head of Paid Search (Bangkok Based, Relocation Provided) downloaded!
         Job 47: Head of Paid Search (Bangko

         Job 126: Senior Analyst/Lead, Corporate Strategy (Bangkok Based) downloaded!
         Job 127: Head of Paid Search (Bangkok Based, Relocation Provided) downloaded!
         Job 128: People BI Manager (Bangkok Based, Relocation Provided) downloaded!
         Job 129: Associate Director, Corporate Strategy (Bangkok Based) downloaded!
         Job 130: Head of Paid Search (Bangkok Based, Relocation Provided) downloaded!
         Job 131: Data Analyst (Bangkok Based, Relocation Provided) downloaded!
         Job 132: Senior Manager, Corporate Strategy (Bangkok Based) downloaded!
         Job 133: Associate Director, Corporate Strategy (Bangkok Based) downloaded!
         Job 134: Senior Manager, Corporate Strategy (Bangkok Based) downloaded!
         Job 135: Senior Marketing Manager, Social Media Team (Bangkok-based, relocation provided) downloaded!
         Job 136: Head of Paid Search (Bangkok Based, Relocation Provided) downloaded!
         Job 137: Head of Paid Search (Bangko