Importing necessary libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Scraping the pages

In [81]:
def scrape_job_openings(page_num,query):
    # query = "Data Scientist"
    # page_num = 1

    url = f"https://www.jobs.ch/en/vacancies/?page={page_num}&term={query.replace(' ', '%20')}"
    print(url)

    page = requests.get(url, timeout=2)
    
    soup = BeautifulSoup(page.content, "html.parser")

    job_ads = soup.find_all('article')
    job_dicts = []

    for job in job_ads:
        
        job_link_tag = job.find('a',{'data-cy' : 'job-link'})
        # title = job_link_tag.get('title')
        url = job_link_tag.get('href')
        
        try:
            location = job.find_all('p')[1].get_text()
        except AttributeError:
            location = None

        try:
            date = job.find_all('p')[0].get_text(';').split(';')[0]
            # print(date)
        except AttributeError:
            date = None
        try:
            company_elem = job.find('div', {'class': 'd_grid ai_center gap_s12 grid-tc_[auto_1fr] mt_auto pt_s16'}).find_all('p', {'class':'textStyle_p2'})
            company = company_elem[0].get_text() if company_elem else ''
        except AttributeError:
            company = None
        try:
            salary_range = job.find_all('p')[2].get_text()
            # print(salary_range)
        except AttributeError:
            salary_range = None
        try:
            title = job.find('div', {'class': 'mb_s8'}).get_text()
            title_parts = title.split()
            new_title_parts = []
            for part in title_parts:
                if not any(char.isdigit() for char in part):
                    new_title_parts.append(part)
                else:
                    break
            title = ' '.join(new_title_parts)
            if '(' in title or ')' in title:
                title = title.split('(')[0].strip()
            if not title:
                title = None
            # print(title)
        except AttributeError:
            title = None
        
        job_dict = {
                    "title": title,
                    'salary_range': salary_range,
                    "company": company,
                    "location": location,
                    "date": date,
                    "query": query,
                    "url": url
                }
        
        job_dicts.append(job_dict)
    return job_dicts


## Scrapping pages based on the number page and the keyWords

In [82]:
keywords = ["Data Scientist", "Data Analyst", "Python Developer", "Data Engineer", "Data Manager", "Data Architect", "Big Data Analyst", "Data Python"]
df_data = {}

for keyword in keywords:
    data = scrape_job_openings(1, keyword)
    df_data[keyword] = data


https://www.jobs.ch/en/vacancies/?page=1&term=Data%20Scientist
https://www.jobs.ch/en/vacancies/?page=1&term=Data%20Analyst
https://www.jobs.ch/en/vacancies/?page=1&term=Python%20Developer
https://www.jobs.ch/en/vacancies/?page=1&term=Data%20Engineer
https://www.jobs.ch/en/vacancies/?page=1&term=Data%20Manager
https://www.jobs.ch/en/vacancies/?page=1&term=Data%20Architect
https://www.jobs.ch/en/vacancies/?page=1&term=Big%20Data%20Analyst
https://www.jobs.ch/en/vacancies/?page=1&term=Data%20Python


## Creating a Dataframe out of the data returned in the dictionary based on keywords

In [83]:
df = pd.concat([pd.DataFrame(df_data[keyword]) for keyword in keywords])
df.head()

Unnamed: 0,title,salary_range,company,location,date,query,url
0,Data Scientist - Fachspezialist/in Geoinformation,60 – 100%,Kanton Luzern,Luzern,Published: 18 September 2024,Data Scientist,/en/vacancies/detail/5cbf0471-0dba-495f-a9ed-b...
1,Data Scientist / Analyst,100%,EMS-CHEMIE AG,Domat/Ems,Published: 26 September 2024,Data Scientist,/en/vacancies/detail/a047031f-cf5b-4713-b3b1-1...
2,Junior Data Scientist,80 – 100%,7Days Media Services GmbH,Egerkingen,Published: 11 September 2024,Data Scientist,/en/vacancies/detail/f3f91a19-5d06-4b20-be15-c...
3,Data Engineer / Data Analyst,80 – 100%,Swissgrid AG,Aarau,Published: 06 September 2024,Data Scientist,/en/vacancies/detail/76182665-83cb-4de3-b311-d...
4,Senior Data-Analyst,100%,auto-i-dat ag,Zürich,Published: 20 September 2024,Data Scientist,/en/vacancies/detail/9f2b74d5-6af7-474b-b244-0...


* How many jobs are shared between these categories?

In [84]:
duplicates = df[df.duplicated(subset='title', keep=False)]
grouped_data = duplicates.groupby('title').count()
grouped_data.head()

Unnamed: 0_level_0,salary_range,company,location,date,query,url
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Data & AI Platform Engineer,2,2,2,2,2,2
Data Analyst,10,10,10,10,10,10
Data Architect,2,2,2,2,2,2
Data Engineer,12,12,12,12,12,12
Data Engineer / Data Analyst,4,4,4,4,4,4


* How much the keywords: “Data Analyst” and “Big Data Analyst” overlap?


In [85]:
overlap_count = df[df['query'].str.contains('Data Analyst') & df['query'].str.contains('Big Data Analyst')]
print(len(overlap_count))
overlap_count['title']

20


0                          Data Engineer / Data Analyst
1                                   Senior Data-Analyst
2     Datenflüsterer / Analyse-Alchemist / Prognose-...
3            Dentalassistent*in mit Röntgenberechtigung
4                        IT MDM Analyst Data Management
5                                          Data Analyst
6           Datenanalystin/Datenanalyst Betriebsplanung
7     Data Scientist - Fachspezialist/in Geoinformation
8                            IT Data Analytics Engineer
9            Intern - Technology Consulting - AI & Data
10                                        Data Engineer
11                                Junior Data Scientist
12                                         Data Analyst
13                                 Senior Data Engineer
14                             Data Scientist / Analyst
15                                         Data Analyst
16     Senior Informatica Cloud Data Quality Specialist
17    Graduate Specialist Program - Supply Chain

* Are there some companies doing more hires than average?


In [88]:

# Group the DataFrame by the 'company' column and count the number of rows for each group
company_counts = df.groupby('company').count()
# Calculate the average number of hires per company
avg_count = company_counts['title'].mean()
high_hires = company_counts[(company_counts['title'] > avg_count) & (company_counts.index != '')]
highest_company_hiring = high_hires.max()
# print(high_hires)
print(avg_count)
highest_company_hiring.head


1.7142857142857142


<bound method NDFrame.head of title           4
salary_range    4
location        4
date            4
query           4
url             4
dtype: int64>

* How many jobs are there in different Kantons?


In [90]:
duplicates = df[df.duplicated(subset='location', keep=False)]
grouped_data = duplicates.groupby('location').count()
grouped_data

Unnamed: 0_level_0,title,salary_range,company,date,query,url
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1007 Lausanne,2,2,2,2,2,2
1211 GENEVE 11,3,3,3,3,3,3
Aarau,4,4,4,4,4,4
Aarburg,3,3,3,3,3,3
Baar,3,3,3,3,3,3
Basel,5,5,5,5,5,5
Bern,4,4,4,4,4,4
Domat/Ems,4,4,4,4,4,4
Egerkingen,3,3,3,3,3,3
Eschen,4,4,4,4,4,4



* Is “machine learning” keyword more often in data scientist or data analyst jobs?


In [109]:
# Select the rows where the job title contains 'data scientist' or 'data analyst'
#na_counts = df.isna().sum()
#na_counts
# na_counts = 
df.dropna()



Unnamed: 0,title,salary_range,company,location,date,query,url
0,Data Scientist - Fachspezialist/in Geoinformation,60 – 100%,Kanton Luzern,Luzern,Published: 18 September 2024,Data Scientist,/en/vacancies/detail/5cbf0471-0dba-495f-a9ed-b...
1,Data Scientist / Analyst,100%,EMS-CHEMIE AG,Domat/Ems,Published: 26 September 2024,Data Scientist,/en/vacancies/detail/a047031f-cf5b-4713-b3b1-1...
2,Junior Data Scientist,80 – 100%,7Days Media Services GmbH,Egerkingen,Published: 11 September 2024,Data Scientist,/en/vacancies/detail/f3f91a19-5d06-4b20-be15-c...
3,Data Engineer / Data Analyst,80 – 100%,Swissgrid AG,Aarau,Published: 06 September 2024,Data Scientist,/en/vacancies/detail/76182665-83cb-4de3-b311-d...
4,Senior Data-Analyst,100%,auto-i-dat ag,Zürich,Published: 20 September 2024,Data Scientist,/en/vacancies/detail/9f2b74d5-6af7-474b-b244-0...
...,...,...,...,...,...,...,...
15,Data Architect,80 – 100%,ti&m AG,Zürich,Published: 27 September 2024,Data Python,/en/vacancies/detail/85f4af7f-ed15-4199-bc6a-6...
16,"Data Engineer, Product Analytics",100%,MF Group AG,Mendrisio,Published: 27 September 2024,Data Python,/en/vacancies/detail/1a50b0d3-9832-4b27-90d9-2...
17,Data Engineer,60 – 100%,Universal-Job AG,Zentralschweiz,Published: 16 September 2024,Data Python,/en/vacancies/detail/e5a7a46b-ba0e-4358-b341-1...
18,Data Engineer,60 – 100%,Universal-Job AG,Zentralschweiz,Published: 05 September 2024,Data Python,/en/vacancies/detail/06c80e45-d1e0-4a83-95de-8...
