Scrape the Python ITJobsWatch page. Showcase:
1. Data Ingestion
2. Data Wrangling
3. Data Analysis
4. Data Visualisation

In [141]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

x = requests.get('https://www.itjobswatch.co.uk/jobs/uk/java.do')
soup = BeautifulSoup(x.text)

Parse a single table into pandas

In [142]:
def get_info_from_row(row):
    return [tag.get_text() for tag in row.find_all('td') if len(tag.get_text()) < 50]

In [143]:
def get_info_from_section(section):
    return [get_info_from_row(row) for row in section.find_all('tr') if len(get_info_from_row(row)) != 0]

In [144]:
def get_job_stats_df(soup):
    info = get_info_from_section(soup.find_all('section')[0])

    dict = {'2024' : [row[1] for row in info],
            '2023' : [row[2] for row in info],
            '2022' : [row[3] for row in info]}

    return pd.DataFrame(dict, index=[row[0] for row in info])

In [145]:
def get_skill_stats_df(soup):
    info = get_info_from_section(soup.find_all('section')[5])
    
    dict = {'Percentage' : [row[1] for row in info],
            'Skill' : [row[2] for row in info]}

    return pd.DataFrame(dict, index=[row[0] for row in info])

In [146]:
def get_category_stats_df(soup, i, name):
    info = get_info_from_section(soup.find_all('table')[6].find_all('table')[i])
    
    dict =  {
            'Category' : name,
            'Ranking' : [row[0] for row in info],
            'Percentage' : [row[1] for row in info],
            'Skill' : [row[2] for row in info]
            }

    return pd.DataFrame(dict)

In [147]:
df_genr = get_category_stats_df(soup, 7, "General")
df_jobs = get_category_stats_df(soup, 8, "Job")
df_libs = get_category_stats_df(soup, 9, "Library")
df_lang = get_category_stats_df(soup, 13, "Language")

df = pd.concat([df_genr, df_jobs, df_libs, df_lang])
print(df)

    Category Ranking      Percentage                    Skill
0    General       1  2,144 (28.06%)                  Finance
1    General       2  1,549 (20.27%)            Social Skills
2    General       3    823 (10.77%)  Inclusion and Diversity
3    General       4    789 (10.33%)        Analytical Skills
4    General       5     687 (8.99%)                  Banking
..       ...     ...             ...                      ...
15  Language      16     165 (2.16%)                   MATLAB
16  Language      17     138 (1.81%)                       VB
17  Language      18     137 (1.79%)                     Rust
18  Language      19     136 (1.78%)             Shell Script
19  Language      20     110 (1.44%)                    T-SQL

[80 rows x 4 columns]
