In [1]:
import tqdm
import re
import requests

import pandas as pd

from bs4 import BeautifulSoup
from unicodedata import normalize
from datetime import datetime

# ESN Automatic Section Counter

## Functions

In [2]:
def get_soup_from_url(url):
    page = requests.get(url)
    return BeautifulSoup(page.text, 'lxml')

def get_global_counts(content):
    regex = "The ESN network consists at this moment of (\d+) local sections in (\d+) countries."
    text = normalize("NFKD", content.find('p').get_text())
    return [int(elem) for elem in re.search(regex, text).groups()]

def get_country_section_count(country_url):
    soup = get_soup_from_url(country_url)
    national_org_name = soup.find('h1', {'class': 'page-header'}).text
    
    section_count_paragraph = soup.find('div', {'class': 'num_sections_country'}).text
    section_count_regex = "Number of sections: (\d+)"
    section_count = int(re.search(section_count_regex, section_count_paragraph).group(1))

    return national_org_name, section_count

## Data scraping

### Global counts

In [3]:
main_url = "https://www.esn.org/sections"
soup = get_soup_from_url(main_url)
content = soup.find(id='content-block').find('div').find('div').find('div').find('div')

In [4]:
global_section_count, global_country_count = get_global_counts(content)
global_section_count, global_country_count

(519, 45)

### Counts per country

In [5]:
country_divs = content.find('div').find_all('div')
country_urls = [elem.find('a')['href'] for elem in country_divs]
country_counts = pd.Series(dict([get_country_section_count(elem) for elem in tqdm.tqdm(country_urls)]))
country_counts

100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 45/45 [00:06<00:00,  7.39it/s]


ESN Albania                    1
ESN Armenia                    1
ESN Austria                   15
ESN Azerbaijan                 1
ESN Belgium                   18
ESN Bosnia and Herzegovina     2
ESN Bulgaria                  11
ESN Croatia                    7
ESN Cyprus                     1
ESN Czech Republic            19
ESN Denmark                    6
ESN Estonia                    5
ESN Finland                   15
ESN France                    34
ESN Georgia                    1
ESN Germany                   45
ESN Greece                    20
ESN Hungary                   14
ESN Ireland                   10
ESN Italy                     53
ESN Jordan                     1
ESN Kazakhstan                 1
ESN Latvia                     3
ESN Liechtenstein              1
ESN Lithuania                 11
ESN Luxembourg                 1
ESN Malta                      1
ESN Moldova                    1
ESN Montenegro                 1
ESN North Macedonia            1
ESN Norway

In [6]:
agg_section_count, agg_country_count = country_counts.sum(), country_counts.count()
agg_section_count, agg_country_count

(521, 45)

## Results

In [7]:
print("Results as of", datetime.today().strftime("%B %d, %Y at %H:%M:%S"))

Results as of September 26, 2024 at 22:15:28


In [8]:
print("The official website currently indicates ESN comprises", global_section_count, "sections in", global_country_count, "countries.")

The official website currently indicates ESN comprises 519 sections in 45 countries.


In [9]:
if global_section_count == agg_section_count:
    print("The aggregate and global section counts are the same.")
else:
    print("The aggregate section count is however different at", agg_section_count)

The aggregate section count is however different at 521


In [10]:
if global_country_count == agg_country_count:
    print("The aggregate and global country counts are the same.")
else:
    print("The aggregate country count is however different at", agg_country_count)

The aggregate and global country counts are the same.
