In [1]:
import cloudscraper
from bs4 import BeautifulSoup
import pandas as pd
import geopandas as gpd

In [2]:
url = 'https://infrabel.be/fr/contact?page={page}'

In [3]:
scraper = cloudscraper.create_scraper()

In [4]:
r = scraper.get(url.format(page=0))
r.status_code

200

In [5]:
html_doc = r.text
soup = BeautifulSoup(html_doc)

In [6]:
# building a set using {} to avoid duplicates
pages = sorted(list({
    int(item['href'].replace('?page=', ''))
    for item in soup.select('.pager.pager__items a.pager__link')
    if item['href'].startswith('?page=')
}))

In [7]:
def get_lcis(soup):
    """
    Returns list of LCIs from soup
    """
    return [
        {
            'name': item.select_one('h4').get_text().strip(),
            'address': item.select_one('.m-t-2.m-b-1.font-weight-bold').get_text().strip(),
            'lng': float(item.select_one('meta[property="longitude"]')['content']),
            'lat': float(item.select_one('meta[property="latitude"]')['content']),
        }
        for item in soup.select('.teaser-block')
    ]

In [8]:
lcis = []
for page in pages:
    r = scraper.get(url.format(page=0))
    html_doc = r.text
    soup = BeautifulSoup(html_doc)
    lcis += get_lcis(soup)

In [9]:
df = pd.DataFrame(lcis)
gdf = gpd.GeoDataFrame(df[[col for col in df.columns if col not in ['lng', 'lat']]], geometry=gpd.points_from_xy(df.lng, df.lat, crs='EPSG:4326'))
gdf

Unnamed: 0,name,address,geometry
0,Werkplaats Roeselare,Beverensteenweg 177 BE- 8800 Roeselare,POINT (3.13041 50.95698)
1,South City,Place Marcel Broodthaers 2 1060 Bruxelles,POINT (4.33766 50.833)
2,Ringstation,"Rue des Deux Gares, 82 1070 Anderlecht",POINT (4.32696 50.83222)
3,Recycling Office,"Place Marcel Broodthaers, 2 - 1060 Bruxelles",POINT (4.33822 50.83578)
4,LCI–Antenne Antwerpen-Oost,Draakplaats BE- 2018 Antwerpen,POINT (4.43222 51.20576)
...,...,...,...
76,LCI–Antenne Antwerpen-Oost,Draakplaats BE- 2018 Antwerpen,POINT (4.43222 51.20576)
77,LCI-Antenne Zottegem,Broeder Mareslaan BE- 9620 Zottegem,POINT (3.8161 50.87123)
78,LCI-Antenne Zeebrugge,New Yorklaan BE- 8380 Zeebrugge,POINT (3.18605 51.33036)
79,LCI-Antenne Zaventem,Heldenplein BE- 1930 Zaventem,POINT (4.47145 50.88495)


In [10]:
gdf.to_file("clis.json", driver="GeoJSON")