![](https://www.philipsstadion.nl/wp-content/uploads/2020/12/Basic-Fit-logo.jpg)

In [1]:
# Imports
import pandas as pd
import requests
import geocoder
import folium
from bs4 import BeautifulSoup
from tqdm import tqdm
from folium.plugins import MarkerCluster

### Scrape gym data

In [2]:
# Get gym URL's
URL = 'https://www.basic-fit.com/nl-nl/resultaat?landing=true'
SOUP = BeautifulSoup(requests.get(URL).content, 'lxml')
gym_url = list()
links = SOUP.find_all('a', {'class': 'club-tile', 'href': True})
for link in (links):
    if link['href'].startswith('/nl-nl/sportschool/'):
        gym_url.append('https://www.basic-fit.com' + link['href'])

In [3]:
# Get gym addresses
street = []
postal_code = []
city = []
for url in tqdm(gym_url):
    soup = BeautifulSoup(requests.get(url).content, 'lxml')
    address = soup.find_all('li', {'class': 'location'})
    for addr in address:
        street.append(addr.text.split(',')[0].strip())
        postal_code.append(addr.text.split(',')[-1].strip().upper().replace(' ', ''))
        city.append(addr.a['href'].split('=')[-1].split(',')[-1].strip().title())

100%|█████████████████████████████████████████| 231/231 [01:08<00:00,  3.39it/s]


In [4]:
# Save data
df = pd.DataFrame({'address': street, 'postal_code': postal_code, 'city': city, 'url': gym_url})
df.to_csv('BasicFit_raw.csv', sep=';', index=False)

### Clean address data

In [5]:
# Read data
df = pd.read_csv('BasicFit_raw.csv', sep=';')

In [6]:
# Clean data
df.loc[df['postal_code'] == '5616RX', ['address']] = 'Stadionplein 2'
df.loc[df['postal_code'] == '7825SB', ['address']] = 'Bislett 11'
df.loc[df['postal_code'] == '9714JT', ['address']] = 'Antillenstraat 7E'
df.loc[df['postal_code'] == '1689PC', ['city']] = 'ZWAAG'
df.loc[df['postal_code'] == '1972AT', ['address']] = 'C. van der Doesstraat 22'
df.loc[df['postal_code'] == '1972AT', ['city']] = 'IJmuiden'
df.loc[df['postal_code'] == '1504DG', ['address']] = 'Dominee Martin Luther Kingweg 205'
df.loc[df['postal_code'] == '4615PB', ['address']] = 'Burgemeester Van de Laarstraat 9'
df.loc[df['postal_code'] == '5461XL', ['address']] = 'Prins Willem Alexander Sportpark 2'
df.loc[df['postal_code'] == '1101DL', ['address']] = 'Johan Cruijff Boulevard 55'

In [7]:
# Save data
df.to_csv('BasicFit_cln.csv', sep=';', index=False)

### Add geo data

In [8]:
# Read data
df = pd.read_csv('BasicFit_cln.csv', sep=';')

In [9]:
# Add columns for latitude and longitude
df['x'] = None
df['y'] = None

In [10]:
# Get latitude and longitude 
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    try:
        a = '{}, {}'.format(row.address, row.city)
        g = geocoder.osm(a).osm
        df.loc[index, ['x']] = g['x']
        df.loc[index, ['y']] = g['y']
    except:
        pass

100%|█████████████████████████████████████████| 231/231 [01:54<00:00,  2.01it/s]


In [11]:
# Validate data
if sum(df.x.isna() | df.y.isna()) == 0:
    print('No missing geolocations')
else:
    print(df.loc[df.x.isna() | df.y.isna(), ['address', 'postal_code', 'city', 'x', 'y']])

                   address postal_code    city     x     y
132  Van Riemsdijkplein 34      7606ZA  Almelo  None  None


In [12]:
# Add missing latitude and longitude manually
df.loc[df['postal_code'] == '7606ZA', ['x', 'y']] = [6.6483422, 52.3381424]

In [13]:
# Save data
df.to_csv('BasicFit_geo.csv', sep=';', index=False)

### Visualise gym data

In [14]:
# Read data
df = pd.read_csv('BasicFit_geo.csv', sep=';')

In [15]:
# Create map
m = folium.Map(location=[52.143, 5.589], zoom_start=9, tiles='Stamen Terrain')

# Add markers
for _, row in df.iterrows():
    folium.Marker(
        [row.y, row.x], 
        icon = folium.features.CustomIcon(
            'http://res.cloudinary.com/brinkhuis/image/upload/v1512746206/basicfit_wexzjg.png', 
            icon_size = (84, 28)
        ), 
        popup = f'''<b>{row.city}</b><br>
        <i>{row.address}</i><br>
        <i>{row.postal_code[:4]} {row.postal_code[4:]}</i><br>
        <a href={row.url} target="_blank"</a>website
        '''
    ).add_to(m)

# Save map
m.save('BasicFit.html')

In [16]:
# Show map
m