In [1]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd
from shapely.geometry import Point

# Import interactive maps module and geoencoding library

In [2]:
from geopandas.tools import geocode
import folium
from folium import Marker

In [3]:
# Function for displaying the map

def embed_map(m, file_name):
    from IPython.display import IFrame
    m.save(file_name)
    return IFrame(file_name, width='100%', height='500px')

In [4]:
# Function for encoding geolocation from name

def encode(name):
    print("Encoding '{}'".format(name))
    try:
        return geocode(name, provider='nominatim').geometry
    except Exception as err:
        print("Error while encoding '{}'!".format(name))
        return None

# Encode locations of metro stations

In [5]:
# Get to the page containing a table with names of all metro stations in Warsaw

response = requests.get('https://pl.wikipedia.org/wiki/Lista_stacji_metra_w_Warszawie')
soup = BeautifulSoup(response.text, 'html.parser')

In [41]:
# Encode locations of metro stations and create a DataFrame

df = pd.DataFrame(columns = ['Nazwa', 'φ', 'λ'])

for row in soup.find("table").find_all("tr")[1:-1]:
    name = ("Metro " + row.find_all("td")[1].text).replace('\n','')
    loc = encode(name + " Warsaw")
    if loc is not None:
        df = df.append({'Nazwa':name, 'φ':float(loc.y), 'λ':float(loc.x)}, ignore_index=True)

Encoding 'Metro Kabaty Warsaw'
Encoding 'Metro Natolin Warsaw'
Encoding 'Metro Imielin Warsaw'
Encoding 'Metro Stokłosy Warsaw'
Encoding 'Metro Ursynów Warsaw'
Encoding 'Metro Służew Warsaw'
Encoding 'Metro Wilanowska Warsaw'
Encoding 'Metro Wierzbno Warsaw'
Encoding 'Metro Racławicka Warsaw'
Encoding 'Metro Pole Mokotowskie Warsaw'
Encoding 'Metro Politechnika Warsaw'
Encoding 'Metro Centrum Warsaw'
Encoding 'Metro Świętokrzyska Warsaw'
Encoding 'Metro Ratusz Arsenał Warsaw'
Encoding 'Metro Dworzec Gdański Warsaw'
Encoding 'Metro Plac Wilsona Warsaw'
Encoding 'Metro Marymont Warsaw'
Encoding 'Metro Słodowiec Warsaw'
Encoding 'Metro Stare Bielany Warsaw'
Encoding 'Metro Wawrzyszew Warsaw'
Encoding 'Metro Młociny Warsaw'
Encoding 'Metro Rondo Daszyńskiego Warsaw'
Encoding 'Metro Rondo ONZ Warsaw'
Encoding 'Metro Świętokrzyska Warsaw'
Encoding 'Metro Nowy Świat-Uniwersytet Warsaw'
Encoding 'Metro Centrum Nauki Kopernik Warsaw'
Encoding 'Metro Stadion Narodowy Warsaw'
Encoding 'Metro Dwor

In [42]:
# Repair badly encoded values

df.at[df[df['Nazwa'] == 'Metro Ratusz Arsenał'].index[0], 'φ'] = 52.2447
df.at[df[df['Nazwa'] == 'Metro Ratusz Arsenał'].index[0], 'λ'] = 21.0005

df.at[df[df['Nazwa'] == 'Metro Ursynów'].index[0], 'φ'] = 52.1619
df.at[df[df['Nazwa'] == 'Metro Ursynów'].index[0], 'λ'] = 21.0278

# Export to .csv to future analysis

In [44]:
df.to_csv("metro.csv", index=False)

In [45]:
df.head()

Unnamed: 0,Nazwa,φ,λ
0,Metro Kabaty,52.131047,21.065518
1,Metro Natolin,52.140341,21.056685
2,Metro Imielin,52.149295,21.045788
3,Metro Stokłosy,52.156834,21.03382
4,Metro Ursynów,52.1619,21.0278


# Plot locations of metro stations (as benchmark)
There were few errors as `Metro Ratusz Arsenał` and `Metro Ursynów` weren't properly encoded

In [46]:
# Create a map
m_1 = folium.Map(location=[52.2323,21.0000], tiles='cartodbpositron', zoom_start=11)

# Add points to the map
for idx, station in df.iterrows():
    Marker((station['φ'], station['λ']), popup=station['Nazwa']).add_to(m_1)

# Display the map
if not os.path.isdir("maps"):
     os.mkdir("maps")
embed_map(m_1, 'maps/m_1.html')