# Population <-> Cities

In [8]:
# To hide the warnings
import warnings
warnings.filterwarnings('ignore')

import pandas as pd 
import networkx as nx
import numpy as np
import pickle
import matplotlib.pyplot as plt

In [9]:
with open('swiss_population_pickle.pkl', 'rb') as f:
    population = pickle.load(f)

First we will check how many cities are considered large, where a large city has more than `POP_LIMIT_LARGE_CITY` number of inhabitants

In [10]:
POP_LIMIT_LARGE_CITY = 24000
large_cities = [city for city, pop in population.items() if pop > POP_LIMIT_LARGE_CITY]
print("There are {} large cities in Switzerland".format(len(large_cities)))

There are 31 large cities in Switzerland


The following small function returns a set of possible names for a city, as they might be found in the stop dataset. For example, when given Zürich, it will return `[Zürich HB, Zürich Hbf, ...]`

In [11]:
def alt_city_names(city_orig_name):
    stop_extensions = ['HB', 'Hbf', 'SBB', 'CFF']
    return ["{} {}".format(city_orig_name, ext) for ext in stop_extensions]

This function goes through the given names of stops, and tries to find which stop matches to the provided cities.

In [12]:
def real_stop_names(cities, stop_names):
    city_stop_names = []
    
    for city in cities:
        found = False
        if city in stop_names.values:
            found = True
            city_stop_names.append(city)

        if not found:
            for alt_name in alt_city_names(city):
                if alt_name in stop_names.values:
                    found = True
                    city_stop_names.append(alt_name)
                    break
                    
        if not found:
            if city == "Fribourg":
                city_stop_names.append("Fribourg/Freiburg")
            if city == "Rapperswil-Jona":
                city_stop_names.append("Rapperswil")

    return(city_stop_names)

In [37]:
df = pd.read_csv('stop_data_complete.csv', sep=',', encoding='latin-1')
del df['Unnamed: 0']

large_city_stop_names = real_stop_names(large_cities, df['stop_name'])

In [65]:
large_cities_centers = {}
for stop in large_city_stop_names:
    matching_lines = df.loc[df['stop_name'] == stop]
    # Multiple lines can match for one stop. 
    # This is usually because each platform counts as a stop in bigger stations
    # We arbitrarily take the first, this makes very little difference.
    stop_line = matching_lines.iloc[0]
    large_cities_centers[stop] = (stop_line['stop_lat'], stop_line['stop_lon'])
    
print("We now have the following:")
print("{} : {}, etc.".format('Basel SBB', large_cities_centers['Basel SBB']))

We now have the following:
Basel SBB : (47.547649085507601, 7.5895514262328696), etc.


### Distance calculation

In [62]:
!pip install geopy

Collecting geopy
  Downloading geopy-1.11.0-py2.py3-none-any.whl (66kB)
[K    100% |████████████████████████████████| 71kB 1.6MB/s ta 0:00:01
[?25hInstalling collected packages: geopy
Successfully installed geopy-1.11.0


In [64]:
from geopy.distance import vincenty as geo_dist

Using the [GeoPy](http://geopy.readthedocs.io/en/1.10.0/) library and the vincenty algorithm, we can now use the geo_dist function to calculate distances between two stops. Here is an example for Lausanne and Zürich:

In [70]:
print("Distance between Lausanne and Zürich as the crow flies:")
print('{:.2f} km'.format(geo_dist(large_cities_centers['Lausanne'], large_cities_centers['Zürich HB']).km))

Distance between Lausanne and Zürich as the crow flies:
174.17 km


With this function we now construct a function that returns the set of stops (train or bus, not yet filtered) that is within XX km of a given city's train station.

In [140]:
def stops_in_range(df, city_stop_name, city_stop_coords, max_dist):
    in_range = []
    for index, row in df.iterrows():
        if not row['stop_name'] == city_stop_name:
            if geo_dist(city_stop_coords, (row['stop_lat'], row['stop_lon'])).km < max_dist:
                in_range.append(row['stop_name'])
    return set(in_range)

Now, for every large city, we find all stops that are closer than 5km from its train station.

In [145]:
stops_to_close = {}
print("Processing all stops for:")
print()
for city, coords in large_cities_centers.items():
    print("{}".format(city), end='')
    stops_to_close[city] = stops_in_range(df, city, coords, 5)
    print(" .... done")

Processing all stops for:

Dübendorf .... done
Uster .... done
Winterthur .... done
Dietikon .... done
Zürich HB .... done
Biel/Bienne .... done
Bern .... done
Köniz .... done
Thun .... done
Luzern .... done
Zug .... done
Fribourg/Freiburg .... done
Basel SBB .... done
Schaffhausen .... done
St. Gallen .... done
Rapperswil .... done
Chur .... done
Frauenfeld .... done
Lugano .... done
Yverdon-les-Bains .... done
Lausanne .... done
Montreux .... done
Sion .... done
La Chaux-de-Fonds .... done
Neuchâtel .... done
Genève .... done
Vernier .... done


Here is the example for Lausanne:

In [147]:
print(stops_to_close['Lausanne'])

{'Lausanne, Rovéréaz', 'Pully, Trois-Chasseurs', 'Cery-Fleur-de-Lys', 'St-Sulpice VD, Pâqueret', 'Epalinges, Croisettes', 'Lausanne, St-Etienne', 'Lausanne, Valmont', 'Lausanne-Chauderon', 'Prilly-Malley', 'Pully-Nord', 'Ecublens VD, Champagne', 'Ecublens VD, EPFL (bus)', 'Renens VD', 'Ecublens VD, Blévallaire', 'Lausanne, Sallaz', 'Union-Prilly', 'Prilly-Chasseur', 'Lausanne, Le Foyer', 'Pully', 'La Conversion', 'Lausanne, Bourdonnette', 'Jouxtens-Mézery', 'Lausanne-Flon', 'Montblesson, Centenaire', 'Ecublens VD, Dorigny', 'Lutry', 'Ecublens VD, EPFL Piccard', 'Le Lussex', 'Montétan'}


And now we save the result to file, so it can be used else where:

In [149]:
with open('stops_too_close.pkl', 'wb') as f:
    pickle.dump(stops_to_close, f)

Here is an example of how to open and use the file:

In [150]:
with open('stops_too_close.pkl', 'rb') as f:
    stops_too_close = pickle.load(f)
    print(stops_too_close['Lausanne'])

{'Lausanne, Rovéréaz', 'Pully, Trois-Chasseurs', 'Cery-Fleur-de-Lys', 'St-Sulpice VD, Pâqueret', 'Epalinges, Croisettes', 'Lausanne, St-Etienne', 'Lausanne, Valmont', 'Lausanne-Chauderon', 'Prilly-Malley', 'Pully-Nord', 'Ecublens VD, Champagne', 'Ecublens VD, EPFL (bus)', 'Renens VD', 'Ecublens VD, Blévallaire', 'Lausanne, Sallaz', 'Union-Prilly', 'Prilly-Chasseur', 'Lausanne, Le Foyer', 'Pully', 'La Conversion', 'Lausanne, Bourdonnette', 'Jouxtens-Mézery', 'Lausanne-Flon', 'Montblesson, Centenaire', 'Ecublens VD, Dorigny', 'Lutry', 'Ecublens VD, EPFL Piccard', 'Le Lussex', 'Montétan'}
