In [68]:
import pathlib
import os
import requests
from urllib.parse import urlparse, parse_qs
from io import BytesIO
import pandas as pd
import json

import math


# city api https://public.opendatasoft.com/

In [34]:
df_metadata = pd.read_csv("counting_stations_germany_metadata.csv", index_col = 0)
df_metadata[:5]

Unnamed: 0,name,number,latitude,longitude,id_list,fetched data ?
0,Sommerda,1,51.287374,11.060599,100055269,0
1,Stuttgart,1,48.78424,9.147031,100063203,1
2,Stuttgart,2,48.826,9.21488,100063205,0
3,Stuttgart,3,48.716494,9.08652,100061257,0
4,Stuttgart,4,48.739821,9.152228,100061633,1


In [61]:
city_names = list(df_metadata["name"])
city_names = list(dict.fromkeys(city_names))
if "Region Hanover" in city_names: 
    city_names.remove("Region Hanover")
city_names

['Sommerda',
 'Stuttgart',
 'Arnsberg',
 'Heilbronn',
 'Detmold',
 'Böblingen',
 'Heidelberg',
 'Lörrach',
 'Kirchheim u. Teck',
 'Rostock',
 'Düsseldorf',
 'Göttingen',
 'Nuremberg',
 'Freiburg',
 'Karlsruhe',
 'Tübingen',
 'Erkelenz',
 'Hanover',
 'Offenbach',
 'Bonn',
 'Münster',
 'Hamburg',
 'Munich',
 'Erlangen',
 'Ludwigsburg',
 'Mülheim',
 'Oberhausen',
 'Dortmund',
 'Göppingen',
 'Aschaffenburg',
 'Konstanz',
 'Erftstadt',
 'Osnabrück',
 'Darmstadt',
 'Litzendorf']

In [62]:
BASE_URL = "https://nominatim.openstreetmap.org/search.php?format=jsonv2&city="

In [63]:
columns = ["city", "latitude", "longitude"]
latitute = []
longitude = []
for city in city_names:
    print(city)
    url_endpoint = BASE_URL + city
    response = requests.get(url_endpoint)
    data_json = json.loads(response.content)  
    lat = data_json[0]["lat"]
    long = data_json[0]["lon"]
    latitute.append(lat)
    longitude.append(long)    
    


Sommerda
Stuttgart
Arnsberg
Heilbronn
Detmold
Böblingen
Heidelberg
Lörrach
Kirchheim u. Teck
Rostock
Düsseldorf
Göttingen
Nuremberg
Freiburg
Karlsruhe
Tübingen
Erkelenz
Hanover
Offenbach
Bonn
Münster
Hamburg
Munich
Erlangen
Ludwigsburg
Mülheim
Oberhausen
Dortmund
Göppingen
Aschaffenburg
Konstanz
Erftstadt
Osnabrück
Darmstadt
Litzendorf


In [65]:
lat

'49.9123064'

In [66]:
df = pd.DataFrame(list(zip(city_names, latitute,longitude)),
               columns =columns)  

df

Unnamed: 0,city,latitude,longitude
0,Sommerda,51.1618258,11.1174874
1,Stuttgart,48.7784485,9.1800132
2,Arnsberg,51.4002384,8.0605908
3,Heilbronn,49.142291,9.218655
4,Detmold,51.936284,8.8791526
5,Böblingen,48.684969,9.0113444
6,Heidelberg,49.4093582,8.694724
7,Lörrach,47.6120896,7.6607218
8,Kirchheim u. Teck,48.6480545,9.4510227
9,Rostock,54.0924445,12.1286127


In [67]:
file_name = "geolocation_cities" + ".csv"

path = os.path.join("metadata",file_name )

df.to_csv(path)

# Calculate distance from center to bike counting station

In [116]:
def calculate_distance(lat1, lon1, lat2, lon2): # in km
    R = 6371 # km
    φ1 = lat1 * math.pi/180 # φ, λ in radians
    φ2 = lat2 * math.pi/180
    Δφ = (lat2-lat1) * math.pi/180
    Δλ = (lon2-lon1) * math.pi/180

    a = math.sin(Δφ/2) * math.sin(Δφ/2) + math.cos(φ1) * math.cos(φ2) * math.sin(Δλ/2) * math.sin(Δλ/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))

    d = R * c # in metres
    return round(d,4)
    
lat1 = 49.872775
lon1 = 8.651177
lat2 = 49.9123064
lon2 = 11.0096505
calculate_distance(lat1, lon1, lat2, lon2)

168.9979

In [117]:
# read data from metadata

df_metadata = pd.read_csv("counting_stations_germany_metadata.csv", index_col = 0)
df_metadata[:5]

Unnamed: 0,name,number,latitude,longitude,id_list,fetched data ?
0,Sommerda,1,51.287374,11.060599,100055269,0
1,Stuttgart,1,48.78424,9.147031,100063203,1
2,Stuttgart,2,48.826,9.21488,100063205,0
3,Stuttgart,3,48.716494,9.08652,100061257,0
4,Stuttgart,4,48.739821,9.152228,100061633,1


In [118]:
df_metadata.loc[df_metadata["name"] == "Region Hanover", "name"] = "Hanover"

In [111]:
df_cities_geolocation = pd.read_csv("metadata/geolocation_cities.csv", index_col = 0)
df_cities_geolocation[:5]

Unnamed: 0,city,latitude,longitude
0,Sommerda,51.161826,11.117487
1,Stuttgart,48.778449,9.180013
2,Arnsberg,51.400238,8.060591
3,Heilbronn,49.142291,9.218655
4,Detmold,51.936284,8.879153


In [112]:
# calcuate distance between city centre and bike counting station
distances = []
for index, row in df_metadata.iterrows():
    # counting station data
    counting_station_lat = row['latitude']
    counting_station_long = row['longitude']
    counting_station_city = row["name"]
    # city data
    city_series = df_cities_geolocation.loc[df_cities_geolocation['city'] == counting_station_city]
    city_lat = city_series["latitude"].values[0]
    city_long = city_series["longitude"].values[0]

    distance = calculate_distance(counting_station_lat, counting_station_long, city_lat, city_long)
    distances.append(distance)

In [114]:
# add distances to dataframe

df_metadata["distance_to_city_centre"] = distances

In [115]:
df_metadata

Unnamed: 0,name,number,latitude,longitude,id_list,fetched data ?,distance
0,Sommerda,1,51.287374,11.060599,100055269,0,14.5115
1,Stuttgart,1,48.78424,9.147031,100063203,1,2.5009
2,Stuttgart,2,48.826,9.21488,100063205,0,5.8718
3,Stuttgart,3,48.716494,9.08652,100061257,0,9.7184
4,Stuttgart,4,48.739821,9.152228,100061633,1,4.7536
5,Stuttgart,5,48.740003,9.226692,100061648,1,5.4757
6,Stuttgart,6,48.811509,9.167497,100062943,1,3.7888
7,Stuttgart,7,48.809354,9.105381,100062945,1,6.4573
8,Stuttgart,8,48.779637,9.248607,100063204,1,5.0278
9,Arnsberg,1,51.440702,7.964296,100063831,0,8.0516
