# Travelling Times

I love travelling by train. And, in future, I want to move cities. And enjoy frequent weekend trips.

But which city in Europe is best connected to other european cities?


## Google OR

Ref: https://developers.google.com/optimization/routing

Bascially, a tourist visiting sites in a city is not too different from a delivery person dropping of packages.
And a tourist walking through a city on two separate days, is similar to finding the optimal route for two delivery trucks.

## !!! Idea: Best city to live given criteria !!!
* I want to travel to Paris frequently
* I need to be well connected to freiburg
* I occasionally need to travel to Berlin
* -> Where should I live?

## Deploy github pages

https://stackoverflow.com/questions/38136637/leaflet-with-github-pages-not-rendering
https://leaflet-extras.github.io/leaflet-providers/preview/


## Folium

https://www.earthdatascience.org/tutorials/introduction-to-leaflet-animated-maps/

In [1]:
import pandas as pd
import numpy as np
import inspect

# General preparations
import json
from datetime import datetime, timedelta

# Google Direction API
import googlemaps # Ref: https://www.tomordonez.com/google-maps-api-python/
# https://googlemaps.github.io/google-maps-services-python/docs/index.html#googlemaps.Client.geocode

from mySecrets import GOOGLE_API_DIR

# Geographic distance
import geopy.distance

# Google OR
# from ortools.constraint_solver import routing_enums_pb2
# from ortools.constraint_solver import pywrapcp

ModuleNotFoundError: No module named 'mySecrets'

In [2]:
# Setup googlemaps
gmaps = googlemaps.Client(key=GOOGLE_API_DIR)

## Parameters

In [None]:
# LIst of cities with other properties: https://ec.europa.eu/eurostat/web/cities/data/database
# df_cities = pd.rea

# -> keep city IDs -> will use them later on for mapping and products!

# df_countries: [country_code][country]
# df_cities: [country_code][city_code][city][pop][lng][lat]

# -> pop -> last available pop from eurostat

In [220]:
# Define list of cities of interest. Largest cities in Central Europe

# Ref: https://en.wikipedia.org/wiki/List_of_metropolitan_areas_in_Europe



cities = [
    "Istanbul, Turkey",
    "Paris, France",
    "London, UK",
    "Madrid, Spain",
    "Berlin, Germany",
    "Barcelona, Spain",
    "Milan, Italy",
    "Rome, Italy",
    "Athens, Greece",
    # "Manchester, UK",
    "Naples, Italy",
    "Hamburg, Germany",
    "Warsaw, Poland",
    "Lisbon, Portugal",
    "Budapest, Hungary",
    "Vienna, Austria",
    "Munich, Germany"
]

In [221]:
# Collect cities -> and their statics
df_cities = pd.DataFrame(cities, columns = ["dep"])
df_cities.head()

# TODO: Assign city IDs!!!


Unnamed: 0,dep
0,"Istanbul, Turkey"
1,"Paris, France"
2,"London, UK"
3,"Madrid, Spain"
4,"Berlin, Germany"


In [222]:
from itertools import product

# Create all cross-combinations
df_dist = pd.DataFrame(list(product(cities, cities)), columns=['dep', 'dest'])

# Drop identicals
df_dist = df_dist.query("dep != dest")

# Inspect
df_dist.head()

Unnamed: 0,dep,dest
1,"Istanbul, Turkey","Paris, France"
2,"Istanbul, Turkey","London, UK"
3,"Istanbul, Turkey","Madrid, Spain"
4,"Istanbul, Turkey","Berlin, Germany"
5,"Istanbul, Turkey","Barcelona, Spain"


## Translate to google codes

In [224]:
# translate list of cities to geocodes

df_cities["address"] = ""
df_cities["lat"] = ""
df_cities["lng"] = ""
df_cities["place_id"] = ""


for index, row in df_cities.iterrows():
    print(row['dep'])

    location = gmaps.geocode(
        address = "Central Station, " + row['dep']
    )

    df_cities.loc[index, "address"]  =  location[0]['formatted_address']
    df_cities.loc[index, "lat"] =  location[0]['geometry']['location']['lat']
    df_cities.loc[index, "lng"] =  location[0]['geometry']['location']['lng']
    df_cities.loc[index, "place_id"] =  location[0]['place_id']





Istanbul, Turkey
Paris, France
London, UK
Madrid, Spain
Berlin, Germany
Barcelona, Spain
Milan, Italy
Rome, Italy
Athens, Greece
Naples, Italy
Hamburg, Germany
Warsaw, Poland
Lisbon, Portugal
Budapest, Hungary
Vienna, Austria
Munich, Germany


In [225]:
df_dist['dist'] = ""

for index, row in df_dist.iterrows():
    # print(df_dist['dep'])

    distance = geopy.distance.geodesic(
        df_cities.loc[df_cities['dep'] == row['dep'],['lat', 'lng']].values[0],
        df_cities.loc[df_cities['dep'] == row['dest'],['lat', 'lng']].values[0]
    ).km

    df_dist.loc[index, "dist"] = distance

In [226]:
df_dist.sort_values("dist", ascending = True)

Unnamed: 0,dep,dest,dist
151,"Naples, Italy","Rome, Italy",188.931696
121,"Rome, Italy","Naples, Italy",188.931696
222,"Budapest, Hungary","Vienna, Austria",213.51063
237,"Vienna, Austria","Budapest, Hungary",213.51063
74,"Berlin, Germany","Hamburg, Germany",252.876427
...,...,...,...
188,"Warsaw, Poland","Lisbon, Portugal",2763.503369
200,"Lisbon, Portugal","Athens, Greece",2857.774694
140,"Athens, Greece","Lisbon, Portugal",2857.774694
192,"Lisbon, Portugal","Istanbul, Turkey",3245.360049


# !! Check with distance matrix which city combinations make sense !!
 Max train speed 300km/h
 -> so for 3 hours, distance < 900km makes sense only>

With intermediate stops, something like <= 600km is probably best

In [262]:
df_dist_sel = df_dist[df_dist.dist <= 600].sort_values("dist")
df_dist_sel.head()

Unnamed: 0,dep,dest,dist
151,"Naples, Italy","Rome, Italy",188.931696
121,"Rome, Italy","Naples, Italy",188.931696
237,"Vienna, Austria","Budapest, Hungary",213.51063
222,"Budapest, Hungary","Vienna, Austria",213.51063
74,"Berlin, Germany","Hamburg, Germany",252.876427


In [263]:
# Add place ids for dep and dest
df_dist_sel = (
df_dist_sel
.join(
    df_cities[['dep', 'address']].set_index('dep'), 
    on = 'dep'
)
.rename(columns = {'address': 'dep_address'})
.join(
    df_cities[['dep', 'address']].rename(columns = {'dep': 'dep'}).set_index('dep'), 
    on = 'dest'
)
.rename(columns = {'address': 'dest_address'})
)



In [264]:
df_dist_sel.head()

Unnamed: 0,dep,dest,dist,dep_address,dest_address
151,"Naples, Italy","Rome, Italy",188.931696,"Stazione di Napoli Centrale, 80142 Napoli NA, ...","Rome, Metropolitan City of Rome Capital, Italy"
121,"Rome, Italy","Naples, Italy",188.931696,"Rome, Metropolitan City of Rome Capital, Italy","Stazione di Napoli Centrale, 80142 Napoli NA, ..."
237,"Vienna, Austria","Budapest, Hungary",213.51063,"Vienna Central T, Am Hbf 1, 1100 Wien, Austria","Budapest, Hungary"
222,"Budapest, Hungary","Vienna, Austria",213.51063,"Budapest, Hungary","Vienna Central T, Am Hbf 1, 1100 Wien, Austria"
74,"Berlin, Germany","Hamburg, Germany",252.876427,"Berlin Central Station, Hauptbahnhof, Europapl...","Hamburg Central Station, Hachmannplatz 16, 200..."


## Travel to cities nearby a selected city

In [223]:
# Define list of departure times

# Create windows of x minutes within time window
departure_times = pd.date_range(
    start = datetime.strptime("2023-05-18 08:00:00",'%Y-%m-%d %H:%M:%S'),
    end = datetime.strptime("2023-05-18 20:00:00",'%Y-%m-%d %H:%M:%S'),
    freq = "30T"
)



In [307]:
# Estimate number of requests
df_dist_sel.shape[0] * len(departure_times)

800

In [277]:
lst = []

for index, row in df_dist_sel.iterrows():
    # print(i)
    dep_place = row['dep']
    dest_place = row['dest']

    for dep_time in departure_times:

    # dep_time = datetime.strptime("2023-05-18 08:00:00",'%Y-%m-%d %H:%M:%S')

        # Query directions
        connections = gmaps.directions(
            dep_place,
            dest_place,
            mode = "transit",
            transit_mode = "train",
            transit_routing_preference = "fewer_transfers",
            departure_time = dep_time

        )

        # Extract values
        try:
            transit_type = connections[0]['legs'][0]['steps'][1]['transit_details']['line']['vehicle']['type']
        except:
            transit_type = ""

        try:
            duration = connections[0]['legs'][0]['duration']['value']
        except:
            duration = ""


        # Append to collection
        lst.append({
            'dep': dep_place,
            'dest': dest_place,
            'dep_time': dep_time,
            'type': transit_type,
            'duration': duration
        })


In [301]:
# Number of requests!
len(lst)

800

In [292]:
# Collect results
# df = pd.DataFrame(lst).sort_values('duration')
# df['duration'] = df['duration'] / 60 / 60
# df.round(2).head(25)
df = pd.DataFrame(lst)

# Filter obs without duration
df = df.query('duration != ""')

# Duration in hours
df['duration'] = df['duration'] / 60 / 60

# Find fastest time per dep dest
df.groupby(['dep', 'dest']).agg({'duration':'min'}).sort_values('duration')
# df.sort_values('duration')

Unnamed: 0_level_0,Unnamed: 1_level_0,duration
dep,dest,Unnamed: 2_level_1
"Naples, Italy","Rome, Italy",1.198056
"Rome, Italy","Naples, Italy",1.280556
"Hamburg, Germany","Berlin, Germany",2.05
"Berlin, Germany","Hamburg, Germany",2.183333
"Vienna, Austria","Budapest, Hungary",2.333333
"Budapest, Hungary","Vienna, Austria",2.433333
"Barcelona, Spain","Madrid, Spain",2.483333
"Madrid, Spain","Barcelona, Spain",2.483333
"Paris, France","London, UK",2.766667
"Milan, Italy","Rome, Italy",2.983333


In [70]:
# Find minimum duration by dep / dist


 

adsf