In [85]:
import pandas as pd 
import networkx as nx
import numpy as np
from functools import reduce
import datetime
import tqdm
import ast
import json
import sys
import time
import pickle
import re

In [2]:
timetable_intercity = pd.read_csv("./data/export-intercites-gtfs-last/timetable_intercity.csv")
timetable_ter = pd.read_csv("./data/ter/timetable_ter.csv")

In [3]:
timetable = pd.concat([timetable_intercity, timetable_ter], sort=True)

In [4]:
timetable["after"] = timetable["arrival_time"].apply(
    lambda x: datetime.datetime.strptime(ast.literal_eval(x)[0], "%H:%M:%S"))
timetable["before"] = timetable["arrival_time"].apply(
    lambda x: datetime.datetime.strptime(ast.literal_eval(x)[-1], "%H:%M:%S"))

In [35]:
def build_graph_within_timeframe(start_time, timeframe, timetable) : 
    stop_2_id = {}
    start_time = datetime.datetime.strptime(start_time, "%H:%M:%S")

    timetable = timetable[timetable["before"] >= start_time]
    timetable = timetable[timetable["after" ] <= start_time + datetime.timedelta(0,timeframe*60)]
    G = nx.Graph()

    frame_indices = []
    start = time.time()
    for i in timetable.index : 
        try :           
            arrival_times = ast.literal_eval(timetable["arrival_time"][i])
            times = [datetime.datetime.strptime(arrival_times[j], "%H:%M:%S") for j in range(len(arrival_times))]
            filtered_times = [time_ for time_ in times if (time_ - start_time).seconds <= (timeframe * 60)]

            indices = np.array([times.index(time) for time in filtered_times])

            if len(indices) == 0: continue

            frame_indices.append(i)

            departure_times = list(np.array(ast.literal_eval(timetable["departure_time"][i]))[indices])
            arrival_times = list(np.array(ast.literal_eval(timetable["arrival_time"][i]))[indices])

            stop_names = list(np.array(ast.literal_eval(timetable["stop_name"][i]))[indices])
        except ValueError : 
            #print(timetable["stop_name"][i])
            continue

        longitudes = list(np.array(ast.literal_eval(timetable["stop_lon"][i]))[indices])
        latitudes = list(np.array(ast.literal_eval(timetable["stop_lat"][i]))[indices])
        durations = [0.] + ast.literal_eval(timetable["durations"][i])

        durations = list(np.array(durations)[indices])
        for k in range(len(stop_names)-1) :  
            curr_stop = stop_names[k]
            next_stop = stop_names[k+1]

            if curr_stop not in stop_2_id.keys() : 
                stop_2_id[curr_stop] = len(stop_2_id.keys())
            if next_stop not in stop_2_id.keys(): 
                stop_2_id[next_stop] = len(stop_2_id.keys())

            G.add_edge(stop_2_id[curr_stop], stop_2_id[next_stop], weight=durations[k+1],
                       departure_time=departure_times[k], arrival_time=arrival_times[k+1], 
                       lon_lat=(longitudes[k+1], latitudes[k+1]))
    paths = nx.shortest_path(G)
    print("Execution time {:.2f} seconds".format(time.time() - start))
    return G, paths, stop_2_id

In [36]:
graph, paths, stop_2_id = build_graph_within_timeframe("07:30:00", 8 * 60, timetable)

Execution time 27.57 seconds


In [37]:
graph.edges[(0, 1)]

{'weight': 165.0,
 'departure_time': '08:21:00',
 'arrival_time': '11:06:00',
 'lon_lat': (6.17427169, 48.68978225)}

In [38]:
paths[1][882]

[1, 447, 902, 877, 882]

In [39]:
graph.edges[(1, 0)]

{'weight': 165.0,
 'departure_time': '08:21:00',
 'arrival_time': '11:06:00',
 'lon_lat': (6.17427169, 48.68978225)}

In [40]:
# graph.edges[(0, 882)]

In [41]:
stops_name_by_id = {v: k for k, v in stop_2_id.items()}

In [42]:
print(stops_name_by_id[1])
print(stops_name_by_id[0])
print(stops_name_by_id[882])

Nancy-Ville
Paris-Est
Les Laumes-Alésia


In [43]:
stops_name_by_id_df = pd.DataFrame.from_dict(stops_name_by_id, orient='index', columns=['name'])

In [44]:
with open('./data/_routing_stops_name_by_id.json', 'w') as outfile:
    json.dump(stops_name_by_id, outfile, sort_keys=True)

In [45]:
with open('./data/_historic_cities.json') as infile:
    HISTORIC_CITIES = json.load(infile)
with open('./data/_art_history_cities.json') as infile:
    ART_HISTORY_CITIES = json.load(infile)

l = list(HISTORIC_CITIES.keys())
l.extend([item for item in l for l in ART_HISTORY_CITIES.values()])
l = set(l)
# l - set(stops_name_by_id_df[stops_name_by_id_df.name.isin(l)].name.values)
len(set(stops_name_by_id_df[stops_name_by_id_df.name.isin(l)].name.values)), len(l)

(140, 174)

In [26]:
stops_name_by_id_df[stops_name_by_id_df.name.str.contains('Villeneuve')]

Unnamed: 0,name
469,Villeneuve-d'Aveyron
1252,Villeneuve-Loubet-Plage
1339,Villeneuve-sur-Yonne
1343,Villeneuve-la-Guyard
2068,Villeneuve-la-Comtesse
2231,Villeneuve-sur-Allier


In [86]:
with open('./data/_routing_paths.pkl', 'wb') as f:
    pickle.dump(paths, f, pickle.HIGHEST_PROTOCOL)

In [87]:
with open('./data/_routing_graph.pkl', 'wb') as f:
    pickle.dump(graph, f, pickle.HIGHEST_PROTOCOL)

In [82]:
#for stop_id, stop_name in stops_name_by_id.items():
#    print(stop_id, stop_name)
#paths[1][2]
for dest_id, dest_val in stops_name_by_id_df[stops_name_by_id_df.name.isin(l)].iterrows():
    dest_name = dest_val.values[0]
    for source_id in range(len(stop_2_id)):
        if source_id == dest_id: continue
        source_name = stops_name_by_id[source_id]
        if dest_id not in paths[source_id]:
            pass # print('No path from {} to {}'.format(source_name, dest_name))
        else:
            print(paths[source_id][dest_id])
    break

[0, 1]
[2, 1]
[3, 2, 1]
[4, 3, 2, 1]
[5, 15, 16, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[6, 5, 15, 16, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[7, 8, 1737, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[8, 1737, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[9, 8, 1737, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[10, 9, 8, 1737, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[11, 10, 9, 8, 1737, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[12, 9, 8, 1737, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[15, 16, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[16, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[17, 16, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[18, 17, 16, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[19, 15, 16, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[20, 16, 23, 1730, 1023, 1591, 1594, 907, 906, 905, 904, 0, 1]
[21, 22, 16, 23, 1730, 1023, 15

[1327, 89, 109, 110, 877, 902, 447, 1]
[1328, 1327, 89, 109, 110, 877, 902, 447, 1]
[1329, 89, 109, 110, 877, 902, 447, 1]
[1330, 1329, 89, 109, 110, 877, 902, 447, 1]
[1331, 1325, 1326, 110, 877, 902, 447, 1]
[1332, 89, 109, 110, 877, 902, 447, 1]
[1333, 119, 89, 109, 110, 877, 902, 447, 1]
[1334, 118, 119, 89, 109, 110, 877, 902, 447, 1]
[1335, 1334, 118, 119, 89, 109, 110, 877, 902, 447, 1]
[1336, 1334, 118, 119, 89, 109, 110, 877, 902, 447, 1]
[1337, 117, 118, 119, 89, 109, 110, 877, 902, 447, 1]
[1338, 1128, 356, 877, 902, 447, 1]
[1339, 1129, 356, 877, 902, 447, 1]
[1340, 1129, 356, 877, 902, 447, 1]
[1341, 1129, 356, 877, 902, 447, 1]
[1342, 1341, 1129, 356, 877, 902, 447, 1]
[1343, 1344, 1129, 356, 877, 902, 447, 1]
[1344, 1129, 356, 877, 902, 447, 1]
[1345, 111, 110, 877, 902, 447, 1]
[1346, 1126, 111, 110, 877, 902, 447, 1]
[1347, 112, 111, 110, 877, 902, 447, 1]
[1348, 877, 902, 447, 1]
[1349, 1348, 877, 902, 447, 1]
[1350, 1349, 1348, 877, 902, 447, 1]
[1351, 1350, 1349, 13