In [1]:
import json
import pandas as pd
import numpy as np
import folium
import re

The raw data is a list of disconnected points (with certain GeoJSON keys, aka 'features', that relate them to a train line/track).  
We want to extract a list of lines from this data.

In [2]:
DATA_DIR = 'data/'
data = json.load(open(DATA_DIR + 'courbe-des-voies.geojson'))

First, we group the points by train line/track while retaining all the GeoJSON features:

In [3]:
lines_by_features = {}
for feature in data['features']:
    # Get a unique key that identifies each line and track.
    key = '{},{},{},{}'.format(feature['properties']['nom_voie'], feature['properties']['code_ligne'],
                               feature['properties']['libelle_voie'], feature['properties']['libelle_ligne'])
    if key in lines_by_features:
        lines_by_features[key].append(feature)
    else:
        lines_by_features[key] = [feature]

Then, for each line/track's points, we only keep the point's code ("pk_debut" -- primary key?) and coordinates. We will then sort the points by their code, which gives us the ordered points of each line/track.

In [4]:
lines_by_points = []
for key, line in lines_by_features.items():
    points = {}
    for feature in line:
        if feature['geometry']:
            coord = feature['geometry']['coordinates']
            start_code = feature['properties']['pk_debut']
            # For correct sorting, we want all numbers to have the same length (e.g. "97+123" should be
            # "097+123", so that "097+123" < "100+345").
            start_code_split = start_code.split('+') if '+' in start_code else start_code.split('-')
            start_code = '{:03}+{:03}'.format(int(start_code_split[0]), int(start_code_split[1]))
            points[start_code] = coord
    lines_by_points.append({'name': key, 'points': points})

In [5]:
# Sort by starting code.
for i in range(len(lines_by_points)):
    lines_by_points[i]['points'] = dict(sorted(lines_by_points[i]['points'].items()))

Now that we have the ordered points for each line/track, we simply build a geometrical line by connecting them. We also add some additional GeoJSON properties that will be shown as tooltips, so we can debug/visualize things better on the Folium map.

In [6]:
lines_geojson_features = []
for line_by_points in lines_by_points:
    line = []
    for start_code, coord in line_by_points['points'].items():
        line.append(coord)
    name = line_by_points['name'].split(',')
    lines_geojson_features.append({'type': 'Feature',
     'properties': {'track_label': name[0], 'line_code': name[1], 'track_name': name[2], 'line_name': name[3]},
     'geometry': {
         'type': 'LineString',
         'coordinates': line,
     }})

We plot the map:

In [7]:
for i in range (431,435):
    lines_geojson_features[i]['properties']['line_name'] = "Ligne de Metz-Sablon à Woippy"
for i in range (444,450):
    lines_geojson_features[i]['properties']['line_name'] = "Ligne de Villers-les-Pots à Petit-Croix"   
for i in range (27,29):
    lines_geojson_features[i]['properties']['line_name'] = "Ligne de Tours à Le Mans" 
for i in range (76,78):
    lines_geojson_features[i]['properties']['line_name'] = "Ligne de Saint-Cyr à Surdon"
lines_geojson_features[106]['properties']['line_name'] = "Ligne de Culoz à Modane"
lines_geojson_features[463]['properties']['line_name'] = "Ligne de Tours à Saint-Pierre-des-Corps"
lines_geojson_features[473]['properties']['line_name'] = 'Ligne de Bordeaux-Saint-Jean à Ravezies '
lines_geojson_features[474]['properties']['line_name'] = 'Ligne de Bordeaux-Saint-Jean à Ravezies '
lines_geojson_features[908]['properties']['line_name'] = 'Ligne de Sartrouville à Villeneuve-Saint-Georges'
lines_geojson_features[909]['properties']['line_name'] = 'Ligne de Sartrouville à Villeneuve-Saint-Georges'
lines_geojson_features[912]['properties']['line_name'] = 'Ligne de Gagny à Strasbourg-Ville.'
for i in range (913,919):
    lines_geojson_features[i]['properties']['line_name'] = 'Ligne de Coubert à gare de Massy TGV'
for i in range (992,996):
    lines_geojson_features[i]['properties']['line_name'] ="Ligne de L'Estaque à Marseille St-Charles"
lines_geojson_features[1036]['properties']['line_name'] = 'Ligne de Pont-de-Veyle à Savoie'
lines_geojson_features[1037]['properties']['line_name'] = 'Ligne de Pont-de-Veyle à Savoie'
lines_geojson_features[1049]['properties']['line_name'] = 'Ligne de Bobigny à Sucy-Bonneuil'
lines_geojson_features[1065]['properties']['line_name'] = 'Ligne de St-Amour à Gevrey-Chambertin'
lines_geojson_features[1091]['properties']['line_name'] = 'Ligne de Nanterre-La Folie à La Garenne-Bezons'
lines_geojson_features[1092]['properties']['line_name'] = 'Ligne de Nanterre-La Folie à La Garenne-Bezons'
lines_geojson_features[1230]['properties']['line_name'] = 'Ligne de Lyon-Guillotière à Lyon-Perrache'
lines_geojson_features[1231]['properties']['line_name'] = 'Ligne de Lyon-Guillotière à Lyon-Perrache'
lines_geojson_features[1286]['properties']['line_name'] = 'Ligne de Lyon-Guillotière à Lyon-Perrache'
lines_geojson_features[1287]['properties']['line_name'] = 'Ligne de Lyon-Guillotière à Lyon-Perrache'
lines_geojson_features[1358]['properties']['line_name'] = 'Ligne de Dijon à Gevrey-Chambertin'
lines_geojson_features[1359]['properties']['line_name'] = 'Ligne de Dijon à Gevrey-Chambertin'
for i in range (1408,1411):
    lines_geojson_features[i]['properties']['line_name'] = 'Ligne de Grenoble à  Montmélian'
lines_geojson_features[1503]['properties']['line_name'] = 'Ligne de Paris à Versailles'
lines_geojson_features[1504]['properties']['line_name'] = 'Ligne de Paris à Versailles'
lines_geojson_features[1554]['properties']['line_name'] = 'Ligne de Bobigny à Sucy-Bonneuil'
lines_geojson_features[1612]['properties']['line_name'] = 'Ligne de Mantes-la-Jolie à Cherbourg'
lines_geojson_features[1613]['properties']['line_name'] = 'Ligne de Mantes-la-Jolie à Cherbourg'
lines_geojson_features[1704]['properties']['line_name'] = 'Ligne de Mans à la Plumasserie'
lines_geojson_features[1705]['properties']['line_name'] = 'Ligne de Mans à la Duboisière'
lines_geojson_features[1706]['properties']['line_name'] = 'Ligne de Mans à la St-Georges'
lines_geojson_features[1948]['properties']['line_name'] = 'Mogne de La Clarté à Mans'
lines_geojson_features[1949]['properties']['line_name'] = 'Mogne de La Clarté à Mans'
lines_geojson_features[2116]['properties']['line_name'] = 'Ligne de Bobigny à Sucy-Bonneuil'
for i in range (2150,2155):
    lines_geojson_features[i]['properties']['line_name'] = 'Ligne de Bobigny à Sucy-Bonneuil'
lines_geojson_features[2204]['properties']['line_name'] = 'Ligne de Bobigny à Sucy-Bonneuil'
lines_geojson_features[2207]['properties']['line_name'] = 'Ligne de Coubert à gare de Massy TGV'
lines_geojson_features[2206]['properties']['line_name'] = 'Ligne de Coubert à gare de Massy TGV'
for i in range (2239,2242):
    lines_geojson_features[i]['properties']['line_name'] = 'Ligne de Lyon-St-Clair à Sathonay'
lines_geojson_features[2459]['properties']['line_name']  = 'Ligne de Lyon-St-Clair à Sathonay'
lines_geojson_features[2587]['properties']['line_name'] = "Ligne de Avignon-Centre à Avignon TGV"
lines_geojson_features[2674]['properties']['line_name'] = 'Ligne de Mans à Pontlieue'
lines_geojson_features[2673]['properties']['line_name'] = 'Ligne de Mans à Pontlieue'
lines_geojson_features[2784]['properties']['line_name'] = 'Ligne de Grenoble à Montmélian'

In [8]:
# Transformations to apply for station names.
NORMALIZATIONS = {'ç': 'c',
                  'é': 'e',
                  'è': 'e',
                  'ë': 'e',
                  'ô': 'o',
                  'â': 'a',
                  'î': 'i',
                  'ê': 'e',
                  '\\(': '',
                  '\\)': '',
                  'û': 'u',
                  '-': ' '}

def get_fixed_name(name):
    search = name.lower()
    for pat, repl in NORMALIZATIONS.items():
        search = search.replace(pat, repl)
    return search

In [10]:
def get_route(departure, destination):
    mylist =[]
    for i in range (len(lines_geojson_features)):
        pattern = r'(Ligne|Raccordement|Voies) d(\'|e|u) ?(.*) (à|au|vers|aux|à) (.*)'
        a = re.findall(pattern, lines_geojson_features[i]['properties']['line_name'])
        if (a != []):
            dep = get_fixed_name(a[0][2])
            dest = get_fixed_name(a[0][4])
            departure = get_fixed_name(departure)
            destination = get_fixed_name(destination)
        
            if ((departure == dep) and (destination == dest)) or ((departure == dest) and (destination == dep)):
                mylist.append(lines_geojson_features[i])
                #print("-----------------------------------")
                #print("i = " + str(i))
                #print(lines_geojson_features[i])
    return mylist

In [12]:
get_route('Lièpvre','Sélestat')

[]

In [11]:
lines_geojson = {'type': 'FeatureCollection', 'features': get_route('Évreux-Embranchement','Quetteville')}
map_ = folium.Map([48.8566, 2.3522], tiles='cartodbpositron', zoom_start=5)
folium.GeoJson(lines_geojson,
               tooltip=folium.GeoJsonTooltip(fields=['track_label', 'track_name', 'line_code', 'line_name'],
                                             aliases=['Track label', 'Track name', 'Line code', 'Line name'])
              ).add_to(map_)
map_