In [125]:
import pandas as pd
from collections import defaultdict
from stop import Stop

## Chargement des données

In [126]:
routes = pd.read_csv('../IDFM-gtfs/routes.txt')
trips = pd.read_csv('../IDFM-gtfs/trips.txt')
stop_times = pd.read_csv('../IDFM-gtfs/stop_times.txt')
stops = pd.read_csv('../IDFM-gtfs/stops.txt')

  trips = pd.read_csv('../IDFM-gtfs/trips.txt')


In [127]:
routes.head()

Unnamed: 0,route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_sort_order
0,IDFM:C01371,IDFM:Operator_100,1,1,,1,,FFBE00,000000,
1,IDFM:C01286,IDFM:Operator_100,322,322,,3,,82C8E6,000000,
2,IDFM:C01153,IDFM:Operator_100,124,124,,3,,FF82B4,000000,
3,IDFM:C00173,IDFM:1081,18,18,,3,,DB006B,FFFFFF,
4,IDFM:C00391,IDFM:1058,4334,4334,,3,,E2001A,FFFFFF,


## Filtrage des données

### Lignes et parcours

In [128]:
trips = trips.merge(routes[['route_id', 'route_short_name']], on='route_id', how='left')

In [129]:
# Sélection des lignes et parcours de metro uniquement
metro_routes = routes[routes['route_type'] == 1]['route_id']
metro_trips = trips[trips['route_id'].isin(metro_routes)]

In [131]:
# On sélectionne les 16 premiers parcours de chaque ligne afin d'inclure tout les chemins possibles
selected_trips = metro_trips.groupby(['route_short_name', 'direction_id']).head(16).reset_index()

selected_trips.head()

Unnamed: 0,index,route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,bikes_allowed,route_short_name
0,49891,IDFM:C01371,IDFM:1,IDFM:RATP:107769-C01371-COU_RATP_4864461_95549...,La Défense (Grande Arche),,1,,,2,0,1
1,49892,IDFM:C01371,IDFM:1,IDFM:RATP:107769-C01371-COU_RATP_4864461_955496_3,La Défense (Grande Arche),,1,,,2,0,1
2,49894,IDFM:C01371,IDFM:1,IDFM:RATP:107769-C01371-COU_RATP_4864461_95549...,La Défense (Grande Arche),,1,,,2,0,1
3,49900,IDFM:C01371,IDFM:1,IDFM:RATP:107769-C01371-COU_RATP_4864461_955496_5,La Défense (Grande Arche),,1,,,2,0,1
4,49901,IDFM:C01371,IDFM:1,IDFM:RATP:107769-C01371-COU_RATP_4864461_955496_7,La Défense (Grande Arche),,1,,,2,0,1


### Arrêts

In [132]:
# On filtre les stop_times pour ne garder que ceux correspondants aux parcours sélectionnés
stop_times = stop_times[stop_times['trip_id'].isin(selected_trips['trip_id'])]
stop_times = stop_times.sort_values(['trip_id', 'stop_sequence'])
stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,local_zone_id,stop_headsign,timepoint
1432168,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:30:00,07:30:00,IDFM:22175,0,0,1,,,1
1432169,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:31:00,07:31:00,IDFM:22173,1,0,0,,,1
1432170,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:32:00,07:32:00,IDFM:22160,2,0,0,,,1
1432171,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:34:00,07:34:00,IDFM:22158,3,0,0,,,1
1432172,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:35:00,07:35:00,IDFM:22155,4,0,0,,,1


In [133]:
def to_seconds(t):
    h, m, s = map(int, t.split(":"))
    return h * 3600 + m * 60 + s

# Expression des heures en secondes
stop_times['arr_sec'] = stop_times['arrival_time'].apply(to_seconds)
stop_times['dep_sec'] = stop_times['departure_time'].apply(to_seconds)

In [134]:
# Sélection des arrêts correspondants aux stop_times 
stops_metro = stops[stops['stop_id'].isin(stop_times['stop_id'])]
stops_metro.count()

stop_id                752
stop_code                0
stop_name              752
stop_desc                0
stop_lon               752
stop_lat               752
zone_id                752
stop_url                 0
location_type          752
parent_station         752
stop_timezone          752
level_id                 0
wheelchair_boarding    752
platform_code            0
dtype: int64

In [135]:
# On récupère les stops commerciaux 
stops_commercial = stops[stops['location_type'] == 1][['stop_id', 'stop_name']]
# On crée un dictionnaire {stop_id_commercial : nom_station}
parent_dict = stops_commercial.set_index('stop_id')['stop_name'].to_dict()

In [136]:
# Filtrage des arrêrts physiques
physical_stops = stops_metro[stops_metro["location_type"] == 0]

physical_stops.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lon,stop_lat,zone_id,stop_url,location_type,parent_station,stop_timezone,level_id,wheelchair_boarding,platform_code
60,IDFM:22009,,Campo-Formio,,2.358469,48.835595,1.0,,0,IDFM:71063,Europe/Paris,,2,
65,IDFM:22392,,Crimée,,2.376936,48.890886,1.0,,0,IDFM:72013,Europe/Paris,,2,
80,IDFM:21940,,Marcel Sembat,,2.243468,48.833808,2.0,,0,IDFM:71054,Europe/Paris,,2,
119,IDFM:22133,,Simplon,,2.347795,48.893815,1.0,,0,IDFM:71556,Europe/Paris,,2,
153,IDFM:22057,,Concorde,,2.323181,48.86615,1.0,,0,IDFM:71298,Europe/Paris,,2,


In [137]:
stop_dict = {} # {stop_id: Stop()}

for row in physical_stops.itertuples(index=False):
    name = parent_dict.get(row.parent_station, row.stop_name)  # nom de la station commerciale si dispo
    key = row.parent_station if pd.notna(row.parent_station) else row.stop_id # id de la station parent si elle existe
    if key not in stop_dict:
        stop_dict[row.stop_id] = Stop(key, name, row.stop_lat, row.stop_lon)

In [138]:
# On ajoute les infos des stops à stop_times
stop_times = stop_times.merge(physical_stops[['stop_id', 'stop_name', 'stop_lat', 'stop_lon']], on='stop_id', how='left')
stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,local_zone_id,stop_headsign,timepoint,arr_sec,dep_sec,stop_name,stop_lat,stop_lon
0,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:30:00,07:30:00,IDFM:22175,0,0,1,,,1,27000,27000,Corvisart,48.82986,2.350611
1,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:31:00,07:31:00,IDFM:22173,1,0,0,,,1,27060,27060,Glacière,48.83117,2.343497
2,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:32:00,07:32:00,IDFM:22160,2,0,0,,,1,27120,27120,Saint-Jacques,48.832916,2.337154
3,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:34:00,07:34:00,IDFM:22158,3,0,0,,,1,27240,27240,Denfert-Rochereau,48.834313,2.332872
4,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:35:00,07:35:00,IDFM:22155,4,0,0,,,1,27300,27300,Raspail,48.839085,2.33055


In [139]:
stop_times = stop_times.sort_values(['trip_id', 'stop_sequence'])
stop_times['next_stop_id'] = stop_times.groupby('trip_id')['stop_id'].shift(-1) # On ajoute une colonne avec le prochain arrêt
stop_times['next_arr_sec'] = stop_times.groupby('trip_id')['arr_sec'].shift(-1) # On ajoute l'heure d'arrivé au prochain arrêt
stop_times['duration'] = stop_times['next_arr_sec'] - stop_times['dep_sec'] # Durée entre 2 arrêts consécutifs
stop_times = stop_times.merge(trips[['trip_id', 'route_short_name']], on='trip_id', how='left')

In [140]:
stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,local_zone_id,stop_headsign,timepoint,arr_sec,dep_sec,stop_name,stop_lat,stop_lon,next_stop_id,next_arr_sec,duration,route_short_name
0,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:30:00,07:30:00,IDFM:22175,0,0,1,,,1,27000,27000,Corvisart,48.82986,2.350611,IDFM:22173,27060.0,60.0,6
1,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:31:00,07:31:00,IDFM:22173,1,0,0,,,1,27060,27060,Glacière,48.83117,2.343497,IDFM:22160,27120.0,60.0,6
2,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:32:00,07:32:00,IDFM:22160,2,0,0,,,1,27120,27120,Saint-Jacques,48.832916,2.337154,IDFM:22158,27240.0,120.0,6
3,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:34:00,07:34:00,IDFM:22158,3,0,0,,,1,27240,27240,Denfert-Rochereau,48.834313,2.332872,IDFM:22155,27300.0,60.0,6
4,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:35:00,07:35:00,IDFM:22155,4,0,0,,,1,27300,27300,Raspail,48.839085,2.33055,IDFM:22172,27360.0,60.0,6


## Construction du graphe

In [141]:
edges = stop_times.dropna(subset=['next_stop_id']).copy()

In [142]:
# u et v sont les objets Stop correspondant aux identifiants
edges['u'] = edges['stop_id'].map(stop_dict)
edges['v'] = edges['next_stop_id'].map(stop_dict)
edges.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,local_zone_id,stop_headsign,timepoint,...,dep_sec,stop_name,stop_lat,stop_lon,next_stop_id,next_arr_sec,duration,route_short_name,u,v
0,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:30:00,07:30:00,IDFM:22175,0,0,1,,,1,...,27000,Corvisart,48.82986,2.350611,IDFM:22173,27060.0,60.0,6,Corvisart (IDFM:71013),Glacière (IDFM:71026)
1,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:31:00,07:31:00,IDFM:22173,1,0,0,,,1,...,27060,Glacière,48.83117,2.343497,IDFM:22160,27120.0,60.0,6,Glacière (IDFM:71026),Saint-Jacques (IDFM:71041)
2,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:32:00,07:32:00,IDFM:22160,2,0,0,,,1,...,27120,Saint-Jacques,48.832916,2.337154,IDFM:22158,27240.0,120.0,6,Saint-Jacques (IDFM:71041),Denfert-Rochereau (IDFM:71056)
3,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:34:00,07:34:00,IDFM:22158,3,0,0,,,1,...,27240,Denfert-Rochereau,48.834313,2.332872,IDFM:22155,27300.0,60.0,6,Denfert-Rochereau (IDFM:71056),Raspail (IDFM:71088)
4,IDFM:RATP:107494-C01376-COU_RATP_5074966_11992...,07:35:00,07:35:00,IDFM:22155,4,0,0,,,1,...,27300,Raspail,48.839085,2.33055,IDFM:22172,27360.0,60.0,6,Raspail (IDFM:71088),Edgar Quinet (IDFM:71108)


In [143]:
# {Stop: {Stop : durée}}
graph = defaultdict(dict)
for row in edges.itertuples(index=False):
    u, v, w, route = row.u, row.v, row.duration, row.route_short_name

    if pd.notnull(u) and pd.notnull(v):
        if v not in graph[u] or graph[u][v]["duration"] > w:
            # Le graphe est non-orienté
            graph[u][v] = {"duration": w, "routes": set([route])}
        else:
            graph[u][v]["routes"].add(route)
        
        if u not in graph[v] or graph[v][u]["duration"] > w:
            graph[v][u] = {"duration": w, "routes": set([route])}
        else:
            graph[v][u]["routes"].add(route)

        

In [153]:
graph

defaultdict(dict,
            {Corvisart (IDFM:71013): {Glacière (IDFM:71026): {'duration': 60.0,
               'routes': {'6'}},
              Place d'Italie (IDFM:71033): {'duration': 60.0,
               'routes': {'6'}}},
             Glacière (IDFM:71026): {Corvisart (IDFM:71013): {'duration': 60.0,
               'routes': {'6'}},
              Saint-Jacques (IDFM:71041): {'duration': 60.0, 'routes': {'6'}}},
             Saint-Jacques (IDFM:71041): {Glacière (IDFM:71026): {'duration': 60.0,
               'routes': {'6'}},
              Denfert-Rochereau (IDFM:71056): {'duration': 60.0,
               'routes': {'6'}}},
             Denfert-Rochereau (IDFM:71056): {Saint-Jacques (IDFM:71041): {'duration': 60.0,
               'routes': {'6'}},
              Raspail (IDFM:71088): {'duration': 60.0, 'routes': {'4', '6'}},
              Mouton-Duvernet (IDFM:73653): {'duration': 60.0,
               'routes': {'4'}}},
             Raspail (IDFM:71088): {Denfert-Rochereau (IDFM:710

### Tests du graphe

In [144]:
def is_connected(graph):
    if not graph:
        return True  # An empty graph is trivially connected

    visited = set()
    start = next(iter(graph))  # Start from any node
    
    def dfs(node):
        visited.add(node)
        for neighbor in graph[node]:
            if neighbor not in visited:
                dfs(neighbor)

    dfs(start)
    return (len(visited) == len(graph), visited)


In [145]:
def test_connexite(graph):
    est_connnexe, visited = is_connected(graph)
    if not est_connnexe:
        for g in graph:
            if g not in visited:
                print(g)
    else:
        print('Le graphe est connexe.')

test_connexite(graph)

Le graphe est connexe.


In [146]:
len(graph)

308

### Test classe MetroGraph

In [147]:
from MetroGraph import MetroGraph

In [148]:
g = MetroGraph(stop_dict, edges)

In [149]:
odeon_stops = g.get_stops_by_name("Odéon")
for stop in odeon_stops:
    print(stop, "→", g.voisins(stop))

Odéon (IDFM:73618) → {Mabillon (IDFM:73639): 60.0, Cluny - La Sorbonne (IDFM:73619): 60.0, Saint-Michel Notre-Dame (IDFM:73620): 60.0, Saint-Germain-des-Prés (IDFM:71216): 60.0}


In [150]:
est_connexe, visited = g.is_connected()
print("Graphe connexe :", est_connexe)

Graphe connexe : True


## Sauvegarde du graphe

In [151]:
import pickle

with open("metro_graph.pkl", "wb") as f:
    pickle.dump(g, f)