## Imports Python

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta




## Classe AnalyzerGTFS

Cette classe a pour but de fouiller des données GTFS et d'en retourner les destinations possible en partant d'une ville, avec les différents trajets réalisables

In [32]:
# Exemple de ville : StopArea:OCE87586545

class AnalyzerGTFS:
    stop_time_trip = [] # Contient StopPoints, Trajets et horaires des trajets
    trip = [] # Contient les trip_id, route_id et les services_id
    tripDates = []  # Contient les trajets des services fonctionnant sur les dates données
    stops_trip_id = [] # Contient tous les stops des trajets corrects
    date = [] # Contient tous les services et toutes les dates comprises entre date_min et date_max, (Utile pour afficher à l'utilisateur toutes les dates possibles pour le trajet)
    destinations = [] # Contient les stops après la ville (StopSequence > StopSequence de la ville)
    stops = []
    calendar_dates = []
    routes = []
    stop_times = []
    trips = []

    def __init__(self, lat, lon, date_min,date_max, path):
        self.calendar_dates = pd.read_csv('Data/'+ path +'/calendar_dates.txt')
        self.routes = pd.read_csv('Data/'+ path +'/routes.txt')
        self.stop_times = pd.read_csv('Data/'+ path +'/stop_times.txt')
        self.stops = pd.read_csv('Data/'+ path +'/stops.txt')
        self.trips = pd.read_csv('Data/'+ path +'/trips.txt')
        self.id_ville = self.villes_proches(lat,lon)
        self.calendar_dates.date = pd.to_datetime(self.calendar_dates["date"], format='%Y%m%d').dt.date
        self.date_min = datetime.strptime(date_min, '%Y%m%d').date()
        self.date_max = datetime.strptime(date_max, '%Y%m%d').date()
        self.stops['parent_station'] = self.stops['parent_station'].fillna('')
    
    # Etape 1 : Récupérer les stops de la ville / StopArea
    def get_stops(self):
        villeSet = set(self.id_ville['stop_id'].array)
        return self.stops[[set([l]).issubset(villeSet) for l in self.stops.parent_station.values.tolist()]]['stop_id']
    
    # Etape 2 : Récupérer tous les trajets des différents StopPoints, peut importe la date
    def get_trips(self):
        stops_ids = self.get_stops()
        self.stop_time_trip = pd.merge(self.stop_times, stops_ids, on='stop_id')
        self.stop_times_duplicates = self.stop_time_trip
        self.stop_time_trip = self.stop_times_duplicates.drop_duplicates(subset='trip_id') #Doublons possibles si deux StopPoints différents sont sur un même trajet
        return self.stop_time_trip['trip_id']

    # Etape 3 : Récupérer les services_id des Trajets
    def get_service_id(self):
        trip_ids = self.get_trips()
        self.trip = pd.merge(trip_ids, self.trips, on='trip_id')
        return self.trip['service_id']
    
    # Etape 4 : Récupérer les services fonctionnant sur les dates données
    def get_dates(self):
        service_id = self.get_service_id()
        dates = pd.merge(service_id, self.calendar_dates, on='service_id')
        return dates[(dates['date'] >= self.date_min) & (dates['date'] <= self.date_max)]
    
    # Etape 5 : Récupérer les trajets des services fonctionnant sur les dates données
    def get_trajets(self):
        services_uniques = self.get_dates().drop_duplicates(subset='service_id')
        return pd.merge(services_uniques, self.trip, on='service_id')
    
    # Etape 6 : Récupérer les stops des trajets corrects
    def get_stops_trajets(self):
        trajets = self.get_trajets()
        return pd.merge(trajets, self.stop_times, on='trip_id')
    
    # Etape 7 : Récupérer les ids des destinations, c'est à dire les stops après la ville (StopSequence > StopSequence de la ville)
    def get_stops_destinations(self):
        stops_trip_id = self.get_stops_trajets().assign(TempsVille = "")
        Time = self.stop_time_trip['departure_time'] #StopTime trip
        Time.index = self.stop_time_trip['trip_id']


        value=Time.loc[stops_trip_id['trip_id']]

        stops_trip_id['TempsVille'] = value.array

        return stops_trip_id[stops_trip_id['departure_time'] > stops_trip_id['TempsVille']]
    
    # Etape 8 : Récupérer les destinations
    def get_destinations(self):
        destinations_uniques = self.get_stops_destinations()
        destinations_StopPoint = pd.merge(destinations_uniques, self.stops, on='stop_id')
        destinationSet = set(destinations_StopPoint['parent_station'].array)
        destinations_StopArea = self.stops[[set([l]).issubset(destinationSet) for l in self.stops.stop_id.values.tolist()]]
        destinations_StopArea.drop_duplicates(subset='stop_id')
        return destinations_StopArea
    
    def villes_proches(self,lat,long):
        return self.stops[(self.stops['stop_lat'] > lat-0.03) & (self.stops['stop_lat'] < lat+0.03) & (self.stops['stop_lon'] > long-0.03) & (self.stops['stop_lon'] < long+0.03) & self.stops['stop_id'].str.contains('StopArea')]
    
        

In [33]:
villeEx = 'StopArea:OCE87547000'

Analyzer = AnalyzerGTFS(47.9,1.9,'20240601','20240731','INTERCITE')
Analyzer.get_destinations()

#Analyzer.villes_proches(47.9,1.9)

Unnamed: 0,stop_id,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station
8,StopArea:OCE87481002,Nantes,,47.217491,-1.542077,,,1,
13,StopArea:OCE87484006,Angers Saint-Laud,,47.464649,-0.55677,,,1,
24,StopArea:OCE87487603,Saumur,,47.268994,-0.071403,,,1,
35,StopArea:OCE87545244,Juvisy,,48.68917,2.38267,,,1,
37,StopArea:OCE87547000,Paris Austerlitz,,48.842285,2.364891,,,1,
41,StopArea:OCE87571240,Saint-Pierre-des-Corps,,47.38614,0.723539,,,1,
44,StopArea:OCE87574004,Blois - Chambord,,47.585449,1.323759,,,1,
46,StopArea:OCE87576009,Vierzon,,47.226324,2.059092,,,1,
56,StopArea:OCE87592006,Limoges Bénédictins,,45.835871,1.268513,,,1,
58,StopArea:OCE87592378,La Souterraine,,46.239782,1.492207,,,1,


In [16]:
Test = Analyzer.get_stops_trajets()
Test

Unnamed: 0,service_id,date,exception_type,trip_id,route_id,trip_headsign,direction_id,block_id,shape_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
0,26,2024-06-26,1,OCESN3605F3066149:2024-06-26T00:35:36Z,FR:Line::276892b7-b6aa-4a9b-a989-62f4abea526b:,3605,0.0,34,,06:28:00,06:28:00,StopPoint:OCEINTERCITES-87547000,0,,0,1,
1,26,2024-06-26,1,OCESN3605F3066149:2024-06-26T00:35:36Z,FR:Line::276892b7-b6aa-4a9b-a989-62f4abea526b:,3605,0.0,34,,07:30:00,07:32:00,StopPoint:OCEINTERCITES-87543017,1,,0,0,
2,26,2024-06-26,1,OCESN3605F3066149:2024-06-26T00:35:36Z,FR:Line::276892b7-b6aa-4a9b-a989-62f4abea526b:,3605,0.0,34,,08:07:00,08:09:00,StopPoint:OCEINTERCITES-87576009,2,,0,0,
3,26,2024-06-26,1,OCESN3605F3066149:2024-06-26T00:35:36Z,FR:Line::276892b7-b6aa-4a9b-a989-62f4abea526b:,3605,0.0,34,,08:41:00,08:43:00,StopPoint:OCEINTERCITES-87597005,3,,0,0,
4,26,2024-06-26,1,OCESN3605F3066149:2024-06-26T00:35:36Z,FR:Line::276892b7-b6aa-4a9b-a989-62f4abea526b:,3605,0.0,34,,09:19:00,09:21:00,StopPoint:OCEINTERCITES-87592378,4,,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1748,215,2024-07-01,1,OCESN4085F3066626:2024-05-25T00:28:24Z,FR:Line::2381a54a-1f59-4452-88c7-303179dd472b:,4085,0.0,564,,19:52:00,19:55:00,StopPoint:OCEOUIGO-87574004,3,,0,0,
1749,215,2024-07-01,1,OCESN4085F3066626:2024-05-25T00:28:24Z,FR:Line::2381a54a-1f59-4452-88c7-303179dd472b:,4085,0.0,564,,20:24:00,20:36:00,StopPoint:OCEOUIGO-87571240,4,,0,0,
1750,215,2024-07-01,1,OCESN4085F3066626:2024-05-25T00:28:24Z,FR:Line::2381a54a-1f59-4452-88c7-303179dd472b:,4085,0.0,564,,21:07:00,21:10:00,StopPoint:OCEOUIGO-87487603,5,,0,0,
1751,215,2024-07-01,1,OCESN4085F3066626:2024-05-25T00:28:24Z,FR:Line::2381a54a-1f59-4452-88c7-303179dd472b:,4085,0.0,564,,21:34:00,21:37:00,StopPoint:OCEOUIGO-87484006,6,,0,0,


In [17]:
Analyzer.stop_time_trip

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
0,OCESN3605F3066149:2024-06-26T00:35:36Z,07:30:00,07:32:00,StopPoint:OCEINTERCITES-87543017,1,,0,0,
1,OCESN3605F3165346:2024-06-26T00:35:36Z,07:28:00,07:30:00,StopPoint:OCEINTERCITES-87543017,1,,0,0,
2,OCESN3619F3064142:2024-06-26T00:35:36Z,09:22:00,09:24:00,StopPoint:OCEINTERCITES-87543017,1,,0,1,
3,OCESN3619F3064143:2024-05-15T00:29:57Z,09:22:00,09:24:00,StopPoint:OCEINTERCITES-87543017,1,,0,1,
4,OCESN3619F3066039:2024-04-16T00:31:58Z,09:27:00,09:29:00,StopPoint:OCEINTERCITES-87543017,1,,0,1,
...,...,...,...,...,...,...,...,...,...
264,OCESN4080F3330216:2024-06-19T00:38:12Z,09:31:00,09:34:00,StopPoint:OCEOUIGO-87543017,5,,0,0,
265,OCESN4082F3247560:2024-06-03T00:27:37Z,15:47:00,15:50:00,StopPoint:OCEOUIGO-87543017,5,,0,0,
266,OCESN4085F3021194:2024-06-26T00:35:36Z,19:25:00,19:28:00,StopPoint:OCEOUIGO-87543017,2,,0,0,
267,OCESN4085F3066626:2024-05-25T00:28:24Z,19:19:00,19:22:00,StopPoint:OCEOUIGO-87543017,2,,0,0,


In [63]:
'''
import folium

#city = Analyzer.stops.loc[Analyzer.stops['stop_id'] == villeEx]

map = folium.Map(location=[47.9,1.9], zoom_start=7)
#folium.CircleMarker([float(city.loc[:,['stop_lat']].values[0][0]), float(city.loc[:,['stop_lon']].values[0][0])], radius = 2, popup=city['stop_name'], color = "red").add_to(map)

for row in Destinations.itertuples():
    folium.CircleMarker([float(row.stop_lat), float(row.stop_lon)], radius = 2, popup=row.stop_name).add_to(map)
map
'''

In [64]:
Time = Analyzer.stop_time_trip['departure_time']
Time.index = Analyzer.stop_time_trip['trip_id']

Analyzer.stops_trip_id = Analyzer.stops_trip_id.assign(TempsVille = "")
value=Time.loc[Analyzer.stops_trip_id['trip_id']]

Analyzer.stops_trip_id['TempsVille'] = value.array

Superieur = Analyzer.stops_trip_id[Analyzer.stops_trip_id['departure_time'] > Analyzer.stops_trip_id['TempsVille']]
Superieur

Unnamed: 0,service_id,date,exception_type,trip_id,route_id,trip_headsign,direction_id,block_id,shape_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,TempsVille
2,227,2024-07-01,1,OCESN4657F3063601:2024-06-26T00:35:36Z,FR:Line::3418b67d-a81f-4dde-b3b0-60cc96ed86f4:,4657,1.0,592,,10:08:00,10:10:00,StopPoint:OCEINTERCITES-87611244,2,,0,0,,09:06:00
3,227,2024-07-01,1,OCESN4657F3063601:2024-06-26T00:35:36Z,FR:Line::3418b67d-a81f-4dde-b3b0-60cc96ed86f4:,4657,1.0,592,,10:38:00,10:43:00,StopPoint:OCEINTERCITES-87611004,3,,0,0,,09:06:00
4,227,2024-07-01,1,OCESN4657F3063601:2024-06-26T00:35:36Z,FR:Line::3418b67d-a81f-4dde-b3b0-60cc96ed86f4:,4657,1.0,592,,11:28:00,11:31:00,StopPoint:OCEINTERCITES-87615286,4,,0,0,,09:06:00
5,227,2024-07-01,1,OCESN4657F3063601:2024-06-26T00:35:36Z,FR:Line::3418b67d-a81f-4dde-b3b0-60cc96ed86f4:,4657,1.0,592,,12:00:00,12:04:00,StopPoint:OCEINTERCITES-87781104,5,,0,0,,09:06:00
6,227,2024-07-01,1,OCESN4657F3063601:2024-06-26T00:35:36Z,FR:Line::3418b67d-a81f-4dde-b3b0-60cc96ed86f4:,4657,1.0,592,,12:37:00,12:39:00,StopPoint:OCEINTERCITES-87773200,6,,0,0,,09:06:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,263,2024-07-12,1,OCESN4758F3124374:2024-06-26T00:35:36Z,FR:Line::3418b67d-a81f-4dde-b3b0-60cc96ed86f4:,4758,0.0,656,,15:35:00,15:35:00,StopPoint:OCEINTERCITES-87581009,9,,1,0,,14:58:00
197,264,2024-07-07,1,OCESN4758F3124375:2024-06-26T00:35:36Z,FR:Line::3418b67d-a81f-4dde-b3b0-60cc96ed86f4:,4758,0.0,657,,15:35:00,15:35:00,StopPoint:OCEINTERCITES-87581009,9,,1,0,,14:58:00
207,94,2024-07-09,1,OCESN4758F3218347:2024-06-26T00:35:36Z,FR:Line::3418b67d-a81f-4dde-b3b0-60cc96ed86f4:,4758,0.0,662,,15:35:00,15:35:00,StopPoint:OCEINTERCITES-87581009,9,,1,0,,14:58:00
217,275,2024-07-06,1,OCESN4764F3064109:2024-06-26T00:35:36Z,FR:Line::3418b67d-a81f-4dde-b3b0-60cc96ed86f4:,4764,0.0,682,,21:36:00,21:36:00,StopPoint:OCEINTERCITES-87581009,9,,1,0,,20:59:00


In [44]:
'''
class AnalyzerGTFS2:
    stop_ids = [] #Identifiants des StopsPoints de la ville
    trip_ids = [] #Identifiants des Trajets liés à tous les StopPoints de la ville
    service_id = [] #Identifiants des services faisant des trajets à des dates comprises entre date_min et date_max
    stop_time_trip = [] # Contient StopPoints, Trajets et horaires des trajets
    trip = [] # Contient les trip_id, route_id et les services_id
    tripDates = []  # Contient les trajets des services fonctionnant sur les dates données
    stops_trip_id = [] # Contient tous les stops des trajets corrects
    date = [] # Contient tous les services et toutes les dates comprises entre date_min et date_max, (Utile pour afficher à l'utilisateur toutes les dates possibles pour le trajet)
    destinations = [] # Contient les stops après la ville (StopSequence > StopSequence de la ville)
    agency = pd.read_csv('Data/TER/agency.txt')
    feed_info = pd.read_csv('Data/TER/feed_info.txt')
    transfers = pd.read_csv('Data/TER/transfers.txt')
    calendar_dates = pd.read_csv('Data/TER/calendar_dates.txt')
    routes = pd.read_csv('Data/TER/routes.txt')
    stop_times = pd.read_csv('Data/TER/stop_times.txt')
    stops = pd.read_csv('Data/TER/stops.txt')
    trips = pd.read_csv('Data/TER/trips.txt')
    def __init__(self, id_ville, date_min,date_max):
        self.id_ville = id_ville
        self.date_min = datetime.strptime(date_min, '%Y%m%d').date()
        self.date_max = datetime.strptime(date_max, '%Y%m%d').date()
        self.stops.fillna('', inplace=True)
        self.calendar_dates.date = pd.to_datetime(self.calendar_dates["date"], format='%Y%m%d').dt.date
    
    # Etape 1 : Récupérer les stops de la ville / StopArea
    def get_stops(self):
        self.stop_ids = self.stops[self.stops['parent_station'].str.contains(self.id_ville)]['stop_id']
        return self.stop_ids
    
    # Etape 2 : Récupérer les trajets des StopPoints, peut importe la date
    def get_trips(self):
        self.stop_time_trip = pd.merge(self.stop_times, self.stop_ids, on='stop_id')
        self.trip_ids = self.stop_time_trip['trip_id']
        return self.trip_ids   

    # Etape 3 : Récupérer les services_id des Trajets
    def get_service_id(self):
        self.trip = pd.merge(self.trip_ids, self.trips, on='trip_id')
        self.service_id = self.trip['service_id']
        return self.service_id 
    
    # Etape 4 : Récupérer les services fonctionnant sur les dates données
    def get_dates(self):
        service_id = self.get_service_id()
        dates = pd.merge(service_id, self.calendar_dates, on='service_id')
        self.date = dates[(dates['date'] >= self.date_min) & (dates['date'] <= self.date_max)]
        return self.date
    
    # Etape 5 : Récupérer les trajets des services fonctionnant sur les dates données
    def get_trajets(self):
        services_uniques = self.date.drop_duplicates(subset='service_id')
        self.trajets = pd.merge(services_uniques, self.trip, on='service_id')
        return self.trajets
    
    # Etape 6 : Récupérer les stops des trajets corrects
    def get_stops_trajets(self):
        self.stops_trip_id = pd.merge(self.trajets, self.stop_times, on='trip_id')
        return self.stops_trip_id
    
    # Etape 7 : Récupérer les ids des destinations, c'est à dire les stops après la ville (StopSequence > StopSequence de la ville)
    def get_stops_destinations(self):
        Time = self.stop_time_trip['departure_time'] #StopTime trip
        Time.index = self.stop_time_trip['trip_id']

        self.stops_trip_id = self.stops_trip_id.assign(TempsVille = "")
        value=Time.loc[self.stops_trip_id['trip_id']]

        self.stops_trip_id['TempsVille'] = value.array

        self.destinations = self.stops_trip_id[self.stops_trip_id['departure_time'] > self.stops_trip_id['TempsVille']]
        return self.destinations
    
    # Etape 8 : Récupérer les destinations
    def get_destinations(self):
        #destinations_uniques = self.destinations.drop_duplicates(subset='stop_id')
        destinations_StopPoint = pd.merge(self.destinations, self.stops, on='stop_id')
        destinationSet = set(destinations_StopPoint['parent_station'].array)
        destinations_StopArea = self.stops[[set([l]).issubset(destinationSet) for l in self.stops.stop_id.values.tolist()]]
        destinations_StopArea.drop_duplicates(subset='stop_id')
        return destinations_StopArea
    
    def villes_proches(self,lat,long):
        return self.stops[(self.stops['stop_lat'] > lat-0.1) & (self.stops['stop_lat'] < lat+0.1) & (self.stops['stop_lon'] > long-0.1) & (self.stops['stop_lon'] < long+0.1) & self.stops['stop_id'].str.contains('StopArea')]
'''

In [45]:
'''
villeEx = 'StopArea:OCE87547000'

Analyzer2 = AnalyzerGTFS2(villeEx,'20240601','20240731')
Analyzer2.get_stops()
Analyzer2.get_trips()
Analyzer2.get_service_id()
Analyzer2.get_dates()
Analyzer2.get_trajets()
Analyzer2.get_stops_trajets()
Analyzer2.get_stops_destinations()
Analyzer2.get_destinations()
'''

  self.stops.fillna('', inplace=True)


Unnamed: 0,stop_id,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station
4221,StopArea:OCE87543009,Orléans,,47.907891,1.904242,,,1,
4223,StopArea:OCE87543017,Les Aubrais,,47.926801,1.907129,,,1,
4226,StopArea:OCE87543033,Cercottes,,47.986155,1.884981,,,1,
4229,StopArea:OCE87543041,Chevilly,,48.026672,1.879113,,,1,
4232,StopArea:OCE87543058,Artenay,,48.081057,1.883226,,,1,
4235,StopArea:OCE87543066,Château-Gaillard,,48.14169,1.912815,,,1,
4238,StopArea:OCE87543074,Toury,,48.193606,1.939565,,,1,
4241,StopArea:OCE87543082,Boisseaux,,48.256418,1.973296,,,1,
4243,StopArea:OCE87543090,Angerville,,48.311632,2.003526,,,1,
4257,StopArea:OCE87543165,Salbris,,47.425186,2.047751,,,1,
