In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt

import warnings
warnings.filterwarnings("ignore")

df_plan_metro = pd.read_csv('plan du métro.csv')
df_position_gps = pd.read_csv('position gps des stations de métro.csv')
df_passagers = pd.read_csv('passagers.csv')
df_sup_stations = pd.read_csv('position gps manquantes.csv', sep=';')

# Data preparation

### station_coords
- longitude: float
- latitude: float
- lignes: list(int)

In [4]:
# Dataframe of the gps coordinates of the stations
station_coords = df_position_gps.copy()
station_coords['latitude'] = df_position_gps['GPS'].str.split(',').str[0]
station_coords['longitude'] = df_position_gps['GPS'].str.split(',').str[1]
station_coords.drop('GPS', axis=1, inplace=True)
station_coords = station_coords.drop_duplicates(subset="Station", keep="first")

# However 46 gps coords are missing, we add it through a hand-made csv
sup_stations = df_sup_stations.copy()
sup_stations.drop('ligne', axis=1, inplace=True)
station_coords = pd.concat([station_coords, sup_stations], ignore_index=True)
station_coords['latitude'] = station_coords['latitude'].astype(float)
station_coords['longitude'] = station_coords['longitude'].astype(float)

In [5]:
num_stations = station_coords.shape[0]
num_lines = 14
print(f"There is {station_coords.shape[0]} metro stations and {num_lines} lines.")

There is 307 metro stations and 14 lines.


In [6]:
def get_index(station_name):
  return station_coords[station_coords['Station'] == station_name].index[0]

def get_station_name(index):
  return station_coords.iloc[index,0]

# Fonction personnalisée pour la conversion
def convert_metro_line(val):
    # Si 'bis' est dans la valeur, retirer 'bis' et convertir en entier
    if 'bis' in val:
        return int(val.replace('bis', '').strip())
    # Sinon, convertir directement en entier
    return int(val)

In [7]:
# Get the lines going through each station
metro_de = df_plan_metro.copy()
metro_de = metro_de.iloc[:,0:2]
metro_de = metro_de.rename(columns={'de Ligne':'Ligne', 'de Station':'Station'})

metro_vers = df_plan_metro.copy()
metro_vers = df_plan_metro.iloc[:,2:4]
metro_vers = metro_vers.rename(columns={'vers Ligne':'Ligne', 'vers Station':'Station'})

metro_line = pd.concat([metro_de,metro_vers],axis=0)
metro_line.drop_duplicates(subset=['Ligne','Station'],keep='first',ignore_index=True, inplace=True)
metro_line['Ligne'] = metro_line['Ligne'].apply(convert_metro_line)

station_line_list =[[] for _ in range(num_stations)]
for i in range (metro_line.shape[0]):
    line = metro_line.iloc[i,0]
    station = metro_line.iloc[i,1]
    station_index = get_index(station)
    station_line_list[station_index].append(line)

# Add these information in station_coords
station_coords['lines'] = station_line_list
station_coords.head()

Unnamed: 0,Station,latitude,longitude,lines
0,Boucicaut,48.841094,2.287946,[8]
1,Bourse,48.868654,2.341376,[3]
2,Bir-Hakeim,48.853943,2.289335,[6]
3,Argentine,48.875337,2.290128,[1]
4,Daumesnil,48.83955,2.395703,"[6, 8]"


# Class MetroNetwork

In [43]:
class MetroNetwork:
    def __init__(self, station_coords, station_line_list, plan_metro):
        self.station_coords = station_coords
        self.station_line_list = station_line_list
        self.plan_metro = plan_metro
        self.adjacency_matrix = self.build_adjacency_matrix()
    

    def build_adjacency_matrix(self):
        station_single_line = self.plan_metro[self.plan_metro['de Station'] != self.plan_metro['vers Station']]
        station_single_line['de Ligne'] = self.plan_metro['de Ligne']
        m = station_single_line.shape[0]
        adjacency_matrix = np.zeros((m,m))
        for i in range(m):
            index_depart = get_index(station_single_line.iloc[i,1])
            index_arrivee = get_index(station_single_line.iloc[i,3])
            adjacency_matrix[index_depart,index_arrivee] = 1
            adjacency_matrix[index_arrivee,index_depart] = 1
        
        return adjacency_matrix
     

    def distance(self, station_index1, station_index2):
        lat1 = self.station_coords.iloc[station_index1,1]
        lon1 = self.station_coords.iloc[station_index1,2]
        lat2 = self.station_coords.iloc[station_index2,1]
        lon2 = self.station_coords.iloc[station_index2,2]
        return sqrt((lat1-lat2)**2 + (lon1-lon2)**2)
    

    def shared_line(self, station1, stationi):
        for line_1 in self.station_line_list[station1]:
            for line_i in self.station_line_list[stationi]:
                if line_1 == line_i:
                    return line_1
                
    def get_station_line_of(self, station_index):
        """return: list(int), lines of the station"""
        return self.station_line_list[station_index]

                

    def get_adjacent_stations(self, station1, traveled_distance, station2, route):
        """
        station1: int, departure station index
        traveled_distance: float, distance parcourue pour arriver à station1
        station2: int, arrival station index
        route: list, stations visitées pour arriver à station1
        """

        adjacent_stations = pd.DataFrame(columns=['station', 'traveled_distance','distance_to_station2','cout','route','sation_name'])


        for station_i in range(num_stations):
            #Select the stations adjacent to station1
            if self.adjacency_matrix[station1,station_i] != 0:
                route_i = route.copy()
                #Compute distances from station1 to station i and station2
                new_traveled_distance = traveled_distance + self.distance(station1,station_i)
                distance_to_station2 = self.distance(station_i,station2)

                # add a malus if a line change is needed
                current_line = route_i.iloc[route_i.shape[0]-1,1]
                if current_line in self.get_station_line_of(station_i):
                    new_line = current_line
                else:
                    new_line = self.shared_line(station1,station_i)
                    new_traveled_distance += 0.1 # the distance between two station is around 0.01 (around 1 minute), we suppose that a line change is around 10 minutes, that's why we add 0.1 distance
                    
                #Add the station to the route_i
                route_i.loc[len(route_i)] = [station_i, new_line]

                self.adjacency_matrix[station1,station_i] = 0
                self.adjacency_matrix[station_i,station1] = 0

                #Add station i and his parameters to potential next stations to explore
                adjacent_stations = pd.concat([adjacent_stations,
                                                pd.DataFrame({'station': [station_i],
                                                            'traveled_distance': [new_traveled_distance],
                                                            'distance_to_station2': [distance_to_station2],
                                                            'cout': [(new_traveled_distance + distance_to_station2)],
                                                            'route': [route_i]
                                                            })], ignore_index=True)
        return adjacent_stations
    

    def trouver_trajets(self, station1, station2, traveled_distance, route, next_stations):
        """
        station1: int, departure station index
        station2: int, arrival station index
        traveled_distance: float, distance traveled to reach station1
        route: list, stations already seen
        next_stations: dataframe, stations to visit with the traveled distance to reach them
        """

        adjacent_stations = self.get_adjacent_stations(station1, traveled_distance, station2, route)

        #lines_station1 = station_coords[station_coords['Station'] == get_station_name(station1)].iloc[0,3]
        #lines_station2 = station_coords[station_coords['Station'] == get_station_name(station2)].iloc[0,3] 

        #add the adjacent stations to station to visit
        new_next_stations = pd.concat([next_stations, adjacent_stations], ignore_index=True).sort_values(by='cout',ascending=True,ignore_index=True).drop_duplicates('station', keep="first",ignore_index=True)

        # compute the station that bring the closest to station2
        new_station1= new_next_stations.iloc[0,0]
        new_traveled_distance = new_next_stations.iloc[0,1]
        new_route = new_next_stations.iloc[0,4]
        print('STATION')
        print(get_station_name(station1) + ' ----> ' + get_station_name(new_station1))
        print('ROUTE')
        print(new_route['station'].apply(get_station_name))
        print('LIGNE')
        print(new_route['line'])
        print('-----------------------------------------------')


        #self.adjacency_matrix[new_route.iloc[new_route.shape[0]-2,0],new_route.iloc[new_route.shape[0]-1,0]] = 0
        #self.adjacency_matrix[new_route.iloc[new_route.shape[0]-1,0],new_route.iloc[new_route.shape[0]-2,0]] = 0

        #drop the new station from next stations possible
        new_next_stations = new_next_stations.drop(index = 0) #maybe just delete index0

        if new_station1 == station2:
            new_route['station'] = new_route['station'].apply(get_station_name)
            return new_route
        
        #TO DELETE
        #if new_route.shape[0] > 3:
        # return new_route
        
        else:
            return self.trouver_trajets(new_station1, station2, new_traveled_distance, new_route, new_next_stations)
        
    
    def predict_route(self, station1_name,station2_name):
        self.adjacency_matrix = self.build_adjacency_matrix()
        next_stations_init = pd.DataFrame(columns=['station', 'traveled_distance','distance_to_station2', 'cout','route'])
        station_index1 = get_index(station1_name)
        station_index2 = get_index(station2_name)
        result =  self.trouver_trajets(station_index1, station_index2,0,pd.DataFrame({'station': station_index1, 'line': self.get_station_line_of(station_index1)[0]}, index=[0]),next_stations_init)
        self.adjacency_matrix = self.build_adjacency_matrix()
        return result

# Test

In [44]:
MetroParis = MetroNetwork(station_coords,station_line_list,df_plan_metro)

In [45]:
MetroParis.predict_route('La Défense','Pont de Levallois - Bécon')

STATION
La Défense ----> Esplanade de La Défense
ROUTE
0                 La Défense
1    Esplanade de La Défense
Name: station, dtype: object
LIGNE
0    1
1    1
Name: line, dtype: int64
-----------------------------------------------
STATION
Esplanade de La Défense ----> Pont de Neuilly
ROUTE
0                 La Défense
1    Esplanade de La Défense
2            Pont de Neuilly
Name: station, dtype: object
LIGNE
0    1
1    1
2    1
Name: line, dtype: int64
-----------------------------------------------
STATION
Pont de Neuilly ----> Les Sablons
ROUTE
0                 La Défense
1    Esplanade de La Défense
2            Pont de Neuilly
3                Les Sablons
Name: station, dtype: object
LIGNE
0    1
1    1
2    1
3    1
Name: line, dtype: int64
-----------------------------------------------
STATION
Les Sablons ----> Porte Maillot
ROUTE
0                 La Défense
1    Esplanade de La Défense
2            Pont de Neuilly
3                Les Sablons
4              Porte Maillo

Unnamed: 0,station,line
0,La Défense,1
1,Esplanade de La Défense,1
2,Pont de Neuilly,1
3,Les Sablons,1
4,Porte Maillot,1
5,Argentine,1
6,Charles de Gaulle - Étoile,1
7,George V,1
8,Franklin D. Roosevelt,1
9,Champs-Élysées - Clemenceau,1


In [63]:
i1 = get_index('Pont de Levallois - Bécon')
i2 = get_index('Nation')
MetroParis.adjacency_matrix[i1,i2]

1.0

In [59]:
station_coords

Unnamed: 0,Station,latitude,longitude,lines
0,Boucicaut,48.841094,2.287946,[8]
1,Bourse,48.868654,2.341376,[3]
2,Bir-Hakeim,48.853943,2.289335,[6]
3,Argentine,48.875337,2.290128,[1]
4,Daumesnil,48.839550,2.395703,"[6, 8]"
...,...,...,...,...
302,Sully - Morland,48.851271,2.361853,[7]
303,Barbès - Rochechouart,48.883776,2.350607,"[2, 4]"
304,Pré-Saint-Gervais,48.880160,2.398581,"[7, 8]"
305,Quatre-Septembre,48.869659,2.336319,[3]


In [61]:
station_coords[station_coords['lines'].apply(lambda x: 6 in x)]

Unnamed: 0,Station,latitude,longitude,lines
2,Bir-Hakeim,48.853943,2.289335,[6]
4,Daumesnil,48.83955,2.395703,"[6, 8]"
16,Nationale,48.833217,2.362856,[6]
35,Trocadéro,48.863611,2.28758,"[6, 9]"
46,Bercy,48.840389,2.379911,"[6, 14]"
47,Edgar Quinet,48.840663,2.326385,[6]
65,Denfert-Rochereau,48.833073,2.333204,"[4, 6]"
67,Nation,48.848945,2.395879,"[1, 2, 6, 9, 3, 7]"
70,Corvisart,48.829791,2.350415,[6]
86,Pasteur,48.842677,2.312813,"[6, 12]"


1.0