## 🚉 Get the nearest RER, TRAIN, METRO, TRAMWAY & VAL Stations

#### I. Importing librairies

In [1]:
from geopy.distance import geodesic
from dotenv import load_dotenv
import pandas as pd
import boto3
import json
import ast
import os

In [3]:
load_dotenv()

True

In [4]:
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
AWS_BUCKET_NAME = os.getenv('AWS_BUCKET_NAME')

Helper function to calculate distance between two latitude and longitude coordinates.

In [5]:
def calculate_distance(lat1, lon1, lat2, lon2):
    """
    Calculates the distance between two sets of latitude and longitude coordinates.
    """

    return geodesic((lat1, lon1), (lat2, lon2)).kilometers

#### II. 💾 Importing data 

Opening the json file containing [data](https://prim.iledefrance-mobilites.fr/fr/donnees-statiques/emplacement-des-gares-idf) about stations of the Île-de-France rail network; the train and RER stations, the metro, tram, valley and funicular stations open to travelers in Île-de-France.

In [6]:
# Source of the data: https://prim.iledefrance-mobilites.fr/fr/donnees-statiques/emplacement-des-gares-idf

with open('data/emplacement-des-gares-idf.json') as f:

    data = json.load(f)
    
    df = pd.json_normalize(data)
    df = df.rename(columns={
        "fields.geo_point_2d": "station_coordinates",
        "fields.nom_gares": "station_name",
        "fields.nom_iv": "station_slug",
        "fields.mode": "station_type",
        "fields.exploitant": "station_exploitant",
        "fields.res_com": "transport_type",
        "fields.indice_lig": "transport_index"
    })
    
    df = df[["station_coordinates", "station_name", "station_slug", "station_type", "transport_type", "transport_index"]]

    # Rename values in "transport_type" column that contain "TRAIN" to "TRANSILIEN"
    df['transport_type'] = df['transport_type'].str.replace('TRAIN', 'TRANSILIEN')
    
    df.to_csv('data/transport_stations_paris.csv', index=False)

In [7]:
stations = pd.read_csv("data/transport_stations_paris.csv")
stations.head()

Unnamed: 0,station_coordinates,station_name,station_slug,station_type,transport_type,transport_index
0,"[48.68101661540746, 2.533130656571037]",Boussy-Saint-Antoine,Boussy-Saint-Antoine,RER,RER D,D
1,"[48.60674494801255, 2.3020596915826386]",Brétigny,Brétigny,RER,RER C,C
2,"[49.00975645120457, 2.5612326952251054]",Aéroport Charles De Gaulle 1,Aéroport Charles De Gaulle 1,RER,RER B,B
3,"[48.798892499145914, 2.3281369230647506]",Arcueil-Cachan,Arcueil-Cachan,RER,RER B,B
4,"[48.932196005027805, 2.495513142584313]",Aulnay-sous-Bois,Aulnay-sous-Bois,TRAIN,TRANSILIEN K,K


In [8]:
stations['station_type'].unique()

array(['RER', 'TRAIN', 'METRO', 'TRAMWAY', 'VAL'], dtype=object)

In [9]:
stations['transport_type'].unique()

array(['RER D', 'RER C', 'RER B', 'TRANSILIEN K', 'TRANSILIEN H',
       'TRANSILIEN L', 'RER A', 'TRANSILIEN J', 'TRANSILIEN R',
       'TRANSILIEN P', 'RER E', 'TRANSILIEN N', 'TRANSILIEN U', 'GL',
       'METRO 10', 'METRO 4', 'METRO 3', 'METRO 9', 'METRO 2', 'TRAM 3a',
       'TRAM 1', 'TRAM 7', 'TRAM 6', 'TRAM 3b', 'METRO 7bis', 'METRO 5',
       'METRO 1', 'METRO 14', 'METRO 8', 'METRO 6', 'METRO 11',
       'METRO 13', 'METRO 12', 'METRO 7', 'METRO 3bis', 'TRAM 4',
       'TRAM 2', 'TRAM 8', 'TRAM 5', 'TRAM 13', 'TRAM 10', 'ORLYVAL',
       'CDGVAL', 'TRAM 11', 'TRAM 9', 'FUNICULAIRE MONTMARTRE'],
      dtype=object)

In [14]:
stations['transport_index'].unique()

array(['D', 'C', 'B', 'K', 'H', 'L', 'A', 'J', 'R', 'P', 'E', 'N', 'U',
       'GL', '10', '4', '3', '9', '2', '3A', '1', '7', '6', '3B', '7b',
       '5', '14', '8', '11', '13', '12', '3bis', 'ORL', 'CDG', 'FUN'],
      dtype=object)

#### III. Scripting 

In [30]:
def load_station_status_data():
    s3 = boto3.resource('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
    bucket = s3.Bucket(AWS_BUCKET_NAME)
    obj = bucket.Object('ratp_stations_traffic_status.csv')
    body = obj.get()['Body']
    df = pd.read_csv(body)

    return df

In [31]:
def find_nearest_ratp_station(start_lat, start_long, dest_lat, dest_long, station_type):
    """
    Finds the nearest station with a type RER, TRAIN, METRO, TRAMWAY or VAL to the given GPS coordinates
    """

    # Get station information
    stations = pd.read_csv("data/transport_stations_paris.csv")
    stations_status = load_station_status_data()
    
    # Merge the filtered stations with the stations_status dataframe based on the "name" and "transport_type" columns
    stations_infos = pd.merge(stations, stations_status, left_on='transport_type', right_on='name', how='left')
    stations_infos = stations_infos.drop(columns='name')


    # Converts the station_coordinates from type object to type list
    stations_infos['station_coordinates'] = stations_infos['station_coordinates'].apply(ast.literal_eval)

    # Create a column with the distance between each station and the input GPS coordinates
    stations_infos['distance_from_input'] = stations_infos.apply(
        lambda row: calculate_distance(start_lat, start_long, row['station_coordinates'][0], row['station_coordinates'][1]),
        axis=1
    )

    # Create a column with the distance between each station and the destination GPS coordinates
    stations_infos['distance_from_destination'] = stations_infos.apply(
        lambda row: calculate_distance(dest_lat, dest_long, row['station_coordinates'][0], row['station_coordinates'][1]),
        axis=1
    )

    # Calculate distance from the input to the destination
    distance_input_destination = calculate_distance(start_lat, start_long, dest_lat, dest_long)

    # Filter stations with station_type 
    filtered_stations = stations_infos[stations_infos['station_type'] == station_type]

    if filtered_stations.empty:
        return "No station found."

    # Sort stations by destination distance in ascending order
    sorted_dest_stations = filtered_stations.sort_values(by='distance_from_destination')

    # Step 1: Get 10 closest stations to the destination
    closest_dest_stations = sorted_dest_stations.head(10)

    # Step 2: Check if the transport_type of the closest stations have "traffic normal"
    closest_dest_station = None
    for _, station in closest_dest_stations.iterrows():
        if 'trafic normal' in station['status']:
            closest_dest_station = station
            break  # Exit the loop once the row is found

    if closest_dest_station is None:
        return "No station with normal traffic found."
    
    closest_dest_station_coords = (closest_dest_station['station_coordinates'][0], closest_dest_station['station_coordinates'][1])
    closest_dest_station_name = closest_dest_station['station_name']
    
    # Sort filtered stations by input distance in ascending order
    sorted_input_stations = filtered_stations.sort_values(by='distance_from_input')

    # Step 3: Filter stations by station_type and transport_type of the closest_dest_station
    filtered_stations = sorted_input_stations[sorted_input_stations['transport_type'] == closest_dest_station['transport_type']]

    if filtered_stations.empty:
        return "No station found with the specified transport type."

    # Retrieve the nearest station coordinates, name, and type
    nearest_station = filtered_stations.iloc[0]
    nearest_station_coords = (nearest_station['station_coordinates'][0], nearest_station['station_coordinates'][1])
    nearest_station_name = nearest_station['station_name']
    nearest_station_type = nearest_station['station_type']

    transport_type = nearest_station['transport_type']
    transport_type_status = nearest_station['status']

    # Return the nearest station coordinates, name, and type (train, metro, tramway etc...)
    return nearest_station_coords, closest_dest_station_coords, closest_dest_station_name, nearest_station_name, nearest_station_type, transport_type, transport_type_status

Let's try to find the nearest Metro station

In [32]:
find_nearest_ratp_station(48.846756, 2.316227, 48.922499, 2.3703, "METRO")

((48.84447014036132, 2.317852783041331),
 (48.913710544615356, 2.3807356063950937),
 "Mairie d'Aubervilliers",
 'Falguière',
 'METRO',
 'METRO 12',
 'trafic normal')