### Load Packages

In [25]:
import requests
import pandas as pd
import os
import json
import io
import zipfile
import geopandas as gpd
from shapely.geometry import LineString

### Set your own LTA DataMall API Key

In [22]:
LTA_KEY = ""

### For Bus Routes, Stops, Services

In [23]:
def fetch_bus_data(resource_url):
    all_data = []
    skip_value = 0
    while True:
        url = f"{resource_url}?$skip={skip_value}"
        headers = {
            'AccountKey': LTA_KEY,
            'accept': 'application/json'
        }
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            data = response.json()

            if len(data['value']) == 0:
                break

            all_data.extend(data['value'])
            skip_value += 500
        else:
            raise Exception(f"Failed to retrieve data. Status code: {response.status_code}")
    
    return pd.DataFrame(all_data)


data_dir_Bus_RoutesStopsServices = "Bus_RoutesStopsServices"
if not os.path.exists(data_dir_Bus_RoutesStopsServices):
    os.makedirs(data_dir_Bus_RoutesStopsServices)

bus_routes_url = "http://datamall2.mytransport.sg/ltaodataservice/BusRoutes"
bus_routes_df = fetch_bus_data(bus_routes_url)
bus_routes_df.to_csv(f"{data_dir_Bus_RoutesStopsServices}/bus_routes.csv", index=False)

bus_stops_url = "http://datamall2.mytransport.sg/ltaodataservice/BusStops"
bus_stops_df = fetch_bus_data(bus_stops_url)
bus_stops_df.to_csv(f"{data_dir_Bus_RoutesStopsServices}/bus_stops.csv", index=False)

bus_services_url = "http://datamall2.mytransport.sg/ltaodataservice/BusServices"
bus_services_df = fetch_bus_data(bus_services_url)
bus_services_df.to_csv(f"{data_dir_Bus_RoutesStopsServices}/bus_services.csv", index=False)

In [None]:
grouped_train_lines = gpd.read_file("TrainStation_Jul2024/mrt_lines_shapefile.shp")
grouped_train_lines = grouped_train_lines.groupby('MRT_LINE')

mrt_lines_list = []  # Initialise list 

# Create  MultiLineString
for MRT_LINE, group in grouped_train_lines:
    group_sorted = group.sort_values('STN_SEQUEN')
    coordinates = group_sorted['geometry'].centroid.apply(lambda geom: (geom.y, geom.x)).tolist()
    mrt_lines_list.append(LineString(coordinates))

# Merge into one MultiLineString
mrt_multiline = MultiLineString(mrt_lines_list)


  coordinates = group_sorted['geometry'].centroid.apply(lambda geom: (geom.y, geom.x)).tolist()

  coordinates = group_sorted['geometry'].centroid.apply(lambda geom: (geom.y, geom.x)).tolist()

  coordinates = group_sorted['geometry'].centroid.apply(lambda geom: (geom.y, geom.x)).tolist()

  coordinates = group_sorted['geometry'].centroid.apply(lambda geom: (geom.y, geom.x)).tolist()

  coordinates = group_sorted['geometry'].centroid.apply(lambda geom: (geom.y, geom.x)).tolist()

  coordinates = group_sorted['geometry'].centroid.apply(lambda geom: (geom.y, geom.x)).tolist()

  coordinates = group_sorted['geometry'].centroid.apply(lambda geom: (geom.y, geom.x)).tolist()

  coordinates = group_sorted['geometry'].centroid.apply(lambda geom: (geom.y, geom.x)).tolist()


### Filter for Trunk Services

In [24]:
trunk_services = bus_services_df[bus_services_df['Category'] == 'TRUNK']['ServiceNo'].tolist()
trunk_services_routes = bus_routes_df[bus_routes_df['ServiceNo'].isin(trunk_services)]

trunkroutes = pd.merge(trunk_services_routes, bus_stops_df, on='BusStopCode', how='left')
trunkroutes.to_csv(f"{data_dir_Bus_RoutesStopsServices}/trunkroutes.csv", index=False)

### LineString for Bus Routes

In [33]:
line_strings = []

unique_services = trunkroutes['ServiceNo'].unique()

for service_no in unique_services:
    busroutes = trunkroutes[trunkroutes['ServiceNo'] == service_no]

    if busroutes['Direction'].nunique() == 2:
        # Non-loop service: get Direction 1 in forward order and Direction 2 
        direction_1 = busroutes[busroutes['Direction'] == 1].sort_values('StopSequence')
        direction_2 = busroutes[busroutes['Direction'] == 2].sort_values('StopSequence')
        # Concatenate Direction 1 followed by Direction 2
        unified_route = pd.concat([direction_1, direction_2])
    else:
        # Loop service: only one direction, just sort it
        unified_route = busroutes.sort_values('StopSequence')

    # Extract the coordinates from unified route
    bus_coordinates = list(zip(unified_route['Latitude'], unified_route['Longitude']))  # Note: (lon, lat) for LineString
    bus_route_line = LineString(bus_coordinates)

    # Create LINESTRING representation
    line_string_representation = f"LINESTRING ({', '.join(f'{lon} {lat}' for lon, lat in bus_coordinates)})"
    
    line_strings.append({
        'ServiceNo': service_no,
        'LineString': line_string_representation
    })

line_string_df = pd.DataFrame(line_strings)
line_string_df.to_csv(f"{data_dir_Bus_RoutesStopsServices}/bus_linestring.csv", index=False)


### For Passenger Volume (Note LTA DataMall only allows for calls up to 3 Months prior)

In [51]:
def fetch_pv_data(resource_url, output_dir):

    headers = {
        'AccountKey': LTA_KEY
    }
    
    response = requests.get(resource_url, headers=headers)
    
    if response.status_code == 200:
        # Parse the response JSON to get the signed URL
        data = response.json()
        if 'value' in data and len(data['value']) > 0:
            signed_url = data['value'][0]['Link']
            
            # Make a request to the signed URL to get the zip file
            zip_response = requests.get(signed_url)
            
            if zip_response.status_code == 200:
                # Extract the CSV file from the zip file
                with zipfile.ZipFile(io.BytesIO(zip_response.content)) as z:
                    z.extractall(output_dir)  
                
                print(f"Files extracted to {output_dir}")
            else:
                raise Exception(f"Failed to download the file. Status code: {zip_response.status_code}")
        else:
            raise Exception("Signed URL not found in the response.")
    else:
        raise Exception(f"Failed to retrieve data. Status code: {response.status_code}")

data_dir_Passenger_Volume_By_Bus_Stop = "Passenger_Volume_By_Bus_Stop"
if not os.path.exists(data_dir_Passenger_Volume_By_Bus_Stop):
    os.makedirs(data_dir_Passenger_Volume_By_Bus_Stop)

# September 2024
pv_sept_url = "http://datamall2.mytransport.sg/ltaodataservice/PV/Bus?Date=202409"
fetch_pv_data(pv_sept_url, data_dir_Passenger_Volume_By_Bus_Stop)

# August 2024
pv_aug_url = "http://datamall2.mytransport.sg/ltaodataservice/PV/Bus?Date=202408"
fetch_pv_data(pv_aug_url, data_dir_Passenger_Volume_By_Bus_Stop)

# July 2024
pv_jul_url = "http://datamall2.mytransport.sg/ltaodataservice/PV/Bus?Date=202407"
fetch_pv_data(pv_jul_url, data_dir_Passenger_Volume_By_Bus_Stop)

Files extracted to Passenger_Volume_By_Bus_Stop
Files extracted to Passenger_Volume_By_Bus_Stop
Files extracted to Passenger_Volume_By_Bus_Stop


### For Train Stations and Bus Stops Location

In [67]:
def fetch_shp_data(resource_url, output_folder):
    headers = {
        'AccountKey': LTA_KEY,
    }
    
    # Initial API call to get the download link
    response = requests.get(resource_url, headers=headers)
    
    if response.status_code == 200:
        # Extract the link from the API response
        response_json = response.json()
        download_link = response_json["value"][0]["Link"]
        
        # Download the ZIP file from the extracted link
        zip_response = requests.get(download_link)
        
        if zip_response.status_code == 200:
            # Unzip 
            with zipfile.ZipFile(io.BytesIO(zip_response.content)) as z:
                z.extractall(output_folder) 
                
        else:
            raise Exception(f"Failed to download ZIP file. Status code: {zip_response.status_code}")
    else:
        raise Exception(f"Failed to retrieve data. Status code: {response.status_code}")

data_dir_TrainStation_Jul2024 = "TrainStation_Jul2024"
if not os.path.exists(data_dir_TrainStation_Jul2024):
    os.makedirs(data_dir_TrainStation_Jul2024)

data_dir_BusStopLocation_Jul2024 = "BusStopLocation_Jul2024"
if not os.path.exists(data_dir_BusStopLocation_Jul2024):
    os.makedirs(data_dir_BusStopLocation_Jul2024)

train_stations_locations_url = "http://datamall2.mytransport.sg/ltaodataservice/GeospatialWholeIsland?ID=TrainStation"
fetch_shp_data(train_stations_locations_url, data_dir_TrainStation_Jul2024)

bus_stops_locations_url = "http://datamall2.mytransport.sg/ltaodataservice/GeospatialWholeIsland?ID=BusStopLocation"
BusStopLocation_Jul2024 = fetch_shp_data(bus_stops_locations_url, data_dir_BusStopLocation_Jul2024)
