In [1]:
# Importing Required Packages

# Importing 'pandas' to handle datasets
import pandas as pd
# Importing 'numpy' to handle arrays
import numpy as np

# Importing 're' package - Python Regular Expressions
import re

from datetime import datetime, timedelta
import requests
import json
from collections import OrderedDict
import json
import time

# Importing Matplotlib to visualize data
import matplotlib.pyplot as plt


# Remove Warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

### Step 1 - Preparing Data Sets

Appending the following columns to the 'All_Routes_Complete.csv' file obtained from the Malta public transport (MPT) website

    1. 'Stop Island' - Defines the island (Malta/Gozo) the corresponding Route Number operates in.
    2. 'Time_Count' - Number of buses operating on a particular Route Number and Route Direction from the start till the end of the bus service.
    3. 'Stops - City Name - Stop Island' - Key column used to compare data in 'All_Routes_Complete.csv' to data in Bus_Stop_Info ('Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx')
    4. 'Longitude_Final' - Longitude value of corresponding Bus Stop entry ('Stops') obtained from Bus_Stop_Info
    5. 'Latitude_Final' - Latitude value of corresponding Bus Stop entry ('Stops') obtained from Bus_Stop_Info
    6. 'Bus_Stop_ID' - Unique identifier for all Bus Stops ('Stops')
    7. 'Next_Bus_Stop_ID' - Since All_Routes ('All_Routes_Complete.csv') was extracted in sequential order from MPT website, the next row in All_Routes corresponds to the upcoming Bus Stop (Given 'Route Number', 'Route Direction' and 'Date' columns remain the same). Hence, 'Next_Bus_Stop_ID' is the unique identifier of the upcoming Bus Stop ('Stops')
    8. 'Bus_Stop_Next_Bus_Stop'- Unique identifier used to define connection between 'Bus_Stop_ID and 'Next_Bus_Stop_ID'

In [2]:
# Step 1.1 - Load Datasets
# Loading 'All_Routes_Complete.csv' (All_Routes) and 'Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx' (Bus_Stop_Info) Datasets
# Recall 'All_Routes_Complete.csv' is the file which was obtained from MPT website consisting of all Bus Schedules in sequential order
# Recall 'Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx' consits of the Longitude and Latitude data of all Bus Stops defined in 'All_Routes_Complete.csv'

All_Routes = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Scraping Route Names from MPT Website//Results//All_Routes_Complete.csv", low_memory = False)
Bus_Stop_Info = pd.read_excel("C://Users//Owner//ICT5012 - Disseration//Obtaining Longitude and Latitude for all Bus Stops//Results (Checks Done + Manual Adjustment)//Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx")


# At work
#All_Routes = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Files Used for Data Visualisation//All_Routes_Complete.csv", low_memory = False)
#Bus_Stop_Info = pd.read_excel("C://Users//attardan.CBM//Data Visualisation//Files Used for Data Visualisation//Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx")

In [3]:
# Step 1.2 - Create 'Stop Island' column
# To differentiate between stops in Malta and Gozo, stops with correspoding 'Route Number' belonging to the 'Gozitan_Route_Number'
# list will be labelled as 'GOZO STOP' whilst all other stops will be labelled as 'MALTA STOP' using a column entitled 'Stop Island'

Gozitan_Route_Number = ['301', '302', '303',
                        '305', '306', '307',
                        '308', '309', '310',
                        '311', '312', '313',
                        '322', '323', '330',
                        'N301']

All_Routes['Stop Island'] = np.where(All_Routes['Route Number'].isin(Gozitan_Route_Number), 'GOZO STOP', 'MALTA STOP')

In [4]:
# Step 1.3 - Create 'Time_Count' column
# The 'Time_Count' column will be added to count the number of buses operating throughout one day for a particular route.
# 'Time_Count' will be used as another method to identify between different routes having the same 'Route Number', 'Route Direction' and 'Date' column

# Select all columns in 'All_Routes' that start with 'Stop Time'
Stop_Time_Columns = [col for col in All_Routes.columns if col.startswith('Stop Time')]
# Row-wise count all'Stop Time' columns which are filled in and populate the 'Time_Count' column with sum
All_Routes['Time_Count'] = All_Routes[Stop_Time_Columns].notna().sum(axis=1)


In [5]:
# Step 1.4 - Merge 'Longitude_Final' and 'Latitude_Final' columns from 'Bus_Stop_Info' dataframe to 'All_Routes' dataframe

# Create column entitled 'Stops - City Name - Stop Island' (Concatenation of 'Stops', 'City Name' and 'Stop Island' columns)
# in both 'All_Routes' and 'Bus_Stop_Info'
All_Routes['Stops - City Name - Stop Island'] = All_Routes['Stops'] + ' - ' + All_Routes['City Name'] + ' - ' + All_Routes['Stop Island']
Bus_Stop_Info['Stops - City Name - Stop Island'] = Bus_Stop_Info['Stops'] + ' - ' + Bus_Stop_Info['City Name'] + ' - ' + Bus_Stop_Info['Stop_Island']
# Merge 'Bus_Stop_Info' Dataframe to 'All_Routes' such that 'All_Routes' dataframe will have 'Longitude_Final' and 'Latitude_Final' values for all corresponding entries
All_Routes = pd.merge(All_Routes, Bus_Stop_Info[['Stops - City Name - Stop Island', 'Longitude_Final', 'Latitude_Final']], on = 'Stops - City Name - Stop Island', how = 'left', sort = False)

In [6]:
# Step 1.5 - Create a unique identifier for all Bus Stops ('Stops') entitled 'Bus_Stop_ID'

# Obtain Dataframe entitled 'Distinct_Coordinates' consisting only of entries with distinct 'Latitude_Final'
# and 'Longitude_Final' pairs
Distinct_Coordinates = All_Routes.drop_duplicates(subset = ['Latitude_Final','Longitude_Final']).copy()
# In 'Distinct_Coordinates' add column 'Stop ID' with entries with format Stop_X where X is a value from 0 up to length of
# 'Distinct_Coordinates' dataframe
Distinct_Coordinates['Stop ID'] = [f'Stop_{i}' for i in range(len(Distinct_Coordinates))]

# Create Dictionary entitled 'Bus_Stop_ID' consisting of corresponding 'Latitude_Final', 'Longitude_Final' and 'Stop ID' values
Bus_Stop_ID = dict(
    zip(
        zip(Distinct_Coordinates['Latitude_Final'], Distinct_Coordinates['Longitude_Final']),
        Distinct_Coordinates['Stop ID']
    )
)

# Using 'Bus_Stop_ID' dictionary label Bus Stops ('Stops') with their corresponding unique identifier.
# Column is labelled as 'Bus_Stop_ID'
# 4 - Using 'Bus_Stop_ID' dict to label All_Routes
All_Routes['Bus_Stop_ID'] = All_Routes.apply(
    lambda row: Bus_Stop_ID.get((row['Latitude_Final'], row['Longitude_Final']), None), axis=1
)


In [7]:
# Step 1.6 - Create Column entitled 'Next_Bus_Stop_ID' consisting of the unique identifier of the upcoming stop in the route

# Since All_Routes ('All_Routes_Complete.csv') was extracted in sequential order from MPT website, the next row in All_Routes
# corresponds to the upcoming Bus Stop (Given 'Route Number', 'Route Direction' and 'Date' columns remain the same).
# Hence, 'Reset_Conditions' is defined such that if any of 'Route Number', 'Route Direction' or 'Date' are different in
# in the upcoming stop then upcoming stop than it is not considered to be a continuation of the current route.
Reset_Conditions = (
    All_Routes['Route Number'].shift(-1) != All_Routes['Route Number']) | \
    (All_Routes['Route Direction'].shift(-1) != All_Routes['Route Direction']) | \
    (All_Routes['Date'].shift(-1) != All_Routes['Date'])

# In 'All_Routes' create column 'Next_Bus_Stop_ID' consisting of the upcoming 'Bus_Stop_ID'
All_Routes['Next_Bus_Stop_ID'] = All_Routes['Bus_Stop_ID'].shift(-1)
# If conditions defined in 'Reset_Conditions' are met, then 'Next_Bus_Stop_ID' should be blank
All_Routes.loc[Reset_Conditions, 'Next_Bus_Stop_ID'] = None


In [8]:
# Step 1.7 - Create a Unique identifier used to define connection between 'Bus_Stop_ID and 'Next_Bus_Stop_ID.
# This is done by concatinating the 'Bus_Stop_ID' and 'Next_Bus_Stop_ID' columns

All_Routes['Bus_Stop_Next_Bus_Stop'] = All_Routes['Bus_Stop_ID'] + '_to_' + All_Routes['Next_Bus_Stop_ID']

### Step 2 - Adjusting All_Routes DataFrame to facilitate further analysis

In [9]:
# Step 2.1 - To simply our work we will not be considering the following routes:
# Night Routes - Not interested in specific Routes designed to work beyond the scheduled service
# Direct Routes - Not interested in routes which make use of specially designed shorter paths

#Defining list of Night Routes and Tallinja Direct Routes (Obtained from: https://www.publictransport.com.mt/en/timetables)
Night_Direct_Routes = ['N11', 'N13', 'N212', 'N62', 'N82',
                       'N91', 'N48', 'N301', 'TD2', 'TD10',
                       'TD13']

# Removing Night and Direct Routes from All_Routes DataFrame
All_Routes = All_Routes[~All_Routes['Route Number'].isin(Night_Direct_Routes)]

In [10]:
# Step 2.2 - Adjust Date names to ensure we are able to split dates accordingly

# Obtain the Date Names utilised in 'All_Routes'
# This is done since in certain 'Date' entries a hypen is utilised (Ex. 'Monday - Friday' significes 'Monday, Tuesday, Wednesday, Thursday, Friday'
# All day names need to be represented in 'Date' field such that 'All_Routes' can be split into specific dates.
Unique_Dates = All_Routes['Date'].unique()
print(Unique_Dates)

['Monday' 'Tuesday, Wednesday, Thursday, Friday' 'Saturday' 'Sunday'
 'Monday, Tuesday, Wednesday, Thursday, Friday' 'Monday - Friday'
 'Saturday, Sunday' 'Monday - Saturday'
 'Wednesday, Thursday, Friday, Monday, Tuesday'
 'Wednesday, Thursday, Friday, Monday, Tuesday, Sunday'
 'Wednesday, Thursday, Friday, Tuesday' 'Wednesday, Thursday, Tuesday'
 'Friday, Monday']


In [11]:
#Step 2.2.1 - Changes to be made
# 1 - Change 'Monday - Friday' to 'Monday, Tuesday, Wednesday, Thursday, Friday'
# 2 - Change 'Monday - Sunday' to 'Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday'
# 3 - Change 'Monday - Saturday' to 'Monday, Tuesday, Wednesday, Thursday, Firday, Saturday'
All_Routes_Copy = All_Routes.copy()
All_Routes_Copy['Date'] = All_Routes_Copy['Date'].replace({'Monday - Friday': 'Monday, Tuesday, Wednesday, Thursday, Friday',
                                                           'Monday - Sunday': 'Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday',
                                                           'Monday - Saturday': 'Monday, Tuesday, Wednesday, Thursday, Friday, Saturday'})


In [12]:
# Step 2.2.2 - Check changes have been carried out accordingly
Unique_Dates_FollowingUpdate = All_Routes['Date'].unique()
print(Unique_Dates_FollowingUpdate)

['Monday' 'Tuesday, Wednesday, Thursday, Friday' 'Saturday' 'Sunday'
 'Monday, Tuesday, Wednesday, Thursday, Friday' 'Monday - Friday'
 'Saturday, Sunday' 'Monday - Saturday'
 'Wednesday, Thursday, Friday, Monday, Tuesday'
 'Wednesday, Thursday, Friday, Monday, Tuesday, Sunday'
 'Wednesday, Thursday, Friday, Tuesday' 'Wednesday, Thursday, Tuesday'
 'Friday, Monday']


### Step 3 - Obtaining Travel Time taken to traverse from Origin Bus Stop to Destination Bus Stop (Morning Model)

Using TomTom API, the following travel time metrics between an Origin Bus Stop ('Bus_Stop') and Destination Bus Stop ('Next_Bus_Stop') are obtained:

    1. lengthInMeters - The route length in metres
    2. travelTimeInSeconds - The estimated travel time in seconds (Based on current traffic levels)
    3. noTrafficTravelTimeInSeconds - The estimated travel time in seconds excluding traffic
    4. historicTrafficTravelTimeInSeconds - The estimated travel time in seconds (Based on historical traffic levels)
    5. liveTrafficIncidentsTravelTimeInSeconds - The estimated travel time in seconds (Based on current traffic levels and accidents)
    6. trafficDelayInSeconds - The delay in seconds based on real-time traffic when compared to free-flow conditions
    7. departureTime - The estimated time of departure
    8. arrivalTime - The estimated time of arrival

In [50]:
# Step 3.1 - 'Bus_Stop_ID' dictionary is transformed into a dataframe entitled 'Bus_Stop_ID_Info' so that it may be referenced later on
Bus_Stop_ID_Info = pd.DataFrame(
    [(lat, lon, bus_stop) for (lat, lon), bus_stop in Bus_Stop_ID.items()],
    columns=['Latitude_Final', 'Longitude_Final', 'Bus_Stop_ID']
)

In [51]:
# Step 3.2 - A DataFrame entitled 'Distinct_Edges' is created which considers entries in the 'All_Routes' DataFrame with unique entries in
# 'Bus_Stop_Next_Bus_Stop' column
Distinct_Edges = All_Routes_Copy.drop_duplicates(subset = ['Bus_Stop_Next_Bus_Stop'])
# Index is reset due to removal of entries in 'All_Routes'
Distinct_Edges = Distinct_Edges.reset_index(drop=True)
# Creating Copy of 'Distinct_Edges' DataFrame entitled 'Distinct_Edges_2' (s.t. any changes made in 'Distinct_Edges_2' does not impact the original
# DataFrame
Distinct_Edges_2 = Distinct_Edges.copy()

In [52]:
# Step 3.3 - Merging 'Bus_Stop_ID_Info' with 'Distinct_Edges_2'. Currently 'Bus_Stop_ID_Info' consists of coordinate data related to the origin
# Bus Stop. Hence, coordinate data related to the destination Bus Stop will also be added.

# Change Column Names of 'Bus_Stop_ID_Info' DataFrame such that it is apparent that coordinate values and Bus_Stop_ID values are referring to the
# destination Bus Stop. In addition, 'Bus_Stop_ID_Info' is renamed to 'Bus_Stop_ID_Info_2'
Bus_Stop_ID_Info_2 = Bus_Stop_ID_Info.rename(columns = {'Latitude_Final': 'Latitude_Next', 'Longitude_Final': 'Longitude_Next', 'Bus_Stop_ID': 'Bus_Stop_ID_2'})
# The 'Bus_Stop_ID_2', 'Latitude_Next', 'Longitude_Next' columns from 'Bus_Stop_ID_Info_2' are merged with 'Distinct_Edges_2' DataFrame.
# Corresponding entries should have identical entries in 'Next_Bus_Stop_ID' and 'Bus_Stop_ID_2' columns
Distinct_Edges_2 = pd.merge(Distinct_Edges_2, Bus_Stop_ID_Info_2[['Bus_Stop_ID_2', 'Latitude_Next', 'Longitude_Next']], left_on = 'Next_Bus_Stop_ID', right_on = 'Bus_Stop_ID_2', how = 'left', sort = False)
# Entries in 'Bus_Stop_ID_2' Column are replicated by entries in 'Next_Bus_Stop_ID' hence the former can be removed
Distinct_Edges_2 = Distinct_Edges_2.drop(columns = 'Bus_Stop_ID_2')
# Remove entries if 'Next_Bus_Stop_ID' is blank
Distinct_Edges_2 = Distinct_Edges_2.dropna(subset=['Next_Bus_Stop_ID'])

In [53]:
# Step 3.4 - Free Version of TomTom API only Allows for 2500 Non-Tile Requests per-day. The entire datasets would require 2516 requests to complete.
# As a result, the 'Distinc_Edges_2' Dataset will be split into two using the 'Stop Island' column. Following DataFrames are obtained:
# 1 - Distinct_Edges_MALTA - Includes all entries in 'Distinct_Edges_2' where 'Stop Island' column has entries 'MALTA STOP'
# 2 - Distinct_Edges_GOZO - Includes all entries in 'Distinct_Edges_2' where 'Stop Island' column has entries 'GOZO STOP'
Distinct_Edges_MALTA = Distinct_Edges_2.loc[Distinct_Edges_2["Stop Island"] == "MALTA STOP"]
Distinct_Edges_GOZO = Distinct_Edges_2.loc[Distinct_Edges_2["Stop Island"] == "GOZO STOP"]
# Since file is obtained sequentially (Gozo routes are listed last in the MPT Website (https://www.publictransport.com.mt/en/timetables)
# Then index is reset for 'Distinct_Edges_Gozo' to ensure for loops utilised will work correctly
Distinct_Edges_MALTA = Distinct_Edges_MALTA.reset_index(drop=True)
Distinct_Edges_GOZO = Distinct_Edges_GOZO.reset_index(drop=True)

### Obtaining Travel Time for Malta

#### Step 1 - All Maltese Connections

In [54]:
# Step 3.5 - Obtaining travel time metrics for Malta

# Step 3.5.1 - Utilise TomTom to obtain the edge treversal metrics described in introduction of Step 3.
# TomTom API Key
TOMTOM_API_Key = '4x14GdbcGGsXeen6yUhicscKFbz28iMj'

#Defining empty lists to be used to append data to
Length_In_Metres = []
Travel_Time = []
Travel_Time_No_Traffic = []
Historic_Traffic_Travel_Time = []
Live_Traffic_Travel_Time = []
Traffic_Delay = []
Departure_Time = []
Arrival_Time = []

# for loop going over all entries in 'Distinct_Edges_MALTA' and passing the following information as a request using TomTom API:
# 1. Latitude_Final - Latitiude of origin
# 2. Longitude_Final - Longitude of origin
# 3. Latitude_Next - Latitude of destination
# 4. Longitude_Next - Longitude of destination
for i in range(len(Distinct_Edges_MALTA)):
    print(f"Processing request {i+1} of {len(Distinct_Edges_MALTA)}")
    departure_time = "2025-03-10T07:00:00Z"
    url = f"https://api.tomtom.com/routing/1/calculateRoute/{Distinct_Edges_MALTA['Latitude_Final'][i]}%2C{Distinct_Edges_MALTA['Longitude_Final'][i]}%3A{Distinct_Edges_MALTA['Latitude_Next'][i]}%2C{Distinct_Edges_MALTA['Longitude_Next'][i]}/json?maxAlternatives=1&computeTravelTimeFor=all&departAt={departure_time}&includeTollPaymentTypes=none&routeType=shortest&traffic=true&travelMode=bus&key={TOMTOM_API_Key}"
    Response_Website = requests.get(url)

    # If Status_code value is 200 this implies TomTom has completed succesfully.
    # If status_code is not 200, the provided status_code and error message will be printed by the code.
    if Response_Website.status_code == 200:
        Result = Response_Website.json()


        # Extract values from the 'Result' given by TomTom API for the corresponding edge
        Length_In_Metres_Value = Result['routes'][0]['summary']['lengthInMeters']
        Travel_Time_Value = Result['routes'][0]['summary']['travelTimeInSeconds']
        Travel_Time_No_Traffic_Value = Result['routes'][0]['summary']['noTrafficTravelTimeInSeconds']
        Historic_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['historicTrafficTravelTimeInSeconds']
        Live_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['liveTrafficIncidentsTravelTimeInSeconds']
        Traffic_Delay_Value = Result['routes'][0]['summary']['trafficDelayInSeconds']
        Departure_Time_Value = Result['routes'][0]['summary']['departureTime']
        Arrival_Time_Value = Result['routes'][0]['summary']['arrivalTime']

        # Append values to list
        Length_In_Metres.append(Length_In_Metres_Value)
        Travel_Time.append(Travel_Time_Value)
        Travel_Time_No_Traffic.append(Travel_Time_No_Traffic_Value)
        Historic_Traffic_Travel_Time.append(Historic_Traffic_Travel_Time_Value)
        Live_Traffic_Travel_Time.append(Live_Traffic_Travel_Time_Value)
        Traffic_Delay.append(Traffic_Delay_Value)
        Departure_Time.append(Departure_Time_Value)
        Arrival_Time.append(Arrival_Time_Value)

    else:
        print(f"Error: HTTP {Response_Website.status_code} - {Response_Website.text}")

    # Introduce a delay of 1 second between each request to avoid too many requests per second
    time.sleep(0.25)


Processing request 1 of 2107
Processing request 2 of 2107
Processing request 3 of 2107
Processing request 4 of 2107
Processing request 5 of 2107
Processing request 6 of 2107
Processing request 7 of 2107
Processing request 8 of 2107
Processing request 9 of 2107
Processing request 10 of 2107
Processing request 11 of 2107
Processing request 12 of 2107
Processing request 13 of 2107
Processing request 14 of 2107
Processing request 15 of 2107
Processing request 16 of 2107
Processing request 17 of 2107
Processing request 18 of 2107
Processing request 19 of 2107
Processing request 20 of 2107
Processing request 21 of 2107
Processing request 22 of 2107
Processing request 23 of 2107
Processing request 24 of 2107
Processing request 25 of 2107
Processing request 26 of 2107
Processing request 27 of 2107
Processing request 28 of 2107
Processing request 29 of 2107
Processing request 30 of 2107
Processing request 31 of 2107
Processing request 32 of 2107
Processing request 33 of 2107
Processing request 

Processing request 269 of 2107
Processing request 270 of 2107
Processing request 271 of 2107
Processing request 272 of 2107
Processing request 273 of 2107
Processing request 274 of 2107
Processing request 275 of 2107
Processing request 276 of 2107
Processing request 277 of 2107
Processing request 278 of 2107
Processing request 279 of 2107
Processing request 280 of 2107
Processing request 281 of 2107
Processing request 282 of 2107
Processing request 283 of 2107
Processing request 284 of 2107
Processing request 285 of 2107
Processing request 286 of 2107
Processing request 287 of 2107
Processing request 288 of 2107
Processing request 289 of 2107
Processing request 290 of 2107
Processing request 291 of 2107
Processing request 292 of 2107
Processing request 293 of 2107
Processing request 294 of 2107
Processing request 295 of 2107
Processing request 296 of 2107
Processing request 297 of 2107
Processing request 298 of 2107
Processing request 299 of 2107
Processing request 300 of 2107
Processi

Processing request 534 of 2107
Processing request 535 of 2107
Processing request 536 of 2107
Processing request 537 of 2107
Processing request 538 of 2107
Processing request 539 of 2107
Processing request 540 of 2107
Processing request 541 of 2107
Processing request 542 of 2107
Processing request 543 of 2107
Processing request 544 of 2107
Processing request 545 of 2107
Processing request 546 of 2107
Processing request 547 of 2107
Processing request 548 of 2107
Processing request 549 of 2107
Processing request 550 of 2107
Processing request 551 of 2107
Processing request 552 of 2107
Processing request 553 of 2107
Processing request 554 of 2107
Processing request 555 of 2107
Processing request 556 of 2107
Processing request 557 of 2107
Processing request 558 of 2107
Processing request 559 of 2107
Processing request 560 of 2107
Processing request 561 of 2107
Processing request 562 of 2107
Processing request 563 of 2107
Processing request 564 of 2107
Processing request 565 of 2107
Processi

Processing request 799 of 2107
Processing request 800 of 2107
Processing request 801 of 2107
Processing request 802 of 2107
Processing request 803 of 2107
Processing request 804 of 2107
Processing request 805 of 2107
Processing request 806 of 2107
Processing request 807 of 2107
Processing request 808 of 2107
Processing request 809 of 2107
Processing request 810 of 2107
Processing request 811 of 2107
Processing request 812 of 2107
Processing request 813 of 2107
Processing request 814 of 2107
Processing request 815 of 2107
Processing request 816 of 2107
Processing request 817 of 2107
Processing request 818 of 2107
Processing request 819 of 2107
Processing request 820 of 2107
Processing request 821 of 2107
Processing request 822 of 2107
Processing request 823 of 2107
Processing request 824 of 2107
Processing request 825 of 2107
Processing request 826 of 2107
Processing request 827 of 2107
Processing request 828 of 2107
Processing request 829 of 2107
Processing request 830 of 2107
Processi

Processing request 1062 of 2107
Processing request 1063 of 2107
Processing request 1064 of 2107
Processing request 1065 of 2107
Processing request 1066 of 2107
Processing request 1067 of 2107
Processing request 1068 of 2107
Processing request 1069 of 2107
Processing request 1070 of 2107
Processing request 1071 of 2107
Processing request 1072 of 2107
Processing request 1073 of 2107
Processing request 1074 of 2107
Processing request 1075 of 2107
Processing request 1076 of 2107
Processing request 1077 of 2107
Processing request 1078 of 2107
Processing request 1079 of 2107
Processing request 1080 of 2107
Processing request 1081 of 2107
Processing request 1082 of 2107
Processing request 1083 of 2107
Processing request 1084 of 2107
Processing request 1085 of 2107
Processing request 1086 of 2107
Processing request 1087 of 2107
Processing request 1088 of 2107
Processing request 1089 of 2107
Processing request 1090 of 2107
Processing request 1091 of 2107
Processing request 1092 of 2107
Processi

Processing request 1319 of 2107
Processing request 1320 of 2107
Processing request 1321 of 2107
Processing request 1322 of 2107
Processing request 1323 of 2107
Processing request 1324 of 2107
Processing request 1325 of 2107
Processing request 1326 of 2107
Processing request 1327 of 2107
Processing request 1328 of 2107
Processing request 1329 of 2107
Processing request 1330 of 2107
Processing request 1331 of 2107
Processing request 1332 of 2107
Processing request 1333 of 2107
Processing request 1334 of 2107
Processing request 1335 of 2107
Processing request 1336 of 2107
Processing request 1337 of 2107
Processing request 1338 of 2107
Processing request 1339 of 2107
Processing request 1340 of 2107
Processing request 1341 of 2107
Processing request 1342 of 2107
Processing request 1343 of 2107
Processing request 1344 of 2107
Processing request 1345 of 2107
Processing request 1346 of 2107
Processing request 1347 of 2107
Processing request 1348 of 2107
Processing request 1349 of 2107
Processi

Processing request 1576 of 2107
Processing request 1577 of 2107
Processing request 1578 of 2107
Processing request 1579 of 2107
Processing request 1580 of 2107
Processing request 1581 of 2107
Processing request 1582 of 2107
Processing request 1583 of 2107
Processing request 1584 of 2107
Processing request 1585 of 2107
Processing request 1586 of 2107
Processing request 1587 of 2107
Processing request 1588 of 2107
Processing request 1589 of 2107
Processing request 1590 of 2107
Processing request 1591 of 2107
Processing request 1592 of 2107
Processing request 1593 of 2107
Processing request 1594 of 2107
Processing request 1595 of 2107
Processing request 1596 of 2107
Processing request 1597 of 2107
Processing request 1598 of 2107
Processing request 1599 of 2107
Processing request 1600 of 2107
Processing request 1601 of 2107
Processing request 1602 of 2107
Processing request 1603 of 2107
Processing request 1604 of 2107
Processing request 1605 of 2107
Processing request 1606 of 2107
Processi

Processing request 1833 of 2107
Processing request 1834 of 2107
Processing request 1835 of 2107
Processing request 1836 of 2107
Processing request 1837 of 2107
Processing request 1838 of 2107
Processing request 1839 of 2107
Processing request 1840 of 2107
Processing request 1841 of 2107
Processing request 1842 of 2107
Processing request 1843 of 2107
Processing request 1844 of 2107
Processing request 1845 of 2107
Processing request 1846 of 2107
Processing request 1847 of 2107
Processing request 1848 of 2107
Processing request 1849 of 2107
Processing request 1850 of 2107
Processing request 1851 of 2107
Processing request 1852 of 2107
Processing request 1853 of 2107
Processing request 1854 of 2107
Processing request 1855 of 2107
Processing request 1856 of 2107
Processing request 1857 of 2107
Processing request 1858 of 2107
Processing request 1859 of 2107
Processing request 1860 of 2107
Processing request 1861 of 2107
Processing request 1862 of 2107
Processing request 1863 of 2107
Processi

Processing request 2090 of 2107
Processing request 2091 of 2107
Processing request 2092 of 2107
Processing request 2093 of 2107
Processing request 2094 of 2107
Processing request 2095 of 2107
Processing request 2096 of 2107
Processing request 2097 of 2107
Processing request 2098 of 2107
Processing request 2099 of 2107
Processing request 2100 of 2107
Processing request 2101 of 2107
Processing request 2102 of 2107
Processing request 2103 of 2107
Processing request 2104 of 2107
Processing request 2105 of 2107
Processing request 2106 of 2107
Processing request 2107 of 2107


In [55]:
# Step 3.5.2 - Assign edge traversal information obtained via TomTom API to 'Distinct_Edges_MALTA' as follows:
# 'Travel_Time' Column - Travel_Time
# 'Travel_Time_No_Traffic' Column - 'Travel_Time_No_Traffic' list
# 'Historic_Traffic_Travel_Time' Column - 'Historic_Traffic_Travel_Time' list
# 'Live_Traffic_Travel_Time' Column - 'Live_Traffic_Travel_Time' list
# 'Traffic_Delay' Column - 'Traffic_Delay' list
# 'Length_In_Metres' Column - 'Length_In_Metres' list
# 'Departure_Time' Column - 'Departure_Time' list
# 'Arrival_Time' Column - 'Arrival_Time' list

#NB: Result DataFrame is renamed to 'Distinct_Edges_MALTA_IncTravelTimes'
Distinct_Edges_MALTA_IncTravelTimes_Morning = Distinct_Edges_MALTA.assign(Travel_Time = Travel_Time, Travel_Time_No_Traffic = Travel_Time_No_Traffic,
                                                          Historic_Traffic_Travel_Time = Historic_Traffic_Travel_Time, Live_Traffic_Travel_Time = Live_Traffic_Travel_Time,
                                                          Traffic_Delay = Traffic_Delay, Length_In_Metres=Length_In_Metres, Departure_Time = Departure_Time, Arrival_Time = Arrival_Time)

# 'Distinct_Edges_MALTA_IncTravelTimes' is saved such that it can be loaded in future instances
Distinct_Edges_MALTA_IncTravelTimes_Morning.to_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_MALTA_IncTravelTimes_Morning.csv")

#### Step 2 - All Necessary Maltese Connections

In [56]:
# Load Dataset
New_Distinct_Edges_Malta = pd.read_excel("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//New_Distinct_Edges_To_Be_Added_Malta.xlsx")

In [59]:
# Step 3.5 - Obtaining travel time metrics for Malta

# Step 3.5.1 - Utilise TomTom to obtain the edge treversal metrics described in introduction of Step 3.
# TomTom API Key
TOMTOM_API_Key = '4x14GdbcGGsXeen6yUhicscKFbz28iMj'

#Defining empty lists to be used to append data to
Length_In_Metres = []
Travel_Time = []
Travel_Time_No_Traffic = []
Historic_Traffic_Travel_Time = []
Live_Traffic_Travel_Time = []
Traffic_Delay = []
Departure_Time = []
Arrival_Time = []

# for loop going over all entries in 'Distinct_Edges_MALTA' and passing the following information as a request using TomTom API:
# 1. Latitude_Final - Latitiude of origin
# 2. Longitude_Final - Longitude of origin
# 3. Latitude_Next - Latitude of destination
# 4. Longitude_Next - Longitude of destination
for i in range(len(New_Distinct_Edges_Malta)):
    print(f"Processing request {i+1} of {len(New_Distinct_Edges_Malta)}")
    departure_time = "2025-03-10T07:00:00Z"
    url = f"https://api.tomtom.com/routing/1/calculateRoute/{New_Distinct_Edges_Malta['Latitude_Final'][i]}%2C{New_Distinct_Edges_Malta['Longitude_Final'][i]}%3A{New_Distinct_Edges_Malta['Latitude_Next'][i]}%2C{New_Distinct_Edges_Malta['Longitude_Next'][i]}/json?maxAlternatives=1&computeTravelTimeFor=all&departAt={departure_time}&includeTollPaymentTypes=none&routeType=shortest&traffic=true&travelMode=bus&key={TOMTOM_API_Key}"
    Response_Website = requests.get(url)

    # If Status_code value is 200 this implies TomTom has completed succesfully.
    # If status_code is not 200, the provided status_code and error message will be printed by the code.
    if Response_Website.status_code == 200:
        Result = Response_Website.json()


        # Extract values from the 'Result' given by TomTom API for the corresponding edge
        Length_In_Metres_Value = Result['routes'][0]['summary']['lengthInMeters']
        Travel_Time_Value = Result['routes'][0]['summary']['travelTimeInSeconds']
        Travel_Time_No_Traffic_Value = Result['routes'][0]['summary']['noTrafficTravelTimeInSeconds']
        Historic_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['historicTrafficTravelTimeInSeconds']
        Live_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['liveTrafficIncidentsTravelTimeInSeconds']
        Traffic_Delay_Value = Result['routes'][0]['summary']['trafficDelayInSeconds']
        Departure_Time_Value = Result['routes'][0]['summary']['departureTime']
        Arrival_Time_Value = Result['routes'][0]['summary']['arrivalTime']

        # Append values to list
        Length_In_Metres.append(Length_In_Metres_Value)
        Travel_Time.append(Travel_Time_Value)
        Travel_Time_No_Traffic.append(Travel_Time_No_Traffic_Value)
        Historic_Traffic_Travel_Time.append(Historic_Traffic_Travel_Time_Value)
        Live_Traffic_Travel_Time.append(Live_Traffic_Travel_Time_Value)
        Traffic_Delay.append(Traffic_Delay_Value)
        Departure_Time.append(Departure_Time_Value)
        Arrival_Time.append(Arrival_Time_Value)

    else:
        print(f"Error: HTTP {Response_Website.status_code} - {Response_Website.text}")

    # Introduce a delay of 1 second between each request to avoid too many requests per second
    time.sleep(0.25)


Processing request 1 of 61
Processing request 2 of 61
Processing request 3 of 61
Processing request 4 of 61
Processing request 5 of 61
Processing request 6 of 61
Processing request 7 of 61
Processing request 8 of 61
Processing request 9 of 61
Processing request 10 of 61
Processing request 11 of 61
Processing request 12 of 61
Processing request 13 of 61
Processing request 14 of 61
Processing request 15 of 61
Processing request 16 of 61
Processing request 17 of 61
Processing request 18 of 61
Processing request 19 of 61
Processing request 20 of 61
Processing request 21 of 61
Processing request 22 of 61
Processing request 23 of 61
Processing request 24 of 61
Processing request 25 of 61
Processing request 26 of 61
Processing request 27 of 61
Processing request 28 of 61
Processing request 29 of 61
Processing request 30 of 61
Processing request 31 of 61
Processing request 32 of 61
Processing request 33 of 61
Processing request 34 of 61
Processing request 35 of 61
Processing request 36 of 61
P

In [60]:
# Step 3.5.2 - Assign edge traversal information obtained via TomTom API to 'Distinct_Edges_MALTA' as follows:
# 'Travel_Time' Column - Travel_Time
# 'Travel_Time_No_Traffic' Column - 'Travel_Time_No_Traffic' list
# 'Historic_Traffic_Travel_Time' Column - 'Historic_Traffic_Travel_Time' list
# 'Live_Traffic_Travel_Time' Column - 'Live_Traffic_Travel_Time' list
# 'Traffic_Delay' Column - 'Traffic_Delay' list
# 'Length_In_Metres' Column - 'Length_In_Metres' list
# 'Departure_Time' Column - 'Departure_Time' list
# 'Arrival_Time' Column - 'Arrival_Time' list

#NB: Result DataFrame is renamed to 'Distinct_Edges_MALTA_IncTravelTimes'
New_Distinct_Edges_Malta_IncTravelTimes_Morning = New_Distinct_Edges_Malta.assign(Travel_Time = Travel_Time, Travel_Time_No_Traffic = Travel_Time_No_Traffic,
                                                          Historic_Traffic_Travel_Time = Historic_Traffic_Travel_Time, Live_Traffic_Travel_Time = Live_Traffic_Travel_Time,
                                                          Traffic_Delay = Traffic_Delay, Length_In_Metres=Length_In_Metres, Departure_Time = Departure_Time, Arrival_Time = Arrival_Time)

# 'Distinct_Edges_MALTA_IncTravelTimes' is saved such that it can be loaded in future instances
New_Distinct_Edges_Malta_IncTravelTimes_Morning.to_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//New_Distinct_Edges_Malta_IncTravelTimes_Morning.csv")

### Obtaining Travel Time for Gozo

In [44]:
# Step 3.6 - Obtaining travel time metrics for Gozo

# Step 3.6.1 - Utilise TomTom to obtain the edge treversal metrics described in introduction of Step 3.

# TomTom API Key
TOMTOM_API_Key = 'uA2d36BEe5Xby9As7hUgrBmGL34u4n0h'

#Defining empty lists to be used to append data to
Length_In_Metres = []
Travel_Time = []
Travel_Time_No_Traffic = []
Historic_Traffic_Travel_Time = []
Live_Traffic_Travel_Time = []
Traffic_Delay = []
Departure_Time = []
Arrival_Time = []

# for loop going over all entries in 'Distinct_Edges_MALTA' and passing the following information as a request using TomTom API:
# 1. Latitude_Final - Latitiude of origin
# 2. Longitude_Final - Longitude of origin
# 3. Latitude_Next - Latitude of destination
# 4. Longitude_Next - Longitude of destination
for i in range(len(Distinct_Edges_GOZO)):
    print(f"Processing request {i+1} of {len(Distinct_Edges_GOZO)}")
    departure_time = "2025-03-10T07:00:00Z"
    url = f"https://api.tomtom.com/routing/1/calculateRoute/{Distinct_Edges_GOZO['Latitude_Final'][i]}%2C{Distinct_Edges_GOZO['Longitude_Final'][i]}%3A{Distinct_Edges_GOZO['Latitude_Next'][i]}%2C{Distinct_Edges_GOZO['Longitude_Next'][i]}/json?maxAlternatives=1&computeTravelTimeFor=all&departAt={departure_time}&includeTollPaymentTypes=none&routeType=shortest&traffic=true&travelMode=bus&key={TOMTOM_API_Key}"
    Response_Website = requests.get(url)

    # If Status_code value is 200 this implies TomTom has completed succesfully.
    # If status_code is not 200, the provided status_code and error message will be printed by the code.
    if Response_Website.status_code == 200:
        Result = Response_Website.json()


        # Extract values from the 'Result' given by TomTom API for the corresponding edge
        Length_In_Metres_Value = Result['routes'][0]['summary']['lengthInMeters']
        Travel_Time_Value = Result['routes'][0]['summary']['travelTimeInSeconds']
        Travel_Time_No_Traffic_Value = Result['routes'][0]['summary']['noTrafficTravelTimeInSeconds']
        Historic_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['historicTrafficTravelTimeInSeconds']
        Live_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['liveTrafficIncidentsTravelTimeInSeconds']
        Traffic_Delay_Value = Result['routes'][0]['summary']['trafficDelayInSeconds']
        Departure_Time_Value = Result['routes'][0]['summary']['departureTime']
        Arrival_Time_Value = Result['routes'][0]['summary']['arrivalTime']

        # Append values to list
        Length_In_Metres.append(Length_In_Metres_Value)
        Travel_Time.append(Travel_Time_Value)
        Travel_Time_No_Traffic.append(Travel_Time_No_Traffic_Value)
        Historic_Traffic_Travel_Time.append(Historic_Traffic_Travel_Time_Value)
        Live_Traffic_Travel_Time.append(Live_Traffic_Travel_Time_Value)
        Traffic_Delay.append(Traffic_Delay_Value)
        Departure_Time.append(Departure_Time_Value)
        Arrival_Time.append(Arrival_Time_Value)

    else:
        print(f"Error: HTTP {Response_Website.status_code} - {Response_Website.text}")

    # Introduce a delay of 1 second between each request to avoid too many requests per second
    time.sleep(0.25)


Processing request 1 of 390
Processing request 2 of 390
Processing request 3 of 390
Processing request 4 of 390
Processing request 5 of 390
Processing request 6 of 390
Processing request 7 of 390
Processing request 8 of 390
Processing request 9 of 390
Processing request 10 of 390
Processing request 11 of 390
Processing request 12 of 390
Processing request 13 of 390
Processing request 14 of 390
Processing request 15 of 390
Processing request 16 of 390
Processing request 17 of 390
Processing request 18 of 390
Processing request 19 of 390
Processing request 20 of 390
Processing request 21 of 390
Processing request 22 of 390
Processing request 23 of 390
Processing request 24 of 390
Processing request 25 of 390
Processing request 26 of 390
Processing request 27 of 390
Processing request 28 of 390
Processing request 29 of 390
Processing request 30 of 390
Processing request 31 of 390
Processing request 32 of 390
Processing request 33 of 390
Processing request 34 of 390
Processing request 35 o

Processing request 278 of 390
Processing request 279 of 390
Processing request 280 of 390
Processing request 281 of 390
Processing request 282 of 390
Processing request 283 of 390
Processing request 284 of 390
Processing request 285 of 390
Processing request 286 of 390
Processing request 287 of 390
Processing request 288 of 390
Processing request 289 of 390
Processing request 290 of 390
Processing request 291 of 390
Processing request 292 of 390
Processing request 293 of 390
Processing request 294 of 390
Processing request 295 of 390
Processing request 296 of 390
Processing request 297 of 390
Processing request 298 of 390
Processing request 299 of 390
Processing request 300 of 390
Processing request 301 of 390
Processing request 302 of 390
Processing request 303 of 390
Processing request 304 of 390
Processing request 305 of 390
Processing request 306 of 390
Processing request 307 of 390
Processing request 308 of 390
Processing request 309 of 390
Processing request 310 of 390
Processing

In [45]:
# Step 3.6.2 - Assign edge traversal information obtained via TomTom API to 'Distinct_Edges_GOZO' as follows:
# 'Travel_Time' Column - Travel_Time
# 'Travel_Time_No_Traffic' Column - 'Travel_Time_No_Traffic' list
# 'Historic_Traffic_Travel_Time' Column - 'Historic_Traffic_Travel_Time' list
# 'Live_Traffic_Travel_Time' Column - 'Live_Traffic_Travel_Time' list
# 'Traffic_Delay' Column - 'Traffic_Delay' list
# 'Length_In_Metres' Column - 'Length_In_Metres' list
# 'Departure_Time' Column - 'Departure_Time' list
# 'Arrival_Time' Column - 'Arrival_Time' list

#NB: Result DataFrame is renamed to 'Distinct_Edges_MALTA_IncTravelTimes'
Distinct_Edges_GOZO_IncTravelTimes_Morning = Distinct_Edges_GOZO.assign(Travel_Time = Travel_Time, Travel_Time_No_Traffic = Travel_Time_No_Traffic,
                                                          Historic_Traffic_Travel_Time = Historic_Traffic_Travel_Time, Live_Traffic_Travel_Time = Live_Traffic_Travel_Time,
                                                          Traffic_Delay = Traffic_Delay, Length_In_Metres=Length_In_Metres, Departure_Time = Departure_Time, Arrival_Time = Arrival_Time)

# 'Distinct_Edges_MALTA_IncTravelTimes' is saved such that it can be loaded in future instances
Distinct_Edges_GOZO_IncTravelTimes_Morning.to_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_GOZO_IncTravelTimes_Morning.csv")

#### Step 2 - All Necessary Gozitan Connections

In [46]:
# Load Dataset
New_Distinct_Edges_Gozo = pd.read_excel("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//New_Distinct_Edges_To_Be_Added_Gozo.xlsx")

In [47]:
# Step 3.5 - Obtaining travel time metrics for Malta

# Step 3.5.1 - Utilise TomTom to obtain the edge treversal metrics described in introduction of Step 3.
# TomTom API Key
TOMTOM_API_Key = '4x14GdbcGGsXeen6yUhicscKFbz28iMj'

#Defining empty lists to be used to append data to
Length_In_Metres = []
Travel_Time = []
Travel_Time_No_Traffic = []
Historic_Traffic_Travel_Time = []
Live_Traffic_Travel_Time = []
Traffic_Delay = []
Departure_Time = []
Arrival_Time = []

# for loop going over all entries in 'Distinct_Edges_MALTA' and passing the following information as a request using TomTom API:
# 1. Latitude_Final - Latitiude of origin
# 2. Longitude_Final - Longitude of origin
# 3. Latitude_Next - Latitude of destination
# 4. Longitude_Next - Longitude of destination
for i in range(len(New_Distinct_Edges_Gozo)):
    print(f"Processing request {i+1} of {len(New_Distinct_Edges_Gozo)}")
    departure_time = "2025-03-10T07:00:00Z"
    url = f"https://api.tomtom.com/routing/1/calculateRoute/{New_Distinct_Edges_Gozo['Latitude_Final'][i]}%2C{New_Distinct_Edges_Gozo['Longitude_Final'][i]}%3A{New_Distinct_Edges_Gozo['Latitude_Next'][i]}%2C{New_Distinct_Edges_Gozo['Longitude_Next'][i]}/json?maxAlternatives=1&computeTravelTimeFor=all&departAt={departure_time}&includeTollPaymentTypes=none&routeType=shortest&traffic=true&travelMode=bus&key={TOMTOM_API_Key}"
    Response_Website = requests.get(url)

    # If Status_code value is 200 this implies TomTom has completed succesfully.
    # If status_code is not 200, the provided status_code and error message will be printed by the code.
    if Response_Website.status_code == 200:
        Result = Response_Website.json()


        # Extract values from the 'Result' given by TomTom API for the corresponding edge
        Length_In_Metres_Value = Result['routes'][0]['summary']['lengthInMeters']
        Travel_Time_Value = Result['routes'][0]['summary']['travelTimeInSeconds']
        Travel_Time_No_Traffic_Value = Result['routes'][0]['summary']['noTrafficTravelTimeInSeconds']
        Historic_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['historicTrafficTravelTimeInSeconds']
        Live_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['liveTrafficIncidentsTravelTimeInSeconds']
        Traffic_Delay_Value = Result['routes'][0]['summary']['trafficDelayInSeconds']
        Departure_Time_Value = Result['routes'][0]['summary']['departureTime']
        Arrival_Time_Value = Result['routes'][0]['summary']['arrivalTime']

        # Append values to list
        Length_In_Metres.append(Length_In_Metres_Value)
        Travel_Time.append(Travel_Time_Value)
        Travel_Time_No_Traffic.append(Travel_Time_No_Traffic_Value)
        Historic_Traffic_Travel_Time.append(Historic_Traffic_Travel_Time_Value)
        Live_Traffic_Travel_Time.append(Live_Traffic_Travel_Time_Value)
        Traffic_Delay.append(Traffic_Delay_Value)
        Departure_Time.append(Departure_Time_Value)
        Arrival_Time.append(Arrival_Time_Value)

    else:
        print(f"Error: HTTP {Response_Website.status_code} - {Response_Website.text}")

    # Introduce a delay of 1 second between each request to avoid too many requests per second
    time.sleep(0.25)


Processing request 1 of 10
Processing request 2 of 10
Processing request 3 of 10
Processing request 4 of 10
Processing request 5 of 10
Processing request 6 of 10
Processing request 7 of 10
Processing request 8 of 10
Processing request 9 of 10
Processing request 10 of 10


In [48]:
# Step 3.5.2 - Assign edge traversal information obtained via TomTom API to 'Distinct_Edges_MALTA' as follows:
# 'Travel_Time' Column - Travel_Time
# 'Travel_Time_No_Traffic' Column - 'Travel_Time_No_Traffic' list
# 'Historic_Traffic_Travel_Time' Column - 'Historic_Traffic_Travel_Time' list
# 'Live_Traffic_Travel_Time' Column - 'Live_Traffic_Travel_Time' list
# 'Traffic_Delay' Column - 'Traffic_Delay' list
# 'Length_In_Metres' Column - 'Length_In_Metres' list
# 'Departure_Time' Column - 'Departure_Time' list
# 'Arrival_Time' Column - 'Arrival_Time' list

#NB: Result DataFrame is renamed to 'Distinct_Edges_MALTA_IncTravelTimes'
New_Distinct_Edges_Gozo_IncTravelTimes_Morning = New_Distinct_Edges_Gozo.assign(Travel_Time = Travel_Time, Travel_Time_No_Traffic = Travel_Time_No_Traffic,
                                                          Historic_Traffic_Travel_Time = Historic_Traffic_Travel_Time, Live_Traffic_Travel_Time = Live_Traffic_Travel_Time,
                                                          Traffic_Delay = Traffic_Delay, Length_In_Metres=Length_In_Metres, Departure_Time = Departure_Time, Arrival_Time = Arrival_Time)

# 'Distinct_Edges_MALTA_IncTravelTimes' is saved such that it can be loaded in future instances
New_Distinct_Edges_Gozo_IncTravelTimes_Morning.to_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//New_Distinct_Edges_Gozo_IncTravelTimes_Morning.csv")

### Step 3 - Obtaining Travel Time taken to traverse from Origin Bus Stop to Destination Bus Stop (Evening Model)

In [49]:
# Step 3.1 - 'Bus_Stop_ID' dictionary is transformed into a dataframe entitled 'Bus_Stop_ID_Info' so that it may be referenced later on
Bus_Stop_ID_Info = pd.DataFrame(
    [(lat, lon, bus_stop) for (lat, lon), bus_stop in Bus_Stop_ID.items()],
    columns=['Latitude_Final', 'Longitude_Final', 'Bus_Stop_ID']
)

# Step 3.2 - A DataFrame entitled 'Distinct_Edges' is created which considers entries in the 'All_Routes' DataFrame with unique entries in
# 'Bus_Stop_Next_Bus_Stop' column
Distinct_Edges = All_Routes_Copy.drop_duplicates(subset = ['Bus_Stop_Next_Bus_Stop'])
# Index is reset due to removal of entries in 'All_Routes'
Distinct_Edges = Distinct_Edges.reset_index(drop=True)
# Creating Copy of 'Distinct_Edges' DataFrame entitled 'Distinct_Edges_2' (s.t. any changes made in 'Distinct_Edges_2' does not impact the original
# DataFrame
Distinct_Edges_2 = Distinct_Edges.copy()

# Step 3.3 - Merging 'Bus_Stop_ID_Info' with 'Distinct_Edges_2'. Currently 'Bus_Stop_ID_Info' consists of coordinate data related to the origin
# Bus Stop. Hence, coordinate data related to the destination Bus Stop will also be added.

# Change Column Names of 'Bus_Stop_ID_Info' DataFrame such that it is apparent that coordinate values and Bus_Stop_ID values are referring to the
# destination Bus Stop. In addition, 'Bus_Stop_ID_Info' is renamed to 'Bus_Stop_ID_Info_2'
Bus_Stop_ID_Info_2 = Bus_Stop_ID_Info.rename(columns = {'Latitude_Final': 'Latitude_Next', 'Longitude_Final': 'Longitude_Next', 'Bus_Stop_ID': 'Bus_Stop_ID_2'})
# The 'Bus_Stop_ID_2', 'Latitude_Next', 'Longitude_Next' columns from 'Bus_Stop_ID_Info_2' are merged with 'Distinct_Edges_2' DataFrame.
# Corresponding entries should have identical entries in 'Next_Bus_Stop_ID' and 'Bus_Stop_ID_2' columns
Distinct_Edges_2 = pd.merge(Distinct_Edges_2, Bus_Stop_ID_Info_2[['Bus_Stop_ID_2', 'Latitude_Next', 'Longitude_Next']], left_on = 'Next_Bus_Stop_ID', right_on = 'Bus_Stop_ID_2', how = 'left', sort = False)
# Entries in 'Bus_Stop_ID_2' Column are replicated by entries in 'Next_Bus_Stop_ID' hence the former can be removed
Distinct_Edges_2 = Distinct_Edges_2.drop(columns = 'Bus_Stop_ID_2')
# Remove entries if 'Next_Bus_Stop_ID' is blank
Distinct_Edges_2 = Distinct_Edges_2.dropna(subset=['Next_Bus_Stop_ID'])

# Step 3.4 - Free Version of TomTom API only Allows for 2500 Non-Tile Requests per-day. The entire datasets would require 2516 requests to complete.
# As a result, the 'Distinc_Edges_2' Dataset will be split into two using the 'Stop Island' column. Following DataFrames are obtained:
# 1 - Distinct_Edges_MALTA - Includes all entries in 'Distinct_Edges_2' where 'Stop Island' column has entries 'MALTA STOP'
# 2 - Distinct_Edges_GOZO - Includes all entries in 'Distinct_Edges_2' where 'Stop Island' column has entries 'GOZO STOP'
Distinct_Edges_MALTA = Distinct_Edges_2.loc[Distinct_Edges_2["Stop Island"] == "MALTA STOP"]
Distinct_Edges_GOZO = Distinct_Edges_2.loc[Distinct_Edges_2["Stop Island"] == "GOZO STOP"]
# Since file is obtained sequentially (Gozo routes are listed last in the MPT Website (https://www.publictransport.com.mt/en/timetables)
# Then index is reset for 'Distinct_Edges_Gozo' to ensure for loops utilised will work correctly
Distinct_Edges_MALTA = Distinct_Edges_MALTA.reset_index(drop=True)
Distinct_Edges_GOZO = Distinct_Edges_GOZO.reset_index(drop=True)

### Obtaining Travel Time for Malta

#### Step 1 - All Maltese Connections

# Step 3.5 - Obtaining travel time metrics for Malta

# Step 3.5.1 - Utilise TomTom to obtain the edge treversal metrics described in introduction of Step 3.
# TomTom API Key
TOMTOM_API_Key = 'Q3ZiGJ3NOOqTLUjevY067wJa9EUcUNzL'

#Defining empty lists to be used to append data to
Length_In_Metres = []
Travel_Time = []
Travel_Time_No_Traffic = []
Historic_Traffic_Travel_Time = []
Live_Traffic_Travel_Time = []
Traffic_Delay = []
Departure_Time = []
Arrival_Time = []

# for loop going over all entries in 'Distinct_Edges_MALTA' and passing the following information as a request using TomTom API:
# 1. Latitude_Final - Latitiude of origin
# 2. Longitude_Final - Longitude of origin
# 3. Latitude_Next - Latitude of destination
# 4. Longitude_Next - Longitude of destination
for i in range(len(Distinct_Edges_MALTA)):
    print(f"Processing request {i+1} of {len(Distinct_Edges_MALTA)}")
    departure_time = "2025-03-10T16:00:00Z"
    url = f"https://api.tomtom.com/routing/1/calculateRoute/{Distinct_Edges_MALTA['Latitude_Final'][i]}%2C{Distinct_Edges_MALTA['Longitude_Final'][i]}%3A{Distinct_Edges_MALTA['Latitude_Next'][i]}%2C{Distinct_Edges_MALTA['Longitude_Next'][i]}/json?maxAlternatives=1&computeTravelTimeFor=all&departAt={departure_time}&includeTollPaymentTypes=none&routeType=shortest&traffic=true&travelMode=bus&key={TOMTOM_API_Key}"
    Response_Website = requests.get(url)

    # If Status_code value is 200 this implies TomTom has completed succesfully.
    # If status_code is not 200, the provided status_code and error message will be printed by the code.
    if Response_Website.status_code == 200:
        Result = Response_Website.json()


        # Extract values from the 'Result' given by TomTom API for the corresponding edge
        Length_In_Metres_Value = Result['routes'][0]['summary']['lengthInMeters']
        Travel_Time_Value = Result['routes'][0]['summary']['travelTimeInSeconds']
        Travel_Time_No_Traffic_Value = Result['routes'][0]['summary']['noTrafficTravelTimeInSeconds']
        Historic_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['historicTrafficTravelTimeInSeconds']
        Live_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['liveTrafficIncidentsTravelTimeInSeconds']
        Traffic_Delay_Value = Result['routes'][0]['summary']['trafficDelayInSeconds']
        Departure_Time_Value = Result['routes'][0]['summary']['departureTime']
        Arrival_Time_Value = Result['routes'][0]['summary']['arrivalTime']

        # Append values to list
        Length_In_Metres.append(Length_In_Metres_Value)
        Travel_Time.append(Travel_Time_Value)
        Travel_Time_No_Traffic.append(Travel_Time_No_Traffic_Value)
        Historic_Traffic_Travel_Time.append(Historic_Traffic_Travel_Time_Value)
        Live_Traffic_Travel_Time.append(Live_Traffic_Travel_Time_Value)
        Traffic_Delay.append(Traffic_Delay_Value)
        Departure_Time.append(Departure_Time_Value)
        Arrival_Time.append(Arrival_Time_Value)

    else:
        print(f"Error: HTTP {Response_Website.status_code} - {Response_Website.text}")

    # Introduce a delay of 1 second between each request to avoid too many requests per second
    time.sleep(0.25)


# Step 3.5.2 - Assign edge traversal information obtained via TomTom API to 'Distinct_Edges_MALTA' as follows:
# 'Travel_Time' Column - Travel_Time
# 'Travel_Time_No_Traffic' Column - 'Travel_Time_No_Traffic' list
# 'Historic_Traffic_Travel_Time' Column - 'Historic_Traffic_Travel_Time' list
# 'Live_Traffic_Travel_Time' Column - 'Live_Traffic_Travel_Time' list
# 'Traffic_Delay' Column - 'Traffic_Delay' list
# 'Length_In_Metres' Column - 'Length_In_Metres' list
# 'Departure_Time' Column - 'Departure_Time' list
# 'Arrival_Time' Column - 'Arrival_Time' list

#NB: Result DataFrame is renamed to 'Distinct_Edges_MALTA_IncTravelTimes'
Distinct_Edges_MALTA_IncTravelTimes_Evening = Distinct_Edges_MALTA.assign(Travel_Time = Travel_Time, Travel_Time_No_Traffic = Travel_Time_No_Traffic,
                                                          Historic_Traffic_Travel_Time = Historic_Traffic_Travel_Time, Live_Traffic_Travel_Time = Live_Traffic_Travel_Time,
                                                          Traffic_Delay = Traffic_Delay, Length_In_Metres=Length_In_Metres, Departure_Time = Departure_Time, Arrival_Time = Arrival_Time)

# 'Distinct_Edges_MALTA_IncTravelTimes' is saved such that it can be loaded in future instances
Distinct_Edges_MALTA_IncTravelTimes_Evening.to_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_MALTA_IncTravelTimes_Evening.csv")

#### Step 2 - All Necessary Maltese Connections

# Load Dataset
New_Distinct_Edges_Malta = pd.read_excel("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//New_Distinct_Edges_To_Be_Added_Malta.xlsx")

# Step 3.5 - Obtaining travel time metrics for Malta

# Step 3.5.1 - Utilise TomTom to obtain the edge treversal metrics described in introduction of Step 3.
# TomTom API Key
TOMTOM_API_Key = 'Q3ZiGJ3NOOqTLUjevY067wJa9EUcUNzL'

#Defining empty lists to be used to append data to
Length_In_Metres = []
Travel_Time = []
Travel_Time_No_Traffic = []
Historic_Traffic_Travel_Time = []
Live_Traffic_Travel_Time = []
Traffic_Delay = []
Departure_Time = []
Arrival_Time = []

# for loop going over all entries in 'Distinct_Edges_MALTA' and passing the following information as a request using TomTom API:
# 1. Latitude_Final - Latitiude of origin
# 2. Longitude_Final - Longitude of origin
# 3. Latitude_Next - Latitude of destination
# 4. Longitude_Next - Longitude of destination
for i in range(len(New_Distinct_Edges_Malta)):
    print(f"Processing request {i+1} of {len(New_Distinct_Edges_Malta)}")
    departure_time = "2025-03-10T16:00:00Z"
    url = f"https://api.tomtom.com/routing/1/calculateRoute/{New_Distinct_Edges_Malta['Latitude_Final'][i]}%2C{New_Distinct_Edges_Malta['Longitude_Final'][i]}%3A{New_Distinct_Edges_Malta['Latitude_Next'][i]}%2C{New_Distinct_Edges_Malta['Longitude_Next'][i]}/json?maxAlternatives=1&computeTravelTimeFor=all&departAt={departure_time}&includeTollPaymentTypes=none&routeType=shortest&traffic=true&travelMode=bus&key={TOMTOM_API_Key}"
    Response_Website = requests.get(url)

    # If Status_code value is 200 this implies TomTom has completed succesfully.
    # If status_code is not 200, the provided status_code and error message will be printed by the code.
    if Response_Website.status_code == 200:
        Result = Response_Website.json()


        # Extract values from the 'Result' given by TomTom API for the corresponding edge
        Length_In_Metres_Value = Result['routes'][0]['summary']['lengthInMeters']
        Travel_Time_Value = Result['routes'][0]['summary']['travelTimeInSeconds']
        Travel_Time_No_Traffic_Value = Result['routes'][0]['summary']['noTrafficTravelTimeInSeconds']
        Historic_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['historicTrafficTravelTimeInSeconds']
        Live_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['liveTrafficIncidentsTravelTimeInSeconds']
        Traffic_Delay_Value = Result['routes'][0]['summary']['trafficDelayInSeconds']
        Departure_Time_Value = Result['routes'][0]['summary']['departureTime']
        Arrival_Time_Value = Result['routes'][0]['summary']['arrivalTime']

        # Append values to list
        Length_In_Metres.append(Length_In_Metres_Value)
        Travel_Time.append(Travel_Time_Value)
        Travel_Time_No_Traffic.append(Travel_Time_No_Traffic_Value)
        Historic_Traffic_Travel_Time.append(Historic_Traffic_Travel_Time_Value)
        Live_Traffic_Travel_Time.append(Live_Traffic_Travel_Time_Value)
        Traffic_Delay.append(Traffic_Delay_Value)
        Departure_Time.append(Departure_Time_Value)
        Arrival_Time.append(Arrival_Time_Value)

    else:
        print(f"Error: HTTP {Response_Website.status_code} - {Response_Website.text}")

    # Introduce a delay of 1 second between each request to avoid too many requests per second
    time.sleep(0.25)


# Step 3.5.2 - Assign edge traversal information obtained via TomTom API to 'Distinct_Edges_MALTA' as follows:
# 'Travel_Time' Column - Travel_Time
# 'Travel_Time_No_Traffic' Column - 'Travel_Time_No_Traffic' list
# 'Historic_Traffic_Travel_Time' Column - 'Historic_Traffic_Travel_Time' list
# 'Live_Traffic_Travel_Time' Column - 'Live_Traffic_Travel_Time' list
# 'Traffic_Delay' Column - 'Traffic_Delay' list
# 'Length_In_Metres' Column - 'Length_In_Metres' list
# 'Departure_Time' Column - 'Departure_Time' list
# 'Arrival_Time' Column - 'Arrival_Time' list

#NB: Result DataFrame is renamed to 'Distinct_Edges_MALTA_IncTravelTimes'
New_Distinct_Edges_Malta_IncTravelTimes_Evening = New_Distinct_Edges_Malta.assign(Travel_Time = Travel_Time, Travel_Time_No_Traffic = Travel_Time_No_Traffic,
                                                          Historic_Traffic_Travel_Time = Historic_Traffic_Travel_Time, Live_Traffic_Travel_Time = Live_Traffic_Travel_Time,
                                                          Traffic_Delay = Traffic_Delay, Length_In_Metres=Length_In_Metres, Departure_Time = Departure_Time, Arrival_Time = Arrival_Time)

# 'Distinct_Edges_MALTA_IncTravelTimes' is saved such that it can be loaded in future instances
New_Distinct_Edges_Malta_IncTravelTimes_Evening.to_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//New_Distinct_Edges_Malta_IncTravelTimes_Evening.csv")

### Obtaining Travel Time for Gozo

# Step 3.6 - Obtaining travel time metrics for Gozo

# Step 3.6.1 - Utilise TomTom to obtain the edge treversal metrics described in introduction of Step 3.

# TomTom API Key
TOMTOM_API_Key = 'uA2d36BEe5Xby9As7hUgrBmGL34u4n0h'

#Defining empty lists to be used to append data to
Length_In_Metres = []
Travel_Time = []
Travel_Time_No_Traffic = []
Historic_Traffic_Travel_Time = []
Live_Traffic_Travel_Time = []
Traffic_Delay = []
Departure_Time = []
Arrival_Time = []

# for loop going over all entries in 'Distinct_Edges_MALTA' and passing the following information as a request using TomTom API:
# 1. Latitude_Final - Latitiude of origin
# 2. Longitude_Final - Longitude of origin
# 3. Latitude_Next - Latitude of destination
# 4. Longitude_Next - Longitude of destination
for i in range(len(Distinct_Edges_GOZO)):
    print(f"Processing request {i+1} of {len(Distinct_Edges_GOZO)}")
    departure_time = "2025-03-10T16:00:00Z"
    url = f"https://api.tomtom.com/routing/1/calculateRoute/{Distinct_Edges_GOZO['Latitude_Final'][i]}%2C{Distinct_Edges_GOZO['Longitude_Final'][i]}%3A{Distinct_Edges_GOZO['Latitude_Next'][i]}%2C{Distinct_Edges_GOZO['Longitude_Next'][i]}/json?maxAlternatives=1&computeTravelTimeFor=all&departAt={departure_time}&includeTollPaymentTypes=none&routeType=shortest&traffic=true&travelMode=bus&key={TOMTOM_API_Key}"
    Response_Website = requests.get(url)

    # If Status_code value is 200 this implies TomTom has completed succesfully.
    # If status_code is not 200, the provided status_code and error message will be printed by the code.
    if Response_Website.status_code == 200:
        Result = Response_Website.json()


        # Extract values from the 'Result' given by TomTom API for the corresponding edge
        Length_In_Metres_Value = Result['routes'][0]['summary']['lengthInMeters']
        Travel_Time_Value = Result['routes'][0]['summary']['travelTimeInSeconds']
        Travel_Time_No_Traffic_Value = Result['routes'][0]['summary']['noTrafficTravelTimeInSeconds']
        Historic_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['historicTrafficTravelTimeInSeconds']
        Live_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['liveTrafficIncidentsTravelTimeInSeconds']
        Traffic_Delay_Value = Result['routes'][0]['summary']['trafficDelayInSeconds']
        Departure_Time_Value = Result['routes'][0]['summary']['departureTime']
        Arrival_Time_Value = Result['routes'][0]['summary']['arrivalTime']

        # Append values to list
        Length_In_Metres.append(Length_In_Metres_Value)
        Travel_Time.append(Travel_Time_Value)
        Travel_Time_No_Traffic.append(Travel_Time_No_Traffic_Value)
        Historic_Traffic_Travel_Time.append(Historic_Traffic_Travel_Time_Value)
        Live_Traffic_Travel_Time.append(Live_Traffic_Travel_Time_Value)
        Traffic_Delay.append(Traffic_Delay_Value)
        Departure_Time.append(Departure_Time_Value)
        Arrival_Time.append(Arrival_Time_Value)

    else:
        print(f"Error: HTTP {Response_Website.status_code} - {Response_Website.text}")

    # Introduce a delay of 1 second between each request to avoid too many requests per second
    time.sleep(0.25)


# Step 3.6.2 - Assign edge traversal information obtained via TomTom API to 'Distinct_Edges_GOZO' as follows:
# 'Travel_Time' Column - Travel_Time
# 'Travel_Time_No_Traffic' Column - 'Travel_Time_No_Traffic' list
# 'Historic_Traffic_Travel_Time' Column - 'Historic_Traffic_Travel_Time' list
# 'Live_Traffic_Travel_Time' Column - 'Live_Traffic_Travel_Time' list
# 'Traffic_Delay' Column - 'Traffic_Delay' list
# 'Length_In_Metres' Column - 'Length_In_Metres' list
# 'Departure_Time' Column - 'Departure_Time' list
# 'Arrival_Time' Column - 'Arrival_Time' list

#NB: Result DataFrame is renamed to 'Distinct_Edges_MALTA_IncTravelTimes'
Distinct_Edges_GOZO_IncTravelTimes_Evening = Distinct_Edges_GOZO.assign(Travel_Time = Travel_Time, Travel_Time_No_Traffic = Travel_Time_No_Traffic,
                                                          Historic_Traffic_Travel_Time = Historic_Traffic_Travel_Time, Live_Traffic_Travel_Time = Live_Traffic_Travel_Time,
                                                          Traffic_Delay = Traffic_Delay, Length_In_Metres=Length_In_Metres, Departure_Time = Departure_Time, Arrival_Time = Arrival_Time)

# 'Distinct_Edges_MALTA_IncTravelTimes' is saved such that it can be loaded in future instances
Distinct_Edges_GOZO_IncTravelTimes_Evening.to_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_GOZO_IncTravelTimes_Evening.csv")

#### Step 2 - All Necessary Gozitan Connections

# Load Dataset
New_Distinct_Edges_Gozo = pd.read_excel("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//New_Distinct_Edges_To_Be_Added_Gozo.xlsx")

# Step 3.5 - Obtaining travel time metrics for Malta

# Step 3.5.1 - Utilise TomTom to obtain the edge treversal metrics described in introduction of Step 3.
# TomTom API Key
TOMTOM_API_Key = '4x14GdbcGGsXeen6yUhicscKFbz28iMj'

#Defining empty lists to be used to append data to
Length_In_Metres = []
Travel_Time = []
Travel_Time_No_Traffic = []
Historic_Traffic_Travel_Time = []
Live_Traffic_Travel_Time = []
Traffic_Delay = []
Departure_Time = []
Arrival_Time = []

# for loop going over all entries in 'Distinct_Edges_MALTA' and passing the following information as a request using TomTom API:
# 1. Latitude_Final - Latitiude of origin
# 2. Longitude_Final - Longitude of origin
# 3. Latitude_Next - Latitude of destination
# 4. Longitude_Next - Longitude of destination
for i in range(len(New_Distinct_Edges_Gozo)):
    print(f"Processing request {i+1} of {len(New_Distinct_Edges_Gozo)}")
    departure_time = "2025-03-10T16:00:00Z"
    url = f"https://api.tomtom.com/routing/1/calculateRoute/{New_Distinct_Edges_Gozo['Latitude_Final'][i]}%2C{New_Distinct_Edges_Gozo['Longitude_Final'][i]}%3A{New_Distinct_Edges_Gozo['Latitude_Next'][i]}%2C{New_Distinct_Edges_Gozo['Longitude_Next'][i]}/json?maxAlternatives=1&computeTravelTimeFor=all&departAt={departure_time}&includeTollPaymentTypes=none&routeType=shortest&traffic=true&travelMode=bus&key={TOMTOM_API_Key}"
    Response_Website = requests.get(url)

    # If Status_code value is 200 this implies TomTom has completed succesfully.
    # If status_code is not 200, the provided status_code and error message will be printed by the code.
    if Response_Website.status_code == 200:
        Result = Response_Website.json()


        # Extract values from the 'Result' given by TomTom API for the corresponding edge
        Length_In_Metres_Value = Result['routes'][0]['summary']['lengthInMeters']
        Travel_Time_Value = Result['routes'][0]['summary']['travelTimeInSeconds']
        Travel_Time_No_Traffic_Value = Result['routes'][0]['summary']['noTrafficTravelTimeInSeconds']
        Historic_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['historicTrafficTravelTimeInSeconds']
        Live_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['liveTrafficIncidentsTravelTimeInSeconds']
        Traffic_Delay_Value = Result['routes'][0]['summary']['trafficDelayInSeconds']
        Departure_Time_Value = Result['routes'][0]['summary']['departureTime']
        Arrival_Time_Value = Result['routes'][0]['summary']['arrivalTime']

        # Append values to list
        Length_In_Metres.append(Length_In_Metres_Value)
        Travel_Time.append(Travel_Time_Value)
        Travel_Time_No_Traffic.append(Travel_Time_No_Traffic_Value)
        Historic_Traffic_Travel_Time.append(Historic_Traffic_Travel_Time_Value)
        Live_Traffic_Travel_Time.append(Live_Traffic_Travel_Time_Value)
        Traffic_Delay.append(Traffic_Delay_Value)
        Departure_Time.append(Departure_Time_Value)
        Arrival_Time.append(Arrival_Time_Value)

    else:
        print(f"Error: HTTP {Response_Website.status_code} - {Response_Website.text}")

    # Introduce a delay of 1 second between each request to avoid too many requests per second
    time.sleep(0.25)


# Step 3.5.2 - Assign edge traversal information obtained via TomTom API to 'Distinct_Edges_MALTA' as follows:
# 'Travel_Time' Column - Travel_Time
# 'Travel_Time_No_Traffic' Column - 'Travel_Time_No_Traffic' list
# 'Historic_Traffic_Travel_Time' Column - 'Historic_Traffic_Travel_Time' list
# 'Live_Traffic_Travel_Time' Column - 'Live_Traffic_Travel_Time' list
# 'Traffic_Delay' Column - 'Traffic_Delay' list
# 'Length_In_Metres' Column - 'Length_In_Metres' list
# 'Departure_Time' Column - 'Departure_Time' list
# 'Arrival_Time' Column - 'Arrival_Time' list

#NB: Result DataFrame is renamed to 'Distinct_Edges_MALTA_IncTravelTimes'
New_Distinct_Edges_Gozo_IncTravelTimes_Evening = New_Distinct_Edges_Gozo.assign(Travel_Time = Travel_Time, Travel_Time_No_Traffic = Travel_Time_No_Traffic,
                                                          Historic_Traffic_Travel_Time = Historic_Traffic_Travel_Time, Live_Traffic_Travel_Time = Live_Traffic_Travel_Time,
                                                          Traffic_Delay = Traffic_Delay, Length_In_Metres=Length_In_Metres, Departure_Time = Departure_Time, Arrival_Time = Arrival_Time)

# 'Distinct_Edges_MALTA_IncTravelTimes' is saved such that it can be loaded in future instances
New_Distinct_Edges_Gozo_IncTravelTimes_Evening.to_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//New_Distinct_Edges_Gozo_IncTravelTimes_Evening.csv")

Processing request 1 of 2107
Processing request 2 of 2107
Processing request 3 of 2107
Processing request 4 of 2107
Processing request 5 of 2107
Processing request 6 of 2107
Processing request 7 of 2107
Processing request 8 of 2107
Processing request 9 of 2107
Processing request 10 of 2107
Processing request 11 of 2107
Processing request 12 of 2107
Processing request 13 of 2107
Processing request 14 of 2107
Processing request 15 of 2107
Processing request 16 of 2107
Processing request 17 of 2107
Processing request 18 of 2107
Processing request 19 of 2107
Processing request 20 of 2107
Processing request 21 of 2107
Processing request 22 of 2107
Processing request 23 of 2107
Processing request 24 of 2107
Processing request 25 of 2107
Processing request 26 of 2107
Processing request 27 of 2107
Processing request 28 of 2107
Processing request 29 of 2107
Processing request 30 of 2107
Processing request 31 of 2107
Processing request 32 of 2107
Processing request 33 of 2107
Processing request 

Processing request 269 of 2107
Processing request 270 of 2107
Processing request 271 of 2107
Processing request 272 of 2107
Processing request 273 of 2107
Processing request 274 of 2107
Processing request 275 of 2107
Processing request 276 of 2107
Processing request 277 of 2107
Processing request 278 of 2107
Processing request 279 of 2107
Processing request 280 of 2107
Processing request 281 of 2107
Processing request 282 of 2107
Processing request 283 of 2107
Processing request 284 of 2107
Processing request 285 of 2107
Processing request 286 of 2107
Processing request 287 of 2107
Processing request 288 of 2107
Processing request 289 of 2107
Processing request 290 of 2107
Processing request 291 of 2107
Processing request 292 of 2107
Processing request 293 of 2107
Processing request 294 of 2107
Processing request 295 of 2107
Processing request 296 of 2107
Processing request 297 of 2107
Processing request 298 of 2107
Processing request 299 of 2107
Processing request 300 of 2107
Processi

Processing request 534 of 2107
Processing request 535 of 2107
Processing request 536 of 2107
Processing request 537 of 2107
Processing request 538 of 2107
Processing request 539 of 2107
Processing request 540 of 2107
Processing request 541 of 2107
Processing request 542 of 2107
Processing request 543 of 2107
Processing request 544 of 2107
Processing request 545 of 2107
Processing request 546 of 2107
Processing request 547 of 2107
Processing request 548 of 2107
Processing request 549 of 2107
Processing request 550 of 2107
Processing request 551 of 2107
Processing request 552 of 2107
Processing request 553 of 2107
Processing request 554 of 2107
Processing request 555 of 2107
Processing request 556 of 2107
Processing request 557 of 2107
Processing request 558 of 2107
Processing request 559 of 2107
Processing request 560 of 2107
Processing request 561 of 2107
Processing request 562 of 2107
Processing request 563 of 2107
Processing request 564 of 2107
Processing request 565 of 2107
Processi

Processing request 799 of 2107
Processing request 800 of 2107
Processing request 801 of 2107
Processing request 802 of 2107
Processing request 803 of 2107
Processing request 804 of 2107
Processing request 805 of 2107
Processing request 806 of 2107
Processing request 807 of 2107
Processing request 808 of 2107
Processing request 809 of 2107
Processing request 810 of 2107
Processing request 811 of 2107
Processing request 812 of 2107
Processing request 813 of 2107
Processing request 814 of 2107
Processing request 815 of 2107
Processing request 816 of 2107
Processing request 817 of 2107
Processing request 818 of 2107
Processing request 819 of 2107
Processing request 820 of 2107
Processing request 821 of 2107
Processing request 822 of 2107
Processing request 823 of 2107
Processing request 824 of 2107
Processing request 825 of 2107
Processing request 826 of 2107
Processing request 827 of 2107
Processing request 828 of 2107
Processing request 829 of 2107
Processing request 830 of 2107
Processi

Processing request 1062 of 2107
Processing request 1063 of 2107
Processing request 1064 of 2107
Processing request 1065 of 2107
Processing request 1066 of 2107
Processing request 1067 of 2107
Processing request 1068 of 2107
Processing request 1069 of 2107
Processing request 1070 of 2107
Processing request 1071 of 2107
Processing request 1072 of 2107
Processing request 1073 of 2107
Processing request 1074 of 2107
Processing request 1075 of 2107
Processing request 1076 of 2107
Processing request 1077 of 2107
Processing request 1078 of 2107
Processing request 1079 of 2107
Processing request 1080 of 2107
Processing request 1081 of 2107
Processing request 1082 of 2107
Processing request 1083 of 2107
Processing request 1084 of 2107
Processing request 1085 of 2107
Processing request 1086 of 2107
Processing request 1087 of 2107
Processing request 1088 of 2107
Processing request 1089 of 2107
Processing request 1090 of 2107
Processing request 1091 of 2107
Processing request 1092 of 2107
Processi

Processing request 1319 of 2107
Processing request 1320 of 2107
Processing request 1321 of 2107
Processing request 1322 of 2107
Processing request 1323 of 2107
Processing request 1324 of 2107
Processing request 1325 of 2107
Processing request 1326 of 2107
Processing request 1327 of 2107
Processing request 1328 of 2107
Processing request 1329 of 2107
Processing request 1330 of 2107
Processing request 1331 of 2107
Processing request 1332 of 2107
Processing request 1333 of 2107
Processing request 1334 of 2107
Processing request 1335 of 2107
Processing request 1336 of 2107
Processing request 1337 of 2107
Processing request 1338 of 2107
Processing request 1339 of 2107
Processing request 1340 of 2107
Processing request 1341 of 2107
Processing request 1342 of 2107
Processing request 1343 of 2107
Processing request 1344 of 2107
Processing request 1345 of 2107
Processing request 1346 of 2107
Processing request 1347 of 2107
Processing request 1348 of 2107
Processing request 1349 of 2107
Processi

Processing request 1576 of 2107
Processing request 1577 of 2107
Processing request 1578 of 2107
Processing request 1579 of 2107
Processing request 1580 of 2107
Processing request 1581 of 2107
Processing request 1582 of 2107
Processing request 1583 of 2107
Processing request 1584 of 2107
Processing request 1585 of 2107
Processing request 1586 of 2107
Processing request 1587 of 2107
Processing request 1588 of 2107
Processing request 1589 of 2107
Processing request 1590 of 2107
Processing request 1591 of 2107
Processing request 1592 of 2107
Processing request 1593 of 2107
Processing request 1594 of 2107
Processing request 1595 of 2107
Processing request 1596 of 2107
Processing request 1597 of 2107
Processing request 1598 of 2107
Processing request 1599 of 2107
Processing request 1600 of 2107
Processing request 1601 of 2107
Processing request 1602 of 2107
Processing request 1603 of 2107
Processing request 1604 of 2107
Processing request 1605 of 2107
Processing request 1606 of 2107
Processi

Processing request 1833 of 2107
Processing request 1834 of 2107
Processing request 1835 of 2107
Processing request 1836 of 2107
Processing request 1837 of 2107
Processing request 1838 of 2107
Processing request 1839 of 2107
Processing request 1840 of 2107
Processing request 1841 of 2107
Processing request 1842 of 2107
Processing request 1843 of 2107
Processing request 1844 of 2107
Processing request 1845 of 2107
Processing request 1846 of 2107
Processing request 1847 of 2107
Processing request 1848 of 2107
Processing request 1849 of 2107
Processing request 1850 of 2107
Processing request 1851 of 2107
Processing request 1852 of 2107
Processing request 1853 of 2107
Processing request 1854 of 2107
Processing request 1855 of 2107
Processing request 1856 of 2107
Processing request 1857 of 2107
Processing request 1858 of 2107
Processing request 1859 of 2107
Processing request 1860 of 2107
Processing request 1861 of 2107
Processing request 1862 of 2107
Processing request 1863 of 2107
Processi

Processing request 2090 of 2107
Processing request 2091 of 2107
Processing request 2092 of 2107
Processing request 2093 of 2107
Processing request 2094 of 2107
Processing request 2095 of 2107
Processing request 2096 of 2107
Processing request 2097 of 2107
Processing request 2098 of 2107
Processing request 2099 of 2107
Processing request 2100 of 2107
Processing request 2101 of 2107
Processing request 2102 of 2107
Processing request 2103 of 2107
Processing request 2104 of 2107
Processing request 2105 of 2107
Processing request 2106 of 2107
Processing request 2107 of 2107
Processing request 1 of 61
Processing request 2 of 61
Processing request 3 of 61
Processing request 4 of 61
Processing request 5 of 61
Processing request 6 of 61
Processing request 7 of 61
Processing request 8 of 61
Processing request 9 of 61
Processing request 10 of 61
Processing request 11 of 61
Processing request 12 of 61
Processing request 13 of 61
Processing request 14 of 61
Processing request 15 of 61
Processing re

Processing request 202 of 390
Processing request 203 of 390
Processing request 204 of 390
Processing request 205 of 390
Processing request 206 of 390
Processing request 207 of 390
Processing request 208 of 390
Processing request 209 of 390
Processing request 210 of 390
Processing request 211 of 390
Processing request 212 of 390
Processing request 213 of 390
Processing request 214 of 390
Processing request 215 of 390
Processing request 216 of 390
Processing request 217 of 390
Processing request 218 of 390
Processing request 219 of 390
Processing request 220 of 390
Processing request 221 of 390
Processing request 222 of 390
Processing request 223 of 390
Processing request 224 of 390
Processing request 225 of 390
Processing request 226 of 390
Processing request 227 of 390
Processing request 228 of 390
Processing request 229 of 390
Processing request 230 of 390
Processing request 231 of 390
Processing request 232 of 390
Processing request 233 of 390
Processing request 234 of 390
Processing

### Step 4.1 - Analysing Route, both in Malta and Gozo, according to day of the week.

This is being done, since routes of the Maltese Public Transport Network experiences changes depending on the day being considered. Hence, we will consider the following DataFrames extracted from All_Routes:

    1. Monday_Routes_Malta - Routes operating in Malta on a Monday
    2. Tuesday_Routes_Malta - Routes operating in Malta on a Tuesday
    3. Wednesday_Routes_Malta - Routes operating in Malta on a Wednesday
    4. Thursday_Routes_Malta - Routes operating in Malta on a Thursday
    5. Friday_Routes_Malta - Routes operating in Malta on a Friday
    6. Saturday_Routes_Malta - Routes operating in Malta on a Saturday
    7. Sunday_Routes_Malta - Routes operating in Malta on a Sunday
    8. Monday_Routes_Gozo - Routes operating in Gozo on a Monday
    9. Tuesday_Routes_Gozo - Routes operating in Gozo on a Tuesday
    10. Wednesday_Routes_Gozo - Routes operating in Gozo on a Wendesday
    11. Thursday_Routes_Gozo - Routes operating in Gozo on a Thursday
    12. Friday_Routes_Gozo - Routes operating in Gozo on a Friday
    13. Saturday_Routes_Gozo - Routes operating in Gozo on a Saturday
    14. Sunday_Routes_Gozo - Routes operating in Gozo on a Sunday

In [13]:
# Step 4 - We will proceed by splitting the 'All_Routes_Copy' into the following fourteen separate dataframes described above.

# List of days in which Public Transportation System Functions
List_Dates = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# 'Rows_Date_Dict' - Dictionary to store only instances of the same date
Rows_Date_Dict = {}
# 'Malta_Dict' - Dictionary to store only instances of the same date operating in Malta
Malta_Dict = {}
# 'Gozo_Dict' - Dictionary to store only instances of the same date operating in Gozo
Gozo_Dict = {}

# for loop going over List_Dates defined above
for c in List_Dates:
    # filter rows according to date 'c' currently being considered
    Rows_Date_Dict[c] = All_Routes_Copy[All_Routes_Copy['Date'].astype(str).str.contains(c, na=False)]

    # split entries present in 'Rows_Date_Dict[c]' accordng to 'Stop Island' value
    Malta_Dict[c] = Rows_Date_Dict[c][Rows_Date_Dict[c]['Stop Island'] == 'MALTA STOP'].reset_index(drop=True)
    Gozo_Dict[c] = Rows_Date_Dict[c][Rows_Date_Dict[c]['Stop Island'] == 'GOZO STOP'].reset_index(drop=True)

# Naming Dataframes
Monday_Routes_Malta = Malta_Dict['Monday']
Tuesday_Routes_Malta = Malta_Dict['Tuesday']
Wednesday_Routes_Malta = Malta_Dict['Wednesday']
Thursday_Routes_Malta = Malta_Dict['Thursday']
Friday_Routes_Malta = Malta_Dict['Friday']
Saturday_Routes_Malta = Malta_Dict['Saturday']
Sunday_Routes_Malta = Malta_Dict['Sunday']
Monday_Routes_Gozo = Gozo_Dict['Monday']
Tuesday_Routes_Gozo = Gozo_Dict['Tuesday']
Wednesday_Routes_Gozo = Gozo_Dict['Wednesday']
Thursday_Routes_Gozo = Gozo_Dict['Thursday']
Friday_Routes_Gozo = Gozo_Dict['Friday']
Saturday_Routes_Gozo = Gozo_Dict['Saturday']
Sunday_Routes_Gozo = Gozo_Dict['Sunday']


In [14]:
# Step 4.1 - Obtain Number of routes covered each day both in Malta and Gozo

Malta_Route_DataFrames_List = [Monday_Routes_Malta, Tuesday_Routes_Malta, Wednesday_Routes_Malta, Thursday_Routes_Malta, Friday_Routes_Malta, Saturday_Routes_Malta, Sunday_Routes_Malta]
Gozo_Route_DataFrames_List = [Monday_Routes_Gozo, Tuesday_Routes_Gozo, Wednesday_Routes_Gozo, Thursday_Routes_Gozo, Friday_Routes_Gozo, Saturday_Routes_Gozo, Sunday_Routes_Gozo]

# List of days
days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

# Compute number of unique routes per day for Malta
malta_routes = [len(df['Route Number'].unique()) for df in Malta_Route_DataFrames_List]
# Compute number of unique routes per day for Gozo
gozo_routes = [len(df['Route Number'].unique()) for df in Gozo_Route_DataFrames_List]

# Create DataFrame
route_data = pd.DataFrame({
    "Day": days,
    "Malta Routes": malta_routes,
    "Gozo Routes": gozo_routes
})

# Display DataFrame
route_data


Unnamed: 0,Day,Malta Routes,Gozo Routes
0,Monday,97,15
1,Tuesday,97,15
2,Wednesday,97,15
3,Thursday,97,15
4,Friday,97,15
5,Saturday,92,15
6,Sunday,92,15


In [None]:
Monday_Routes_Gozo.to_csv("Routes_In_Gozo.csv")

In [None]:
# Step 4.2 - Obtain table indicating number of bus stops traversed by each route, also taking into account day of the week (To simplify problem,
#for any non-circular stops we will consider only the maximum number of stops in any one direction)

# Initialize an empty dictionary to store result of number of stops of each route per day
Malta_Stops_Per_Route_PerDay = {}
Gozo_Stops_Per_Route_PerDay = {}

j = 0

for i in Malta_Route_DataFrames_List:
    i_copy = i.copy()
    i_copy['Concatenated_Columns'] = (i_copy['Route Number'] + '-' + i_copy['Route Direction'] + '-' + i_copy['Time_Count'].astype(str))
    # Count the number of rows in each group
    group_sizes = i_copy.groupby('Concatenated_Columns').size()
    # Map group sizes back to the original DataFrame
    i_copy['Group'] = i_copy['Concatenated_Columns'].map(group_sizes)
    # Drop temporary columns if not needed
    i_copy.drop(columns=['Concatenated_Columns'], inplace=True)
    # Count the size of each group
    Partition_Count = i_copy.groupby(['Route Number', 'Group']).size()
    #Convert Partition_Count to a DataFrame
    Partition_Count = Partition_Count.reset_index(name='Count')
    # Find the maximum size for each Route Number (Group)
    Malta_Stops_Per_Route = Partition_Count.groupby('Route Number')['Group'].max()
    Malta_Stops_Per_Route_PerDay[List_Dates[j]] = Malta_Stops_Per_Route
    j += 1


k = 0
Gozo_Stops_Per_Route_PerDay = {}  # Ensure dictionary is initialized
for i in Gozo_Route_DataFrames_List:
    i_copy = i.copy()
    i_copy['Concatenated_Columns'] = (i_copy['Route Number'] + '-' + i_copy['Route Direction'] + i_copy['Time_Count'].astype(str))
    # Count the number of rows in each group
    group_sizes = i_copy.groupby('Concatenated_Columns').size()
    # Map group sizes back to the original DataFrame
    i_copy['Group'] = i_copy['Concatenated_Columns'].map(group_sizes)
    # Drop temporary columns if not needed
    i_copy.drop(columns=['Concatenated_Columns'], inplace=True)
    # Count the size of each group
    Partition_Count = i_copy.groupby(['Route Number', 'Group']).size()
    Partition_Count = Partition_Count.reset_index(name='Count')
    # Find the maximum size for each Route Number
    Gozo_Stops_Per_Route = Partition_Count.groupby('Route Number')['Group'].max()
    Gozo_Stops_Per_Route_PerDay[List_Dates[k]] = Gozo_Stops_Per_Route
    k += 1

# Naming Dataframes
Monday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Monday']
Tuesday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Tuesday']
Wednesday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Wednesday']
Thursday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Thursday']
Friday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Friday']
Saturday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Saturday']
Sunday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Sunday']
Monday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Monday']
Tuesday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Tuesday']
Wednesday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Wednesday']
Thursday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Thursday']
Friday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Friday']
Saturday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Saturday']
Sunday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Sunday']

In [None]:
# Step 4.2.1 - Combine Data, related to Malta, in One DataFrame with the following
    # 1. Column - Day of the week
    # 2. Rows - Route Number
    # 3. Entries in DataFrame - Number of Bus Stops traverrsed by each Route

# Combine Data in one DataFrame entitled 'Malta_RoutePresencePerDay__and_BusStopsVisited'
Malta_RoutePresencePerDay__and_BusStopsVisited = pd.concat([Monday_NoStopsPerRoute_Malta, Tuesday_NoStopsPerRoute_Malta, Wednesday_NoStopsPerRoute_Malta,
                                                           Thursday_NoStopsPerRoute_Malta, Friday_NoStopsPerRoute_Malta, Saturday_NoStopsPerRoute_Malta,
                                                           Sunday_NoStopsPerRoute_Malta], axis=1)
# Replace 'N/A' with NaN
Malta_RoutePresencePerDay__and_BusStopsVisited.replace('N/A', pd.NA, inplace=True)
# Convert to Integers
Malta_RoutePresencePerDay__and_BusStopsVisited = Malta_RoutePresencePerDay__and_BusStopsVisited.astype('Int64')
# Rename Columns
Malta_RoutePresencePerDay__and_BusStopsVisited.columns = List_Dates
# Display DataFrame
Malta_RoutePresencePerDay__and_BusStopsVisited

In [None]:
# Step 4.2.2 - Combine Data, related to Gozo, in One DataFrame with the following
    # 1. Column - Day of the week
    # 2. Rows - Route Number
    # 3. Entries in DataFrame - Number of Bus Stops traverrsed by each Route


# Combine Data in one DataFrame entitled 'Gozo_RoutePresencePerDay__and_BusStopsVisited'
Gozo_RoutePresencePerDay__and_BusStopsVisited = pd.concat([Monday_NoStopsPerRoute_Gozo, Tuesday_NoStopsPerRoute_Gozo, Wednesday_NoStopsPerRoute_Gozo,
                                                           Thursday_NoStopsPerRoute_Gozo, Friday_NoStopsPerRoute_Gozo, Saturday_NoStopsPerRoute_Gozo,
                                                           Sunday_NoStopsPerRoute_Gozo], axis=1)
# Replace 'N/A' with NaN
Gozo_RoutePresencePerDay__and_BusStopsVisited.replace('N/A', pd.NA, inplace=True)
# Convert to Integers
Gozo_RoutePresencePerDay__and_BusStopsVisited = Gozo_RoutePresencePerDay__and_BusStopsVisited.astype('Int64')
# Rename Columns
Gozo_RoutePresencePerDay__and_BusStopsVisited.columns = List_Dates
# Display DataFrame
Gozo_RoutePresencePerDay__and_BusStopsVisited

In [None]:
# Step 4.3 - Obtain table indicating number of times route runs throughout a day, also taking into account day of the week
# (To simplify problem, for any non-circular stops we will consider only the maximum frequency between directions)
# [Frequency is given per working day]
# N.B: It is expected that circular routes will run more frequently compared to routes going in one direction


# Initialize an empty dictionary to store result of number of stops of each route per day
Malta_Frequency_Of_Route_PerDay = {}
Gozo_Frequency_Of_Route_PerDay = {}

j = 0

for i in Malta_Route_DataFrames_List:
    i_copy = i.copy()
    Grouped = i_copy.groupby('Route Number')
    # Find the maximum size for each Route Number
    Malta_Frequency_Of_Route = Grouped['Time_Count'].max()
    Malta_Frequency_Of_Route_PerDay[List_Dates[j]] = Malta_Frequency_Of_Route
    j += 1

k = 0

for i in Gozo_Route_DataFrames_List:
    i_copy = i.copy()
    Grouped = i_copy.groupby('Route Number')
    # Find the maximum size for each Route Number
    Gozo_Frequency_Of_Route = Grouped['Time_Count'].max()
    Gozo_Frequency_Of_Route_PerDay[List_Dates[k]] = Gozo_Frequency_Of_Route
    k += 1


# Naming Dataframes
Monday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Monday']
Tuesday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Tuesday']
Wednesday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Wednesday']
Thursday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Thursday']
Friday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Friday']
Saturday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Saturday']
Sunday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Sunday']
Monday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Monday']
Tuesday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Tuesday']
Wednesday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Wednesday']
Thursday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Thursday']
Friday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Friday']
Saturday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Saturday']
Sunday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Sunday']

In [None]:
# Step 4.3.1 - Combine Data, related to Malta, in One DataFrame with the following
    # 1. Column - Day of the week
    # 2. Rows - Route Number
    # 3. Entries in DataFrame - The Frequency per day of each Route

# Combine Data in one DataFrame entitled 'Malta_FrequencyPerDay'
Malta_FrequencyPerDay = pd.concat([Monday_FrequencyofRoute_Malta, Tuesday_FrequencyofRoute_Malta, Wednesday_FrequencyofRoute_Malta,
                                                           Thursday_FrequencyofRoute_Malta, Friday_FrequencyofRoute_Malta, Saturday_FrequencyofRoute_Malta,
                                                           Sunday_FrequencyofRoute_Malta], axis=1)
# Replace 'N/A' with NaN
Malta_FrequencyPerDay.replace('N/A', pd.NA, inplace=True)
# Convert to Integers
Malta_FrequencyPerDay = Malta_FrequencyPerDay.astype('Int64')
# Rename Columns
Malta_FrequencyPerDay.columns = List_Dates
# Display DataFrame
Malta_FrequencyPerDay

In [None]:
# Step 4.3.2 - Combine Data, related to Gozo, in One DataFrame with the following
    # 1. Column - Day of the week
    # 2. Rows - Route Number
    # 3. Entries in DataFrame - The Frequency per day of each Route

# Combine Data in one DataFrame entitled 'Gozo_FrequencyPerDay'
Gozo_FrequencyPerDay = pd.concat([Monday_FrequencyofRoute_Gozo, Tuesday_FrequencyofRoute_Gozo, Wednesday_FrequencyofRoute_Gozo,
                                                           Thursday_FrequencyofRoute_Gozo, Friday_FrequencyofRoute_Gozo, Saturday_FrequencyofRoute_Gozo,
                                                           Sunday_FrequencyofRoute_Gozo], axis=1)
# Replace 'N/A' with NaN
Gozo_FrequencyPerDay.replace('N/A', pd.NA, inplace=True)
# Convert to Integers
Gozo_FrequencyPerDay = Gozo_FrequencyPerDay.astype('Int64')
# Rename Columns
Gozo_FrequencyPerDay.columns = List_Dates
# Display DataFrame
Gozo_FrequencyPerDay

In [None]:
# Step 4.4 - Splitting Bus Stops, related to Malta, into three separate
#  1. 'Normal_and_BusTerminals_DataFrame_Malta' - Bus Stops used as both Normal Stops and Bus Terminals
#  2. 'BusTerminals_Only_DataFrame_Malta' - Bus Stops used as Bus Terminals Only
#  3. 'NormalStops_Only_DataFrame_Malta' - Bus Stops used as Normal Stops Only
# All DataFrame outputs are saved in the path specieid


Malta_Route_DataFrames_List = [Monday_Routes_Malta, Tuesday_Routes_Malta, Wednesday_Routes_Malta, Thursday_Routes_Malta, Friday_Routes_Malta, Saturday_Routes_Malta, Sunday_Routes_Malta]
Gozo_Route_DataFrames_List = [Monday_Routes_Gozo, Tuesday_Routes_Gozo, Wednesday_Routes_Gozo, Thursday_Routes_Gozo, Friday_Routes_Gozo, Saturday_Routes_Gozo, Sunday_Routes_Gozo]

Day_Names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

# Save Path Home
#save_path = "C://Users//Owner//ICT5012 - Disseration//Untitled Folder//"
# Save Path Work
save_path = "C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//"

for i, day in zip(Malta_Route_DataFrames_List, Day_Names):

    print(day)

    Reset_Condition_SepDay = (
        i['Route Number'].shift(-1) != i['Route Number']) | \
        (i['Route Direction'].shift(-1) != i['Route Direction'])

    # Initialise all entries in 'Bus_Terminal' as 0
    i['Bus_Terminal'] = 0
    # If Reset_Conditions defined above fails, then Bus Stop considered in 'Bus_Stop_ID' is a Bus Terminal
    i.loc[Reset_Condition_SepDay, 'Bus_Terminal'] = 1
    OG_BUS_Terminal_COLUMN = i['Bus_Terminal'].copy()
    # Iterate through the original column
    for j in range(len(OG_BUS_Terminal_COLUMN) - 1):
        if OG_BUS_Terminal_COLUMN[j] == 1:
            i.loc[j + 1, 'Bus_Terminal'] = 1
    # Initialising First Row as Bus Terminal
    i.loc[0, 'Bus_Terminal'] = 1

    Bus_Terminals_DataFrame = i[i['Bus_Terminal'] == 1]

    # Contain List of traversed Bus_Stop_ID ('bus_stop_ids' - All Bus Stops identified as terminal stops)
    bus_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(Bus_Terminals_DataFrame)):
        bus_stop_id = Bus_Terminals_DataFrame.iloc[c]['Bus_Stop_ID']

        if bus_stop_id not in bus_stop_ids:
            bus_stop_ids.append(bus_stop_id)  # Add to list of seen IDs

    # Obtain only entries which correspond to a Bus Terminals.
    # Duplicates will be present since the same Bus Terminals may be utilised for multiple 'Route Numebr' and 'Route Direction'
    Normal_Bus_DataFrame = i[i['Bus_Terminal'] == 0]

    # Contain List of traversed Bus_Stop_ID ('Normal_bus_stop_ids' - All Bus Stops identified as normal stops)
    Normal_bus_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(Normal_Bus_DataFrame)):
        Normal_bus_stop_id = Normal_Bus_DataFrame.iloc[c]['Bus_Stop_ID']

        if Normal_bus_stop_id not in Normal_bus_stop_ids:
            Normal_bus_stop_ids.append(Normal_bus_stop_id)  # Add to list of seen IDs


    # Bus Stops used in terminals/terminals used as Bus Stops
    # Find items in common between the two lists produced above 'bus_stop_ids' and 'Normal_bus_stop_ids'
    #Normal_and_BusTerminals_ids = [item for item in Normal_bus_stop_ids if item in bus_stop_ids]
    Normal_and_BusTerminals_ids = list(set(bus_stop_ids) & set(Normal_bus_stop_ids))

    # Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
    Normal_and_BusTerminals_DataFrame_Malta = pd.DataFrame(columns=i.columns)

    # Store Bus Stops to avoid duplicates
    compare_bus_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(i)):
        bus_stop_id = i.iloc[c]['Bus_Stop_ID']

        if bus_stop_id in Normal_and_BusTerminals_ids and bus_stop_id not in compare_bus_stop_ids:
            compare_bus_stop_ids.append(bus_stop_id)  # Add to list of seen IDs

            Normal_and_BusTerminals_DataFrame_Malta = pd.concat([Normal_and_BusTerminals_DataFrame_Malta, i.iloc[[c]]],
                                                                ignore_index=True)

    file_name = f"AllNormalBusStopsUsedAsTerminalsMalta_{day}.csv"
    Normal_and_BusTerminals_DataFrame_Malta.to_csv(save_path + file_name, index=False)

    # Obtaining Stops which are only used as Terminals
    BusTerminals_Only_ids = list(set(bus_stop_ids) - set(Normal_bus_stop_ids))

    # Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
    BusTerminals_Only_DataFrame_Malta = pd.DataFrame(columns=i.columns)

    # Store Bus Stops to avoid duplicates
    Only_Terminal_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(i)):
        Only_Terminal_stop_id = i.iloc[c]['Bus_Stop_ID']

        if Only_Terminal_stop_id in BusTerminals_Only_ids and Only_Terminal_stop_id not in Only_Terminal_stop_ids:
            Only_Terminal_stop_ids.append(Only_Terminal_stop_id)  # Add to list of seen IDs

            # Check the island and append the row
            BusTerminals_Only_DataFrame_Malta = pd.concat([BusTerminals_Only_DataFrame_Malta, i.iloc[[c]]],
                                                          ignore_index=True)

    file_name = f"BusTerminalsOnlyMalta_{day}.csv"
    BusTerminals_Only_DataFrame_Malta.to_csv(save_path + file_name, index=False)

    # Obtaining Stops which are only used as Normal Stops
    NormalStops_Only_ids = list(set(Normal_bus_stop_ids) - set(bus_stop_ids))

    # Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
    NormalStops_Only_DataFrame_Malta = pd.DataFrame(columns=i.columns)

    # Store Bus Stops to avoid duplicates
    Only_Normal_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(i)):
        Only_Normal_stop_id = i.iloc[c]['Bus_Stop_ID']

        if Only_Normal_stop_id in NormalStops_Only_ids and Only_Normal_stop_id not in Only_Normal_stop_ids:
            Only_Normal_stop_ids.append(Only_Normal_stop_id)  # Add to list of seen IDs

            # Check the island and append the row
            NormalStops_Only_DataFrame_Malta = pd.concat([NormalStops_Only_DataFrame_Malta, i.iloc[[c]]],
                                                         ignore_index=True)

    file_name = f"NormalStopsOnlyMalta_{day}.csv"
    NormalStops_Only_DataFrame_Malta.to_csv(save_path + file_name, index=False)

In [None]:
# Step 4.4.2 - Splitting Bus Stops, related to Gozo, into three separate DataFrames
#  1. 'Normal_and_BusTerminals_DataFrame_Malta' - Bus Stops used as both Normal Stops and Bus Terminals
#  2. 'BusTerminals_Only_DataFrame_Malta' - Bus Stops used as Bus Terminals Only
#  3. 'NormalStops_Only_DataFrame_Malta' - Bus Stops used as Normal Stops Only
# All DataFrame outputs are saved in the path specied in 'save_path'

Malta_Route_DataFrames_List = [Monday_Routes_Malta, Tuesday_Routes_Malta, Wednesday_Routes_Malta, Thursday_Routes_Malta, Friday_Routes_Malta, Saturday_Routes_Malta, Sunday_Routes_Malta]
Gozo_Route_DataFrames_List = [Monday_Routes_Gozo, Tuesday_Routes_Gozo, Wednesday_Routes_Gozo, Thursday_Routes_Gozo, Friday_Routes_Gozo, Saturday_Routes_Gozo, Sunday_Routes_Gozo]

Day_Names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

# Save Path Home
#save_path = "C://Users//Owner//ICT5012 - Disseration//Untitled Folder//"
# Save Path Work
save_path = "C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//"


for i, day in zip(Gozo_Route_DataFrames_List, Day_Names):

    print(day)

    Reset_Condition_SepDay = (
        i['Route Number'].shift(-1) != i['Route Number']) | \
        (i['Route Direction'].shift(-1) != i['Route Direction'])

    # Initialise all entries in 'Bus_Terminal' as 0
    i['Bus_Terminal'] = 0
    # If Reset_Conditions defined above fails, then Bus Stop considered in 'Bus_Stop_ID' is a Bus Terminal
    i.loc[Reset_Condition_SepDay, 'Bus_Terminal'] = 1
    OG_BUS_Terminal_COLUMN = i['Bus_Terminal'].copy()
    # Iterate through the original column
    for j in range(len(OG_BUS_Terminal_COLUMN) - 1):
        if OG_BUS_Terminal_COLUMN[j] == 1:
            i.loc[j + 1, 'Bus_Terminal'] = 1
    # Initialising First Row as Bus Terminal
    i.loc[0, 'Bus_Terminal'] = 1

    Bus_Terminals_DataFrame = i[i['Bus_Terminal'] == 1]

    # Contain List of traversed Bus_Stop_ID ('bus_stop_ids' - All Bus Stops identified as terminal stops)
    bus_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(Bus_Terminals_DataFrame)):
        bus_stop_id = Bus_Terminals_DataFrame.iloc[c]['Bus_Stop_ID']

        if bus_stop_id not in bus_stop_ids:
            bus_stop_ids.append(bus_stop_id)  # Add to list of seen IDs

    # Obtain only entries which correspond to a Bus Terminals.
    # Duplicates will be present since the same Bus Terminals may be utilised for multiple 'Route Numebr' and 'Route Direction'
    Normal_Bus_DataFrame = i[i['Bus_Terminal'] == 0]

    # Contain List of traversed Bus_Stop_ID ('Normal_bus_stop_ids' - All Bus Stops identified as normal stops)
    Normal_bus_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(Normal_Bus_DataFrame)):
        Normal_bus_stop_id = Normal_Bus_DataFrame.iloc[c]['Bus_Stop_ID']

        if Normal_bus_stop_id not in Normal_bus_stop_ids:
            Normal_bus_stop_ids.append(Normal_bus_stop_id)  # Add to list of seen IDs


    # Bus Stops used in terminals/terminals used as Bus Stops
    # Find items in common between the two lists produced above 'bus_stop_ids' and 'Normal_bus_stop_ids'
    #Normal_and_BusTerminals_ids = [item for item in Normal_bus_stop_ids if item in bus_stop_ids]
    Normal_and_BusTerminals_ids = list(set(bus_stop_ids) & set(Normal_bus_stop_ids))

    # Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
    Normal_and_BusTerminals_DataFrame_Gozo = pd.DataFrame(columns=i.columns)

    # Store Bus Stops to avoid duplicates
    compare_bus_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(i)):
        bus_stop_id = i.iloc[c]['Bus_Stop_ID']

        if bus_stop_id in Normal_and_BusTerminals_ids and bus_stop_id not in compare_bus_stop_ids:
            compare_bus_stop_ids.append(bus_stop_id)  # Add to list of seen IDs

            Normal_and_BusTerminals_DataFrame_Gozo = pd.concat([Normal_and_BusTerminals_DataFrame_Gozo, i.iloc[[c]]],
                                                                ignore_index=True)

    file_name = f"AllNormalBusStopsUsedAsTerminalsGozo_{day}.csv"
    Normal_and_BusTerminals_DataFrame_Gozo.to_csv(save_path + file_name, index=False)

    # Obtaining Stops which are only used as Terminals
    BusTerminals_Only_ids = list(set(bus_stop_ids) - set(Normal_bus_stop_ids))

    # Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
    BusTerminals_Only_DataFrame_Gozo = pd.DataFrame(columns=i.columns)

    # Store Bus Stops to avoid duplicates
    Only_Terminal_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(i)):
        Only_Terminal_stop_id = i.iloc[c]['Bus_Stop_ID']

        if Only_Terminal_stop_id in BusTerminals_Only_ids and Only_Terminal_stop_id not in Only_Terminal_stop_ids:
            Only_Terminal_stop_ids.append(Only_Terminal_stop_id)  # Add to list of seen IDs

            # Check the island and append the row
            BusTerminals_Only_DataFrame_Gozo = pd.concat([BusTerminals_Only_DataFrame_Gozo, i.iloc[[c]]],
                                                          ignore_index=True)

    file_name = f"BusTerminalsOnlyGozo_{day}.csv"
    BusTerminals_Only_DataFrame_Gozo.to_csv(save_path + file_name, index=False)

    # Obtaining Stops which are only used as Normal Stops
    NormalStops_Only_ids = list(set(Normal_bus_stop_ids) - set(bus_stop_ids))

    # Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
    NormalStops_Only_DataFrame_Gozo = pd.DataFrame(columns=i.columns)

    # Store Bus Stops to avoid duplicates
    Only_Normal_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(i)):
        Only_Normal_stop_id = i.iloc[c]['Bus_Stop_ID']

        if Only_Normal_stop_id in NormalStops_Only_ids and Only_Normal_stop_id not in Only_Normal_stop_ids:
            Only_Normal_stop_ids.append(Only_Normal_stop_id)  # Add to list of seen IDs

            # Check the island and append the row
            NormalStops_Only_DataFrame_Gozo = pd.concat([NormalStops_Only_DataFrame_Gozo, i.iloc[[c]]],
                                                         ignore_index=True)

    file_name = f"NormalStopsOnlyGozo_{day}.csv"
    NormalStops_Only_DataFrame_Gozo.to_csv(save_path + file_name, index=False)

In [None]:
# Step 4.4.3 - Loading all datasets created (for Malta)
Malta_NormalStops_Monday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Monday.csv")
Malta_NormalStops_Tuesday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Tuesday.csv")
Malta_NormalStops_Wednesday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Wednesday.csv")
Malta_NormalStops_Thursday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Thursday.csv")
Malta_NormalStops_Friday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Friday.csv")
Malta_NormalStops_Saturday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Saturday.csv")
Malta_NormalStops_Sunday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Sunday.csv")

Malta_TerminalStops_Monday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Monday.csv")
Malta_TerminalStops_Tuesday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Tuesday.csv")
Malta_TerminalStops_Wednesday =pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Wednesday.csv")
Malta_TerminalStops_Thursday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Thursday.csv")
Malta_TerminalStops_Friday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Friday.csv")
Malta_TerminalStops_Saturday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Saturday.csv")
Malta_TerminalStops_Sunday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Sunday.csv")

Malta_NormalandTerminalStops_Monday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Monday.csv")
Malta_NormalandTerminalStops_Tuesday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Tuesday.csv")
Malta_NormalandTerminalStops_Wednesday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Wednesday.csv")
Malta_NormalandTerminalStops_Thursday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Thursday.csv")
Malta_NormalandTerminalStops_Friday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Friday.csv")
Malta_NormalandTerminalStops_Saturday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Saturday.csv")
Malta_NormalandTerminalStops_Sunday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Sunday.csv")

In [None]:
# Step 4.4.4 - Loading all datasets created (for Gozo)
Gozo_NormalStops_Monday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Monday.csv")
Gozo_NormalStops_Tuesday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Tuesday.csv")
Gozo_NormalStops_Wednesday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Wednesday.csv")
Gozo_NormalStops_Thursday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Thursday.csv")
Gozo_NormalStops_Friday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Friday.csv")
Gozo_NormalStops_Saturday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Saturday.csv")
Gozo_NormalStops_Sunday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Sunday.csv")

Gozo_TerminalStops_Monday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Monday.csv")
Gozo_TerminalStops_Tuesday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Tuesday.csv")
Gozo_TerminalStops_Wednesday =pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Wednesday.csv")
Gozo_TerminalStops_Thursday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Thursday.csv")
Gozo_TerminalStops_Friday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Friday.csv")
Gozo_TerminalStops_Saturday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Saturday.csv")
Gozo_TerminalStops_Sunday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Sunday.csv")

Gozo_NormalandTerminalStops_Monday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Monday.csv")
Gozo_NormalandTerminalStops_Tuesday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Tuesday.csv")
Gozo_NormalandTerminalStops_Wednesday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Wednesday.csv")
Gozo_NormalandTerminalStops_Thursday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Thursday.csv")
Gozo_NormalandTerminalStops_Friday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Friday.csv")
Gozo_NormalandTerminalStops_Saturday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Saturday.csv")
Gozo_NormalandTerminalStops_Sunday = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Sunday.csv")

In [None]:
# Step 4.4.5 - Creating Pandas Dataframes to show data, for both Malta and Gozo, in table form

List_Original_DataFrames =  [Monday_Routes_Malta, Tuesday_Routes_Malta, Wednesday_Routes_Malta, Thursday_Routes_Malta, Friday_Routes_Malta, Saturday_Routes_Malta, Sunday_Routes_Malta,
                            Monday_Routes_Gozo, Tuesday_Routes_Gozo, Wednesday_Routes_Gozo, Thursday_Routes_Gozo, Friday_Routes_Gozo, Saturday_Routes_Gozo, Sunday_Routes_Gozo]

List_NormalStopsOnly_DataFrames = [Malta_NormalStops_Monday, Malta_NormalStops_Tuesday, Malta_NormalStops_Wednesday, Malta_NormalStops_Thursday, Malta_NormalStops_Friday, Malta_NormalStops_Saturday,
                                   Malta_NormalStops_Sunday, Gozo_NormalStops_Monday, Gozo_NormalStops_Tuesday, Gozo_NormalStops_Wednesday, Gozo_NormalStops_Thursday, Gozo_NormalStops_Friday, Gozo_NormalStops_Saturday,
                                   Gozo_NormalStops_Sunday]

List_TerminalStopsOnly_DataFrames = [Malta_TerminalStops_Monday, Malta_TerminalStops_Tuesday, Malta_TerminalStops_Wednesday, Malta_TerminalStops_Thursday,
                                    Malta_TerminalStops_Friday, Malta_TerminalStops_Saturday, Malta_TerminalStops_Sunday, Gozo_TerminalStops_Monday, Gozo_TerminalStops_Tuesday, Gozo_TerminalStops_Wednesday, Gozo_TerminalStops_Thursday,
                                    Gozo_TerminalStops_Friday, Gozo_TerminalStops_Saturday, Gozo_TerminalStops_Sunday]

List_NormalandTerminalStops_DataFrames = [Malta_NormalandTerminalStops_Monday, Malta_NormalandTerminalStops_Tuesday, Malta_NormalandTerminalStops_Wednesday, Malta_NormalandTerminalStops_Thursday,
                                          Malta_NormalandTerminalStops_Friday, Malta_NormalandTerminalStops_Saturday, Malta_NormalandTerminalStops_Sunday, Gozo_NormalandTerminalStops_Monday, Gozo_NormalandTerminalStops_Tuesday, Gozo_NormalandTerminalStops_Wednesday, Gozo_NormalandTerminalStops_Thursday,
                                          Gozo_NormalandTerminalStops_Friday, Gozo_NormalandTerminalStops_Saturday, Gozo_NormalandTerminalStops_Sunday]

Column_List = ['Malta_Monday', 'Malta_Tuesday', 'Malta_Wednesday', 'Malta_Thursday', 'Malta_Friday',
               'Malta_Saturday', 'Malta_Sunday', 'Gozo_Monday', 'Gozo_Tuesday', 'Gozo_Wednesday', 'Gozo_Thursday', 'Gozo_Friday',
               'Gozo_Saturday', 'Gozo_Sunday']

Total_Nodes = []
for c in List_Original_DataFrames:
    Total_Nodes.append(c['Bus_Stop_ID'].nunique())

Total_NormalStopsOnly = []
for c in List_NormalStopsOnly_DataFrames:
    Total_NormalStopsOnly.append(c['Bus_Stop_ID'].nunique())

Total_TerminalStopsOnly = []
for c in List_TerminalStopsOnly_DataFrames:
    Total_TerminalStopsOnly.append(c['Bus_Stop_ID'].nunique())

Total_NormalandTerminalStops = []
for c in List_NormalandTerminalStops_DataFrames:
    Total_NormalandTerminalStops.append(c['Bus_Stop_ID'].nunique())

NodeInfo_SplitByDay = pd.DataFrame([Total_Nodes, Total_NormalStopsOnly, Total_TerminalStopsOnly, Total_NormalandTerminalStops],
                                   index=['Total Number of unique Nodes', 'Total Number of Unique Normal Stops', 'Total Number of Unique Bus Terminals',
                                          'Total Number of Unique Stops used as both Regular Stops and Terminals'], columns= Column_List)
# Display DataFrame
NodeInfo_SplitByDay

In [None]:
# Saving a copy of all DataFrames Created
Malta_RoutePresencePerDay__and_BusStopsVisited.to_csv('C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Malta_RoutePresencePerDay__and_BusStopsVisited.csv')
Gozo_RoutePresencePerDay__and_BusStopsVisited.to_csv('C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Gozo_RoutePresencePerDay__and_BusStopsVisited.csv')
Malta_FrequencyPerDay.to_csv('C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Malta_FrequencyPerDay.csv')
Gozo_FrequencyPerDay.to_csv('C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Gozo_FrequencyPerDay.csv')
NodeInfo_SplitByDay.to_csv('C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//NodeInfo_SplitByDay.csv')

### Step 5 - Obtaining Information on the travel time beween each Origin and destination Bus Stop

In [None]:
# Step 5.1 - Load data from 'Distinct_Edges_MALTA_IncTravelTimes' (renamed to 'Malta_DistinctEdges_IncTravelTimes') and
# 'Distinct_Edges_GOZO_IncTravelTimes' (renamed to 'Gozo_DistinctEdges_IncTravelTimes')
#Malta_DistinctEdges_IncTravelTimes = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_MALTA_IncTravelTimes.csv")
#Gozo_DistinctEdges_IncTravelTimes = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_GOZO_IncTravelTimes.csv")

# At Work
Malta_DistinctEdges_IncTravelTimes = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Files Used for Data Visualisation//Distinct_Edges_MALTA_IncTravelTimes.csv")
Gozo_DistinctEdges_IncTravelTimes = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Files Used for Data Visualisation//Distinct_Edges_GOZO_IncTravelTimes.csv")

In [None]:
len(Malta_DistinctEdges_IncTravelTimes)

In [None]:
len(Gozo_DistinctEdges_IncTravelTimes)

In [None]:
# Step 5.2 - Data Cleaning (Take shortest path if available ex. Stop 3 to Stop 819 is 56 seconds long on the otherhand Stop 819 to Stop 3
# is 1019 seconds long (The former is correct))

# Standardize route names to be order-independent
Malta_DistinctEdges_IncTravelTimes['Standardized_Route'] = Malta_DistinctEdges_IncTravelTimes['Bus_Stop_Next_Bus_Stop'].apply(lambda x: '_to_'.join(sorted(x.split('_to_'))))
# Keep the row with the minimum travel time for each standardized route
Malta_DistinctEdges_IncTravelTimes_Min = Malta_DistinctEdges_IncTravelTimes.loc[Malta_DistinctEdges_IncTravelTimes.groupby('Standardized_Route')['Travel_Time'].idxmin()]
# Drop the helper column if not needed
Malta_DistinctEdges_IncTravelTimes_Min = Malta_DistinctEdges_IncTravelTimes_Min.drop(columns=['Standardized_Route'])


# Standardize route names to be order-independent
Gozo_DistinctEdges_IncTravelTimes['Standardized_Route'] = Gozo_DistinctEdges_IncTravelTimes['Bus_Stop_Next_Bus_Stop'].apply(lambda x: '_to_'.join(sorted(x.split('_to_'))))
# Keep the row with the minimum travel time for each standardized route
Gozo_DistinctEdges_IncTravelTimes_Min = Gozo_DistinctEdges_IncTravelTimes.loc[Gozo_DistinctEdges_IncTravelTimes.groupby('Standardized_Route')['Travel_Time'].idxmin()]
# Drop the helper column if not needed
Gozo_DistinctEdges_IncTravelTimes_Min = Gozo_DistinctEdges_IncTravelTimes_Min.drop(columns=['Standardized_Route'])

# Saving DataFrames
Malta_DistinctEdges_IncTravelTimes_Min.to_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_MALTA_IncTravelTimes_WrongPathFix.csv")
Gozo_DistinctEdges_IncTravelTimes_Min.to_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_GOZO_IncTravelTimes_WrongPathFix.csv")

In [None]:
# Step 5.3 - Obtaining Information related to Malta (Time)

# Find edge with maximum travel time
Malta_DistinctEdges_IncTravelTimes_Min[Malta_DistinctEdges_IncTravelTimes_Min['Travel_Time'] == Malta_DistinctEdges_IncTravelTimes_Min['Travel_Time'].max()]
# Find average travelling time throughout all edges
Average_TravelTime = Malta_DistinctEdges_IncTravelTimes_Min['Travel_Time'].mean()
# Find median travelling time throughout all edges
Median_TravelTime = Malta_DistinctEdges_IncTravelTimes_Min['Travel_Time'].median()
# Obtain box plot of Travel Time

# Removing Errors by Changing Data Type to numeric
Travel_TimeData = pd.to_numeric(Malta_DistinctEdges_IncTravelTimes_Min['Travel_Time'], errors='coerce').dropna()


# Obtaining further statistics
minimum = np.min(Travel_TimeData)
Q1 = np.percentile(Travel_TimeData, 25)
median = np.median(Travel_TimeData)
Q3 = np.percentile(Travel_TimeData, 75)
maximum = np.max(Travel_TimeData)
# Compute interquartile range (IQR)
IQR = Q3 - Q1
# Compute whiskers (last values before outliers)
lower_whisker = np.min(Travel_TimeData[Travel_TimeData >= (Q1 - 1.5 * IQR)].astype(float))
upper_whisker = np.max(Travel_TimeData[Travel_TimeData <= (Q3 + 1.5 * IQR)].astype(float))

# Defining figure size
Figure = plt.figure(figsize =(10, 7))
plt.boxplot(Travel_TimeData, vert = True, patch_artist = True)

# Add title, y-axis and x-axis
plt.xlabel("Edge Travel Time Data", fontsize = 12)
plt.ylabel("Travel Time (seconds)", fontsize = 12)
plt.title("Box Plot of Edge In-Vehicle Travel Time in Malta", fontsize = 14)


plt.annotate(f"Min/Lower Whisker: {minimum:.2f}",
             xy=(1, minimum),
             xytext=(1.1, minimum + 2),
             fontsize=8)

plt.annotate(f"Q1: {Q1:.2f}",
             xy=(1, Q1),
             xytext=(1.1, Q1 + 2),
             fontsize=8)

plt.annotate(f"Median: {median:.2f}",
             xy=(1, median),
             xytext=(1.1, median + 2),
             fontsize=8, fontweight='bold')

plt.annotate(f"Q3: {Q3:.2f}",
             xy=(1, Q3),
             xytext=(1.1, Q3 + 2),
             fontsize=8)

plt.annotate(f"Max: {maximum:.2f}",
             xy=(1, maximum),
             xytext=(1.1, maximum + 2),
             fontsize=8)

plt.annotate(f"Upper Whisker: {upper_whisker:.2f}",
             xy=(1, upper_whisker),
             xytext=(1.1, upper_whisker + 2),
             fontsize=8)

# Show the plot
plt.show()

In [None]:
# Step 5.4 - Obtaining Information related to Gozo (Time)

# Find edge with maximum travel time
Gozo_DistinctEdges_IncTravelTimes_Min[Gozo_DistinctEdges_IncTravelTimes_Min['Travel_Time'] == Gozo_DistinctEdges_IncTravelTimes_Min['Travel_Time'].max()]
# Find average travelling time throughout all edges
Average_TravelTime = Gozo_DistinctEdges_IncTravelTimes_Min['Travel_Time'].mean()
# Find median travelling time throughout all edges
Median_TravelTime = Gozo_DistinctEdges_IncTravelTimes_Min['Travel_Time'].median()

# Obtain box plot of Travel Time
# Example data (replace this with your actual DataFrame column)
Travel_TimeData = pd.to_numeric(Gozo_DistinctEdges_IncTravelTimes_Min['Travel_Time'], errors='coerce').dropna()

# Obtaining further statistics
minimum = np.min(Travel_TimeData)
Q1 = np.percentile(Travel_TimeData, 25)
median = np.median(Travel_TimeData)
Q3 = np.percentile(Travel_TimeData, 75)
maximum = np.max(Travel_TimeData)
# Compute interquartile range (IQR)
IQR = Q3 - Q1
# Compute whiskers (last values before outliers)
lower_whisker = np.min(Travel_TimeData[Travel_TimeData >= (Q1 - 1.5 * IQR)].astype(float))
upper_whisker = np.max(Travel_TimeData[Travel_TimeData <= (Q3 + 1.5 * IQR)].astype(float))

# Defining figure size
Figure = plt.figure(figsize =(10, 7))
plt.boxplot(Travel_TimeData, vert = True, patch_artist = True)

# Add title, y-axis and x-axis
plt.xlabel("Edge Travel Time Data", fontsize = 12)
plt.ylabel("Travel Time (seconds)", fontsize = 12)
plt.title("Box Plot of Edge In-Vehicle Travel Time in Gozo", fontsize = 14)

#Annotate Values to Box Plot
plt.annotate(f"Min/Lower Whisker: {minimum:.2f}",
             xy=(1, minimum),
             xytext=(1.1, minimum + 2),
             fontsize=8)

plt.annotate(f"Q1: {Q1:.2f}",
             xy=(1, Q1),
             xytext=(1.1, Q1 + 2),
             fontsize=8)

plt.annotate(f"Median: {median:.2f}",
             xy=(1, median),
             xytext=(1.1, median + 2),
             fontsize=8, fontweight='bold')

plt.annotate(f"Q3: {Q3:.2f}",
             xy=(1, Q3),
             xytext=(1.1, Q3 + 2),
             fontsize=8)

plt.annotate(f"Max: {maximum:.2f}",
             xy=(1, maximum),
             xytext=(1.1, maximum + 2),
             fontsize=8)

plt.annotate(f"Upper Whisker: {upper_whisker:.2f}",
             xy=(1, upper_whisker),
             xytext=(1.1, upper_whisker + 2),
             fontsize=8)



plt.show()

In [None]:
# Step 5.5 - Data Cleaning (Take shortest path if available ex. Stop 3 to Stop 819 is 56 seconds long on the otherhand Stop 819 to Stop 3
# is 1019 seconds long (The former is correct))

# Standardize route names to be order-independent
Malta_DistinctEdges_IncTravelTimes['Standardized_Route'] = Malta_DistinctEdges_IncTravelTimes['Bus_Stop_Next_Bus_Stop'].apply(lambda x: '_to_'.join(sorted(x.split('_to_'))))
# Keep the row with the minimum travel time for each standardized route
Malta_DistinctEdges_IncTravelTimes_MinLength = Malta_DistinctEdges_IncTravelTimes.loc[Malta_DistinctEdges_IncTravelTimes.groupby('Standardized_Route')['Length_In_Metres'].idxmin()]
# Drop the helper column if not needed
Malta_DistinctEdges_IncTravelTimes_MinLength = Malta_DistinctEdges_IncTravelTimes_MinLength.drop(columns=['Standardized_Route'])


# Standardize route names to be order-independent
Gozo_DistinctEdges_IncTravelTimes['Standardized_Route'] = Gozo_DistinctEdges_IncTravelTimes['Bus_Stop_Next_Bus_Stop'].apply(lambda x: '_to_'.join(sorted(x.split('_to_'))))
# Keep the row with the minimum travel time for each standardized route
Gozo_DistinctEdges_IncTravelTimes_MinLength = Gozo_DistinctEdges_IncTravelTimes.loc[Gozo_DistinctEdges_IncTravelTimes.groupby('Standardized_Route')['Length_In_Metres'].idxmin()]
# Drop the helper column if not needed
Gozo_DistinctEdges_IncTravelTimes_MinLength = Gozo_DistinctEdges_IncTravelTimes_MinLength.drop(columns=['Standardized_Route'])

# Saving DataFrames
Malta_DistinctEdges_IncTravelTimes_MinLength.to_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_MALTA_IncTravelTimes_WrongPathFixLength.csv")
Gozo_DistinctEdges_IncTravelTimes_MinLength.to_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_GOZO_IncTravelTimes_WrongPathFixLength.csv")

In [None]:
# Step 5.6 - Obtaining Information related to Malta (Length)

# Find edge with maximum travel time
Malta_DistinctEdges_IncTravelTimes_MinLength[Malta_DistinctEdges_IncTravelTimes_MinLength['Length_In_Metres'] == Malta_DistinctEdges_IncTravelTimes_MinLength['Length_In_Metres'].max()]
# Find average travelling time throughout all edges
Average_Length = Malta_DistinctEdges_IncTravelTimes_MinLength['Length_In_Metres'].mean()
# Find median travelling time throughout all edges
Median_Length = Malta_DistinctEdges_IncTravelTimes_MinLength['Length_In_Metres'].median()
# Obtain box plot of Travel Time

# Removing Errors by Changing Data Type to numeric
Length_Data = pd.to_numeric(Malta_DistinctEdges_IncTravelTimes_MinLength['Length_In_Metres'], errors='coerce').dropna()


# Obtaining further statistics
minimum = np.min(Length_Data)
Q1 = np.percentile(Length_Data, 25)
median = np.median(Length_Data)
Q3 = np.percentile(Length_Data, 75)
maximum = np.max(Length_Data)
# Compute interquartile range (IQR)
IQR = Q3 - Q1
# Compute whiskers (last values before outliers)
lower_whisker = np.min(Length_Data[Length_Data >= (Q1 - 1.5 * IQR)].astype(float))
upper_whisker = np.max(Length_Data[Length_Data <= (Q3 + 1.5 * IQR)].astype(float))

# Defining figure size
Figure = plt.figure(figsize =(10, 7))
plt.boxplot(Length_Data, vert = True, patch_artist = True)

# Add title, y-axis and x-axis
plt.xlabel("Edge Length Data", fontsize = 12)
plt.ylabel("Length (metres)", fontsize = 12)
plt.title("Box Plot of Edge Length in Malta", fontsize = 14)


plt.annotate(f"Min/Lower Whisker: {minimum:.2f}",
             xy=(1, minimum),
             xytext=(1.1, minimum + 2),
             fontsize=8)

plt.annotate(f"Q1: {Q1:.2f}",
             xy=(1, Q1),
             xytext=(1.1, Q1 + 2),
             fontsize=8)

plt.annotate(f"Median: {median:.2f}",
             xy=(1, median),
             xytext=(1.1, median + 2),
             fontsize=8, fontweight='bold')

plt.annotate(f"Q3: {Q3:.2f}",
             xy=(1, Q3),
             xytext=(1.1, Q3 + 2),
             fontsize=8)

plt.annotate(f"Max: {maximum:.2f}",
             xy=(1, maximum),
             xytext=(1.1, maximum + 2),
             fontsize=8)

plt.annotate(f"Upper Whisker: {upper_whisker:.2f}",
             xy=(1, upper_whisker),
             xytext=(1.1, upper_whisker + 2),
             fontsize=8)

# Show the plot
plt.show()

In [None]:
# Step 5.6 - Obtaining Information related to Gozo (Length)

# Find edge with maximum travel time
Gozo_DistinctEdges_IncTravelTimes_MinLength[Gozo_DistinctEdges_IncTravelTimes_MinLength['Length_In_Metres'] == Gozo_DistinctEdges_IncTravelTimes_MinLength['Length_In_Metres'].max()]
# Find average travelling time throughout all edges
Average_Length = Gozo_DistinctEdges_IncTravelTimes_MinLength['Length_In_Metres'].mean()
# Find median travelling time throughout all edges
Median_Length = Gozo_DistinctEdges_IncTravelTimes_MinLength['Length_In_Metres'].median()
# Obtain box plot of Travel Time

# Removing Errors by Changing Data Type to numeric
Length_Data = pd.to_numeric(Gozo_DistinctEdges_IncTravelTimes_MinLength['Length_In_Metres'], errors='coerce').dropna()


# Obtaining further statistics
minimum = np.min(Length_Data)
Q1 = np.percentile(Length_Data, 25)
median = np.median(Length_Data)
Q3 = np.percentile(Length_Data, 75)
maximum = np.max(Length_Data)
# Compute interquartile range (IQR)
IQR = Q3 - Q1
# Compute whiskers (last values before outliers)
lower_whisker = np.min(Length_Data[Length_Data >= (Q1 - 1.5 * IQR)].astype(float))
upper_whisker = np.max(Length_Data[Length_Data <= (Q3 + 1.5 * IQR)].astype(float))

# Defining figure size
Figure = plt.figure(figsize =(10, 7))
plt.boxplot(Length_Data, vert = True, patch_artist = True)

# Add title, y-axis and x-axis
plt.xlabel("Edge Length Data", fontsize = 12)
plt.ylabel("Length (metres)", fontsize = 12)
plt.title("Box Plot of Edge Length in Gozo", fontsize = 14)


plt.annotate(f"Min/Lower Whisker: {minimum:.2f}",
             xy=(1, minimum),
             xytext=(1.1, minimum + 2),
             fontsize=8)

plt.annotate(f"Q1: {Q1:.2f}",
             xy=(1, Q1),
             xytext=(1.1, Q1 + 2),
             fontsize=8)

plt.annotate(f"Median: {median:.2f}",
             xy=(1, median),
             xytext=(1.1, median + 2),
             fontsize=8, fontweight='bold')

plt.annotate(f"Q3: {Q3:.2f}",
             xy=(1, Q3),
             xytext=(1.1, Q3 + 2),
             fontsize=8)

plt.annotate(f"Max: {maximum:.2f}",
             xy=(1, maximum),
             xytext=(1.1, maximum + 2),
             fontsize=8)

plt.annotate(f"Upper Whisker: {upper_whisker:.2f}",
             xy=(1, upper_whisker),
             xytext=(1.1, upper_whisker + 2),
             fontsize=8)

# Show the plot
plt.show()

### Step 6 - Analysing Most Popular Stops

In [None]:
All_Routes_Copy = pd.concat([Monday_Routes_Malta, Monday_Routes_Gozo], ignore_index=True)

In [None]:
# Keeping Only essential data from 'All_Routes_Copy' DataFrame
Trip_Generation_DataFrame = All_Routes_Copy[['Stops','City Name', 'Bus_Stop_ID', 'Longitude_Final', 'Latitude_Final', 'Stop Island']]
Trip_Generation_DataFrame.loc[:, "Index"] = range(1, len(Trip_Generation_DataFrame) + 1)

In [None]:
# Obtaining Unique City Names to check population figures
Unique_CityNames = Trip_Generation_DataFrame['City Name'].unique()
print(Unique_CityNames)

# Checking if City Name Exists in NSO Census Data

- **Hal Luqa** - Yes (7,249)
- **San Vincenz** - No (Long-term care facility in Luqa, 7,249)
- **Il-Marsa** - Yes (5,468)
- **L-Imsida** - Yes (13,587)
- **Mater Dei** - No (Refers to Hospital in Msida, 13,587)
- **Is-Swieqi** - Yes (13,044)
- **Pembroke** - Yes (3,545)
- **Bahar ic-Caghaq** - No (Part of Naxxar according to Postal Code, 16,912)
- **Il-Qawra** - No (Part of San Pawl Il-Baħar according to Postal Code, 32,042)
- **San Pawl il-Bahar** - Yes (32,042)
- **Ix-Xemxija** - No (Part of San Pawl Il-Baħar according to Postal Code, 32,042)
- **Il-Mellieha** - Yes (12,738)
- **Il-Marfa** - No (Part of Mellieha according to Postal Code, 12,738)
- **Ic-Cirkewwa** - No (Part of Mellieha according to Postal Code, 12,738)
- **San Vincenz/Luqa** - No (Long-term care facility in Luqa, 7,249)
- **Paola** - No (Part of Rahal Gdid according to NSO map, 9,339)
- **Il-Gudja** - Yes (3,229)
- **Hal Far** - No (Part of Birzebuga according to Postal Code, 11,844)
- **Birzebbuga** - Yes (11,844)
- **Il-Qajjenza** - No (Part of Birzebuga according to Postal Code, 11,844)
- **Marsaskala** - Yes (16,804)
- **Marsaxlokk** - Yes (3,988)
- **Bir id-Deheb** - No (Part of Zejtun according to Postal Code, 12,409)
- **Iz-Zejtun** - Yes (12,409)
- **Santa Lucija, Malta** - Yes (2,617)
- **Is-Swatar** - No (Part of Birkirkara according to Postal Code, 25,807)
- **San Giljan** - Yes (11,653)
- **Tas-Sliema** - Yes (19,655)
- **Il-Gzira** - Yes (10,331)
- **Santa Venera** - Yes (8,834)
- **Birkirkara** - Yes (25,807)
- **Hal Balzan** - Yes (4,774)
- **H'Attard** - Yes (12,268)
- **Ta' Qali** - No (Part of H'Attard according to Postal Code, 12,268)
- **Ir-Rabat, Malta** - Yes (11,936)
- **L-Imtarfa** - Yes (2,566)
- **Il-Mosta** - Yes (23,482)
- **Bugibba** - No (Part of San Pawl Il-Baħar according to Postal Code, 32,042)
- **Il-Belt Valletta** - Yes (5,157)
- **Floriana** - Yes (1,985)
- **Il-Hamrun** - Yes (10,514)
- **Hal Farrug** - No (Part of Luqa according to Postal Code, 7,249)
- **Il-Fgura** - Yes (13,066)
- **Bormla** - Yes (4,654)
- **L-Isla** - Yes (2,304)
- **Il-Birgu** - Yes (2,261)
- **Il-Kalkara** - Yes (3,105)
- **Ix-Xghajra** - Yes (2,192)
- **Ir-Rinella** - No (Part of Kalkara according to Postal Code, 3,105)
- **Tal-P-età** - Yes (5,892)
- **Ta' Xbiex** - Yes (2,092)
- **Paceville** - No (Part of San Giljan according to Postal Code, 11,653)
- **Il-Madliena** - No (Part of Swieqi, 13,044)
- **Il-Kappara** - No (Part of San Gwann according to Postal Code, 14,244)
- **Ta' Giorni** - No (Part of San Gwann according to Postal Code, 14,244)
- **San Gwann** - Yes (14,244)
- **L-Iklin** - Yes (3,399)
- **In-Naxxar** - Yes (16,912)
- **Burmarrad** - No (Part of San Pawl Il-Baħar according to Postal Code, 32,042)
- **Hal Lija** - Yes (3,162)
- **Fleur-de-Lys** - No (Part of Birkirkara according to Postal Code, 25,807)
- **Iz-Zebbiegh** - No (Part of Mgarr according to Postal Code, 4,840)
- **L-Imgarr, Malta** - Yes (4,840)
- **Ghajn Tuffieha** - No (Part of Mgarr according to Postal Code, 4,840)
- **Il-Manikata** - No (Part of Mellieha according to Postal Code, 12,738)
- **Hal Gharghur** - Yes (3,741)
- **Gwardamangia** - No (Part of Pi-ta according to Postal Code, 5,892)
- **Had-Dingli** - Yes (3,865)
- **Il-Buskett** - No (Part of Had-Dingli according to Postal Code, 3,865)
- **Hal Qormi** - Yes (18,099)
- **Haz-Zebbug** - Yes (13,785)
- **Is-Siggiewi** - Yes (9,318)
- **Hal Kirkop** - Yes (2,527)
- **Hal Safi** - Yes (2,641)
- **Iz-Zurrieq** - Yes (12,295)
- **L-Imqabba** - Yes (3,525)
- **Il-Qrendi** - Yes (3,148)
- **Hal Tarxien** - Yes (9,464)
- **Bulebel** - No (Part of Zejtun according to Postal Code, 12,409)
- **Hal Ghaxaq** - Yes (5,538)
- **Haz-Zabbar** - Yes (17,148)
- **Il-Bidnija** - No (Part of San Pawl Il-Baħar according to Postal Code, 32,042)
- **Ir-Rabat, Ghawdex** - Yes (7,242)
- **Ix-Xewkija** - Yes (3,555)
- **Ghajnsielem** - Yes (3,523)
- **L-Imgarr, Ghawdex** - No (Part of Ghajnsielem according to NSO map, 3,523)
- **In-Nadur** - Yes (4,548)
- **Il-Qala** - Yes (2,300)
- **Ta' Sannat** - Yes (2,186)
- **Il-Munxar** - Yes (1,707)
- **Il-Fontana, Ghawdex** - Yes (1,042)
- **Ix-Xaghra** - Yes (5,161)
- **L-Ghasri** - Yes (518)
- **Iz-Zebbug, Gozo** - Yes (3,303)
- **L-Gharb** - Yes (1,549)
- **San Lawrenz** - Yes (772)
- **Ta' Kercem** - Yes (1,881)


In [None]:
# Evening Model
# Trip Origin will be from Workplaces
# Trip Destination will be to Work

# Time to Consider - 5 PM
# Total Number of trips by bus (Whole day) - 34,679
# Total Number of trips by bus (Whole day - Malta) - 32390.186
# Total Number of trips by bus (Whole day - Gozo) - 2288.814
# Proportion of trips occuring at 5 PM - 45,000/638,456 (7048253912%)



### Step 7 - Obtaining the required txt file

In [77]:
# Step 7.1 - Calling/Loading necessary files
# 1) 'Monday_Routes_Malta'
# 2) 'Monday_Routes_Gozo'
# 2) 'Distinct_Edges_MALTA_IncTravelTimes' (renamed to 'Malta_DistinctEdges_IncTravelTimes')
# 3) 'Distinct_Edges_GOZO_IncTravelTimes' (renamed to 'Gozo_DistinctEdges_IncTravelTimes')

Monday_Routes_Malta
Monday_Routes_Gozo
#Morning
Malta_DistinctEdges_IncTravelTimes_Morning = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_MALTA_IncTravelTimes_Morning.csv")
Gozo_DistinctEdges_IncTravelTimes_Morning = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_GOZO_IncTravelTimes_Morning.csv")
New_Distinct_Edges_Malta_IncTravelTimes_Morning = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//New_Distinct_Edges_Malta_IncTravelTimes_Morning.csv")
New_Distinct_Edges_Gozo_IncTravelTimes_Morning = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//New_Distinct_Edges_Gozo_IncTravelTimes_Morning.csv")


#Evening
Malta_DistinctEdges_IncTravelTimes_Evening = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_MALTA_IncTravelTimes_Evening.csv")
Gozo_DistinctEdges_IncTravelTimes_Evening = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//Distinct_Edges_GOZO_IncTravelTimes_Evening.csv")
New_Distinct_Edges_Malta_IncTravelTimes_Evening = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//New_Distinct_Edges_Malta_IncTravelTimes_Evening.csv")
New_Distinct_Edges_Gozo_IncTravelTimes_Evening = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//New_Distinct_Edges_Gozo_IncTravelTimes_Evening.csv")



# Change Travel Times to Minutes
Malta_DistinctEdges_IncTravelTimes_Morning['Travel_Time'] = Malta_DistinctEdges_IncTravelTimes_Morning['Travel_Time']/60
Gozo_DistinctEdges_IncTravelTimes_Morning['Travel_Time'] = Gozo_DistinctEdges_IncTravelTimes_Morning['Travel_Time']/60
New_Distinct_Edges_Malta_IncTravelTimes_Morning['Travel_Time'] = New_Distinct_Edges_Malta_IncTravelTimes_Morning['Travel_Time']/60
New_Distinct_Edges_Gozo_IncTravelTimes_Morning['Travel_Time'] = New_Distinct_Edges_Gozo_IncTravelTimes_Morning['Travel_Time']/60

Malta_DistinctEdges_IncTravelTimes_Evening['Travel_Time'] = Malta_DistinctEdges_IncTravelTimes_Evening['Travel_Time']/60
Gozo_DistinctEdges_IncTravelTimes_Evening['Travel_Time'] = Gozo_DistinctEdges_IncTravelTimes_Evening['Travel_Time']/60
New_Distinct_Edges_Malta_IncTravelTimes_Evening['Travel_Time'] = New_Distinct_Edges_Malta_IncTravelTimes_Evening['Travel_Time']/60
New_Distinct_Edges_Gozo_IncTravelTimes_Evening['Travel_Time'] = New_Distinct_Edges_Gozo_IncTravelTimes_Evening['Travel_Time']/60

In [78]:
# Splitting Monday_Routes_Malta into separate route sections according to Malta Public Transport Website

South_Eastern_Routes = ['X4', '71', '72', '73', '74', '80', '82', '84', '85', '88', '91', '92', '93']
Northern_Harbour_Routes = ['13A', '14', '15', '16', '21', '22', '58', '58A', '24', '25', '32', '35', '63', '64', '110', '121', '225']
Southern_Harbour_Routes = ['1', '2', '3', '4', '83', '90', '94', '120', '124', '130', '133', '150']
Western_Routes = ['50', '51', '52', '53', '54', '56', '61', '62', '186', '202', '109', '109A']
Northern_Routes = ['X1A', '13', '31', '41', '42', '43', '44', '45', '46', '47', '48', '101', '103', '203', '212', '221', '222', '223', '238', '260', '250', 'X300']
Mater_Dei_North_Routes = ['106', '181', '182', '209', '233', '280']
Mater_Dei_South_Routes = ['117', '122', '135', '204', '206', '210', '213', '218', '226', '300']
Airport_Routes = ['X1', '119', 'X2', 'X3', '201']

# Defining a Separate DataFrame for each specifiv Route Set
Monday_Routes_Malta_South_Eastern_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(South_Eastern_Routes)]
Monday_Routes_Malta_Northern_Harbour_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Northern_Harbour_Routes)]
Monday_Routes_Malta_Southern_Harbour_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Southern_Harbour_Routes)]
Monday_Routes_Malta_Western_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Western_Routes)]
Monday_Routes_Malta_Northern_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Northern_Routes)]
Monday_Routes_Malta_Mater_Dei_North_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Mater_Dei_North_Routes)]
Monday_Routes_Malta_Mater_Dei_South_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Mater_Dei_South_Routes)]
Monday_Routes_Malta_Mater_Dei_Airport_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Airport_Routes)]


# Create a List of TUples: (name, dataframe)
Monday_Routes_Malta_List = [
    ('South_Eastern_Routes', Monday_Routes_Malta_South_Eastern_Routes),
    ('Northern_Harbour_Routes', Monday_Routes_Malta_Northern_Harbour_Routes),
    ('Southern_Harbour_Routes', Monday_Routes_Malta_Southern_Harbour_Routes),
    ('Western_Routes', Monday_Routes_Malta_Western_Routes),
    ('Northern_Routes', Monday_Routes_Malta_Northern_Routes),
    ('Mater_Dei_North_Routes', Monday_Routes_Malta_Mater_Dei_North_Routes),
    ('Mater_Dei_South_Routes', Monday_Routes_Malta_Mater_Dei_South_Routes),
    ('Airport_Routes', Monday_Routes_Malta_Mater_Dei_Airport_Routes)
]



In [63]:
# Saving Routes Malta
Monday_Routes_Malta_South_Eastern_Routes.to_csv('South_Eastern_Routes.csv')
Monday_Routes_Malta_Northern_Harbour_Routes.to_csv('Northern_Harbour_Routes.csv')
Monday_Routes_Malta_Southern_Harbour_Routes.to_csv('Southern_Harbour_Routes.csv')
Monday_Routes_Malta_Western_Routes.to_csv('Western_Routes.csv')
Monday_Routes_Malta_Northern_Routes.to_csv('Northern_Routes.csv')
Monday_Routes_Malta_Mater_Dei_North_Routes.to_csv('Mater_Dei_North_Routes.csv')
Monday_Routes_Malta_Mater_Dei_South_Routes.to_csv('Mater_Dei_South_Routes.csv')
Monday_Routes_Malta_Mater_Dei_Airport_Routes.to_csv('Airport_Routes.csv')


In [32]:
# Saving Routes Gozo
Monday_Routes_Gozo.to_csv('Gozo_Routes.csv')

In [79]:
# Step 7.1 - Data Cleaning (Take shortest path if available ex. Stop 3 to Stop 819 is 56 seconds long on the otherhand Stop 819 to Stop 3
# is 1019 seconds long (The former is correct))

# Standardize route names to be order-independent
Malta_DistinctEdges_IncTravelTimes_Morning['Standardized_Route'] = Malta_DistinctEdges_IncTravelTimes_Morning['Bus_Stop_Next_Bus_Stop'].apply(lambda x: '_to_'.join(sorted(x.split('_to_'))))
# Keep the row with the minimum travel time for each standardized route
Malta_DistinctEdges_IncTravelTimes_Morning['Travel_Time'] = Malta_DistinctEdges_IncTravelTimes_Morning.groupby('Standardized_Route')['Travel_Time'].transform('min')
# Drop the helper column if not needed
Malta_DistinctEdges_IncTravelTimes_Min = Malta_DistinctEdges_IncTravelTimes_Morning.drop(columns=['Standardized_Route'])




# Standardize route names to be order-independent
Gozo_DistinctEdges_IncTravelTimes_Morning['Standardized_Route'] = Gozo_DistinctEdges_IncTravelTimes_Morning['Bus_Stop_Next_Bus_Stop'].apply(lambda x: '_to_'.join(sorted(x.split('_to_'))))
# Keep the row with the minimum travel time for each standardized route
Gozo_DistinctEdges_IncTravelTimes_Morning['Travel_Time'] = Gozo_DistinctEdges_IncTravelTimes_Morning.groupby('Standardized_Route')['Travel_Time'].transform('min')
# Drop the helper column if not needed
Gozo_DistinctEdges_IncTravelTimes_Min = Gozo_DistinctEdges_IncTravelTimes_Morning.drop(columns=['Standardized_Route'])

## Obtaining Text Files for Malta

In [None]:
# Step 7.2.1 - Create Coords text file
for name, df in Monday_Routes_Malta_List:
    # 1 - Obtain List of Unique Stops in Malta
    unique_stops = df.drop_duplicates(subset=['Bus_Stop_ID'], keep='first')
    # 2 - Sort Stops in Descending Order
    unique_stops_sorted  = unique_stops.sort_values(by='Bus_Stop_ID', key=lambda x: x.str.extract(r'(\d+)')[0].astype(int))
    # 3 - Limit 'Latitude_Final' and 'Longitude_Final' columns to 7 Decimal Places
    unique_stops_sorted[['Latitude_Final', 'Longitude_Final']] = unique_stops_sorted[['Latitude_Final', 'Longitude_Final']].round(7)
    # 4 - Obtain 'MaltaCoords.txt' file
    File_Name = f"{name}Coords.txt"
    Columns_Required = ['Latitude_Final', 'Longitude_Final']
    Number_of_Rows = len(unique_stops_sorted)
    
    with open(File_Name, 'w') as File:
        # Write row count at top of csv file
        File.write(f"{Number_of_Rows}\n")
        unique_stops_sorted[Columns_Required].to_csv(File, sep=' ', index=False, header=False)

In [71]:
New_Distinct_Edges_Malta_IncTravelTimes_Morning

Unnamed: 0.1,Unnamed: 0,Stop From,Latitude_Final,Longitude_Final,Stop To,Latitude_Next,Longitude_Next,Travel_Time,Travel_Time_No_Traffic,Historic_Traffic_Travel_Time,Live_Traffic_Travel_Time,Traffic_Delay,Length_In_Metres,Departure_Time,Arrival_Time
0,0,Stop_23,35.987261,14.328296,Stop_24,35.987998,14.328836,98,97,98,98,0,299,2025-03-10T08:00:00+01:00,2025-03-10T08:01:38+01:00
1,1,Stop_0,35.849412,14.495967,Stop_29,35.848856,14.496720,183,172,183,183,0,827,2025-03-10T08:00:00+01:00,2025-03-10T08:03:03+01:00
2,2,Stop_29,35.848856,14.496720,Stop_34,35.849123,14.496371,17,16,17,17,0,43,2025-03-10T08:00:00+01:00,2025-03-10T08:00:17+01:00
3,3,Stop_29,35.848856,14.496720,Stop_0,35.849412,14.495967,35,33,35,35,0,92,2025-03-10T08:00:00+01:00,2025-03-10T08:00:35+01:00
4,4,Stop_144,35.884301,14.401575,Stop_167,35.884732,14.401403,7,7,7,7,0,49,2025-03-10T08:00:00+01:00,2025-03-10T08:00:07+01:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,56,Stop_190,35.895214,14.508006,Stop_727,35.893993,14.509492,278,245,278,278,0,340,2025-03-10T08:00:00+01:00,2025-03-10T08:04:38+01:00
57,57,Stop_157,35.954264,14.419552,Stop_158,35.954660,14.419480,127,121,127,127,0,472,2025-03-10T08:00:00+01:00,2025-03-10T08:02:07+01:00
58,58,Stop_144,35.884301,14.401575,Stop_167,35.884732,14.401403,7,7,7,7,0,49,2025-03-10T08:00:00+01:00,2025-03-10T08:00:07+01:00
59,59,Stop_384,35.908843,14.504340,Stop_232,35.909278,14.504563,213,192,213,213,0,873,2025-03-10T08:00:00+01:00,2025-03-10T08:03:33+01:00


In [84]:
import pandas as pd
import numpy as np

# Assume Monday_Routes_Malta_List is defined as a list of tuples: (route_name, dataframe)
for route_name, df in Monday_Routes_Malta_List:
    # 1 - Obtain list of unique stops for this route
    unique_stops = df.drop_duplicates(subset=['Bus_Stop_ID'], keep='first')
    unique_stops_sorted = unique_stops.sort_values(
        by='Bus_Stop_ID',
        key=lambda x: x.str.extract(r'(\d+)')[0].astype(int)
    )
    
    # Create Bus Stops list
    bus_stops_list = unique_stops_sorted['Bus_Stop_ID'].unique()
    # Create an empty DataFrame with Bus_Stop_IDs as both rows and columns
    travel_time_matrix = pd.DataFrame(index=bus_stops_list, columns=bus_stops_list)
    
    # Set index for faster lookup in malta_edges
    malta_edges = Malta_DistinctEdges_IncTravelTimes_Min.copy()
    malta_edges.set_index(['Bus_Stop_ID', 'Next_Bus_Stop_ID'], inplace=True)
    
    # Initialize the matrix with np.inf (as a default)
    travel_time_matrix[:] = np.inf

    # Set diagonal entries to zero
    np.fill_diagonal(travel_time_matrix.values, 0)

    # Loop through each bus_stop combination specified in the DataFrame
    for index, row in df.iterrows():
        bus_stop_pair = row['Bus_Stop_Next_Bus_Stop']
        if pd.notna(bus_stop_pair):  # Ensure the value is not NaN
            # Convert to string and split
            stop_0, stop_1 = str(bus_stop_pair).split('_to_')
        
            if stop_0 == stop_1:
                travel_time_matrix.at[stop_0, stop_1] = 0
            elif (stop_0, stop_1) in malta_edges.index:
                travel_time_matrix.at[stop_0, stop_1] = malta_edges.at[(stop_0, stop_1), 'Travel_Time']

    
    # Adding Required Connections
    if route_name == 'Airport_Routes':
        AR1 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_23') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_24')]
        travel_time_AR1 = AR1['Travel_Time'].values 
        travel_time_matrix.at['Stop_23', 'Stop_24'] = travel_time_AR1[0]
        AR2 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_0') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_29')]
        travel_time_AR2 = AR2['Travel_Time'].values 
        travel_time_matrix.at['Stop_0', 'Stop_29'] = travel_time_AR2[0]
        AR3 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_29') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_34')]
        travel_time_AR3 = AR3['Travel_Time'].values 
        travel_time_matrix.at['Stop_29', 'Stop_34'] = travel_time_AR3[0]   
        AR4 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_29') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_0')]
        travel_time_AR4 = AR4['Travel_Time'].values 
        travel_time_matrix.at['Stop_29', 'Stop_0'] = travel_time_AR4[0]
        AR5 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_144') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_167')]
        travel_time_AR5 = AR5['Travel_Time'].values 
        travel_time_matrix.at['Stop_144', 'Stop_167'] = travel_time_AR5[0]
        AR6 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_9') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_112')]
        travel_time_AR6 = AR6['Travel_Time'].values 
        travel_time_matrix.at['Stop_9', 'Stop_112'] = travel_time_AR6[0]
        AR7 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_82') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_83')]
        travel_time_AR7 = AR7['Travel_Time'].values 
        travel_time_matrix.at['Stop_82', 'Stop_83'] = travel_time_AR7[0]
        AR8 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_157') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_158')]
        travel_time_AR8 = AR8['Travel_Time'].values 
        travel_time_matrix.at['Stop_157', 'Stop_158'] = travel_time_AR8[0]

    if route_name == 'Mater_Dei_North_Routes':
        MDN1 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_7') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_8')]
        travel_time_MDN1 = MDN1['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_7', 'Stop_8'] = travel_time_MDN1  # Ensure the indices are correct as per your matrix setup
        MDN2 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_157') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_456')]
        travel_time_MDN2 = MDN2['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_157', 'Stop_456'] = travel_time_MDN2  # Ensure the indices are correct as per your matrix setup

        
    if route_name == 'Mater_Dei_South_Routes':
        MDS1 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_7') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_27')]
        travel_time_MDS1 = MDS1['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_7', 'Stop_27'] = travel_time_MDS1
        MDS2 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_223')]
        travel_time_MDS2 = MDS2['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_190', 'Stop_223'] = travel_time_MDS2
        MDS3 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_82') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_83')]
        travel_time_MDS3 = MDS3['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_82', 'Stop_83'] = travel_time_MDS3
        MDS4 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_7') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_8')]
        travel_time_MDS4 = MDS4['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_7', 'Stop_8'] = travel_time_MDS4

    if route_name == 'Northern_Harbour_Routes':
        NH1 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_347')]
        travel_time_NH1 = NH1['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_190', 'Stop_347'] = travel_time_NH1
        NH2 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_349') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_329')]
        travel_time_NH2 = NH2['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_349', 'Stop_329'] = travel_time_NH2
        NH3 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_355')]
        travel_time_NH3 = NH3['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_190', 'Stop_355'] = travel_time_NH3
        NH4 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_232') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_233')]
        travel_time_NH4 = NH4['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_232', 'Stop_233'] = travel_time_NH4
        NH5 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_171')]
        travel_time_NH5 = NH5['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_190', 'Stop_171'] = travel_time_NH5
        NH6 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_412')]
        travel_time_NH6 = NH6['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_190', 'Stop_412'] = travel_time_NH6
        NH7 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_575')]
        travel_time_NH7 = NH7['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_190', 'Stop_575'] = travel_time_NH7
        NH8 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_752')]
        travel_time_NH8 = NH8['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_190', 'Stop_752'] = travel_time_NH8
        NH9 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_244') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_232')]
        travel_time_NH9 = NH9['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_244', 'Stop_232'] = travel_time_NH9
        NH10 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_114') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_112')]
        travel_time_NH10 = NH10['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_114', 'Stop_112'] = travel_time_NH10
        NH11 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_327')]
        travel_time_NH11 = NH11['Travel_Time'].values[0]
        travel_time_matrix.at['Stop_190', 'Stop_327'] = travel_time_NH11
        
        
    # Adding Required Connections
    if route_name == 'Northern_Routes':
        N1 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_327')]
        travel_time_matrix.at['Stop_190', 'Stop_327'] = N1['Travel_Time'].values[0]
        N2 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_157') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_456')]
        travel_time_matrix.at['Stop_157', 'Stop_456'] = N2['Travel_Time'].values[0]
        N3 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_424')]
        travel_time_matrix.at['Stop_190', 'Stop_424'] = N3['Travel_Time'].values[0]
        N4 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_23') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_24')]
        travel_time_matrix.at['Stop_23', 'Stop_24'] = N4['Travel_Time'].values[0]
        N5 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_471')]
        travel_time_matrix.at['Stop_190', 'Stop_471'] = N5['Travel_Time'].values[0]
        N6 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_506')]
        travel_time_matrix.at['Stop_190', 'Stop_506'] = N6['Travel_Time'].values[0]
        N7 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_509')]
        travel_time_matrix.at['Stop_190', 'Stop_509'] = N7['Travel_Time'].values[0]
        N8 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_528') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_525')]
        travel_time_matrix.at['Stop_528', 'Stop_525'] = N8['Travel_Time'].values[0]
        N9 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_157') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_540')]
        travel_time_matrix.at['Stop_157', 'Stop_540'] = N9['Travel_Time'].values[0]
        N10 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_349') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_329')]
        travel_time_matrix.at['Stop_349', 'Stop_329'] = N10['Travel_Time'].values[0]
        N11 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_157') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_195')]
        travel_time_matrix.at['Stop_157', 'Stop_195'] = N11['Travel_Time'].values[0]
        N12 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_232') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_384')]
        travel_time_matrix.at['Stop_232', 'Stop_384'] = N12['Travel_Time'].values[0]
        N13 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_244') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_232')]
        travel_time_matrix.at['Stop_244', 'Stop_232'] = N13['Travel_Time'].values[0]
        N14 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_157') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_1120')]
        travel_time_matrix.at['Stop_157', 'Stop_1120'] = N14['Travel_Time'].values[0]
        N15 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_157') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_1122')]
        travel_time_matrix.at['Stop_157', 'Stop_1122'] = N15['Travel_Time'].values[0]


    # Adding Required Connections
    if route_name == 'South_Eastern_Routes':
        SE1 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_772')]
        travel_time_matrix.at['Stop_190', 'Stop_772'] = SE1['Travel_Time'].values[0]
        SE2 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_805')]
        travel_time_matrix.at['Stop_190', 'Stop_805'] = SE2['Travel_Time'].values[0]
        SE3 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_44') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_43')]
        travel_time_matrix.at['Stop_44', 'Stop_43'] = SE3['Travel_Time'].values[0]
        SE4 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_853')]
        travel_time_matrix.at['Stop_190', 'Stop_853'] = SE4['Travel_Time'].values[0]
        SE5 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_878')]
        travel_time_matrix.at['Stop_190', 'Stop_878'] = SE5['Travel_Time'].values[0]
        SE6 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_927') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_928')]
        travel_time_matrix.at['Stop_927', 'Stop_928'] = SE6['Travel_Time'].values[0]
        SE7 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_895')]
        travel_time_matrix.at['Stop_190', 'Stop_895'] = SE7['Travel_Time'].values[0]
        SE8 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_245')]
        travel_time_matrix.at['Stop_190', 'Stop_245'] = SE8['Travel_Time'].values[0]
        SE9 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_933')]
        travel_time_matrix.at['Stop_190', 'Stop_933'] = SE9['Travel_Time'].values[0]
        SE10 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_171')]
        travel_time_matrix.at['Stop_190', 'Stop_171'] = SE10['Travel_Time'].values[0]
        SE11 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_57') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_190')]
        travel_time_matrix.at['Stop_57', 'Stop_190'] = SE11['Travel_Time'].values[0]
        SE12 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_914')]
        travel_time_matrix.at['Stop_190', 'Stop_914'] = SE12['Travel_Time'].values[0]

        
    # Adding Required Connections
    if route_name == 'Southern_Harbour_Routes':
        SH1 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_280')]
        travel_time_matrix.at['Stop_190', 'Stop_280'] = SH1['Travel_Time'].values[0]
        SH2 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_319')]
        travel_time_matrix.at['Stop_190', 'Stop_319'] = SH2['Travel_Time'].values[0]
        SH3 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_895')]
        travel_time_matrix.at['Stop_190', 'Stop_895'] = SH3['Travel_Time'].values[0]
        SH4 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_932')]
        travel_time_matrix.at['Stop_190', 'Stop_932'] = SH4['Travel_Time'].values[0]
        SH5 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_349') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_329')]
        travel_time_matrix.at['Stop_349', 'Stop_329'] = SH5['Travel_Time'].values[0]
        SH6 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_271') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_949')]
        travel_time_matrix.at['Stop_271', 'Stop_949'] = SH6['Travel_Time'].values[0]
        SH7 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_223')]
        travel_time_matrix.at['Stop_190', 'Stop_223'] = SH7['Travel_Time'].values[0]

    # Adding Required Connections
    if route_name == 'Western_Routes':
        W1 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_575')]
        travel_time_matrix.at['Stop_190', 'Stop_575'] = W1['Travel_Time'].values[0]
        W2 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_590')]
        travel_time_matrix.at['Stop_190', 'Stop_590'] = W2['Travel_Time'].values[0]
        W3 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_633') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_634')]
        travel_time_matrix.at['Stop_633', 'Stop_634'] = W3['Travel_Time'].values[0]
        W4 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_581') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_587')]
        travel_time_matrix.at['Stop_581', 'Stop_587'] = W4['Travel_Time'].values[0]
        W5 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_642')]
        travel_time_matrix.at['Stop_190', 'Stop_642'] = W5['Travel_Time'].values[0]
        W6 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_689')]
        travel_time_matrix.at['Stop_190', 'Stop_689'] = W6['Travel_Time'].values[0]
        W7 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_190') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_727')]
        travel_time_matrix.at['Stop_190', 'Stop_727'] = W7['Travel_Time'].values[0]
        W8 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_157') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_158')]
        travel_time_matrix.at['Stop_157', 'Stop_158'] = W8['Travel_Time'].values[0]
        W9 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_144') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_167')]
        travel_time_matrix.at['Stop_144', 'Stop_167'] = W9['Travel_Time'].values[0]
        W10 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_384') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_232')]
        travel_time_matrix.at['Stop_384', 'Stop_232'] = W10['Travel_Time'].values[0]
        W11 = New_Distinct_Edges_Malta_IncTravelTimes_Morning[(New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop From'] == 'Stop_581') & (New_Distinct_Edges_Malta_IncTravelTimes_Morning['Stop To'] == 'Stop_167')]
        travel_time_matrix.at['Stop_581', 'Stop_167'] = W11['Travel_Time'].values[0]
        
        
    print(f"Matrix for {route_name} populated successfully.")
    
    # Replace infinite values with the string 'Inf'
    travel_time_matrix.replace({np.inf: 'Inf'}, inplace=True)
    
    # Save to txt file in append mode
    file_name = f"{route_name}TravelTimes.txt"
    travel_time_matrix.to_csv(file_name, sep='\t', index=False, header=False)


Matrix for South_Eastern_Routes populated successfully.
Matrix for Northern_Harbour_Routes populated successfully.
Matrix for Southern_Harbour_Routes populated successfully.
Matrix for Western_Routes populated successfully.
Matrix for Northern_Routes populated successfully.
Matrix for Mater_Dei_North_Routes populated successfully.
Matrix for Mater_Dei_South_Routes populated successfully.
Matrix for Airport_Routes populated successfully.


In [None]:
# Step 7.2.3 - Creating Passenger-Demand Matrix
All_Routes_Copy = pd.concat([Monday_Routes_Malta, Monday_Routes_Gozo], ignore_index=True)

# Keeping Only essential data from 'All_Routes_Copy' DataFrame
Trip_Generation_DataFrame = All_Routes_Copy[['Stops','City Name', 'Bus_Stop_ID', 'Longitude_Final', 'Latitude_Final', 'Stop Island']]
Trip_Generation_DataFrame.loc[:, "Index"] = range(1, len(Trip_Generation_DataFrame) + 1)


# Step 1 - Loading Census data
CENSUS_DATA = pd.read_excel("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//CENSUS_DATA.xlsx")

# Step 2 - Append 'Proper Name', 'Stop Island' and 'Population' columns from 'CENSUS_DATA' DataFrame to 'Trip_Generation_DataFrame' DataFrame
# Rename DataFrame as 'Stop_Info'
# Create Column 'City Name - Stop Island' in 'Trip_Generation_DataFrame'
Trip_Generation_DataFrame['City Name - Stop Island'] =  Trip_Generation_DataFrame['City Name'] + '.' + Trip_Generation_DataFrame['Stop Island']
# Create column 'City Name - Stop Island' in 'CENSUS_DATA'
CENSUS_DATA['City Name - Stop Island'] = CENSUS_DATA['Name in Sheet'] + '.' + CENSUS_DATA['Stop Island']
# Utilise newly created column to concatenate 'Trip_Generation_DataFrame' and 'CENSUS_DATA'
Stop_Info = pd.merge(Trip_Generation_DataFrame[['Stops', 'Bus_Stop_ID', 'Longitude_Final', 'Latitude_Final', 'City Name - Stop Island', "Index"]], CENSUS_DATA[['City Name - Stop Island', 'Stop Island', 'Proper Name', 'Population']], on="City Name - Stop Island", how="inner",  sort=False)
Stop_Info = Stop_Info.sort_values("Index").drop(columns=["Index"])

# Step 3 - Setting estimates for individuals making use of bus in Malta and Gozo at 8 AM
# Morning Model
# Trip Origin will be from Home
# Trip Destination will be to Work

# Time to Consider - 8 AM
# Total Number of trips by bus (Whole day) - 34,679
# Total Number of trips by bus (Whole day - Malta) - 32390.186
# Total Number of trips by bus (Whole day - Gozo) - 2288.814
# Proportion of trips occuring at 8 AM - 82,000/638,456 (12.843484907%)
Bus_Travel_Morning_Malta = 1000000*(82000/638456)
Bus_Travel_Morning_Gozo = 100000*(82000/638456)



# Step 4 - Calculate Number of Individuals leaving each stop
# 'Unique_Stops' Dataframe - Remove duplicate 'Bus_Stop_ID' entries from 'Stop_Info' DataFrame
Unique_Stops = Stop_Info.drop_duplicates(subset=['Bus_Stop_ID'])
# Count Number of distinct stops in each city (cities denoted by 'Proper Name' column)
BusStopPerCity = Unique_Stops.groupby('Proper Name')['Bus_Stop_ID'].count().reset_index()
# 'Unique_City' Dataframe - Remove duplicate 'Proper Name' entries from 'Stop_Info' DataFrame
Unique_City = Stop_Info.drop_duplicates(subset=['Proper Name'])
# 'BusStopPerCity' DataFrame - Append 'Population' and 'Stop Island' columns to BusStopPerCity
BusStopPerCity = pd.merge(BusStopPerCity, Unique_City[['Proper Name','Population', 'Stop Island']], on="Proper Name", how="inner",  sort=False)
# Calculate Total Population per Island
total_population_per_island = BusStopPerCity.groupby('Stop Island')['Population'].sum().rename('Total_population_in_Island')
# Merge Total Population per Island to BusStopPerCity DataFrame
BusStopPerCity = BusStopPerCity.merge(total_population_per_island, on='Stop Island')
# Calculate Population Proportion in each city
BusStopPerCity['Pop_Proportion'] = BusStopPerCity['Population'] / BusStopPerCity['Total_population_in_Island']
# Calculate 'POP_LEAVING_Per_City' by multiplying Population proportion in city and total people travelling using bus
BusStopPerCity['POP_LEAVING_Per_City'] = np.where(
    BusStopPerCity['Stop Island'] == 'MALTA STOP',
    np.ceil(BusStopPerCity['Pop_Proportion'] * Bus_Travel_Morning_Malta),
    np.ceil(BusStopPerCity['Pop_Proportion'] * Bus_Travel_Morning_Gozo)
)
# Calculate 'POP_LEAVING_Per_Stop' by dividing POP_LEAVING_Per_City by number of stops in city
BusStopPerCity['POP_Leaving_Per_Stop'] = np.ceil(BusStopPerCity['POP_LEAVING_Per_City']/BusStopPerCity['Bus_Stop_ID'])

# Step 5 - Calculate Number of Individuals arriving at each stop
#Get total stops per island
total_times_stops_appears = Stop_Info.groupby('Bus_Stop_ID').size().reset_index(name='total_times_stops_appears')
#Merge with original 'Stop_Info' DataFrame
Stop_Info_WOccofStop = Stop_Info.merge(total_times_stops_appears, on='Bus_Stop_ID', how = 'left')
#Consider only one instance of each distinc 'Bus_Stop_ID'
Unique_Stops_Updated = Stop_Info_WOccofStop.drop_duplicates(subset=['Bus_Stop_ID'])
# Get total stops per island
total_stops_in_island = Unique_Stops_Updated.groupby('Stop Island')['total_times_stops_appears'].sum().rename('Total_stops_in_Island')
# Merge with original DataFrame
Unique_Stops_Updated = Unique_Stops_Updated.merge(total_stops_in_island, on='Stop Island')
# Calculate proportion of stops per city
Unique_Stops_Updated['Stop_Proportion'] = Unique_Stops_Updated['total_times_stops_appears'] / Unique_Stops_Updated['Total_stops_in_Island']
# Calculate 'POP_COMING' by multiplying Population proportion in city and total people travelling using bus
Unique_Stops_Updated['POP_COMING'] = np.where(
    Unique_Stops_Updated['Stop Island'] == 'MALTA STOP',
    np.ceil(Unique_Stops_Updated['Stop_Proportion'] * Bus_Travel_Morning_Malta),
    np.ceil(Unique_Stops_Updated['Stop_Proportion'] * Bus_Travel_Morning_Gozo)
)

# Obtain Final_Dataset representing Individuals exiting and entering each stop
Final_Dataset = pd.merge(Unique_Stops_Updated, BusStopPerCity[['Proper Name','POP_Leaving_Per_Stop']], on="Proper Name", how="inner",  sort=False)


# Summarise Data Set
Morning_Dataset = Final_Dataset[['Stops', 'Bus_Stop_ID', 'Proper Name', 'Longitude_Final', 'Latitude_Final', 'Stop Island', 'POP_COMING', 'POP_Leaving_Per_Stop']]

# Sort DataFrame
Morning_Dataset_Sorted = Morning_Dataset.sort_values(by="Bus_Stop_ID", key=lambda x: x.str.extract(r'(\d+)')[0].astype(int))

Morning_Dataset_Sorted_Malta = Morning_Dataset_Sorted[Morning_Dataset_Sorted["Stop Island"] == "MALTA STOP"]
Morning_Dataset_Sorted_Gozo = Morning_Dataset_Sorted[Morning_Dataset_Sorted["Stop Island"] == "GOZO STOP"]

In [None]:
# Calculate Distance Matrix (In KM using Haversine distance)

# Earth's radius in kilometers
R = 6371

# Function to compute Haversine distance
def haversine(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])  # Convert to radians
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2.0) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c  # Distance in kilometers

# Extract unique Bus_Stop_IDs
bus_stops = Morning_Dataset_Sorted_Malta['Bus_Stop_ID'].unique()

# Create an empty DataFrame with Bus_Stop_IDs as both rows and columns
distance_matrix_malta = pd.DataFrame(index=bus_stops, columns=bus_stops)

# Convert latitude and longitude to NumPy arrays for fast lookup
latitudes = Morning_Dataset_Sorted_Malta.set_index('Bus_Stop_ID')['Latitude_Final']
longitudes = Morning_Dataset_Sorted_Malta.set_index('Bus_Stop_ID')['Longitude_Final']

# Compute the Haversine distance for each pair
for i in bus_stops:
    print(i)
    for j in bus_stops:
        distance_matrix_malta.at[i, j] = haversine(latitudes[i], longitudes[i], latitudes[j], longitudes[j])

# Convert to numeric
distance_matrix_malta = distance_matrix_malta.apply(pd.to_numeric)


In [None]:
beta = -0.5
bus_stops = Morning_Dataset_Sorted_Malta['Bus_Stop_ID'].unique()  # Get unique bus stops

# Initialize DataFrame with Bus_Stop_IDs as both rows and columns
nij_df = pd.DataFrame(index=bus_stops, columns=bus_stops)

# Convert distance_matrix to NumPy array for speed
distance_matrix_np = distance_matrix_malta.loc[bus_stops, bus_stops].to_numpy()

# Replace 0 distances with np.inf (to avoid division errors)
distance_matrix_np[distance_matrix_np == 0] = np.inf

# Precompute d_ij^beta
d_matrix_beta = distance_matrix_np ** beta  # Apply power directly

# Extract relevant columns as NumPy arrays for fast access
pop_leaving = Morning_Dataset_Sorted_Malta.set_index('Bus_Stop_ID')['POP_Leaving_Per_Stop'].to_dict()
pop_coming = Morning_Dataset_Sorted_Malta.set_index('Bus_Stop_ID')['POP_COMING'].to_dict()

for i_idx, i in enumerate(bus_stops):  # Loop through focus stops
    print(i)
    gi = pop_leaving[i]  # Get g_i

    # Compute denominator in vectorized form
    a2 = np.array([pop_coming[m] for m in bus_stops])
    sum_denominator = np.sum(a2 * d_matrix_beta[i_idx, :])  # Vectorized sum

    # Compute nij values
    a = np.array([pop_coming[j] for j in bus_stops])
    d = d_matrix_beta[i_idx, :]  # Get row i from precomputed d^beta
    nij_df.iloc[i_idx, :] = (gi * a * d) / sum_denominator  # Vectorized division

# Convert to numeric type for calculations
nij_df_malta = nij_df.apply(pd.to_numeric)

nij_df_Malta_ROUNDED = nij_df_malta.round(0).astype(int)


In [None]:
# Assume Monday_Routes_Malta_List is defined as a list of tuples: (route_name, dataframe)
for route_name, df in Monday_Routes_Malta_List:
    # 1 - Obtain list of unique stops for this route
    unique_stops = df.drop_duplicates(subset=['Bus_Stop_ID'], keep='first')
    unique_stops_sorted = unique_stops.sort_values(
        by='Bus_Stop_ID',
        key=lambda x: x.str.extract(r'(\d+)')[0].astype(int)
    )
    # Create Bus Stops list
    bus_stops_list = unique_stops_sorted['Bus_Stop_ID'].unique()
    nij_df_Malta_ROUNDED_df = nij_df_Malta_ROUNDED.loc[bus_stops_list, bus_stops_list]
    
    # Save to txt file in append mode
    file_name = f"{route_name}Demand.txt"
    nij_df_Malta_ROUNDED_df.to_csv(file_name, sep='\t', index=False, header=False)
    
    print(f"Demand Matrix for {route_name} Saved successfully.")

## Obtaining Text Files for Gozo

In [15]:
# Step 7.2.1 - Create Coords text file

# 1 - Obtain List of Unique Stops in Malta
Unique_Stops_Monday_Routes_Gozo = Monday_Routes_Gozo.drop_duplicates(subset=['Bus_Stop_ID'], keep='first')
# 2 - Sort Stops in Descending Order
Unique_Stops_Monday_Routes_Gozo_Sorted = Unique_Stops_Monday_Routes_Gozo.sort_values(by='Bus_Stop_ID', key=lambda x: x.str.extract(r'(\d+)')[0].astype(int))
# 3 - Limit 'Latitude_Final' and 'Longitude_Final' columns to 7 Decimal Places
Unique_Stops_Monday_Routes_Gozo_Sorted[['Latitude_Final', 'Longitude_Final']] = Unique_Stops_Monday_Routes_Gozo_Sorted[['Latitude_Final', 'Longitude_Final']].round(7)

# 4 - Obtain 'GozoCoords.txt' file
File_Name = 'GozoCoords.txt'
Columns_Required = ['Latitude_Final', 'Longitude_Final']
Number_of_Rows = len(Unique_Stops_Monday_Routes_Gozo_Sorted)

with open(File_Name, 'w') as File:
    # Write row count at top of csv file
    File.write(f"{Number_of_Rows}\n")
    Unique_Stops_Monday_Routes_Gozo_Sorted[Columns_Required].to_csv(File, sep=' ', index=False, header=False)


In [23]:
# Step 7.2.2 - Obtaining Travel Time Matrix

Bus_Stops_List = Unique_Stops_Monday_Routes_Gozo_Sorted['Bus_Stop_ID'].unique()

# Create an empty DataFrame with Bus_Stop_IDs as both rows and columns
Travel_Time_Matrix_Monday_Routes_Gozo = pd.DataFrame(index=Bus_Stops_List, columns=Bus_Stops_List)

# Set 'Bus_Stop_ID' and 'Next_Bus_Stop_ID' as index for faster lookup
Gozo_DistinctEdges_IncTravelTimes_Copy =  Gozo_DistinctEdges_IncTravelTimes_Min.copy()
Gozo_DistinctEdges_IncTravelTimes_Copy.set_index(['Bus_Stop_ID', 'Next_Bus_Stop_ID'], inplace=True)

# Initialize the matrix with np.inf (as a default)
Travel_Time_Matrix_Monday_Routes_Gozo[:] = np.inf

# Set diagonal entries to zero
np.fill_diagonal(Travel_Time_Matrix_Monday_Routes_Gozo.values, 0)

# Loop through each pair of bus stops in the list
for index, row in Monday_Routes_Gozo.iterrows():
    bus_stop_pair = row['Bus_Stop_Next_Bus_Stop']
    if pd.notna(bus_stop_pair):  # Ensure the value is not NaN
        # Convert to string and split
        stop_0, stop_1 = str(bus_stop_pair).split('_to_')
        if stop_0 == stop_1:
            Travel_Time_Matrix_Monday_Routes_Gozo[stop_0, stop_1] = 0
        elif (stop_0, stop_1) in Gozo_DistinctEdges_IncTravelTimes_Copy.index:
            Travel_Time_Matrix_Monday_Routes_Gozo.at[stop_0, stop_1] = Gozo_DistinctEdges_IncTravelTimes_Copy.at[(stop_0, stop_1), 'Travel_Time']
            
print(f"Matrix for Gozo populated successfully.")

# Replace infinite values with the string 'Inf'
Travel_Time_Matrix_Monday_Routes_Gozo.replace({np.inf: 'Inf'}, inplace=True)

# Save to txt file in append mode
File_Name = 'GozoTravelTimes.txt'
Travel_Time_Matrix_Monday_Routes_Gozo.to_csv(File_Name, sep='\t', index=False, header=False)

Matrix for Gozo populated successfully.


In [24]:
# Step 7.2.3 - Creating Passenger-Demand Matrix

All_Routes_Copy = pd.concat([Monday_Routes_Malta, Monday_Routes_Gozo], ignore_index=True)

# Keeping Only essential data from 'All_Routes_Copy' DataFrame
Trip_Generation_DataFrame = All_Routes_Copy[['Stops','City Name', 'Bus_Stop_ID', 'Longitude_Final', 'Latitude_Final', 'Stop Island']]
Trip_Generation_DataFrame.loc[:, "Index"] = range(1, len(Trip_Generation_DataFrame) + 1)


# Step 1 - Loading Census data
CENSUS_DATA = pd.read_excel("C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Files used for Data Visualisation//CENSUS_DATA.xlsx")

# Step 2 - Append 'Proper Name', 'Stop Island' and 'Population' columns from 'CENSUS_DATA' DataFrame to 'Trip_Generation_DataFrame' DataFrame
# Rename DataFrame as 'Stop_Info'
# Create Column 'City Name - Stop Island' in 'Trip_Generation_DataFrame'
Trip_Generation_DataFrame['City Name - Stop Island'] =  Trip_Generation_DataFrame['City Name'] + '.' + Trip_Generation_DataFrame['Stop Island']
# Create column 'City Name - Stop Island' in 'CENSUS_DATA'
CENSUS_DATA['City Name - Stop Island'] = CENSUS_DATA['Name in Sheet'] + '.' + CENSUS_DATA['Stop Island']
# Utilise newly created column to concatenate 'Trip_Generation_DataFrame' and 'CENSUS_DATA'
Stop_Info = pd.merge(Trip_Generation_DataFrame[['Stops', 'Bus_Stop_ID', 'Longitude_Final', 'Latitude_Final', 'City Name - Stop Island', "Index"]], CENSUS_DATA[['City Name - Stop Island', 'Stop Island', 'Proper Name', 'Population']], on="City Name - Stop Island", how="inner",  sort=False)
Stop_Info = Stop_Info.sort_values("Index").drop(columns=["Index"])

# Step 3 - Setting estimates for individuals making use of bus in Malta and Gozo at 8 AM
# Morning Model
# Trip Origin will be from Home
# Trip Destination will be to Work

# Time to Consider - 8 AM
# Total Number of trips by bus (Whole day) - 34,679
# Total Number of trips by bus (Whole day - Malta) - 32390.186
# Total Number of trips by bus (Whole day - Gozo) - 2288.814
# Proportion of trips occuring at 8 AM - 82,000/638,456 (12.843484907%)
Bus_Travel_Morning_Malta = 1000000*(82000/638456)
Bus_Travel_Morning_Gozo = 100000*(82000/638456)



# Step 4 - Calculate Number of Individuals leaving each stop
# 'Unique_Stops' Dataframe - Remove duplicate 'Bus_Stop_ID' entries from 'Stop_Info' DataFrame
Unique_Stops = Stop_Info.drop_duplicates(subset=['Bus_Stop_ID'])
# Count Number of distinct stops in each city (cities denoted by 'Proper Name' column)
BusStopPerCity = Unique_Stops.groupby('Proper Name')['Bus_Stop_ID'].count().reset_index()
# 'Unique_City' Dataframe - Remove duplicate 'Proper Name' entries from 'Stop_Info' DataFrame
Unique_City = Stop_Info.drop_duplicates(subset=['Proper Name'])
# 'BusStopPerCity' DataFrame - Append 'Population' and 'Stop Island' columns to BusStopPerCity
BusStopPerCity = pd.merge(BusStopPerCity, Unique_City[['Proper Name','Population', 'Stop Island']], on="Proper Name", how="inner",  sort=False)
# Calculate Total Population per Island
total_population_per_island = BusStopPerCity.groupby('Stop Island')['Population'].sum().rename('Total_population_in_Island')
# Merge Total Population per Island to BusStopPerCity DataFrame
BusStopPerCity = BusStopPerCity.merge(total_population_per_island, on='Stop Island')
# Calculate Population Proportion in each city
BusStopPerCity['Pop_Proportion'] = BusStopPerCity['Population'] / BusStopPerCity['Total_population_in_Island']
# Calculate 'POP_LEAVING_Per_City' by multiplying Population proportion in city and total people travelling using bus
BusStopPerCity['POP_LEAVING_Per_City'] = np.where(
    BusStopPerCity['Stop Island'] == 'MALTA STOP',
    np.ceil(BusStopPerCity['Pop_Proportion'] * Bus_Travel_Morning_Malta),
    np.ceil(BusStopPerCity['Pop_Proportion'] * Bus_Travel_Morning_Gozo)
)
# Calculate 'POP_LEAVING_Per_Stop' by dividing POP_LEAVING_Per_City by number of stops in city
BusStopPerCity['POP_Leaving_Per_Stop'] = np.ceil(BusStopPerCity['POP_LEAVING_Per_City']/BusStopPerCity['Bus_Stop_ID'])

# Step 5 - Calculate Number of Individuals arriving at each stop
#Get total stops per island
total_times_stops_appears = Stop_Info.groupby('Bus_Stop_ID').size().reset_index(name='total_times_stops_appears')
#Merge with original 'Stop_Info' DataFrame
Stop_Info_WOccofStop = Stop_Info.merge(total_times_stops_appears, on='Bus_Stop_ID', how = 'left')
#Consider only one instance of each distinc 'Bus_Stop_ID'
Unique_Stops_Updated = Stop_Info_WOccofStop.drop_duplicates(subset=['Bus_Stop_ID'])
# Get total stops per island
total_stops_in_island = Unique_Stops_Updated.groupby('Stop Island')['total_times_stops_appears'].sum().rename('Total_stops_in_Island')
# Merge with original DataFrame
Unique_Stops_Updated = Unique_Stops_Updated.merge(total_stops_in_island, on='Stop Island')
# Calculate proportion of stops per city
Unique_Stops_Updated['Stop_Proportion'] = Unique_Stops_Updated['total_times_stops_appears'] / Unique_Stops_Updated['Total_stops_in_Island']
# Calculate 'POP_COMING' by multiplying Population proportion in city and total people travelling using bus
Unique_Stops_Updated['POP_COMING'] = np.where(
    Unique_Stops_Updated['Stop Island'] == 'MALTA STOP',
    np.ceil(Unique_Stops_Updated['Stop_Proportion'] * Bus_Travel_Morning_Malta),
    np.ceil(Unique_Stops_Updated['Stop_Proportion'] * Bus_Travel_Morning_Gozo)
)

# Obtain Final_Dataset representing Individuals exiting and entering each stop
Final_Dataset = pd.merge(Unique_Stops_Updated, BusStopPerCity[['Proper Name','POP_Leaving_Per_Stop']], on="Proper Name", how="inner",  sort=False)


# Summarise Data Set
Morning_Dataset = Final_Dataset[['Stops', 'Bus_Stop_ID', 'Proper Name', 'Longitude_Final', 'Latitude_Final', 'Stop Island', 'POP_COMING', 'POP_Leaving_Per_Stop']]

# Sort DataFrame
Morning_Dataset_Sorted = Morning_Dataset.sort_values(by="Bus_Stop_ID", key=lambda x: x.str.extract(r'(\d+)')[0].astype(int))

Morning_Dataset_Sorted_Malta = Morning_Dataset_Sorted[Morning_Dataset_Sorted["Stop Island"] == "MALTA STOP"]
Morning_Dataset_Sorted_Gozo = Morning_Dataset_Sorted[Morning_Dataset_Sorted["Stop Island"] == "GOZO STOP"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Trip_Generation_DataFrame.loc[:, "Index"] = range(1, len(Trip_Generation_DataFrame) + 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Trip_Generation_DataFrame['City Name - Stop Island'] =  Trip_Generation_DataFrame['City Name'] + '.' + Trip_Generation_DataFrame['Stop Island']


In [25]:
# Calculate Distance Matrix (In KM using Haversine distance)

# Earth's radius in kilometers
R = 6371

# Function to compute Haversine distance
def haversine(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])  # Convert to radians
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2.0) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c  # Distance in kilometers

# Extract unique Bus_Stop_IDs
bus_stops = Morning_Dataset_Sorted_Gozo['Bus_Stop_ID'].unique()

# Create an empty DataFrame with Bus_Stop_IDs as both rows and columns
distance_matrix_gozo = pd.DataFrame(index=bus_stops, columns=bus_stops)

# Convert latitude and longitude to NumPy arrays for fast lookup
latitudes = Morning_Dataset_Sorted_Gozo.set_index('Bus_Stop_ID')['Latitude_Final']
longitudes = Morning_Dataset_Sorted_Gozo.set_index('Bus_Stop_ID')['Longitude_Final']

# Compute the Haversine distance for each pair
for i in bus_stops:
    print(i)
    for j in bus_stops:
        distance_matrix_gozo.at[i, j] = haversine(latitudes[i], longitudes[i], latitudes[j], longitudes[j])

# Convert to numeric
distance_matrix_gozo = distance_matrix_gozo.apply(pd.to_numeric)


Stop_1138
Stop_1139
Stop_1140
Stop_1141
Stop_1142
Stop_1143
Stop_1144
Stop_1145
Stop_1146
Stop_1147
Stop_1148
Stop_1149
Stop_1150
Stop_1151
Stop_1152
Stop_1153
Stop_1154
Stop_1155
Stop_1156
Stop_1157
Stop_1158
Stop_1159
Stop_1160
Stop_1161
Stop_1162
Stop_1163
Stop_1164
Stop_1165
Stop_1166
Stop_1167
Stop_1168
Stop_1169
Stop_1170
Stop_1171
Stop_1172
Stop_1173
Stop_1174
Stop_1175
Stop_1176
Stop_1177
Stop_1178
Stop_1179
Stop_1180
Stop_1181
Stop_1182
Stop_1183
Stop_1184
Stop_1185
Stop_1186
Stop_1187
Stop_1188
Stop_1189
Stop_1190
Stop_1191
Stop_1192
Stop_1193
Stop_1194
Stop_1195
Stop_1196
Stop_1197
Stop_1198
Stop_1199
Stop_1200
Stop_1201
Stop_1202
Stop_1203
Stop_1204
Stop_1205
Stop_1206
Stop_1207
Stop_1208
Stop_1209
Stop_1210
Stop_1211
Stop_1212
Stop_1213
Stop_1214
Stop_1215
Stop_1216
Stop_1217
Stop_1218
Stop_1219
Stop_1220
Stop_1221
Stop_1222
Stop_1223
Stop_1224
Stop_1225
Stop_1226
Stop_1227
Stop_1228
Stop_1229
Stop_1230
Stop_1231
Stop_1232
Stop_1233
Stop_1234
Stop_1235
Stop_1236
Stop_1237


In [26]:
import numpy as np
import pandas as pd

beta = -0.5
bus_stops = Morning_Dataset_Sorted_Gozo['Bus_Stop_ID'].unique()  # Get unique bus stops

# Initialize DataFrame with Bus_Stop_IDs as both rows and columns
nij_df = pd.DataFrame(index=bus_stops, columns=bus_stops)

# Convert distance_matrix to NumPy array for speed
distance_matrix_np = distance_matrix_gozo.loc[bus_stops, bus_stops].to_numpy()

# Replace 0 distances with np.inf (to avoid division errors)
distance_matrix_np[distance_matrix_np == 0] = np.inf

# Precompute d_ij^beta
d_matrix_beta = distance_matrix_np ** beta  # Apply power directly

# Extract relevant columns as NumPy arrays for fast access
pop_leaving = Morning_Dataset_Sorted_Gozo.set_index('Bus_Stop_ID')['POP_Leaving_Per_Stop'].to_dict()
pop_coming = Morning_Dataset_Sorted_Gozo.set_index('Bus_Stop_ID')['POP_COMING'].to_dict()

for i_idx, i in enumerate(bus_stops):  # Loop through focus stops
    print(i)
    gi = pop_leaving[i]  # Get g_i

    # Compute denominator in vectorized form
    a2 = np.array([pop_coming[m] for m in bus_stops])
    sum_denominator = np.sum(a2 * d_matrix_beta[i_idx, :])  # Vectorized sum

    # Compute nij values
    a = np.array([pop_coming[j] for j in bus_stops])
    d = d_matrix_beta[i_idx, :]  # Get row i from precomputed d^beta
    nij_df.iloc[i_idx, :] = (gi * a * d) / sum_denominator  # Vectorized division

# Convert to numeric type for calculations
nij_df_gozo = nij_df.apply(pd.to_numeric)

nij_df_Gozo_ROUNDED = nij_df_gozo.round(0).astype(int)

Stop_1138
Stop_1139
Stop_1140
Stop_1141
Stop_1142
Stop_1143
Stop_1144
Stop_1145
Stop_1146
Stop_1147
Stop_1148
Stop_1149
Stop_1150
Stop_1151
Stop_1152
Stop_1153
Stop_1154
Stop_1155
Stop_1156
Stop_1157
Stop_1158
Stop_1159
Stop_1160
Stop_1161
Stop_1162
Stop_1163
Stop_1164
Stop_1165
Stop_1166
Stop_1167
Stop_1168
Stop_1169
Stop_1170
Stop_1171
Stop_1172
Stop_1173
Stop_1174
Stop_1175
Stop_1176
Stop_1177
Stop_1178
Stop_1179
Stop_1180
Stop_1181
Stop_1182
Stop_1183
Stop_1184
Stop_1185
Stop_1186
Stop_1187
Stop_1188
Stop_1189
Stop_1190
Stop_1191
Stop_1192
Stop_1193
Stop_1194
Stop_1195
Stop_1196
Stop_1197
Stop_1198
Stop_1199
Stop_1200
Stop_1201
Stop_1202
Stop_1203
Stop_1204
Stop_1205
Stop_1206
Stop_1207
Stop_1208
Stop_1209
Stop_1210
Stop_1211
Stop_1212
Stop_1213
Stop_1214
Stop_1215
Stop_1216
Stop_1217
Stop_1218
Stop_1219
Stop_1220
Stop_1221
Stop_1222
Stop_1223
Stop_1224
Stop_1225
Stop_1226
Stop_1227
Stop_1228
Stop_1229
Stop_1230
Stop_1231
Stop_1232
Stop_1233
Stop_1234
Stop_1235
Stop_1236
Stop_1237


In [27]:
# Save txt file
File_Name = 'GozoDemand.txt'
nij_df_Gozo_ROUNDED.to_csv(File_Name, sep='\t', index=False, header=False)

### Step 8 - Analysing Route, in Malta and Gozo, according to Route Group

### Step 8.1 - Malta

In [None]:
# Defining a Separate DataFrame for each specifiv Route Set
Monday_Routes_Malta_South_Eastern_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(South_Eastern_Routes)]
Monday_Routes_Malta_Northern_Harbour_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Northern_Harbour_Routes)]
Monday_Routes_Malta_Southern_Harbour_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Southern_Harbour_Routes)]
Monday_Routes_Malta_Western_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Western_Routes)]
Monday_Routes_Malta_Northern_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Northern_Routes)]
Monday_Routes_Malta_Mater_Dei_North_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Mater_Dei_North_Routes)]
Monday_Routes_Malta_Mater_Dei_South_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Mater_Dei_South_Routes)]
Monday_Routes_Malta_Mater_Dei_Airport_Routes = Monday_Routes_Malta[Monday_Routes_Malta['Route Number'].isin(Airport_Routes)]


# Create a List of TUples: (name, dataframe)
Monday_Routes_Malta_List = [
    ('South_Eastern_Routes', Monday_Routes_Malta_South_Eastern_Routes),
    ('Northern_Harbour_Routes', Monday_Routes_Malta_Northern_Harbour_Routes),
    ('Southern_Harbour_Routes', Monday_Routes_Malta_Southern_Harbour_Routes),
    ('Western_Routes', Monday_Routes_Malta_Western_Routes),
    ('Northern_Routes', Monday_Routes_Malta_Northern_Routes),
    ('Mater_Dei_North_Routes', Monday_Routes_Malta_Mater_Dei_North_Routes),
    ('Mater_Dei_South_Routes', Monday_Routes_Malta_Mater_Dei_South_Routes),
    ('Airport_Routes', Monday_Routes_Malta_Mater_Dei_Airport_Routes)
]

In [None]:
# Step 4.1 - Obtain Number of routes covered by each Route_Group

# Create a list of route group names from Monday_Routes_Malta_List
Route_Group = [name for name, _ in Monday_Routes_Malta_List]

# Compute number of unique routes per route group, adding one extra if the group is 'Route41_49'
malta_routes = []
for name, df in Monday_Routes_Malta_List:
    unique_count = len(df[['Route Number', 'Route Direction', 'Time_Count']].drop_duplicates())
    if name == 'Northern_Routes':
        unique_count += 1  # Add an extra route for Route41_49
    malta_routes.append(unique_count)

# Create the summary DataFrame
route_data = pd.DataFrame({
    "Route Group": Route_Group,
    "Number of Routes": malta_routes
})

# Display the DataFrame
print(route_data)


In [None]:
# Step 4.2 - Obtain table indicating number of bus stops traversed by each route,
# also taking into account day of the week (for non-circular stops, we consider only
# the maximum number of stops in any one direction)

# Initialize a list to collect summary data for each route DataFrame
summary_results = []

# Process each tuple (name, DataFrame) in Monday_Routes_Malta_List
for name, df in Monday_Routes_Malta_List:
    # Work on a copy of the DataFrame
    df_copy = df.copy()
    
    # Create a unique route identifier by concatenating relevant columns.
    df_copy['Route_ID'] = (df_copy['Route Number'] + '-' +
                           df_copy['Route Direction'] + '-' +
                           df_copy['Time_Count'].astype(str))
    
    # Group by the unique route identifier and count the number of bus stops per route instance.
    route_stop_counts = df_copy.groupby('Route_ID').size()
    
    # Determine the minimum and maximum number of bus stops seen in any one route instance.
    min_stops = route_stop_counts.min()
    max_stops = route_stop_counts.max()
    
    # If the route group is 'Route41_49', divide the maximum stops value by 2 and convert to int
    if name == "Northern_Routes":
        max_stops = int(max_stops / 2)
    
    # Append the results for this DataFrame to the summary list.
    summary_results.append({
        'DataFrame_Name': name,
        'Min_Bus_Stops': min_stops,
        'Max_Bus_Stops': max_stops
    })

# Convert the summary results to a DataFrame
summary_df = pd.DataFrame(summary_results)

# Display the summary DataFrame
print(summary_df)


In [None]:
Monday_Routes_Malta_Route41_49.to_csv("IAMNOTOKAY.csv")

In [None]:
import os
import pandas as pd

# Ensure save_path ends with a separator
save_path = "C://Users//Owner//ICT5012 - Disseration//Chapter 3 - Data Visualisation//Python Scripts//Terminal_NormalStops_Identification//"

for name, i in Monday_Routes_Malta_List:
    print(name)
    
    # Reset index and work on a copy to avoid SettingWithCopyWarning
    i = i.copy().reset_index(drop=True)
    
    # Create Reset_Condition for identifying changes in route attributes
    Reset_Condition = (
        (i['Route Number'].shift(-1) != i['Route Number']) |
        (i['Route Direction'].shift(-1) != i['Route Direction']) |
        (i['Time_Count'].shift(-1) != i['Time_Count'])
    )
    
    # Initialise all entries in 'Bus_Terminal' as 0
    i.loc[:, 'Bus_Terminal'] = 0
    # If Reset_Condition is True then mark as Bus Terminal
    i.loc[Reset_Condition, 'Bus_Terminal'] = 1
    
    OG_BUS_Terminal_COLUMN = i['Bus_Terminal'].copy()
    # Iterate through the original Bus_Terminal column
    for j in range(len(OG_BUS_Terminal_COLUMN) - 1):
        if OG_BUS_Terminal_COLUMN[j] == 1:
            i.loc[j + 1, 'Bus_Terminal'] = 1
    # Initialise first row as Bus Terminal
    i.loc[0, 'Bus_Terminal'] = 1
    
    # Identify Bus Terminal stops
    Bus_Terminals_DataFrame = i[i['Bus_Terminal'] == 1]
    
    # Collect unique Bus_Stop_IDs for Bus Terminals
    bus_stop_ids = []
    for c in range(len(Bus_Terminals_DataFrame)):
        bus_stop_id = Bus_Terminals_DataFrame.iloc[c]['Bus_Stop_ID']
        if bus_stop_id not in bus_stop_ids:
            bus_stop_ids.append(bus_stop_id)
    
    # DataFrame for Normal Bus Stops (non-terminals)
    Normal_Bus_DataFrame = i[i['Bus_Terminal'] == 0]
    
    # Collect unique Bus_Stop_IDs for Normal stops
    Normal_bus_stop_ids = []
    for c in range(len(Normal_Bus_DataFrame)):
        normal_bus_stop_id = Normal_Bus_DataFrame.iloc[c]['Bus_Stop_ID']
        if normal_bus_stop_id not in Normal_bus_stop_ids:
            Normal_bus_stop_ids.append(normal_bus_stop_id)
    
    # Bus Stops used as both Normal and Terminal stops
    Normal_and_BusTerminals_ids = list(set(bus_stop_ids) & set(Normal_bus_stop_ids))
    
    # Define empty DataFrame to store Bus Stops used as both Normal and Terminal
    Normal_and_BusTerminals_DataFrame_Malta = pd.DataFrame(columns=i.columns)
    compare_bus_stop_ids = []
    for c in range(len(i)):
        bus_stop_id = i.iloc[c]['Bus_Stop_ID']
        if bus_stop_id in Normal_and_BusTerminals_ids and bus_stop_id not in compare_bus_stop_ids:
            compare_bus_stop_ids.append(bus_stop_id)
            Normal_and_BusTerminals_DataFrame_Malta = pd.concat(
                [Normal_and_BusTerminals_DataFrame_Malta, i.iloc[[c]]],
                ignore_index=True
            )
    
    file_name = f"AllNormalBusStopsUsedAsTerminals_{name}.csv"
    Normal_and_BusTerminals_DataFrame_Malta.to_csv(os.path.join(save_path, file_name), index=False)
    
    # Bus Stops used only as Terminals
    BusTerminals_Only_ids = list(set(bus_stop_ids) - set(Normal_bus_stop_ids))
    BusTerminals_Only_DataFrame_Malta = pd.DataFrame(columns=i.columns)
    Only_Terminal_stop_ids = []
    for c in range(len(i)):
        Only_Terminal_stop_id = i.iloc[c]['Bus_Stop_ID']
        if Only_Terminal_stop_id in BusTerminals_Only_ids and Only_Terminal_stop_id not in Only_Terminal_stop_ids:
            Only_Terminal_stop_ids.append(Only_Terminal_stop_id)
            BusTerminals_Only_DataFrame_Malta = pd.concat(
                [BusTerminals_Only_DataFrame_Malta, i.iloc[[c]]],
                ignore_index=True
            )
    
    file_name = f"BusTerminalsOnly_{name}.csv"
    BusTerminals_Only_DataFrame_Malta.to_csv(os.path.join(save_path, file_name), index=False)
    
    # Bus Stops used only as Normal stops
    NormalStops_Only_ids = list(set(Normal_bus_stop_ids) - set(bus_stop_ids))
    NormalStops_Only_DataFrame_Malta = pd.DataFrame(columns=i.columns)
    Only_Normal_stop_ids = []
    for c in range(len(i)):
        Only_Normal_stop_id = i.iloc[c]['Bus_Stop_ID']
        if Only_Normal_stop_id in NormalStops_Only_ids and Only_Normal_stop_id not in Only_Normal_stop_ids:
            Only_Normal_stop_ids.append(Only_Normal_stop_id)
            NormalStops_Only_DataFrame_Malta = pd.concat(
                [NormalStops_Only_DataFrame_Malta, i.iloc[[c]]],
                ignore_index=True
            )
    
    file_name = f"NormalStopsOnly_{name}.csv"
    NormalStops_Only_DataFrame_Malta.to_csv(os.path.join(save_path, file_name), index=False)


### Step 8.2 - Gozo

In [29]:
Monday_Routes_Gozo_List = [
    ('Monday_Gozo_Routes', Monday_Routes_Gozo)] 

In [30]:
# Step 4.1 - Obtain Number of routes covered by each Route_Group

# Create a list of route group names from Monday_Routes_Malta_List
Route_Group = [name for name, _ in Monday_Routes_Gozo_List]

# Compute number of unique routes per route group, adding one extra if the group is 'Route41_49'
gozo_routes = []
for name, df in Monday_Routes_Gozo_List:
    unique_count = len(df[['Route Number', 'Route Direction', 'Time_Count']].drop_duplicates())
    #if name == 'Northern_Routes':
    #    unique_count += 1  # Add an extra route for Route41_49
    gozo_routes.append(unique_count)

# Create the summary DataFrame
route_data = pd.DataFrame({
    "Route Group": Route_Group,
    "Number of Routes": gozo_routes
})

# Display the DataFrame
print(route_data)


          Route Group  Number of Routes
0  Monday_Gozo_Routes                26


In [31]:
# Step 4.2 - Obtain table indicating number of bus stops traversed by each route,
# also taking into account day of the week (for non-circular stops, we consider only
# the maximum number of stops in any one direction)

# Initialize a list to collect summary data for each route DataFrame
summary_results = []

# Process each tuple (name, DataFrame) in Monday_Routes_Malta_List
for name, df in Monday_Routes_Gozo_List:
    # Work on a copy of the DataFrame
    df_copy = df.copy()
    
    # Create a unique route identifier by concatenating relevant columns.
    df_copy['Route_ID'] = (df_copy['Route Number'] + '-' +
                           df_copy['Route Direction'] + '-' +
                           df_copy['Time_Count'].astype(str))
    
    # Group by the unique route identifier and count the number of bus stops per route instance.
    route_stop_counts = df_copy.groupby('Route_ID').size()
    
    # Determine the minimum and maximum number of bus stops seen in any one route instance.
    min_stops = route_stop_counts.min()
    max_stops = route_stop_counts.max()
    
    # If the route group is 'Route41_49', divide the maximum stops value by 2 and convert to int
    #if name == "Northern_Routes":
    #   max_stops = int(max_stops / 2)
    
    # Append the results for this DataFrame to the summary list.
    summary_results.append({
        'DataFrame_Name': name,
        'Min_Bus_Stops': min_stops,
        'Max_Bus_Stops': max_stops
    })

# Convert the summary results to a DataFrame
summary_df = pd.DataFrame(summary_results)

# Display the summary DataFrame
print(summary_df)


       DataFrame_Name  Min_Bus_Stops  Max_Bus_Stops
0  Monday_Gozo_Routes             10             42
