In [1]:
# Importing Required Packages

# Importing 'pandas' to handle datasets
import pandas as pd
# Importing 'numpy' to handle arrays
import numpy as np

# Importing math to be able to utilise mathematical functions (radian is defined by converting 90 degrees)
import math
degree = 90
radian = math.radians(degree)

# Importing 're' package - Python Regular Expressions
import re

# Importing 'xml.etree.cElementTree' to be able to handle xml files
import xml.etree.cElementTree as ET

from datetime import datetime, timedelta
import requests 
import json 
from collections import OrderedDict
import json
import time

### Step 1 - Preparing Data Sets 

Appending the following columns to the 'All_Routes_Complete.csv' file obtained from the Malta public transport (MPT) website

    1. 'Stop Island' - Defines the island (Malta/Gozo) the corresponding Route Number operates in. 
    2. 'Time_Count' - Number of buses operating on a particular Route Number and Route Direction from the start till the end of the bus service.
    3. 'Stops - City Name - Stop Island' - Key column used to compare data in 'All_Routes_Complete.csv' to data in Bus_Stop_Info ('Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx')
    4. 'Longitude_Final' - Longitude value of corresponding Bus Stop entry ('Stops') obtained from Bus_Stop_Info
    5. 'Latitude_Final' - Latitude value of corresponding Bus Stop entry ('Stops') obtained from Bus_Stop_Info
    6. 'Bus_Stop_ID' - Unique identifier for all Bus Stops ('Stops')
    7. 'Next_Bus_Stop_ID' - Since All_Routes ('All_Routes_Complete.csv') was extracted in sequential order from MPT website, the next row in All_Routes corresponds to the upcoming Bus Stop (Given 'Route Number', 'Route Direction' and 'Date' columns remain the same). Hence, 'Next_Bus_Stop_ID' is the unique identifier of the upcoming Bus Stop ('Stops')
    8. 'Bus_Stop_Next_Bus_Stop'- Unique identifier used to define connection between 'Bus_Stop_ID and 'Next_Bus_Stop_ID'

In [2]:
# Step 1.1 - Load Datasets
# Loading 'All_Routes_Complete.csv' (All_Routes) and 'Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx' (Bus_Stop_Info) Datasets
# Recall 'All_Routes_Complete.csv' is the file which was obtained from MPT website consisting of all Bus Schedules in sequential order
# Recall 'Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx' consits of the Longitude and Latitude data of all Bus Stops defined in 'All_Routes_Complete.csv' 

# From Home
All_Routes = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Scraping Route Names from MPT Website//Results//All_Routes_Complete.csv", low_memory = False)
Bus_Stop_Info = pd.read_excel("C://Users//Owner//ICT5012 - Disseration//Obtaining Longitude and Latitude for all Bus Stops//Results (Checks Done + Manual Adjustment)//Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx")

# From Work 
#All_Routes = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//All_Routes_Complete.csv", low_memory = False)
#Bus_Stop_Info = pd.read_excel("C://Users//attardan.CBM//Data Visualisation//Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx")

In [3]:
# Step 1.2 - Create 'Stop Island' column
# To differentiate between stops in Malta and Gozo, stops with correspoding 'Route Number' belonging to the 'Gozitan_Route_Number'
# list will be labelled as 'GOZO STOP' whilst all other stops will be labelled as 'MALTA STOP' using a column entitled 'Stop Island' 

Gozitan_Route_Number = ['301', '302', '303',
                        '305', '306', '307',
                        '308', '309', '310',
                        '311', '312', '313',
                        '322', '323', '330',
                        'N301']

All_Routes['Stop Island'] = np.where(All_Routes['Route Number'].isin(Gozitan_Route_Number), 'GOZO STOP', 'MALTA STOP')

In [4]:
# Step 1.3 - Create 'Time_Count' column
# The 'Time_Count' column will be added to count the number of buses operating throughout one day for a particular route.
# 'Time_Count' will be used as another method to identify between different routes having the same 'Route Number', 'Route Direction' and 'Date' column

# Select all columns in 'All_Routes' that start with 'Stop Time'
Stop_Time_Columns = [col for col in All_Routes.columns if col.startswith('Stop Time')]
# Row-wise count all'Stop Time' columns which are filled in and populate the 'Time_Count' column with sum 
All_Routes['Time_Count'] = All_Routes[Stop_Time_Columns].notna().sum(axis=1)


In [5]:
# Step 1.4 - Merge 'Longitude_Final' and 'Latitude_Final' columns from 'Bus_Stop_Info' dataframe to 'All_Routes' dataframe

# Create column entitled 'Stops - City Name - Stop Island' (Concatenation of 'Stops', 'City Name' and 'Stop Island' columns) 
# in both 'All_Routes' and 'Bus_Stop_Info'
All_Routes['Stops - City Name - Stop Island'] = All_Routes['Stops'] + ' - ' + All_Routes['City Name'] + ' - ' + All_Routes['Stop Island']
Bus_Stop_Info['Stops - City Name - Stop Island'] = Bus_Stop_Info['Stops'] + ' - ' + Bus_Stop_Info['City Name'] + ' - ' + Bus_Stop_Info['Stop_Island']
# Merge 'Bus_Stop_Info' Dataframe to 'All_Routes' such that 'All_Routes' dataframe will have 'Longitude_Final' and 'Latitude_Final' values for all corresponding entries
All_Routes = pd.merge(All_Routes, Bus_Stop_Info[['Stops - City Name - Stop Island', 'Longitude_Final', 'Latitude_Final']], on = 'Stops - City Name - Stop Island', how = 'left', sort = False)

In [6]:
# Step 1.5 - Create a unique identifier for all Bus Stops ('Stops') entitled 'Bus_Stop_ID'

# Obtain Dataframe entitled 'Distinct_Coordinates' consisting only of entries with distinct 'Latitude_Final' 
# and 'Longitude_Final' pairs
Distinct_Coordinates = All_Routes.drop_duplicates(subset = ['Latitude_Final','Longitude_Final'])
# In 'Distinct_Coordinates' add column 'Stop ID' with entries with format Stop_X where X is a value from 0 up to length of 
# 'Distinct_Coordinates' dataframe
Distinct_Coordinates['Stop ID'] = [f'Stop_{i}' for i in range(len(Distinct_Coordinates))]

# Create Dictionary entitled 'Bus_Stop_ID' consisting of corresponding 'Latitude_Final', 'Longitude_Final' and 'Stop ID' values 
Bus_Stop_ID = dict(
    zip(
        zip(Distinct_Coordinates['Latitude_Final'], Distinct_Coordinates['Longitude_Final']),
        Distinct_Coordinates['Stop ID']
    )
)

# Using 'Bus_Stop_ID' dictionary label Bus Stops ('Stops') with their corresponding unique identifier.
# Column is labelled as 'Bus_Stop_ID'
# 4 - Using 'Bus_Stop_ID' dict to label All_Routes
All_Routes['Bus_Stop_ID'] = All_Routes.apply(
    lambda row: Bus_Stop_ID.get((row['Latitude_Final'], row['Longitude_Final']), None), axis=1
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Distinct_Coordinates['Stop ID'] = [f'Stop_{i}' for i in range(len(Distinct_Coordinates))]


In [7]:
# Step 1.6 - Create Column entitled 'Next_Bus_Stop_ID' consisting of the unique identifier of the upcoming stop in the route

# Since All_Routes ('All_Routes_Complete.csv') was extracted in sequential order from MPT website, the next row in All_Routes
# corresponds to the upcoming Bus Stop (Given 'Route Number', 'Route Direction' and 'Date' columns remain the same).
# Hence, 'Reset_Conditions' is defined such that if any of 'Route Number', 'Route Direction' or 'Date' are different in
# in the upcoming stop then upcoming stop than it is not considered to be a continuation of the current route.
Reset_Conditions = (
    All_Routes['Route Number'].shift(-1) != All_Routes['Route Number']) | \
    (All_Routes['Route Direction'].shift(-1) != All_Routes['Route Direction']) | \
    (All_Routes['Date'].shift(-1) != All_Routes['Date'])

# In 'All_Routes' create column 'Next_Bus_Stop_ID' consisting of the upcoming 'Bus_Stop_ID'
All_Routes['Next_Bus_Stop_ID'] = All_Routes['Bus_Stop_ID'].shift(-1)
# If conditions defined in 'Reset_Conditions' are met, then 'Next_Bus_Stop_ID' should be blank
All_Routes.loc[Reset_Conditions, 'Next_Bus_Stop_ID'] = None


In [8]:
# Step 1.7 - Create a Unique identifier used to define connection between 'Bus_Stop_ID and 'Next_Bus_Stop_ID. 
# This is done by concatinating the 'Bus_Stop_ID' and 'Next_Bus_Stop_ID' columns

All_Routes['Bus_Stop_Next_Bus_Stop'] = All_Routes['Bus_Stop_ID'] + '_to_' + All_Routes['Next_Bus_Stop_ID']

In [9]:
# Step 2 - Adjusting All_Routes

# Step 2.1 - To simply our work we will not be considering the following routes:
# Night Routes - Not interested in specific Routes designed to work beyond the scheduled service
# Direct Routes - Not interested in routes which make use of specially designed shorter paths

#Defining list of Night Routes and Tallinja Direct Routes (Obtained from: https://www.publictransport.com.mt/en/timetables)
Night_Direct_Routes = ['N11', 'N13', 'N212', 'N62', 'N82',
                       'N91', 'N48', 'N301', 'TD2', 'TD10',
                       'TD13']

All_Routes = All_Routes[~All_Routes['Route Number'].isin(Night_Direct_Routes)]

In [10]:
# Step 2.2 - Adjust Date names to ensure we are able to split dates accordingly

# Obtain the Date Names utilised in 'All_Routes'
# This is done since in certain 'Date' entries a hypen is utilised (Ex. 'Monday - Friday' significes 'Monday, Tuesday, Wednesday, Thursday, Friday'
# All day names need to be represented in 'Date' field such that 'All_Routes' can be split into specific dates.
Unique_Dates = All_Routes['Date'].unique()
print(Unique_Dates)

['Monday' 'Tuesday, Wednesday, Thursday, Friday' 'Saturday' 'Sunday'
 'Monday, Tuesday, Wednesday, Thursday, Friday' 'Monday - Friday'
 'Saturday, Sunday' 'Monday - Saturday'
 'Wednesday, Thursday, Friday, Monday, Tuesday'
 'Wednesday, Thursday, Friday, Monday, Tuesday, Sunday'
 'Wednesday, Thursday, Friday, Tuesday' 'Wednesday, Thursday, Tuesday'
 'Friday, Monday']


In [11]:
#Step 2.2.1 - Changes to be made
# 1 - Change 'Monday - Friday' to 'Monday, Tuesday, Wednesday, Thursday, Friday'
# 2 - Change 'Monday - Sunday' to 'Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday'
# 3 - Change 'Monday - Saturday' to 'Monday, Tuesday, Wednesday, Thursday, Firday, Saturday'
All_Routes_Copy = All_Routes.copy()
All_Routes_Copy['Date'] = All_Routes_Copy['Date'].replace({'Monday - Friday': 'Monday, Tuesday, Wednesday, Thursday, Friday',
                                                           'Monday - Sunday': 'Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday',
                                                           'Monday - Saturday': 'Monday, Tuesday, Wednesday, Thursday, Friday, Saturday'})


In [12]:
# Step 2.2.2 - Check changes have been carried out accordingly
Unique_Dates_FollowingUpdate = All_Routes['Date'].unique()
print(Unique_Dates_FollowingUpdate)

['Monday' 'Tuesday, Wednesday, Thursday, Friday' 'Saturday' 'Sunday'
 'Monday, Tuesday, Wednesday, Thursday, Friday' 'Monday - Friday'
 'Saturday, Sunday' 'Monday - Saturday'
 'Wednesday, Thursday, Friday, Monday, Tuesday'
 'Wednesday, Thursday, Friday, Monday, Tuesday, Sunday'
 'Wednesday, Thursday, Friday, Tuesday' 'Wednesday, Thursday, Tuesday'
 'Friday, Monday']


In [13]:
All_Routes_Copy

Unnamed: 0,Route Number,Route Direction,Stops,City Name,Date,Stop Time 1,Stop Time 2,Stop Time 3,Stop Time 4,Stop Time 5,...,Stop Time 77,Stop Time 78,Stop Island,Time_Count,Stops - City Name - Stop Island,Longitude_Final,Latitude_Final,Bus_Stop_ID,Next_Bus_Stop_ID,Bus_Stop_Next_Bus_Stop
0,X1,Ajruport - Cirkewwa,Airport 1,Hal Luqa,Monday,05:08,05:38,05:53,06:38,07:23,...,,,MALTA STOP,26,Airport 1 - Hal Luqa - MALTA STOP,14.495967,35.849412,Stop_0,Stop_1,Stop_0_to_Stop_1
1,X1,Ajruport - Cirkewwa,Avjazzjoni,Hal Luqa,Monday,05:09,05:39,05:54,06:40,07:25,...,,,MALTA STOP,26,Avjazzjoni - Hal Luqa - MALTA STOP,14.492289,35.854831,Stop_1,Stop_2,Stop_1_to_Stop_2
2,X1,Ajruport - Cirkewwa,Mitjar,Hal Luqa,Monday,05:10,05:40,05:55,06:41,07:26,...,,,MALTA STOP,26,Mitjar - Hal Luqa - MALTA STOP,14.486949,35.855699,Stop_2,Stop_3,Stop_2_to_Stop_3
3,X1,Ajruport - Cirkewwa,Ingieret,San Vincenz,Monday,05:14,05:45,06:00,06:47,07:32,...,,,MALTA STOP,26,Ingieret - San Vincenz - MALTA STOP,14.482673,35.870979,Stop_3,Stop_4,Stop_3_to_Stop_4
4,X1,Ajruport - Cirkewwa,Marsa Park & Ride 1,Il-Marsa,Monday,05:18,05:48,06:03,06:51,07:36,...,,,MALTA STOP,26,Marsa Park & Ride 1 - Il-Marsa - MALTA STOP,14.484522,35.878715,Stop_4,Stop_5,Stop_4_to_Stop_5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26528,330,Xlendi - Victoria,Ghajn,"Il-Fontana, Ghawdex",Saturday,09:45,11:45,13:45,15:45,17:45,...,,,GOZO STOP,5,"Ghajn - Il-Fontana, Ghawdex - GOZO STOP",14.234934,36.037018,Stop_1250,Stop_1251,Stop_1250_to_Stop_1251
26529,330,Xlendi - Victoria,Parrokkjali,"Il-Fontana, Ghawdex",Saturday,09:46,11:46,13:46,15:46,17:46,...,,,GOZO STOP,5,"Parrokkjali - Il-Fontana, Ghawdex - GOZO STOP",14.236993,36.039296,Stop_1251,Stop_1252,Stop_1251_to_Stop_1252
26530,330,Xlendi - Victoria,Andar,"Il-Fontana, Ghawdex",Saturday,09:47,11:47,13:47,15:47,17:47,...,,,GOZO STOP,5,"Andar - Il-Fontana, Ghawdex - GOZO STOP",14.237107,36.040683,Stop_1252,Stop_1253,Stop_1252_to_Stop_1253
26531,330,Xlendi - Victoria,Vincenzo,"Il-Fontana, Ghawdex",Saturday,09:47,11:47,13:47,15:47,17:47,...,,,GOZO STOP,5,"Vincenzo - Il-Fontana, Ghawdex - GOZO STOP",14.239928,36.040968,Stop_1253,Stop_1161,Stop_1253_to_Stop_1161


In [14]:
# Step 4.1 - A DataFrame entitled 'Distinct_Edges' is created which considers entries in the 'All_Routes' DataFrame with unique entries in 
# 'Bus_Stop_Next_Bus_Stop' column
Distinct_Edges = All_Routes_Copy.drop_duplicates(subset = ['Bus_Stop_Next_Bus_Stop'])
# Index is reset due to removal of entries in 'All_Routes'
Distinct_Edges = Distinct_Edges.reset_index(drop=True)
# Creating Copy of 'Distinct_Edges' DataFrame entitled 'Distinct_Edges_2' (s.t. any changes made in 'Distinct_Edges_2' does not impact the original
# DataFrame
Distinct_Edges_2 = Distinct_Edges.copy()

In [15]:
Distinct_Edges_2 = pd.merge(Distinct_Edges_2, Distinct_Edges_2[['Bus_Stop_ID', 'Longitude_Final', 'Latitude_Final']].drop_duplicates(subset=['Bus_Stop_ID']), left_on = 'Next_Bus_Stop_ID', right_on = 'Bus_Stop_ID', how = 'left', sort = False)

In [16]:
Distinct_Edges_2 = Distinct_Edges_2.rename(columns={'Bus_Stop_ID_x': 'Bus_Stop_ID','Longitude_Final_x': 'Longitude_Final', 'Latitude_Final_x': 'Latitude_Final',
                                                   'Longitude_Final_y': 'Longitude_Next', 'Latitude_Final_y': 'Latitude_Next'})
Distinct_Edges_2 = Distinct_Edges_2.drop(columns = 'Bus_Stop_ID_y')

In [17]:
# Step 4.3 - Free Version of TomTom API only Allows for 2500 Non-Tile Requests per-day. The entire datasets would require 2543 requests to complete.
# As a result, the 'Distinc_Edges_2' Dataset will be split into two using the 'Stop Island' column. Following DataFrames are obtained:
# 1 - Distinct_Edges_MALTA - Includes all entries in 'Distinct_Edges_2' where 'Stop Island' column has entries 'MALTA STOP'
# 2 - Distinct_Edges_GOZO - Includes all entries in 'Distinct_Edges_2' where 'Stop Island' column has entries 'GOZO STOP'
Distinct_Edges_MALTA = Distinct_Edges_2.loc[Distinct_Edges_2["Stop Island"] == "MALTA STOP"]
Distinct_Edges_GOZO = Distinct_Edges_2.loc[Distinct_Edges_2["Stop Island"] == "GOZO STOP"]
# Since file is obtained sequentially (Gozo routes are listed last in the MPT Website (https://www.publictransport.com.mt/en/timetables)
# Then index is reset for 'Distinct_Edges_Gozo' to ensure for loops utilised will work correctly
Distinct_Edges_GOZO = Distinct_Edges_GOZO.reset_index(drop=True)

In [18]:
########################################################################################################

In [20]:
# Step 4.4 - Utilise TomTom to obtain the following edge treversal information:
# 1. Length_In_Metres - The route or leg length in meters.
# 2. Travel_Time_Value - The estimated travel time in seconds. 
# Note that even when traffic=false, travelTimeInSeconds still includes the delay due to traffic.
# 3. Travel_Time_No_Traffic_Value - The estimated travel time in seconds calculated as if there are no delays on the route due to traffic conditions (e.g., congestion).
# 4. Historic_Traffic_Travel_Time - The estimated travel time in seconds calculated using time-dependent historic traffic data.
# 5. Live_Traffic_Travel_Time - The estimated travel time in seconds calculated using real-time speed data.
# 6. Traffic_Delay - Traffic delay is the difference between the travel time calculated using all available traffic information and 
# travel time calculated without the influence of current and historic traffic data.
# 7. Departure_Time - Time Bus leaves origin 
# 8. Arrival_Time - Time Bus arrives at origin

# TomTom API Key 
TOMTOM_API_Key = '4x14GdbcGGsXeen6yUhicscKFbz28iMj'

#Defining empty lists to be used to append data to
Length_In_Metres = []
Travel_Time = []
Travel_Time_No_Traffic = []
Historic_Traffic_Travel_Time = []
Live_Traffic_Travel_Time = []
Traffic_Delay = []
Departure_Time = []
Arrival_Time = []

# for loop going over all entries in 'Distinct_Edges_MALTA' and passing the following information as a request using TomTom API:
# 1. Latitude_Final - Latitiude of origin
# 2. Longitude_Final - Longitude of origin
# 3. Latitude_Next - Latitude of destination
# 4. Longitude_Next - Longitude of destination
for i in range(len(Distinct_Edges_MALTA)):
    print(f"Processing request {i+1} of {len(Distinct_Edges_MALTA)}")
    url = "https://api.tomtom.com/routing/1/calculateRoute/{0}%2C{1}%3A{2}%2C{3}/json?maxAlternatives=1&computeTravelTimeFor=all&includeTollPaymentTypes=none&routeType=shortest&traffic=true&travelMode=bus&key={4}".format(Distinct_Edges_MALTA['Latitude_Final'][i], Distinct_Edges_MALTA['Longitude_Final'][i], Distinct_Edges_MALTA['Latitude_Next'][i], Distinct_Edges_MALTA['Longitude_Next'][i], TOMTOM_API_Key)
    Response_Website = requests.get(url)

    # If Status_code value is 200 this implies TomTom has completed succesfully.
    # If status_code is not 200, the provided status_code and error message will be printed by the code.
    if Response_Website.status_code == 200:
        Result = Response_Website.json()


        # Extract values from the 'Result' given by TomTom API for the corresponding edge
        Length_In_Metres_Value = Result['routes'][0]['summary']['lengthInMeters']
        Travel_Time_Value = Result['routes'][0]['summary']['travelTimeInSeconds']
        Travel_Time_No_Traffic_Value = Result['routes'][0]['summary']['noTrafficTravelTimeInSeconds']
        Historic_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['historicTrafficTravelTimeInSeconds']
        Live_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['liveTrafficIncidentsTravelTimeInSeconds']
        Traffic_Delay_Value = Result['routes'][0]['summary']['trafficDelayInSeconds']
        Departure_Time_Value = Result['routes'][0]['summary']['departureTime']
        Arrival_Time_Value = Result['routes'][0]['summary']['arrivalTime']

        # Append values to list
        Length_In_Metres.append(Length_In_Metres_Value)
        Travel_Time.append(Travel_Time_Value)
        Travel_Time_No_Traffic.append(Travel_Time_No_Traffic_Value)
        Historic_Traffic_Travel_Time.append(Historic_Traffic_Travel_Time_Value)
        Live_Traffic_Travel_Time.append(Live_Traffic_Travel_Time_Value)
        Traffic_Delay.append(Traffic_Delay_Value)
        Departure_Time.append(Departure_Time_Value)
        Arrival_Time.append(Arrival_Time_Value)
        
    else:
        print(f"Error: HTTP {Response_Website.status_code} - {Response_Website.text}")
        
    # Introduce a delay of 1 second between each request to avoid too many requests per second
    time.sleep(1)


Processing request 1 of 2118
Processing request 2 of 2118
Processing request 3 of 2118
Processing request 4 of 2118
Processing request 5 of 2118
Processing request 6 of 2118
Processing request 7 of 2118
Processing request 8 of 2118
Processing request 9 of 2118
Processing request 10 of 2118
Processing request 11 of 2118
Processing request 12 of 2118
Processing request 13 of 2118
Processing request 14 of 2118
Processing request 15 of 2118
Processing request 16 of 2118
Processing request 17 of 2118
Processing request 18 of 2118
Processing request 19 of 2118
Processing request 20 of 2118
Processing request 21 of 2118
Processing request 22 of 2118
Processing request 23 of 2118
Processing request 24 of 2118
Error: HTTP 400 - {"formatVersion":"0.0.12","detailedError":{"message":"Invalid request: malformed coordinate at index [1]: [nan,nan]","code":"BAD_INPUT"}}
Processing request 25 of 2118
Processing request 26 of 2118
Processing request 27 of 2118
Processing request 28 of 2118
Processing re

Processing request 259 of 2118
Processing request 260 of 2118
Processing request 261 of 2118
Processing request 262 of 2118
Processing request 263 of 2118
Processing request 264 of 2118
Processing request 265 of 2118
Processing request 266 of 2118
Processing request 267 of 2118
Processing request 268 of 2118
Processing request 269 of 2118
Processing request 270 of 2118
Processing request 271 of 2118
Processing request 272 of 2118
Processing request 273 of 2118
Processing request 274 of 2118
Processing request 275 of 2118
Processing request 276 of 2118
Processing request 277 of 2118
Processing request 278 of 2118
Processing request 279 of 2118
Processing request 280 of 2118
Processing request 281 of 2118
Processing request 282 of 2118
Processing request 283 of 2118
Processing request 284 of 2118
Processing request 285 of 2118
Error: HTTP 400 - {"formatVersion":"0.0.12","detailedError":{"message":"Invalid request: malformed coordinate at index [1]: [nan,nan]","code":"BAD_INPUT"}}
Process

Processing request 514 of 2118
Processing request 515 of 2118
Processing request 516 of 2118
Processing request 517 of 2118
Processing request 518 of 2118
Processing request 519 of 2118
Processing request 520 of 2118
Processing request 521 of 2118
Processing request 522 of 2118
Processing request 523 of 2118
Processing request 524 of 2118
Processing request 525 of 2118
Processing request 526 of 2118
Processing request 527 of 2118
Processing request 528 of 2118
Processing request 529 of 2118
Processing request 530 of 2118
Processing request 531 of 2118
Processing request 532 of 2118
Processing request 533 of 2118
Processing request 534 of 2118
Processing request 535 of 2118
Processing request 536 of 2118
Processing request 537 of 2118
Processing request 538 of 2118
Processing request 539 of 2118
Processing request 540 of 2118
Processing request 541 of 2118
Processing request 542 of 2118
Processing request 543 of 2118
Processing request 544 of 2118
Processing request 545 of 2118
Processi

ConnectionError: HTTPSConnectionPool(host='api.tomtom.com', port=443): Max retries exceeded with url: /routing/1/calculateRoute/35.9473111%2C14.3931026%3A35.9462932%2C14.3904814/json?maxAlternatives=1&computeTravelTimeFor=all&includeTollPaymentTypes=none&routeType=shortest&traffic=true&travelMode=bus&key=uA2d36BEe5Xby9As7hUgrBmGL34u4n0h (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002458B8BDAF0>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'))

In [None]:
# Step 4.5 - Assign edge traversal information obtained via TomTom API to 'Distinct_Edges_MALTA' as follows:
# 'Travel_Time' Column - Travel_Time
# 'Travel_Time_No_Traffic' Column - 'Travel_Time_No_Traffic' list
# 'Historic_Traffic_Travel_Time' Column - 'Historic_Traffic_Travel_Time' list
# 'Live_Traffic_Travel_Time' Column - 'Live_Traffic_Travel_Time' list
# 'Traffic_Delay' Column - 'Traffic_Delay' list
# 'Length_In_Metres' Column - 'Length_In_Metres' list
# 'Departure_Time' Column - 'Departure_Time' list
# 'Arrival_Time' Column - 'Arrival_Time' list

#NB: Result DataFrame is renamed to 'Distinct_Edges_MALTA_IncTravelTimes'
Distinct_Edges_MALTA_IncTravelTimes = Distinct_Edges_MALTA.assign(Travel_Time = Travel_Time, Travel_Time_No_Traffic = Travel_Time_No_Traffic,
                                                          Historic_Traffic_Travel_Time = Historic_Traffic_Travel_Time, Live_Traffic_Travel_Time = Live_Traffic_Travel_Time,
                                                          Traffic_Delay = Traffic_Delay, Length_In_Metres=Length_In_Metres, Departure_Time = Departure_Time, Arrival_Time = Arrival_Time)

# 'Distinct_Edges_MALTA_IncTravelTimes' is saved such that it can be loaded in future instances
Distinct_Edges_MALTA_IncTravelTimes.to_csv('C://Users//Owner//ICT5012 - Disseration//Distinct_Edges_MALTA_IncTravelTimes.csv')

In [21]:
# Step 4.6 - Repeat process for 'Distinct_Edges_GOZO'

# TomTom API Key 
TOMTOM_API_Key = 'uA2d36BEe5Xby9As7hUgrBmGL34u4n0h'

#Defining empty lists to be used to append data to
Length_In_Metres = []
Travel_Time = []
Travel_Time_No_Traffic = []
Historic_Traffic_Travel_Time = []
Live_Traffic_Travel_Time = []
Traffic_Delay = []
Departure_Time = []
Arrival_Time = []

# for loop going over all entries in 'Distinct_Edges_MALTA' and passing the following information as a request using TomTom API:
# 1. Latitude_Final - Latitiude of origin
# 2. Longitude_Final - Longitude of origin
# 3. Latitude_Next - Latitude of destination
# 4. Longitude_Next - Longitude of destination
for i in range(len(Distinct_Edges_GOZO)):
    print(f"Processing request {i+1} of {len(Distinct_Edges_GOZO)}")
    url = "https://api.tomtom.com/routing/1/calculateRoute/{0}%2C{1}%3A{2}%2C{3}/json?maxAlternatives=1&computeTravelTimeFor=all&includeTollPaymentTypes=none&routeType=shortest&traffic=true&travelMode=bus&key={4}".format(Distinct_Edges_MALTA['Latitude_Final'][i], Distinct_Edges_MALTA['Longitude_Final'][i], Distinct_Edges_MALTA['Latitude_Next'][i], Distinct_Edges_MALTA['Longitude_Next'][i], TOMTOM_API_Key)
    Response_Website = requests.get(url)

    # If Status_code value is 200 this implies TomTom has completed succesfully.
    # If status_code is not 200, the provided status_code and error message will be printed by the code.
    if Response_Website.status_code == 200:
        Result = Response_Website.json()


        # Extract values from the 'Result' given by TomTom API for the corresponding edge
        Length_In_Metres_Value = Result['routes'][0]['summary']['lengthInMeters']
        Travel_Time_Value = Result['routes'][0]['summary']['travelTimeInSeconds']
        Travel_Time_No_Traffic_Value = Result['routes'][0]['summary']['noTrafficTravelTimeInSeconds']
        Historic_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['historicTrafficTravelTimeInSeconds']
        Live_Traffic_Travel_Time_Value = Result['routes'][0]['summary']['liveTrafficIncidentsTravelTimeInSeconds']
        Traffic_Delay_Value = Result['routes'][0]['summary']['trafficDelayInSeconds']
        Departure_Time_Value = Result['routes'][0]['summary']['departureTime']
        Arrival_Time_Value = Result['routes'][0]['summary']['arrivalTime']

        # Append values to list
        Length_In_Metres.append(Length_In_Metres_Value)
        Travel_Time.append(Travel_Time_Value)
        Travel_Time_No_Traffic.append(Travel_Time_No_Traffic_Value)
        Historic_Traffic_Travel_Time.append(Historic_Traffic_Travel_Time_Value)
        Live_Traffic_Travel_Time.append(Live_Traffic_Travel_Time_Value)
        Traffic_Delay.append(Traffic_Delay_Value)
        Departure_Time.append(Departure_Time_Value)
        Arrival_Time.append(Arrival_Time_Value)
        
    else:
        print(f"Error: HTTP {Response_Website.status_code} - {Response_Website.text}")
        
    # Introduce a delay of 1 second between each request to avoid too many requests per second
    time.sleep(1)


Processing request 1 of 390
Processing request 2 of 390
Processing request 3 of 390
Processing request 4 of 390
Processing request 5 of 390
Processing request 6 of 390
Processing request 7 of 390
Processing request 8 of 390
Processing request 9 of 390
Processing request 10 of 390
Processing request 11 of 390
Processing request 12 of 390
Processing request 13 of 390
Processing request 14 of 390
Processing request 15 of 390
Processing request 16 of 390
Processing request 17 of 390
Processing request 18 of 390
Processing request 19 of 390
Processing request 20 of 390
Processing request 21 of 390
Processing request 22 of 390
Processing request 23 of 390
Processing request 24 of 390
Error: HTTP 400 - {"formatVersion":"0.0.12","detailedError":{"message":"Invalid request: malformed coordinate at index [1]: [nan,nan]","code":"BAD_INPUT"}}
Processing request 25 of 390
Processing request 26 of 390
Processing request 27 of 390
Processing request 28 of 390
Processing request 29 of 390
Processing r

Processing request 268 of 390
Processing request 269 of 390
Processing request 270 of 390
Processing request 271 of 390
Processing request 272 of 390
Processing request 273 of 390
Processing request 274 of 390
Processing request 275 of 390
Processing request 276 of 390
Processing request 277 of 390
Processing request 278 of 390
Processing request 279 of 390
Processing request 280 of 390
Processing request 281 of 390
Processing request 282 of 390
Processing request 283 of 390
Processing request 284 of 390
Processing request 285 of 390
Error: HTTP 400 - {"formatVersion":"0.0.12","detailedError":{"message":"Invalid request: malformed coordinate at index [1]: [nan,nan]","code":"BAD_INPUT"}}
Processing request 286 of 390
Processing request 287 of 390
Processing request 288 of 390
Processing request 289 of 390
Processing request 290 of 390
Processing request 291 of 390
Processing request 292 of 390
Processing request 293 of 390
Processing request 294 of 390
Processing request 295 of 390
Proc

In [30]:
Distinct_Edges_GOZO.to_csv('Donkey.csy')

In [22]:
# Step 4.7 - Assign edge traversal information obtained via TomTom API to 'Distinct_Edges_MALTA' as follows:
# 'Travel_Time' Column - Travel_Time
# 'Travel_Time_No_Traffic' Column - 'Travel_Time_No_Traffic' list
# 'Historic_Traffic_Travel_Time' Column - 'Historic_Traffic_Travel_Time' list
# 'Live_Traffic_Travel_Time' Column - 'Live_Traffic_Travel_Time' list
# 'Traffic_Delay' Column - 'Traffic_Delay' list
# 'Length_In_Metres' Column - 'Length_In_Metres' list
# 'Departure_Time' Column - 'Departure_Time' list
# 'Arrival_Time' Column - 'Arrival_Time' list

#NB: Result DataFrame is renamed to 'Distinct_Edges_MALTA_IncTravelTimes'
Distinct_Edges_GOZO_IncTravelTimes = Distinct_Edges_GOZO.assign(Travel_Time = Travel_Time, Travel_Time_No_Traffic = Travel_Time_No_Traffic,
                                                          Historic_Traffic_Travel_Time = Historic_Traffic_Travel_Time, Live_Traffic_Travel_Time = Live_Traffic_Travel_Time,
                                                          Traffic_Delay = Traffic_Delay, Length_In_Metres=Length_In_Metres, Departure_Time = Departure_Time, Arrival_Time = Arrival_Time)

# 'Distinct_Edges_MALTA_IncTravelTimes' is saved such that it can be loaded in future instances
#Distinct_Edges_GOZO_IncTravelTimes.to_csv('C://Users//Owner//ICT5012 - Disseration//Creating SUMO Simulation & Obtaining Travel Times//Results//Distinct_Edges_GOZO_IncTravelTimes.csv')

ValueError: Length of values (387) does not match length of index (390)

In [None]:
########################################################################################

In [None]:
All_Routes
min_value = All_Routes['Stop Time 1'].min()
min_value

In [None]:
# Step 3 - We will proceed by splitting the 'All_Routes_Copy' into the following fourteen separate dataframes:
 # 1 - Monday_Routes_Malta
 # 2 - Tuesday_Routes_Malta
 # 3 - Wednesday_Routes_Malta
 # 4 - Thursday_Routes_Malta
 # 5 - Friday_Routes_Malta
 # 6 - Saturday_Routes_Malta
 # 7 - Sunday_Routes_Malta
 # 8 - Monday_Routes_Gozo
 # 9 - Tuesday_Routes_Gozo
 # 10 - Wednesday_Routes_Gozo
 # 11 - Thursday_Routes_Gozo
 # 12 - Friday_Routes_Gozo
 # 13 - Saturday_Routes_Gozo
 # 14 - Sunday_Routes_Gozo

# List of days in which Public Transportation System Functions
List_Dates = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# 'Rows_Date_Dict' - Dictionary to store only instances of the same date 
Rows_Date_Dict = {}
# 'Malta_Dict' - Dictionary to store only instances of the same date operating in Malta
Malta_Dict = {}
# 'Gozo_Dict' - Dictionary to store only instances of the same date operating in Gozo
Gozo_Dict = {}

# for loop going over List_Dates defined above
for c in List_Dates:
    # filter rows according to date 'c' currently being considered
    Rows_Date_Dict[c] = All_Routes_Copy[All_Routes_Copy['Date'].astype(str).str.contains(c, na=False)]

    # split entries present in 'Rows_Date_Dict[c]' accordng to 'Stop Island' value
    Malta_Dict[c] = Rows_Date_Dict[c][Rows_Date_Dict[c]['Stop Island'] == 'MALTA STOP'].reset_index(drop=True)
    Gozo_Dict[c] = Rows_Date_Dict[c][Rows_Date_Dict[c]['Stop Island'] == 'GOZO STOP'].reset_index(drop=True)

# Naming Dataframes 
Monday_Routes_Malta = Malta_Dict['Monday']
Tuesday_Routes_Malta = Malta_Dict['Tuesday']
Wednesday_Routes_Malta = Malta_Dict['Wednesday']
Thursday_Routes_Malta = Malta_Dict['Thursday']
Friday_Routes_Malta = Malta_Dict['Friday']
Saturday_Routes_Malta = Malta_Dict['Saturday']
Sunday_Routes_Malta = Malta_Dict['Sunday']
Monday_Routes_Gozo = Gozo_Dict['Monday']
Tuesday_Routes_Gozo = Gozo_Dict['Tuesday']
Wednesday_Routes_Gozo = Gozo_Dict['Wednesday']
Thursday_Routes_Gozo = Gozo_Dict['Thursday']
Friday_Routes_Gozo = Gozo_Dict['Friday']
Saturday_Routes_Gozo = Gozo_Dict['Saturday']
Sunday_Routes_Gozo = Gozo_Dict['Sunday']


In [None]:
# Step 3.1 - Obtain Number of Routes Covered each day

Malta_Route_DataFrames_List = [Monday_Routes_Malta, Tuesday_Routes_Malta, Wednesday_Routes_Malta, Thursday_Routes_Malta, Friday_Routes_Malta, Saturday_Routes_Malta, Sunday_Routes_Malta]
Gozo_Route_DataFrames_List = [Monday_Routes_Gozo, Tuesday_Routes_Gozo, Wednesday_Routes_Gozo, Thursday_Routes_Gozo, Friday_Routes_Gozo, Saturday_Routes_Gozo, Sunday_Routes_Gozo]

# Used to select Date from 'List_Dates'
j = 0

for i in Malta_Route_DataFrames_List:
    Unique_Route_Length = len(i['Route Number'].unique())
    print(f'Number of Routes for Malta in {List_Dates[j]} is {Unique_Route_Length}')
    j += 1

c = 0 

for i in Gozo_Route_DataFrames_List:
    Unique_Route_Length = len(i['Route Number'].unique())
    print(f'Number of Routes for Gozo in {List_Dates[c]} is {Unique_Route_Length}')
    c += 1

In [None]:
# Step 3.2 - Obtain table indicating number of bus stops traversed by each route 
# To simplify problem, for any non-circular stops we will consider only the maximum number of stops in any one direction


# Initialize an empty dictionary to store result of number of stops of each route per day
Malta_Stops_Per_Route_PerDay = {}
Gozo_Stops_Per_Route_PerDay = {}

j = 0

for i in Malta_Route_DataFrames_List:
    i_copy = i.copy()
    i_copy['Concatenated_Columns'] = (i_copy['Route Number'] + '-' + i_copy['Route Direction'])
    # Count the number of rows in each group
    group_sizes = i_copy.groupby('Concatenated_Columns').size()
    # Map group sizes back to the original DataFrame
    i_copy['Group'] = i_copy['Concatenated_Columns'].map(group_sizes)
    # Drop temporary columns if not needed
    i_copy.drop(columns=['Concatenated_Columns'], inplace=True)
    # Count the size of each group
    Partition_Count = i_copy.groupby(['Route Number', 'Group']).size()
    # Find the maximum size for each Route Number
    Malta_Stops_Per_Route = Partition_Count.groupby('Route Number').max()
    Malta_Stops_Per_Route_PerDay[List_Dates[j]] = Malta_Stops_Per_Route
    j += 1

k = 0

for i in Gozo_Route_DataFrames_List:
    i_copy = i.copy()
    i_copy['Concatenated_Columns'] = (i_copy['Route Number'] + '-' + i_copy['Route Direction'])
    # Count the number of rows in each group
    group_sizes = i_copy.groupby('Concatenated_Columns').size()
    # Map group sizes back to the original DataFrame
    i_copy['Group'] = i_copy['Concatenated_Columns'].map(group_sizes)
    # Drop temporary columns if not needed
    i_copy.drop(columns=['Concatenated_Columns'], inplace=True)
    # Count the size of each group
    Partition_Count = i_copy.groupby(['Route Number', 'Group']).size()
    # Find the maximum size for each Route Number
    Gozo_Stops_Per_Route = Partition_Count.groupby('Route Number').max()
    Gozo_Stops_Per_Route_PerDay[List_Dates[k]] = Gozo_Stops_Per_Route
    k += 1


# Naming Dataframes 
Monday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Monday']
Tuesday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Tuesday']
Wednesday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Wednesday']
Thursday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Thursday']
Friday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Friday']
Saturday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Saturday']
Sunday_NoStopsPerRoute_Malta = Malta_Stops_Per_Route_PerDay['Sunday']
Monday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Monday']
Tuesday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Tuesday']
Wednesday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Wednesday']
Thursday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Thursday']
Friday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Friday']
Saturday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Saturday']
Sunday_NoStopsPerRoute_Gozo = Gozo_Stops_Per_Route_PerDay['Sunday']

In [None]:
# Combine Data in One DataFrame
# Column - Day of the week
# Rows - Route Number
# Entries in DataFrame show the number of Bus Stops traverrsed by each Route

# Combine Data in one DataFrame entitled 'Malta_RoutePresencePerDay__and_BusStopsVisited'
Malta_RoutePresencePerDay__and_BusStopsVisited = pd.concat([Monday_NoStopsPerRoute_Malta, Tuesday_NoStopsPerRoute_Malta, Wednesday_NoStopsPerRoute_Malta,
                                                           Thursday_NoStopsPerRoute_Malta, Friday_NoStopsPerRoute_Malta, Saturday_NoStopsPerRoute_Malta,
                                                           Sunday_NoStopsPerRoute_Malta], axis=1)
# Replace 'N/A' with NaN 
Malta_RoutePresencePerDay__and_BusStopsVisited.replace('N/A', pd.NA, inplace=True)
# Convert to Integers
Malta_RoutePresencePerDay__and_BusStopsVisited = Malta_RoutePresencePerDay__and_BusStopsVisited.astype('Int64')
# Rename Columns
Malta_RoutePresencePerDay__and_BusStopsVisited.columns = List_Dates

In [None]:
# Combine Data in One DataFrame
# Column - Day of the week
# Rows - Route Number
# Entries in DataFrame show the number of Bus Stops traverrsed by each Route

# Combine Data in one DataFrame entitled 'Malta_RoutePresencePerDay__and_BusStopsVisited'
Gozo_RoutePresencePerDay__and_BusStopsVisited = pd.concat([Monday_NoStopsPerRoute_Gozo, Tuesday_NoStopsPerRoute_Gozo, Wednesday_NoStopsPerRoute_Gozo,
                                                           Thursday_NoStopsPerRoute_Gozo, Friday_NoStopsPerRoute_Gozo, Saturday_NoStopsPerRoute_Gozo,
                                                           Sunday_NoStopsPerRoute_Gozo], axis=1)
# Replace 'N/A' with NaN 
Gozo_RoutePresencePerDay__and_BusStopsVisited.replace('N/A', pd.NA, inplace=True)
# Convert to Integers
Gozo_RoutePresencePerDay__and_BusStopsVisited = Gozo_RoutePresencePerDay__and_BusStopsVisited.astype('Int64')
# Rename Columns
Gozo_RoutePresencePerDay__and_BusStopsVisited.columns = List_Dates

In [None]:
# Step 3.3 - Obtain table indicating number of times route runs throughout a day 
# (It is expected that circular routes will run more frequently compared to routes going in one direction_
# To simplify problem, for any non-circular stops we will consider only the maximum frequency between directions
# Frequency is given per working day


# Initialize an empty dictionary to store result of number of stops of each route per day
Malta_Frequency_Of_Route_PerDay = {}
Gozo_Frequency_Of_Route_PerDay = {}

j = 0

for i in Malta_Route_DataFrames_List:
    i_copy = i.copy()
    Grouped = i_copy.groupby('Route Number')
    # Find the maximum size for each Route Number
    Malta_Frequency_Of_Route = Grouped['Time_Count'].max()
    Malta_Frequency_Of_Route_PerDay[List_Dates[j]] = Malta_Frequency_Of_Route
    j += 1

k = 0

for i in Gozo_Route_DataFrames_List:
    i_copy = i.copy()
    Grouped = i_copy.groupby('Route Number')
    # Find the maximum size for each Route Number
    Gozo_Frequency_Of_Route = Grouped['Time_Count'].max()
    Gozo_Frequency_Of_Route_PerDay[List_Dates[k]] = Gozo_Frequency_Of_Route
    k += 1


# Naming Dataframes 
Monday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Monday']
Tuesday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Tuesday']
Wednesday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Wednesday']
Thursday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Thursday']
Friday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Friday']
Saturday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Saturday']
Sunday_FrequencyofRoute_Malta = Malta_Frequency_Of_Route_PerDay['Sunday']
Monday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Monday']
Tuesday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Tuesday']
Wednesday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Wednesday']
Thursday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Thursday']
Friday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Friday']
Saturday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Saturday']
Sunday_FrequencyofRoute_Gozo = Gozo_Frequency_Of_Route_PerDay['Sunday']

In [None]:
# Combine Data in One DataFrame
# Column - Day of the week
# Rows - Route Number
# Entries in DataFrame show the Frequency per day of each Route

# Combine Data in one DataFrame entitled 'Malta_RoutePresencePerDay__and_BusStopsVisited'
Malta_FrequencyPerDay = pd.concat([Monday_FrequencyofRoute_Malta, Tuesday_FrequencyofRoute_Malta, Wednesday_FrequencyofRoute_Malta,
                                                           Thursday_FrequencyofRoute_Malta, Friday_FrequencyofRoute_Malta, Saturday_FrequencyofRoute_Malta,
                                                           Sunday_FrequencyofRoute_Malta], axis=1)
# Replace 'N/A' with NaN 
Malta_FrequencyPerDay.replace('N/A', pd.NA, inplace=True)
# Convert to Integers
Malta_FrequencyPerDay = Malta_FrequencyPerDay.astype('Int64')
# Rename Columns
Malta_FrequencyPerDay.columns = List_Dates

In [None]:
# Combine Data in One DataFrame
# Column - Day of the week
# Rows - Route Number
# Entries in DataFrame show the Frequency per day of each Route

# Combine Data in one DataFrame entitled 'Malta_RoutePresencePerDay__and_BusStopsVisited'
Gozo_FrequencyPerDay = pd.concat([Monday_FrequencyofRoute_Gozo, Tuesday_FrequencyofRoute_Gozo, Wednesday_FrequencyofRoute_Gozo,
                                                           Thursday_FrequencyofRoute_Gozo, Friday_FrequencyofRoute_Gozo, Saturday_FrequencyofRoute_Gozo,
                                                           Sunday_FrequencyofRoute_Gozo], axis=1)
# Replace 'N/A' with NaN 
Gozo_FrequencyPerDay.replace('N/A', pd.NA, inplace=True)
# Convert to Integers
Gozo_FrequencyPerDay = Gozo_FrequencyPerDay.astype('Int64')
# Rename Columns
Gozo_FrequencyPerDay.columns = List_Dates

In [None]:
# Download DataFrames
Malta_RoutePresencePerDay__and_BusStopsVisited.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Malta_RoutePresencePerDay__and_BusStopsVisited.csv')
Gozo_RoutePresencePerDay__and_BusStopsVisited.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Gozo_RoutePresencePerDay__and_BusStopsVisited.csv')
Malta_FrequencyPerDay.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Malta_FrequencyPerDay.csv')
Gozo_FrequencyPerDay.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Gozo_FrequencyPerDay.csv')

In [None]:
Malta_RoutePresencePerDay__and_BusStopsVisited

In [None]:
All_Routes_Copy.to_csv('C://Users//attardan.CBM//Data Visualisation//All_Routes_Copies.csv')

In [None]:
# Step 4 - Obtain List of Bus Terminals (Malta & Gozo)

Malta_Route_DataFrames_List = [Monday_Routes_Malta, Tuesday_Routes_Malta, Wednesday_Routes_Malta, Thursday_Routes_Malta, Friday_Routes_Malta, Saturday_Routes_Malta, Sunday_Routes_Malta]
Gozo_Route_DataFrames_List = [Monday_Routes_Gozo, Tuesday_Routes_Gozo, Wednesday_Routes_Gozo, Thursday_Routes_Gozo, Friday_Routes_Gozo, Saturday_Routes_Gozo, Sunday_Routes_Gozo]

Day_Names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

# Save Path Home
#save_path = "C://Users//Owner//ICT5012 - Disseration//Untitled Folder//"
# Save Path Work
save_path = "C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//"


for i, day in zip(Malta_Route_DataFrames_List, Day_Names):
    
    print(day)
    
    Reset_Condition_SepDay = (
        i['Route Number'].shift(-1) != i['Route Number']) | \
        (i['Route Direction'].shift(-1) != i['Route Direction'])

    # Initialise all entries in 'Bus_Terminal' as 0
    i['Bus_Terminal'] = 0
    # If Reset_Conditions defined above fails, then Bus Stop considered in 'Bus_Stop_ID' is a Bus Terminal
    i.loc[Reset_Condition_SepDay, 'Bus_Terminal'] = 1
    OG_BUS_Terminal_COLUMN = i['Bus_Terminal'].copy()
    # Iterate through the original column
    for j in range(len(OG_BUS_Terminal_COLUMN) - 1):
        if OG_BUS_Terminal_COLUMN[j] == 1:
            i.loc[j + 1, 'Bus_Terminal'] = 1
    # Initialising First Row as Bus Terminal
    i.loc[0, 'Bus_Terminal'] = 1
    
    Bus_Terminals_DataFrame = i[i['Bus_Terminal'] == 1]

    # Contain List of traversed Bus_Stop_ID ('bus_stop_ids' - All Bus Stops identified as terminal stops)
    bus_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(Bus_Terminals_DataFrame)):
        bus_stop_id = Bus_Terminals_DataFrame.iloc[c]['Bus_Stop_ID']

        if bus_stop_id not in bus_stop_ids:
            bus_stop_ids.append(bus_stop_id)  # Add to list of seen IDs
            
    # Obtain only entries which correspond to a Bus Terminals. 
    # Duplicates will be present since the same Bus Terminals may be utilised for multiple 'Route Numebr' and 'Route Direction'
    Normal_Bus_DataFrame = i[i['Bus_Terminal'] == 0]
    
    # Contain List of traversed Bus_Stop_ID ('Normal_bus_stop_ids' - All Bus Stops identified as normal stops)
    Normal_bus_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(Normal_Bus_DataFrame)):
        Normal_bus_stop_id = Normal_Bus_DataFrame.iloc[c]['Bus_Stop_ID']

        if Normal_bus_stop_id not in Normal_bus_stop_ids:
            Normal_bus_stop_ids.append(Normal_bus_stop_id)  # Add to list of seen IDs

    
    # Bus Stops used in terminals/terminals used as Bus Stops
    # Find items in common between the two lists produced above 'bus_stop_ids' and 'Normal_bus_stop_ids'
    #Normal_and_BusTerminals_ids = [item for item in Normal_bus_stop_ids if item in bus_stop_ids]
    Normal_and_BusTerminals_ids = list(set(bus_stop_ids) & set(Normal_bus_stop_ids))

    # Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
    Normal_and_BusTerminals_DataFrame_Malta = pd.DataFrame(columns=i.columns)
    
    # Store Bus Stops to avoid duplicates
    compare_bus_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(i)):
        bus_stop_id = i.iloc[c]['Bus_Stop_ID']

        if bus_stop_id in Normal_and_BusTerminals_ids and bus_stop_id not in compare_bus_stop_ids:
            compare_bus_stop_ids.append(bus_stop_id)  # Add to list of seen IDs

            Normal_and_BusTerminals_DataFrame_Malta = pd.concat([Normal_and_BusTerminals_DataFrame_Malta, i.iloc[[c]]],
                                                                ignore_index=True)
 
    file_name = f"AllNormalBusStopsUsedAsTerminalsMalta_{day}.csv" 
    Normal_and_BusTerminals_DataFrame_Malta.to_csv(save_path + file_name, index=False)
    
    # Obtaining Stops which are only used as Terminals
    BusTerminals_Only_ids = list(set(bus_stop_ids) - set(Normal_bus_stop_ids))

    # Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
    BusTerminals_Only_DataFrame_Malta = pd.DataFrame(columns=i.columns)

    # Store Bus Stops to avoid duplicates
    Only_Terminal_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(i)):
        Only_Terminal_stop_id = i.iloc[c]['Bus_Stop_ID']

        if Only_Terminal_stop_id in BusTerminals_Only_ids and Only_Terminal_stop_id not in Only_Terminal_stop_ids:
            Only_Terminal_stop_ids.append(Only_Terminal_stop_id)  # Add to list of seen IDs

            # Check the island and append the row
            BusTerminals_Only_DataFrame_Malta = pd.concat([BusTerminals_Only_DataFrame_Malta, i.iloc[[c]]],
                                                          ignore_index=True)

    file_name = f"BusTerminalsOnlyMalta_{day}.csv" 
    BusTerminals_Only_DataFrame_Malta.to_csv(save_path + file_name, index=False)

    # Obtaining Stops which are only used as Normal Stops
    NormalStops_Only_ids = list(set(Normal_bus_stop_ids) - set(bus_stop_ids))
    
    # Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
    NormalStops_Only_DataFrame_Malta = pd.DataFrame(columns=i.columns)

    # Store Bus Stops to avoid duplicates
    Only_Normal_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(i)):
        Only_Normal_stop_id = i.iloc[c]['Bus_Stop_ID']

        if Only_Normal_stop_id in NormalStops_Only_ids and Only_Normal_stop_id not in Only_Normal_stop_ids:
            Only_Normal_stop_ids.append(Only_Normal_stop_id)  # Add to list of seen IDs

            # Check the island and append the row
            NormalStops_Only_DataFrame_Malta = pd.concat([NormalStops_Only_DataFrame_Malta, i.iloc[[c]]],
                                                         ignore_index=True)
      
    file_name = f"NormalStopsOnlyMalta_{day}.csv" 
    NormalStops_Only_DataFrame_Malta.to_csv(save_path + file_name, index=False)

In [None]:
# Step 4 - Obtain List of Bus Terminals (Malta & Gozo)

Malta_Route_DataFrames_List = [Monday_Routes_Malta, Tuesday_Routes_Malta, Wednesday_Routes_Malta, Thursday_Routes_Malta, Friday_Routes_Malta, Saturday_Routes_Malta, Sunday_Routes_Malta]
Gozo_Route_DataFrames_List = [Monday_Routes_Gozo, Tuesday_Routes_Gozo, Wednesday_Routes_Gozo, Thursday_Routes_Gozo, Friday_Routes_Gozo, Saturday_Routes_Gozo, Sunday_Routes_Gozo]

Day_Names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

# Save Path Home
#save_path = "C://Users//Owner//ICT5012 - Disseration//Untitled Folder//"
# Save Path Work
save_path = "C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//"


for i, day in zip(Gozo_Route_DataFrames_List, Day_Names):
    
    print(day)
    
    Reset_Condition_SepDay = (
        i['Route Number'].shift(-1) != i['Route Number']) | \
        (i['Route Direction'].shift(-1) != i['Route Direction'])

    # Initialise all entries in 'Bus_Terminal' as 0
    i['Bus_Terminal'] = 0
    # If Reset_Conditions defined above fails, then Bus Stop considered in 'Bus_Stop_ID' is a Bus Terminal
    i.loc[Reset_Condition_SepDay, 'Bus_Terminal'] = 1
    OG_BUS_Terminal_COLUMN = i['Bus_Terminal'].copy()
    # Iterate through the original column
    for j in range(len(OG_BUS_Terminal_COLUMN) - 1):
        if OG_BUS_Terminal_COLUMN[j] == 1:
            i.loc[j + 1, 'Bus_Terminal'] = 1
    # Initialising First Row as Bus Terminal
    i.loc[0, 'Bus_Terminal'] = 1
    
    Bus_Terminals_DataFrame = i[i['Bus_Terminal'] == 1]

    # Contain List of traversed Bus_Stop_ID ('bus_stop_ids' - All Bus Stops identified as terminal stops)
    bus_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(Bus_Terminals_DataFrame)):
        bus_stop_id = Bus_Terminals_DataFrame.iloc[c]['Bus_Stop_ID']

        if bus_stop_id not in bus_stop_ids:
            bus_stop_ids.append(bus_stop_id)  # Add to list of seen IDs
            
    # Obtain only entries which correspond to a Bus Terminals. 
    # Duplicates will be present since the same Bus Terminals may be utilised for multiple 'Route Numebr' and 'Route Direction'
    Normal_Bus_DataFrame = i[i['Bus_Terminal'] == 0]
    
    # Contain List of traversed Bus_Stop_ID ('Normal_bus_stop_ids' - All Bus Stops identified as normal stops)
    Normal_bus_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(Normal_Bus_DataFrame)):
        Normal_bus_stop_id = Normal_Bus_DataFrame.iloc[c]['Bus_Stop_ID']

        if Normal_bus_stop_id not in Normal_bus_stop_ids:
            Normal_bus_stop_ids.append(Normal_bus_stop_id)  # Add to list of seen IDs

    
    # Bus Stops used in terminals/terminals used as Bus Stops
    # Find items in common between the two lists produced above 'bus_stop_ids' and 'Normal_bus_stop_ids'
    #Normal_and_BusTerminals_ids = [item for item in Normal_bus_stop_ids if item in bus_stop_ids]
    Normal_and_BusTerminals_ids = list(set(bus_stop_ids) & set(Normal_bus_stop_ids))

    # Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
    Normal_and_BusTerminals_DataFrame_Gozo = pd.DataFrame(columns=i.columns)
    
    # Store Bus Stops to avoid duplicates
    compare_bus_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(i)):
        bus_stop_id = i.iloc[c]['Bus_Stop_ID']

        if bus_stop_id in Normal_and_BusTerminals_ids and bus_stop_id not in compare_bus_stop_ids:
            compare_bus_stop_ids.append(bus_stop_id)  # Add to list of seen IDs

            Normal_and_BusTerminals_DataFrame_Gozo = pd.concat([Normal_and_BusTerminals_DataFrame_Gozo, i.iloc[[c]]],
                                                                ignore_index=True)
 
    file_name = f"AllNormalBusStopsUsedAsTerminalsGozo_{day}.csv" 
    Normal_and_BusTerminals_DataFrame_Gozo.to_csv(save_path + file_name, index=False)
    
    # Obtaining Stops which are only used as Terminals
    BusTerminals_Only_ids = list(set(bus_stop_ids) - set(Normal_bus_stop_ids))

    # Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
    BusTerminals_Only_DataFrame_Gozo = pd.DataFrame(columns=i.columns)

    # Store Bus Stops to avoid duplicates
    Only_Terminal_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(i)):
        Only_Terminal_stop_id = i.iloc[c]['Bus_Stop_ID']

        if Only_Terminal_stop_id in BusTerminals_Only_ids and Only_Terminal_stop_id not in Only_Terminal_stop_ids:
            Only_Terminal_stop_ids.append(Only_Terminal_stop_id)  # Add to list of seen IDs

            # Check the island and append the row
            BusTerminals_Only_DataFrame_Gozo = pd.concat([BusTerminals_Only_DataFrame_Gozo, i.iloc[[c]]],
                                                          ignore_index=True)

    file_name = f"BusTerminalsOnlyGozo_{day}.csv" 
    BusTerminals_Only_DataFrame_Gozo.to_csv(save_path + file_name, index=False)

    # Obtaining Stops which are only used as Normal Stops
    NormalStops_Only_ids = list(set(Normal_bus_stop_ids) - set(bus_stop_ids))
    
    # Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
    NormalStops_Only_DataFrame_Gozo = pd.DataFrame(columns=i.columns)

    # Store Bus Stops to avoid duplicates
    Only_Normal_stop_ids = []

    # Consider only Distinct Bus Terminals
    for c in range(len(i)):
        Only_Normal_stop_id = i.iloc[c]['Bus_Stop_ID']

        if Only_Normal_stop_id in NormalStops_Only_ids and Only_Normal_stop_id not in Only_Normal_stop_ids:
            Only_Normal_stop_ids.append(Only_Normal_stop_id)  # Add to list of seen IDs

            # Check the island and append the row
            NormalStops_Only_DataFrame_Gozo = pd.concat([NormalStops_Only_DataFrame_Gozo, i.iloc[[c]]],
                                                         ignore_index=True)
      
    file_name = f"NormalStopsOnlyGozo_{day}.csv" 
    NormalStops_Only_DataFrame_Gozo.to_csv(save_path + file_name, index=False)

In [None]:
# Loading all datasets created (for Malta)
Malta_NormalStops_Monday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Monday.csv")
Malta_NormalStops_Tuesday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Tuesday.csv")
Malta_NormalStops_Wednesday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Wednesday.csv")
Malta_NormalStops_Thursday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Thursday.csv")
Malta_NormalStops_Friday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Friday.csv")
Malta_NormalStops_Saturday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Saturday.csv")
Malta_NormalStops_Sunday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyMalta_Sunday.csv")

Malta_TerminalStops_Monday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Monday.csv")
Malta_TerminalStops_Tuesday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Tuesday.csv")
Malta_TerminalStops_Wednesday =pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Wednesday.csv")
Malta_TerminalStops_Thursday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Thursday.csv")
Malta_TerminalStops_Friday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Friday.csv")
Malta_TerminalStops_Saturday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Saturday.csv")
Malta_TerminalStops_Sunday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyMalta_Sunday.csv")

Malta_NormalandTerminalStops_Monday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Monday.csv")
Malta_NormalandTerminalStops_Tuesday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Tuesday.csv")
Malta_NormalandTerminalStops_Wednesday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Wednesday.csv")
Malta_NormalandTerminalStops_Thursday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Thursday.csv")
Malta_NormalandTerminalStops_Friday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Friday.csv")
Malta_NormalandTerminalStops_Saturday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Saturday.csv")
Malta_NormalandTerminalStops_Sunday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsMalta_Sunday.csv")

In [None]:
# Loading all datasets created (for Gozo)
Gozo_NormalStops_Monday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Monday.csv")
Gozo_NormalStops_Tuesday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Tuesday.csv")
Gozo_NormalStops_Wednesday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Wednesday.csv")
Gozo_NormalStops_Thursday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Thursday.csv")
Gozo_NormalStops_Friday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Friday.csv")
Gozo_NormalStops_Saturday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Saturday.csv")
Gozo_NormalStops_Sunday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//NormalStopsOnlyGozo_Sunday.csv")

Gozo_TerminalStops_Monday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Monday.csv")
Gozo_TerminalStops_Tuesday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Tuesday.csv")
Gozo_TerminalStops_Wednesday =pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Wednesday.csv")
Gozo_TerminalStops_Thursday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Thursday.csv")
Gozo_TerminalStops_Friday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Friday.csv")
Gozo_TerminalStops_Saturday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Saturday.csv")
Gozo_TerminalStops_Sunday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//BusTerminalsOnlyGozo_Sunday.csv")

Gozo_NormalandTerminalStops_Monday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Monday.csv")
Gozo_NormalandTerminalStops_Tuesday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Tuesday.csv")
Gozo_NormalandTerminalStops_Wednesday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Wednesday.csv")
Gozo_NormalandTerminalStops_Thursday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Thursday.csv")
Gozo_NormalandTerminalStops_Friday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Friday.csv")
Gozo_NormalandTerminalStops_Saturday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Saturday.csv")
Gozo_NormalandTerminalStops_Sunday = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Outputs of Terminals_NormalStops//AllNormalBusStopsUsedAsTerminalsGozo_Sunday.csv")

In [None]:
# Creating Pandas Dataframes to show Data in table form

List_Original_DataFrames =  [Monday_Routes_Malta, Tuesday_Routes_Malta, Wednesday_Routes_Malta, Thursday_Routes_Malta, Friday_Routes_Malta, Saturday_Routes_Malta, Sunday_Routes_Malta,
                            Monday_Routes_Gozo, Tuesday_Routes_Gozo, Wednesday_Routes_Gozo, Thursday_Routes_Gozo, Friday_Routes_Gozo, Saturday_Routes_Gozo, Sunday_Routes_Gozo]

List_NormalStopsOnly_DataFrames = [Malta_NormalStops_Monday, Malta_NormalStops_Tuesday, Malta_NormalStops_Wednesday, Malta_NormalStops_Thursday, Malta_NormalStops_Friday, Malta_NormalStops_Saturday,
                                   Malta_NormalStops_Sunday, Gozo_NormalStops_Monday, Gozo_NormalStops_Tuesday, Gozo_NormalStops_Wednesday, Gozo_NormalStops_Thursday, Gozo_NormalStops_Friday, Gozo_NormalStops_Saturday,
                                   Gozo_NormalStops_Sunday]

List_TerminalStopsOnly_DataFrames = [Malta_TerminalStops_Monday, Malta_TerminalStops_Tuesday, Malta_TerminalStops_Wednesday, Malta_TerminalStops_Thursday,
                                    Malta_TerminalStops_Friday, Malta_TerminalStops_Saturday, Malta_TerminalStops_Sunday, Gozo_TerminalStops_Monday, Gozo_TerminalStops_Tuesday, Gozo_TerminalStops_Wednesday, Gozo_TerminalStops_Thursday,
                                    Gozo_TerminalStops_Friday, Gozo_TerminalStops_Saturday, Gozo_TerminalStops_Sunday]

List_NormalandTerminalStops_DataFrames = [Malta_NormalandTerminalStops_Monday, Malta_NormalandTerminalStops_Tuesday, Malta_NormalandTerminalStops_Wednesday, Malta_NormalandTerminalStops_Thursday,
                                          Malta_NormalandTerminalStops_Friday, Malta_NormalandTerminalStops_Saturday, Malta_NormalandTerminalStops_Sunday, Gozo_NormalandTerminalStops_Monday, Gozo_NormalandTerminalStops_Tuesday, Gozo_NormalandTerminalStops_Wednesday, Gozo_NormalandTerminalStops_Thursday,
                                          Gozo_NormalandTerminalStops_Friday, Gozo_NormalandTerminalStops_Saturday, Gozo_NormalandTerminalStops_Sunday]

Column_List = ['Malta_Monday', 'Malta_Tuesday', 'Malta_Wednesday', 'Malta_Thursday', 'Malta_Friday',
               'Malta_Saturday', 'Malta_Sunday', 'Gozo_Monday', 'Gozo_Tuesday', 'Gozo_Wednesday', 'Gozo_Thursday', 'Gozo_Friday',
               'Gozo_Saturday', 'Gozo_Sunday']

In [None]:

Total_Nodes = []
for c in List_Original_DataFrames:
    Total_Nodes.append(c['Bus_Stop_ID'].nunique())
    
Total_NormalStopsOnly = []
for c in List_NormalStopsOnly_DataFrames:
    Total_NormalStopsOnly.append(c['Bus_Stop_ID'].nunique())
    
Total_TerminalStopsOnly = []
for c in List_TerminalStopsOnly_DataFrames:
    Total_TerminalStopsOnly.append(c['Bus_Stop_ID'].nunique()) 
    
Total_NormalandTerminalStops = []
for c in List_NormalandTerminalStops_DataFrames:
    Total_NormalandTerminalStops.append(c['Bus_Stop_ID'].nunique()) 
    

In [None]:
NodeInfo_SplitByDay = pd.DataFrame([Total_Nodes, Total_NormalStopsOnly, Total_TerminalStopsOnly, Total_NormalandTerminalStops],
                                   index=['Total Number of unique Nodes', 'Total Number of Unique Normal Stops', 'Total Number of Unique Bus Terminals',
                                          'Total Number of Unique Stops used as both Regular Stops and Terminals'], columns= Column_List)

In [None]:
# NodeInfo_SplitByDay 

In [None]:
NodeInfo_SplitByDay

In [None]:
Monday_Routes_Malta.to_csv("C://Users//attardan.CBM//Data Visualisation//Monday_Routes_Malta.csv")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Step 5 - Average Time Taken to traverse each 'edge' ('link')

#At Home
# Load data from 'Distinct_Edges_MALTA_IncTravelTimes' (renamed to 'Malta_DistinctEdges_IncTravelTimes') and 
# 'Distinct_Edges_GOZO_IncTravelTimes' (renamed to 'Gozo_DistinctEdges_IncTravelTimes')
#Malta_DistinctEdges_IncTravelTimes = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Creating SUMO Simulation & Obtaining Travel Times//Results//Distinct_Edges_MALTA_IncTravelTimes.csv")
#Gozo_DistinctEdges_IncTravelTimes = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Creating SUMO Simulation & Obtaining Travel Times\Results//Distinct_Edges_GOZO_IncTravelTimes.csv")


# At Work
# Load data from 'Distinct_Edges_MALTA_IncTravelTimes' (renamed to 'Malta_DistinctEdges_IncTravelTimes') and 
# 'Distinct_Edges_GOZO_IncTravelTimes' (renamed to 'Gozo_DistinctEdges_IncTravelTimes')
Malta_DistinctEdges_IncTravelTimes = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Distinct_Edges_MALTA_IncTravelTimes.csv")
Gozo_DistinctEdges_IncTravelTimes = pd.read_csv("C://Users//attardan.CBM//Data Visualisation//Distinct_Edges_GOZO_IncTravelTimes.csv")



In [None]:
# Data Cleaning (Take shortest path if available
# Ex. Stop 3 to Stop 819 is 56 seconds long
# on the otherhand Stop 819 to Stop 3 is 1019 seconds long (The former is correct)

# Standardize route names to be order-independent
Malta_DistinctEdges_IncTravelTimes['Standardized_Route'] = Malta_DistinctEdges_IncTravelTimes['Bus_Stop_Next_Bus_Stop'].apply(lambda x: '_to_'.join(sorted(x.split('_to_'))))
# Keep the row with the minimum travel time for each standardized route
Malta_DistinctEdges_IncTravelTimes_Min = Malta_DistinctEdges_IncTravelTimes.loc[Malta_DistinctEdges_IncTravelTimes.groupby('Standardized_Route')['Travel_Time'].idxmin()]
# Drop the helper column if not needed
Malta_DistinctEdges_IncTravelTimes_Min = Malta_DistinctEdges_IncTravelTimes_Min.drop(columns=['Standardized_Route'])


# Standardize route names to be order-independent
Gozo_DistinctEdges_IncTravelTimes['Standardized_Route'] = Gozo_DistinctEdges_IncTravelTimes['Bus_Stop_Next_Bus_Stop'].apply(lambda x: '_to_'.join(sorted(x.split('_to_'))))
# Keep the row with the minimum travel time for each standardized route
Gozo_DistinctEdges_IncTravelTimes_Min = Gozo_DistinctEdges_IncTravelTimes.loc[Gozo_DistinctEdges_IncTravelTimes.groupby('Standardized_Route')['Travel_Time'].idxmin()]
# Drop the helper column if not needed
Gozo_DistinctEdges_IncTravelTimes_Min = Gozo_DistinctEdges_IncTravelTimes_Min.drop(columns=['Standardized_Route'])



In [None]:
Malta_DistinctEdges_IncTravelTimes_Min.to_csv("C://Users//attardan.CBM//Data Visualisation//Distinct_Edges_MALTA_IncTravelTimes_WrongPathFix.csv")
Gozo_DistinctEdges_IncTravelTimes_Min.to_csv("C://Users//attardan.CBM//Data Visualisation//Distinct_Edges_GOZO_IncTravelTimes_WrongPathFix.csv")

In [None]:
# Step 5.1 - Obtaining Information related to Malta

# Step 5.1.1 - Find edge with maximum travel time 
Malta_DistinctEdges_IncTravelTimes_Min[Malta_DistinctEdges_IncTravelTimes_Min['Travel_Time'] == Malta_DistinctEdges_IncTravelTimes['Travel_Time'].max()]
# Step 5.2 - Find average travelling time throughout all edges
Average_TravelTime = Malta_DistinctEdges_IncTravelTimes_Min['Travel_Time'].mean()
# Step 5.3 - Find median travelling time throughout all edges
Median_TravelTime = Malta_DistinctEdges_IncTravelTimes_Min['Travel_Time'].median()
# Step 5.4 - Obtain box plot of Travel Time

# Example data (replace this with your actual DataFrame column)
Travel_TimeData = pd.to_numeric(Malta_DistinctEdges_IncTravelTimes_Min['Travel_Time'], errors='coerce').dropna()


# Obtaining further statistics
minimum = np.min(Travel_TimeData)
Q1 = np.percentile(Travel_TimeData, 25)
median = np.median(Travel_TimeData)
Q3 = np.percentile(Travel_TimeData, 75)
maximum = np.max(Travel_TimeData)
# Compute interquartile range (IQR)
IQR = Q3 - Q1
# Compute whiskers (last values before outliers)
lower_whisker = np.min(Travel_TimeData[Travel_TimeData >= (Q1 - 1.5 * IQR)].astype(float))
upper_whisker = np.max(Travel_TimeData[Travel_TimeData <= (Q3 + 1.5 * IQR)].astype(float))

# Defining figure size
Figure = plt.figure(figsize =(10, 7))
plt.boxplot(Travel_TimeData, vert = True, patch_artist = True)

# Add title, y-axis and x-axis
plt.xlabel("Edge Travel Time Date", fontsize = 12)
plt.ylabel("Travel Time (seconds)", fontsize = 12)
plt.title("Box Plot of Travel Time in Malta", fontsize = 14)


plt.annotate(f"Min/Lower Whisker: {minimum:.2f}", 
             xy=(1, minimum), 
             xytext=(1.1, minimum + 2),
             fontsize=8)

plt.annotate(f"Q1: {Q1:.2f}", 
             xy=(1, Q1), 
             xytext=(1.1, Q1 + 2),
             fontsize=8)

plt.annotate(f"Median: {median:.2f}", 
             xy=(1, median), 
             xytext=(1.1, median + 2),
             fontsize=8, fontweight='bold')

plt.annotate(f"Q3: {Q3:.2f}", 
             xy=(1, Q3), 
             xytext=(1.1, Q3 + 2),
             fontsize=8)

plt.annotate(f"Max: {maximum:.2f}", 
             xy=(1, maximum), 
             xytext=(1.1, maximum + 2),
             fontsize=8)

plt.annotate(f"Upper Whisker: {upper_whisker:.2f}", 
             xy=(1, upper_whisker), 
             xytext=(1.1, upper_whisker + 2), 
             fontsize=8)

# Show the plot
plt.show()




In [None]:
# Step 5.1 - Obtaining Information related to Malta

# Step 5.1.1 - Find edge with maximum travel time 
Malta_DistinctEdges_IncTravelTimes_Min[Malta_DistinctEdges_IncTravelTimes_Min['Travel_Time'] == Malta_DistinctEdges_IncTravelTimes['Travel_Time'].max()]
# Step 5.2 - Find average travelling time throughout all edges
Average_TravelTime = Malta_DistinctEdges_IncTravelTimes_Min['Travel_Time'].mean()
# Step 5.3 - Find median travelling time throughout all edges
Median_TravelTime = Malta_DistinctEdges_IncTravelTimes_Min['Travel_Time'].median()
# Step 5.4 - Obtain box plot of Travel Time

# Example data (replace this with your actual DataFrame column)
Travel_TimeData = pd.to_numeric(Gozo_DistinctEdges_IncTravelTimes_Min['Travel_Time'], errors='coerce').dropna()


# Obtaining further statistics
minimum = np.min(Travel_TimeData)
Q1 = np.percentile(Travel_TimeData, 25)
median = np.median(Travel_TimeData)
Q3 = np.percentile(Travel_TimeData, 75)
maximum = np.max(Travel_TimeData)
# Compute interquartile range (IQR)
IQR = Q3 - Q1
# Compute whiskers (last values before outliers)
lower_whisker = np.min(Travel_TimeData[Travel_TimeData >= (Q1 - 1.5 * IQR)].astype(float))
upper_whisker = np.max(Travel_TimeData[Travel_TimeData <= (Q3 + 1.5 * IQR)].astype(float))

# Defining figure size
Figure = plt.figure(figsize =(10, 7))
plt.boxplot(Travel_TimeData, vert = True, patch_artist = True)

# Add title, y-axis and x-axis
plt.xlabel("Edge Travel Time Date", fontsize = 12)
plt.ylabel("Travel Time (seconds)", fontsize = 12)
plt.title("Box Plot of Travel Time in Gozo", fontsize = 14)

#Annotate Values to Box Plot
plt.annotate(f"Min/Lower Whisker: {minimum:.2f}", 
             xy=(1, minimum), 
             xytext=(1.1, minimum + 2),
             fontsize=8)

plt.annotate(f"Q1: {Q1:.2f}", 
             xy=(1, Q1), 
             xytext=(1.1, Q1 + 2),
             fontsize=8)

plt.annotate(f"Median: {median:.2f}", 
             xy=(1, median), 
             xytext=(1.1, median + 2),
             fontsize=8, fontweight='bold')

plt.annotate(f"Q3: {Q3:.2f}", 
             xy=(1, Q3), 
             xytext=(1.1, Q3 + 2),
             fontsize=8)

plt.annotate(f"Max: {maximum:.2f}", 
             xy=(1, maximum), 
             xytext=(1.1, maximum + 2),
             fontsize=8)

plt.annotate(f"Upper Whisker: {upper_whisker:.2f}", 
             xy=(1, upper_whisker), 
             xytext=(1.1, upper_whisker + 2), 
             fontsize=8)



plt.show()



In [None]:
# Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
Bus_Stop_Info_Malta = pd.DataFrame(columns=Bus_Stop_Info.columns)
Bus_Stop_Info_Gozo = pd.DataFrame(columns=Bus_Stop_Info.columns)


# Store Bus Stops to avoid duplicates
Only_Normal_stop_ids = []

# Consider only Distinct Bus Terminals
for i in range(len(Bus_Stop_Info)):
    if Bus_Stop_Info.iloc[i]['Stop_Island'] == 'MALTA STOP':
        Bus_Stop_Info_Malta = pd.concat([Bus_Stop_Info_Malta, Bus_Stop_Info.iloc[[i]]],
                                                     ignore_index=True)
    else:
        Bus_Stop_Info_Gozo = pd.concat([Bus_Stop_Info_Gozo, Bus_Stop_Info.iloc[[i]]],
                                                    ignore_index=True)        

In [None]:
# Saving DataFrames
Bus_Stop_Info.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Bus_Stop_Info.csv')
Bus_Stop_Info_Malta.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Bus_Stop_Info_Malta.csv')
Bus_Stop_Info_Gozo.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Bus_Stop_Info_Gozo.csv')
