In [None]:
# Importing Required Packages

# Importing 'pandas' to handle datasets
import pandas as pd
# Importing 'numpy' to handle arrays
import numpy as np

# Importing math to be able to utilise mathematical functions (radian is defined by converting 90 degrees)
import math
degree = 90
radian = math.radians(degree)

# Importing 're' package - Python Regular Expressions
import re

# Importing 'xml.etree.cElementTree' to be able to handle xml files
import xml.etree.cElementTree as ET

from datetime import datetime, timedelta
import requests 
import json 
from collections import OrderedDict
import json
import time

### Step 1 - Preparing Data Sets 

Appending the following columns to the 'All_Routes_Complete.csv' file obtained from the Malta public transport (MPT) website

    1. 'Stop Island' - Defines the island (Malta/Gozo) the corresponding Route Number operates in. 
    2. 'Time_Count' - Number of buses operating on a particular Route Number and Route Direction from the start till the end of the bus service.
    3. 'Stops - City Name - Stop Island' - Key column used to compare data in 'All_Routes_Complete.csv' to data in Bus_Stop_Info ('Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx')
    4. 'Longitude_Final' - Longitude value of corresponding Bus Stop entry ('Stops') obtained from Bus_Stop_Info
    5. 'Latitude_Final' - Latitude value of corresponding Bus Stop entry ('Stops') obtained from Bus_Stop_Info
    6. 'Bus_Stop_ID' - Unique identifier for all Bus Stops ('Stops')
    7. 'Next_Bus_Stop_ID' - Since All_Routes ('All_Routes_Complete.csv') was extracted in sequential order from MPT website, the next row in All_Routes corresponds to the upcoming Bus Stop (Given 'Route Number', 'Route Direction' and 'Date' columns remain the same). Hence, 'Next_Bus_Stop_ID' is the unique identifier of the upcoming Bus Stop ('Stops')
    8. 'Bus_Stop_Next_Bus_Stop'- Unique identifier used to define connection between 'Bus_Stop_ID and 'Next_Bus_Stop_ID'

In [None]:
# Step 1.1 - Load Datasets
# Loading 'All_Routes_Complete.csv' (All_Routes) and 'Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx' (Bus_Stop_Info) Datasets
# Recall 'All_Routes_Complete.csv' is the file which was obtained from MPT website consisting of all Bus Schedules in sequential order
# Recall 'Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx' consits of the Longitude and Latitude data of all Bus Stops defined in 'All_Routes_Complete.csv' 
All_Routes = pd.read_csv("C://Users//Owner//ICT5012 - Disseration//Scraping Route Names from MPT Website//Results//All_Routes_Complete.csv", low_memory = False)
Bus_Stop_Info = pd.read_excel("C://Users//Owner//ICT5012 - Disseration//Obtaining Longitude and Latitude for all Bus Stops//Results (Checks Done + Manual Adjustment)//Bus_Stop_Lon_Lat_Data_Final_Manual_Update.xlsx")

In [None]:
# Step 1.2 - Create 'Stop Island' column
# To differentiate between stops in Malta and Gozo, stops with correspoding 'Route Number' belonging to the 'Gozitan_Route_Number'
# list will be labelled as 'GOZO STOP' whilst all other stops will be labelled as 'MALTA STOP' using a column entitled 'Stop Island' 

Gozitan_Route_Number = ['301', '302', '303',
                        '305', '306', '307',
                        '308', '309', '310',
                        '311', '312', '313',
                        '322', '323', '330',
                        'N301']

All_Routes['Stop Island'] = np.where(All_Routes['Route Number'].isin(Gozitan_Route_Number), 'GOZO STOP', 'MALTA STOP')

In [None]:
# Step 1.3 - Create 'Time_Count' column
# The 'Time_Count' column will be added to count the number of buses operating throughout one day for a particular route.
# 'Time_Count' will be used as another method to identify between different routes having the same 'Route Number', 'Route Direction' and 'Date' column

# Select all columns in 'All_Routes' that start with 'Stop Time'
Stop_Time_Columns = [col for col in All_Routes.columns if col.startswith('Stop Time')]
# Row-wise count all'Stop Time' columns which are filled in and populate the 'Time_Count' column with sum 
All_Routes['Time_Count'] = All_Routes[Stop_Time_Columns].notna().sum(axis=1)


In [None]:
# Step 1.4 - Merge 'Longitude_Final' and 'Latitude_Final' columns from 'Bus_Stop_Info' dataframe to 'All_Routes' dataframe

# Create column entitled 'Stops - City Name - Stop Island' (Concatenation of 'Stops', 'City Name' and 'Stop Island' columns) 
# in both 'All_Routes' and 'Bus_Stop_Info'
All_Routes['Stops - City Name - Stop Island'] = All_Routes['Stops'] + ' - ' + All_Routes['City Name'] + ' - ' + All_Routes['Stop Island']
Bus_Stop_Info['Stops - City Name - Stop Island'] = Bus_Stop_Info['Stops'] + ' - ' + Bus_Stop_Info['City Name'] + ' - ' + Bus_Stop_Info['Stop_Island']
# Merge 'Bus_Stop_Info' Dataframe to 'All_Routes' such that 'All_Routes' dataframe will have 'Longitude_Final' and 'Latitude_Final' values for all corresponding entries
All_Routes = pd.merge(All_Routes, Bus_Stop_Info[['Stops - City Name - Stop Island', 'Longitude_Final', 'Latitude_Final']], on = 'Stops - City Name - Stop Island', how = 'left', sort = False)

In [None]:
# Step 1.5 - Create a unique identifier for all Bus Stops ('Stops') entitled 'Bus_Stop_ID'

# Obtain Dataframe entitled 'Distinct_Coordinates' consisting only of entries with distinct 'Latitude_Final' 
# and 'Longitude_Final' pairs
Distinct_Coordinates = All_Routes.drop_duplicates(subset = ['Latitude_Final','Longitude_Final'])
# In 'Distinct_Coordinates' add column 'Stop ID' with entries with format Stop_X where X is a value from 0 up to length of 
# 'Distinct_Coordinates' dataframe
Distinct_Coordinates['Stop ID'] = [f'Stop_{i}' for i in range(len(Distinct_Coordinates))]

# Create Dictionary entitled 'Bus_Stop_ID' consisting of corresponding 'Latitude_Final', 'Longitude_Final' and 'Stop ID' values 
Bus_Stop_ID = dict(
    zip(
        zip(Distinct_Coordinates['Latitude_Final'], Distinct_Coordinates['Longitude_Final']),
        Distinct_Coordinates['Stop ID']
    )
)

# Using 'Bus_Stop_ID' dictionary label Bus Stops ('Stops') with their corresponding unique identifier.
# Column is labelled as 'Bus_Stop_ID'
# 4 - Using 'Bus_Stop_ID' dict to label All_Routes
All_Routes['Bus_Stop_ID'] = All_Routes.apply(
    lambda row: Bus_Stop_ID.get((row['Latitude_Final'], row['Longitude_Final']), None), axis=1
)


In [None]:
# Step 1.6 - Create Column entitled 'Next_Bus_Stop_ID' consisting of the unique identifier of the upcoming stop in the route

# Since All_Routes ('All_Routes_Complete.csv') was extracted in sequential order from MPT website, the next row in All_Routes
# corresponds to the upcoming Bus Stop (Given 'Route Number', 'Route Direction' and 'Date' columns remain the same).
# Hence, 'Reset_Conditions' is defined such that if any of 'Route Number', 'Route Direction' or 'Date' are different in
# in the upcoming stop then upcoming stop than it is not considered to be a continuation of the current route.
Reset_Conditions = (
    All_Routes['Route Number'].shift(-1) != All_Routes['Route Number']) | \
    (All_Routes['Route Direction'].shift(-1) != All_Routes['Route Direction']) | \
    (All_Routes['Date'].shift(-1) != All_Routes['Date'])

# In 'All_Routes' create column 'Next_Bus_Stop_ID' consisting of the upcoming 'Bus_Stop_ID'
All_Routes['Next_Bus_Stop_ID'] = All_Routes['Bus_Stop_ID'].shift(-1)
# If conditions defined in 'Reset_Conditions' are met, then 'Next_Bus_Stop_ID' should be blank
All_Routes.loc[Reset_Conditions, 'Next_Bus_Stop_ID'] = None


In [None]:
# Step 1.7 - Create a Unique identifier used to define connection between 'Bus_Stop_ID and 'Next_Bus_Stop_ID. 
# This is done by concatinating the 'Bus_Stop_ID' and 'Next_Bus_Stop_ID' columns

All_Routes['Bus_Stop_Next_Bus_Stop'] = All_Routes['Bus_Stop_ID'] + '_to_' + All_Routes['Next_Bus_Stop_ID']

In [9]:
# Step 2.1 - Obtain List of Bus Terminals

# Initialise all entries in 'Bus_Terminal' as 0
All_Routes['Bus_Terminal'] = 0
# If Reset_Conditions defined above fails, then Bus Stop considered in 'Bus_Stop_ID' is a Bus Terminal
All_Routes.loc[Reset_Conditions, 'Bus_Terminal'] = 1
OG_BUS_Terminal_COLUMN = All_Routes['Bus_Terminal'].copy()
# Iterate through the original column
for i in range(len(OG_BUS_Terminal_COLUMN) - 1):
    if OG_BUS_Terminal_COLUMN[i] == 1:
        All_Routes.loc[i + 1, 'Bus_Terminal'] = 1
# Initialising First Row as Bus Terminal
All_Routes.loc[0, 'Bus_Terminal'] = 1

In [10]:
# Obtain only entries which correspond to a Bus Terminals. 
# Duplicates will be present since the same Bus Terminals may be utilised for multiple 'Route Numebr' and 'Route Direction'
Bus_Terminals_DataFrame = All_Routes[All_Routes['Bus_Terminal'] == 1]

In [11]:
# Define empty DataFrame to Store all Bus Terminals in Malta and Gozo
Bus_Terminals_DataFrame_Malta = pd.DataFrame(columns=Bus_Terminals_DataFrame.columns)
Bus_Terminals_DataFrame_Gozo = pd.DataFrame(columns=Bus_Terminals_DataFrame.columns)

# Contain List of traversed Bus_Stop_ID
bus_stop_ids = []

# Consider only Distinct Bus Terminals
for i in range(len(Bus_Terminals_DataFrame)):
    bus_stop_id = Bus_Terminals_DataFrame.iloc[i]['Bus_Stop_ID']
    
    if bus_stop_id not in bus_stop_ids:
        bus_stop_ids.append(bus_stop_id)  # Add to list of seen IDs
        
        # Check the island and append the row
        if Bus_Terminals_DataFrame.iloc[i]['Stop Island'] == 'MALTA STOP':
            Bus_Terminals_DataFrame_Malta = pd.concat(
                [Bus_Terminals_DataFrame_Malta, Bus_Terminals_DataFrame.iloc[[i]]],
                ignore_index=True
            )
        else:
            Bus_Terminals_DataFrame_Gozo = pd.concat(
                [Bus_Terminals_DataFrame_Gozo, Bus_Terminals_DataFrame.iloc[[i]]],
                ignore_index=True
            )        

In [13]:
# Save Dataframes
Bus_Terminals_DataFrame_Malta.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Bus_Terminals_DataFrame_Malta.csv')
Bus_Terminals_DataFrame_Gozo.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Bus_Terminals_DataFrame_Gozo.csv')

In [14]:
# Obtain only entries which correspond to a Bus Terminals. 
# Duplicates will be present since the same Bus Terminals may be utilised for multiple 'Route Numebr' and 'Route Direction'
Normal_Bus_DataFrame = All_Routes[All_Routes['Bus_Terminal'] == 0]

In [15]:
# Define empty DataFrame to Store all Bus Terminals in Malta and Gozo
NormalStops_DataFrame_Malta = pd.DataFrame(columns=Normal_Bus_DataFrame.columns)
NormalStops_DataFrame_Gozo = pd.DataFrame(columns=Normal_Bus_DataFrame.columns)

# Contain List of traversed Bus_Stop_ID
Normal_bus_stop_ids = []

# Consider only Distinct Bus Terminals
for i in range(len(Normal_Bus_DataFrame)):
    Normal_bus_stop_id = Normal_Bus_DataFrame.iloc[i]['Bus_Stop_ID']
    
    if Normal_bus_stop_id not in Normal_bus_stop_ids:
        Normal_bus_stop_ids.append(Normal_bus_stop_id)  # Add to list of seen IDs
        
        # Check the island and append the row
        if Normal_Bus_DataFrame.iloc[i]['Stop Island'] == 'MALTA STOP':
            NormalStops_DataFrame_Malta = pd.concat(
                [NormalStops_DataFrame_Malta, Normal_Bus_DataFrame.iloc[[i]]],
                ignore_index=True
            )
        else:
            NormalStops_DataFrame_Gozo = pd.concat(
                [NormalStops_DataFrame_Gozo, Normal_Bus_DataFrame.iloc[[i]]],
                ignore_index=True
            )        

In [16]:
# Save Dataframes
NormalStops_DataFrame_Malta.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//NormalStops_DataFrame_Malta.csv')
NormalStops_DataFrame_Gozo.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//NormalStops_DataFrame_Gozo.csv')


In [17]:
# Bus Stops used in terminals/terminals used as Bus Stops
# Find items in common between the two lists produced above 'bus_stop_ids' and 'Normal_bus_stop_ids'
Normal_and_BusTerminals_ids = [item for item in bus_stop_ids if item in Normal_bus_stop_ids]

In [18]:
# Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
Normal_and_BusTerminals_DataFrame_Malta = pd.DataFrame(columns=All_Routes.columns)
Normal_and_BusTerminals_DataFrame_Gozo = pd.DataFrame(columns=All_Routes.columns)


# Store Bus Stops to avoid duplicates
compare_bus_stop_ids = []

# Consider only Distinct Bus Terminals
for i in range(len(All_Routes)):
    bus_stop_id = All_Routes.iloc[i]['Bus_Stop_ID']
    
    if bus_stop_id in Normal_and_BusTerminals_ids and bus_stop_id not in compare_bus_stop_ids:
        compare_bus_stop_ids.append(bus_stop_id)  # Add to list of seen IDs
        
        # Check the island and append the row
        if All_Routes.iloc[i]['Stop Island'] == 'MALTA STOP':
            Normal_and_BusTerminals_DataFrame_Malta = pd.concat(
                [Normal_and_BusTerminals_DataFrame_Malta, All_Routes.iloc[[i]]],
                ignore_index=True
            )
        else:
            Normal_and_BusTerminals_DataFrame_Gozo = pd.concat(
                [Normal_and_BusTerminals_DataFrame_Gozo, All_Routes.iloc[[i]]],
                ignore_index=True
            )        

In [19]:
# Saving DataFrames
Normal_and_BusTerminals_DataFrame_Malta.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Normal_and_BusTerminals_DataFrame_Malta.csv')
Normal_and_BusTerminals_DataFrame_Gozo.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Normal_and_BusTerminals_DataFrame_Gozo.csv')

In [20]:
# Obtaining Stops which are only used as Terminals
BusTerminals_Only_ids = [item for item in bus_stop_ids if item not in Normal_bus_stop_ids]

In [21]:
# Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
BusTerminals_Only_DataFrame_Malta = pd.DataFrame(columns=Bus_Terminals_DataFrame.columns)
BusTerminals_Only_DataFrame_Gozo = pd.DataFrame(columns=Bus_Terminals_DataFrame.columns)


# Store Bus Stops to avoid duplicates
Only_Terminal_stop_ids = []

# Consider only Distinct Bus Terminals
for i in range(len(Bus_Terminals_DataFrame)):
    Only_Terminal_stop_id = Bus_Terminals_DataFrame.iloc[i]['Bus_Stop_ID']
    
    if Only_Terminal_stop_id in BusTerminals_Only_ids and Only_Terminal_stop_id not in Only_Terminal_stop_ids:
        Only_Terminal_stop_ids.append(Only_Terminal_stop_id)  # Add to list of seen IDs
        
        # Check the island and append the row
        if Bus_Terminals_DataFrame.iloc[i]['Stop Island'] == 'MALTA STOP':
            BusTerminals_Only_DataFrame_Malta = pd.concat(
                [BusTerminals_Only_DataFrame_Malta, Bus_Terminals_DataFrame.iloc[[i]]],
                ignore_index=True
            )
        else:
            BusTerminals_Only_DataFrame_Gozo = pd.concat(
                [BusTerminals_Only_DataFrame_Gozo, Bus_Terminals_DataFrame.iloc[[i]]],
                ignore_index=True
            )        

In [23]:
# Saving DataFrames
BusTerminals_Only_DataFrame_Malta.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//BusTerminals_Only_DataFrame_Malta.csv')
BusTerminals_Only_DataFrame_Gozo.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//BusTerminals_Only_DataFrame_Gozo.csv')


In [28]:
# Obtaining Stops which are only used as regular Bus Stops
len(bus_stop_ids)

146

In [33]:
# Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
NormalStops_Only_DataFrame_Malta = pd.DataFrame(columns=All_Routes.columns)
NormalStops_Only_DataFrame_Gozo = pd.DataFrame(columns=All_Routes.columns)


# Store Bus Stops to avoid duplicates
Only_Normal_stop_ids = []

# Consider only Distinct Bus Terminals
for i in range(len(All_Routes)):
    Only_Normal_stop_id = All_Routes.iloc[i]['Bus_Stop_ID']
    
    if Only_Normal_stop_id not in bus_stop_ids and Only_Normal_stop_id not in Only_Normal_stop_ids:
        Only_Normal_stop_ids.append(Only_Normal_stop_id)  # Add to list of seen IDs
        
        # Check the island and append the row
        if All_Routes.iloc[i]['Stop Island'] == 'MALTA STOP':
            NormalStops_Only_DataFrame_Malta = pd.concat(
                [NormalStops_Only_DataFrame_Malta, All_Routes.iloc[[i]]],
                ignore_index=True
            )
        else:
            NormalStops_Only_DataFrame_Gozo = pd.concat(
                [NormalStops_Only_DataFrame_Gozo, All_Routes.iloc[[i]]],
                ignore_index=True
            )        

In [50]:
# Saving DataFrames
NormalStops_Only_DataFrame_Malta.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//NormalStops_Only_DataFrame_Malta.csv')
NormalStops_Only_DataFrame_Gozo.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//NormalStops_Only_DataFrame_Gozo.csv')


In [49]:
NormalStops_Only_DataFrame_Gozo

Unnamed: 0,Route Number,Route Direction,Stops,City Name,Date,Stop Time 1,Stop Time 2,Stop Time 3,Stop Time 4,Stop Time 5,...,Stop Time 78,Stop Island,Time_Count,Stops - City Name - Stop Island,Longitude_Final,Latitude_Final,Bus_Stop_ID,Next_Bus_Stop_ID,Bus_Stop_Next_Bus_Stop,Bus_Terminal
0,301,Victoria - Vapur,Mizzi,"Ir-Rabat, Ghawdex","Wednesday, Thursday, Friday, Monday, Tuesday, ...",05:05,05:30,06:00,06:15,07:00,...,,GOZO STOP,40,"Mizzi - Ir-Rabat, Ghawdex - GOZO STOP",14.243909,36.043859,Stop_1138,Stop_1139,Stop_1138_to_Stop_1139,0
1,301,Victoria - Vapur,Fortunato,"Ir-Rabat, Ghawdex","Wednesday, Thursday, Friday, Monday, Tuesday, ...",05:06,05:31,06:01,06:16,07:01,...,,GOZO STOP,40,"Fortunato - Ir-Rabat, Ghawdex - GOZO STOP",14.245147,36.043736,Stop_1139,Stop_1140,Stop_1139_to_Stop_1140,0
2,301,Victoria - Vapur,Viani,"Ir-Rabat, Ghawdex","Wednesday, Thursday, Friday, Monday, Tuesday, ...",05:06,05:31,06:01,06:16,07:01,...,,GOZO STOP,40,"Viani - Ir-Rabat, Ghawdex - GOZO STOP",14.248008,36.043239,Stop_1140,Stop_1141,Stop_1140_to_Stop_1141,0
3,301,Victoria - Vapur,Biccerija,"Ir-Rabat, Ghawdex","Wednesday, Thursday, Friday, Monday, Tuesday, ...",05:08,05:33,06:03,06:18,07:03,...,,GOZO STOP,40,"Biccerija - Ir-Rabat, Ghawdex - GOZO STOP",14.253107,36.039305,Stop_1141,Stop_1142,Stop_1141_to_Stop_1142,0
4,301,Victoria - Vapur,Xewkija,Ix-Xewkija,"Wednesday, Thursday, Friday, Monday, Tuesday, ...",05:09,05:34,06:04,06:19,07:04,...,,GOZO STOP,40,Xewkija - Ix-Xewkija - GOZO STOP,14.255956,36.037844,Stop_1142,Stop_1143,Stop_1142_to_Stop_1143,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226,323,Vapur - Victoria,Benghazi,Ghajnsielem,"Wednesday, Thursday, Friday, Monday, Tuesday, ...",05:47,06:47,07:48,08:49,09:49,...,,GOZO STOP,18,Benghazi - Ghajnsielem - GOZO STOP,14.288683,36.027223,Stop_1379,Stop_1380,Stop_1379_to_Stop_1380,0
227,323,Vapur - Victoria,Bahhar,Ghajnsielem,"Wednesday, Thursday, Friday, Monday, Tuesday, ...",05:48,06:48,07:49,08:50,09:50,...,,GOZO STOP,18,Bahhar - Ghajnsielem - GOZO STOP,14.285944,36.027974,Stop_1380,Stop_1381,Stop_1380_to_Stop_1381,0
228,323,Vapur - Victoria,Cief,Ghajnsielem,"Wednesday, Thursday, Friday, Monday, Tuesday, ...",05:48,06:48,07:50,08:51,09:51,...,,GOZO STOP,18,Cief - Ghajnsielem - GOZO STOP,14.282760,36.028067,Stop_1381,Stop_1148,Stop_1381_to_Stop_1148,0
229,330,Victoria - Xlendi,Giljan,"Il-Fontana, Ghawdex","Wednesday, Thursday, Friday, Monday, Tuesday, ...",09:07,11:07,13:07,15:07,17:07,...,,GOZO STOP,5,"Giljan - Il-Fontana, Ghawdex - GOZO STOP",14.238102,36.039257,Stop_1382,Stop_1383,Stop_1382_to_Stop_1383,0


In [36]:
NormalStops_Only_DataFrame_Malta

Unnamed: 0,Route Number,Route Direction,Stops,City Name,Date,Stop Time 1,Stop Time 2,Stop Time 3,Stop Time 4,Stop Time 5,...,Stop Time 78,Stop Island,Time_Count,Stops - City Name - Stop Island,Longitude_Final,Latitude_Final,Bus_Stop_ID,Next_Bus_Stop_ID,Bus_Stop_Next_Bus_Stop,Bus_Terminal
0,X1,Ajruport - Cirkewwa,Avjazzjoni,Hal Luqa,Monday,05:09,05:39,05:54,06:40,07:25,...,,MALTA STOP,26,Avjazzjoni - Hal Luqa - MALTA STOP,14.492289,35.854831,Stop_1,Stop_2,Stop_1_to_Stop_2,0
1,X1,Ajruport - Cirkewwa,Mitjar,Hal Luqa,Monday,05:10,05:40,05:55,06:41,07:26,...,,MALTA STOP,26,Mitjar - Hal Luqa - MALTA STOP,14.486949,35.855699,Stop_2,Stop_3,Stop_2_to_Stop_3,0
2,X1,Ajruport - Cirkewwa,Ingieret,San Vincenz,Monday,05:14,05:45,06:00,06:47,07:32,...,,MALTA STOP,26,Ingieret - San Vincenz - MALTA STOP,14.482673,35.870979,Stop_3,Stop_4,Stop_3_to_Stop_4,0
3,X1,Ajruport - Cirkewwa,Marsa Park & Ride 1,Il-Marsa,Monday,05:18,05:48,06:03,06:51,07:36,...,,MALTA STOP,26,Marsa Park & Ride 1 - Il-Marsa - MALTA STOP,14.484522,35.878715,Stop_4,Stop_5,Stop_4_to_Stop_5,0
4,X1,Ajruport - Cirkewwa,Universita 1,L-Imsida,Monday,05:26,05:57,06:12,07:03,07:48,...,,MALTA STOP,26,Universita 1 - L-Imsida - MALTA STOP,14.479873,35.901429,Stop_5,Stop_6,Stop_5_to_Stop_6,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1002,250,Ghadira - Valletta,Tunezja,Il-Mosta,Monday - Friday,06:42,07:42,08:42,09:36,10:36,...,,MALTA STOP,16,Tunezja - Il-Mosta - MALTA STOP,14.437851,35.906799,Stop_1132,Stop_543,Stop_1132_to_Stop_543,0
1003,260,Valletta - Gharghur,Fabriani,In-Naxxar,Monday - Friday,07:14,08:14,09:07,10:07,11:03,...,,MALTA STOP,14,Fabriani - In-Naxxar - MALTA STOP,14.445870,35.918128,Stop_1133,Stop_1134,Stop_1133_to_Stop_1134,0
1004,260,Valletta - Gharghur,Konverzjoni,In-Naxxar,Monday - Friday,07:15,08:15,09:09,10:09,11:04,...,,MALTA STOP,14,Konverzjoni - In-Naxxar - MALTA STOP,14.442180,35.920268,Stop_1134,Stop_990,Stop_1134_to_Stop_990,0
1005,260,Gharghur - Valletta,Konverzjioni,In-Naxxar,Monday - Friday,07:37,08:37,09:29,10:29,11:24,...,,MALTA STOP,14,Konverzjioni - In-Naxxar - MALTA STOP,14.442042,35.920630,Stop_1135,Stop_1133,Stop_1135_to_Stop_1133,0


In [37]:
Bus_Stop_Info

Unnamed: 0,Stops,City Name,Stop_Island,Stops_STANDARD,City Name_STANDARD,City_and_Bus_Stop_OSM,Longitude_OSM,Latitude_OSM,City_Name_OSM,Stop_Name_OSM,City_Name_OSM_STANDARD,Stop_Name_OSM_STANDARD,Stop_GMAPS,City_GMAPS,Longitude_GMAPS,Latitude_GMAPS,Longitude_Final,Latitude_Final,Stops - City Name - Stop Island
0,Airport 1,Hal Luqa,MALTA STOP,airport 1,hal luqa,Ħal Luqa - Airport 1,14.495967,35.849412,Ħal Luqa,Airport 1,hal luqa,airport 1,,,,,14.495967,35.849412,Airport 1 - Hal Luqa - MALTA STOP
1,Avjazzjoni,Hal Luqa,MALTA STOP,avjazzjoni,hal luqa,Ħal Luqa - Avjazzjoni,14.492289,35.854831,Ħal Luqa,Avjazzjoni,hal luqa,avjazzjoni,,,,,14.492289,35.854831,Avjazzjoni - Hal Luqa - MALTA STOP
2,Mitjar,Hal Luqa,MALTA STOP,mitjar,hal luqa,Ħal Luqa - Mitjar,14.486949,35.855699,Ħal Luqa,Mitjar,hal luqa,mitjar,,,,,14.486949,35.855699,Mitjar - Hal Luqa - MALTA STOP
3,Universita 1,L-Imsida,MALTA STOP,universita 1,l-imsida,L-Imsida - Università 1,14.479873,35.901429,L-Imsida,Università 1,l-imsida,universita 1,,,,,14.479873,35.901429,Universita 1 - L-Imsida - MALTA STOP
4,Mater Dei 2,Mater Dei,MALTA STOP,mater dei 2,mater dei,Mater Dei - Mater Dei 2,14.476578,35.900845,Mater Dei,Mater Dei 2,mater dei,mater dei 2,,,,,14.476578,35.900845,Mater Dei 2 - Mater Dei - MALTA STOP
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1405,Ciangura Lawrenz,San Lawrenz,GOZO STOP,ciangura lawrenz,san lawrenz,,,,,,,,Ciangura Lawrenz,San Lawrenz,14.203347,36.055285,14.203347,36.055285,Ciangura Lawrenz - San Lawrenz - GOZO STOP
1406,Hozna,Ghajn Hosna,GOZO STOP,hozna,ghajn hosna,,,,,,,,Hozna,Xagħra,14.277536,36.050472,14.277536,36.050472,Hozna - Ghajn Hosna - GOZO STOP
1407,D Amato,Ix-Xewkija,GOZO STOP,d amato,ix-xewkija,,,,,,,,D Amato,Xewkija,14.263417,36.032292,14.263417,36.032292,D Amato - Ix-Xewkija - GOZO STOP
1408,Bahhar,Ghajnsielem,GOZO STOP,bahhar,ghajnsielem,,,,,,,,Bahhar,Ghajnsielem,14.285944,36.027974,14.285944,36.027974,Bahhar - Ghajnsielem - GOZO STOP


In [47]:
# Define empty DataFrame to Store all Bus Stops used as Normal Bus Stops and Terminals in Malta and Gozo
Bus_Stop_Info_Malta = pd.DataFrame(columns=Bus_Stop_Info.columns)
Bus_Stop_Info_Gozo = pd.DataFrame(columns=Bus_Stop_Info.columns)


# Store Bus Stops to avoid duplicates
Only_Normal_stop_ids = []

# Consider only Distinct Bus Terminals
for i in range(len(Bus_Stop_Info)):
    if Bus_Stop_Info.iloc[i]['Stop_Island'] == 'MALTA STOP':
        Bus_Stop_Info_Malta = pd.concat([Bus_Stop_Info_Malta, Bus_Stop_Info.iloc[[i]]],
                                                     ignore_index=True)
    else:
        Bus_Stop_Info_Gozo = pd.concat([Bus_Stop_Info_Gozo, Bus_Stop_Info.iloc[[i]]],
                                                    ignore_index=True)        

In [48]:
# Saving DataFrames
Bus_Stop_Info.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Bus_Stop_Info.csv')
Bus_Stop_Info_Malta.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Bus_Stop_Info_Malta.csv')
Bus_Stop_Info_Gozo.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Bus_Stop_Info_Gozo.csv')


In [53]:
All_Routes.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//All_Routes.csv')

In [59]:
# Obtain Number of Routes
Unique_Dates = All_Routes['Date'].unique()

#Changes to be made
# 1 - Change 'Monday - Friday' to 'Monday, Tuesday, Wednesday, Thursday, Friday'
# 2 - Change 'Monday - Sunday' to 'Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday'
# 3 - Change 'Monday - Saturday' to 'Monday, Tuesday, Wednesday, Thursday, Firday, Saturday'
All_Routes_Copy = All_Routes.copy()
All_Routes_Copy['Date'] = All_Routes_Copy['Date'].replace({'Monday - Friday': 'Monday, Tuesday, Wednesday, Thursday, Friday',
                                                           'Monday - Sunday': 'Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday',
                                                           'Monday - Saturday': 'Monday, Tuesday, Wednesday, Thursday, Friday, Saturday'})


['Monday' 'Tuesday, Wednesday, Thursday, Friday' 'Saturday' 'Sunday'
 'Monday, Tuesday, Wednesday, Thursday, Friday' 'Monday - Friday'
 'Saturday, Sunday' 'Monday - Sunday'
 'Monday, Tuesday, Wednesday, Thursday' 'Friday, Saturday, Sunday'
 'Monday, Tuesday, Wednesday, Thursday, Sunday' 'Friday, Saturday'
 'Monday - Saturday' 'Wednesday, Thursday, Friday, Monday, Tuesday'
 'Wednesday, Thursday, Friday, Monday, Tuesday, Sunday'
 'Wednesday, Thursday, Friday, Tuesday' 'Wednesday, Thursday, Tuesday'
 'Friday, Monday']


In [73]:
# List of days
List_Dates = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Initialize dictionaries to store the DataFrames
Rows_Date_Dict = {}
Malta_Dict = {}
Gozo_Dict = {}

for c in List_Dates:
    # Filter rows where 'Date' contains the day
    Rows_Date_Dict[c] = All_Routes_Copy[All_Routes_Copy['Date'].astype(str).str.contains(c, na=False)]

    # Split into Malta and Gozo based on 'Stop Island'
    Malta_Dict[c] = Rows_Date_Dict[c][Rows_Date_Dict[c]['Stop Island'] == 'MALTA STOP'].reset_index(drop=True)
    Gozo_Dict[c] = Rows_Date_Dict[c][Rows_Date_Dict[c]['Stop Island'] == 'GOZO STOP'].reset_index(drop=True)


In [76]:
Malta_Dict['Monday'].to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//All_Routes_Malta_Monday.csv')

In [93]:
Monday_Routes = Malta_Dict['Monday']
Tuesday_Routes = Malta_Dict['Tuesday']
Wednesday_Routes = Malta_Dict['Wednesday']
Thursday_Routes = Malta_Dict['Thursday']
Friday_Routes = Malta_Dict['Friday']
Saturday_Routes = Malta_Dict['Saturday']
Sunday_Routes = Malta_Dict['Sunday']

In [94]:
Unique_Routes_Monday_Malta = Monday_Routes['Route Number'].unique()
Unique_Routes_Tuesday_Malta = Tuesday_Routes['Route Number'].unique()
Unique_Routes_Wednesday_Malta = Wednesday_Routes['Route Number'].unique()
Unique_Routes_Thursday_Malta = Thursday_Routes['Route Number'].unique()
Unique_Routes_Friday_Malta = Friday_Routes['Route Number'].unique()
Unique_Routes_Saturday_Malta = Saturday_Routes['Route Number'].unique()
Unique_Routes_Sunday_Malta = Sunday_Routes['Route Number'].unique()

In [95]:
print(len(Unique_Routes_Monday_Malta))
print(len(Unique_Routes_Tuesday_Malta))
print(len(Unique_Routes_Wednesday_Malta))
print(len(Unique_Routes_Thursday_Malta))
print(len(Unique_Routes_Friday_Malta))
print(len(Unique_Routes_Saturday_Malta))
print(len(Unique_Routes_Sunday_Malta))

104
104
104
104
104
99
100


In [97]:
#Defininglist of Night Routes and Tallinja Direct Routes (Obtained from: https://www.publictransport.com.mt/en/timetables)
Night_Direct_Routes = ['N11', 'N13', 'N212', 'N62', 'N82',
                       'N91', 'N48', 'N301', 'TD2', 'TD10',
                       'TD13']

In [98]:
# Removing entries from if 'Route Number' value is in  'Night_Direct_Routes' list
Monday_Routes_Truncated = Monday_Routes[~Monday_Routes['Route Number'].isin(Night_Direct_Routes)]
Tuesday_Routes_Truncated = Tuesday_Routes[~Tuesday_Routes['Route Number'].isin(Night_Direct_Routes)]
Wednesday_Routes_Truncated = Wednesday_Routes[~Wednesday_Routes['Route Number'].isin(Night_Direct_Routes)]
Thursday_Routes_Truncated = Thursday_Routes[~Thursday_Routes['Route Number'].isin(Night_Direct_Routes)]
Friday_Routes_Truncated = Friday_Routes[~Friday_Routes['Route Number'].isin(Night_Direct_Routes)]
Saturday_Routes_Truncated = Saturday_Routes[~Saturday_Routes['Route Number'].isin(Night_Direct_Routes)]
Sunday_Routes_Truncated = Sunday_Routes[~Sunday_Routes['Route Number'].isin(Night_Direct_Routes)]


In [99]:
Unique_Routes_Monday_Malta_Truncated = Monday_Routes_Truncated['Route Number'].unique()
Unique_Routes_Tuesday_Malta_Truncated = Tuesday_Routes_Truncated['Route Number'].unique()
Unique_Routes_Wednesday_Malta_Truncated = Wednesday_Routes_Truncated['Route Number'].unique()
Unique_Routes_Thursday_Malta_Truncated = Thursday_Routes_Truncated['Route Number'].unique()
Unique_Routes_Friday_Malta_Truncated = Friday_Routes_Truncated['Route Number'].unique()
Unique_Routes_Saturday_Malta_Truncated = Saturday_Routes_Truncated['Route Number'].unique()
Unique_Routes_Sunday_Malta_Truncated = Sunday_Routes_Truncated['Route Number'].unique()

In [100]:
print(len(Unique_Routes_Monday_Malta_Truncated))
print(len(Unique_Routes_Tuesday_Malta_Truncated))
print(len(Unique_Routes_Wednesday_Malta_Truncated))
print(len(Unique_Routes_Thursday_Malta_Truncated))
print(len(Unique_Routes_Friday_Malta_Truncated))
print(len(Unique_Routes_Saturday_Malta_Truncated))
print(len(Unique_Routes_Sunday_Malta_Truncated))

98
98
98
98
98
93
93


In [101]:
Unique_Routes_Monday_Malta

array(['X1', 'X1A', '119', 'X2', 'X3', 'X4', 'N11', 'N13', 'N212', 'N91',
       '1', '2', '3', '4', '13', '13A', '14', '15', '16', '21', '22',
       '24', '25', '31', '32', '35', '41', '42', '42_2', '43', '44', '45',
       '46', '47', '48', '50', '51', '52', '53', '54', '56', '58', '58A',
       '61', '62', '63', '64', '71', '72', '73', '74', '80', '82', '83',
       '84', '85', '88', '90', '91', '92', '93', '94', '101', '103',
       '106', '109', '109A', '110', '117', '120', '121', '122', '124',
       '130', '133', '135', '181', '182', '186', '201', '202', '203',
       '204', '206', '209', '210', '212', '221', '213', '218', '222',
       '223', '225', '226', '233', '238', '250', '260', '280', 'TD2',
       'TD13', 'X300', '150', '300'], dtype=object)

In [105]:
# Create a dictionary mapping days to their respective routes
routes_dict = {
    'Monday': Unique_Routes_Monday_Malta.tolist(),
    'Tuesday': Unique_Routes_Tuesday_Malta.tolist(),
    'Wednesday': Unique_Routes_Wednesday_Malta.tolist(),
    'Thursday': Unique_Routes_Thursday_Malta.tolist(),
    'Friday': Unique_Routes_Friday_Malta.tolist(),
    'Saturday': Unique_Routes_Saturday_Malta.tolist(),
    'Sunday': Unique_Routes_Sunday_Malta.tolist()
}

# Find all unique route numbers
All_Unique_Routes = sorted(set(route for routes in routes_dict.values() for route in routes))

# Initialize an empty DataFrame with routes as rows and days as columns
Route_Table = pd.DataFrame(index=All_Unique_Routes, columns=routes_dict.keys())

# Fill the DataFrame with "✓" for presence
for day, routes in routes_dict.items():
    Route_Table[day] = np.where(Route_Table.index.isin(routes), '✓', '')

# Print the result
print(Route_Table)


     Monday Tuesday Wednesday Thursday Friday Saturday Sunday
1         ✓       ✓         ✓        ✓      ✓        ✓      ✓
101       ✓       ✓         ✓        ✓      ✓        ✓      ✓
103       ✓       ✓         ✓        ✓      ✓        ✓      ✓
106       ✓       ✓         ✓        ✓      ✓        ✓      ✓
109       ✓       ✓         ✓        ✓      ✓        ✓      ✓
...     ...     ...       ...      ...    ...      ...    ...
X1A       ✓       ✓         ✓        ✓      ✓                
X2        ✓       ✓         ✓        ✓      ✓        ✓      ✓
X3        ✓       ✓         ✓        ✓      ✓        ✓      ✓
X300      ✓       ✓         ✓        ✓      ✓                
X4        ✓       ✓         ✓        ✓      ✓        ✓      ✓

[105 rows x 7 columns]


In [107]:
Route_Table.to_csv('C://Users//Owner//ICT5012 - Disseration//Untitled Folder//Route_Presence_Per_Day.csv')

In [108]:
# Create a dictionary mapping days to their respective routes
routes_dict_truncated = {
    'Monday': Unique_Routes_Monday_Malta_Truncated.tolist(),
    'Tuesday': Unique_Routes_Tuesday_Malta_Truncated.tolist(),
    'Wednesday': Unique_Routes_Wednesday_Malta_Truncated.tolist(),
    'Thursday': Unique_Routes_Thursday_Malta_Truncated.tolist(),
    'Friday': Unique_Routes_Friday_Malta_Truncated.tolist(),
    'Saturday': Unique_Routes_Saturday_Malta_Truncated.tolist(),
    'Sunday': Unique_Routes_Sunday_Malta_Truncated.tolist()
}

# Find all unique route numbers
All_Unique_Routes_Truncated = sorted(set(route for routes in routes_dict_truncated.values() for route in routes))

# Initialize an empty DataFrame with routes as rows and days as columns
Route_Table_Truncated = pd.DataFrame(index=All_Unique_Routes_Truncated, columns=routes_dict_truncated.keys())

# Fill the DataFrame with "✓" for presence
for day, routes in routes_dict_truncated.items():
    Route_Table_Truncated[day] = np.where(Route_Table_Truncated.index.isin(routes), '✓', '')

# Print the result
print(Route_Table_Truncated)


     Monday Tuesday Wednesday Thursday Friday Saturday Sunday
1         ✓       ✓         ✓        ✓      ✓        ✓      ✓
101       ✓       ✓         ✓        ✓      ✓        ✓      ✓
103       ✓       ✓         ✓        ✓      ✓        ✓      ✓
106       ✓       ✓         ✓        ✓      ✓        ✓      ✓
109       ✓       ✓         ✓        ✓      ✓        ✓      ✓
...     ...     ...       ...      ...    ...      ...    ...
X1A       ✓       ✓         ✓        ✓      ✓                
X2        ✓       ✓         ✓        ✓      ✓        ✓      ✓
X3        ✓       ✓         ✓        ✓      ✓        ✓      ✓
X300      ✓       ✓         ✓        ✓      ✓                
X4        ✓       ✓         ✓        ✓      ✓        ✓      ✓

[98 rows x 7 columns]


In [111]:
Monday_Routes_Truncated

Unnamed: 0,Route Number,Route Direction,Stops,City Name,Date,Stop Time 1,Stop Time 2,Stop Time 3,Stop Time 4,Stop Time 5,...,Stop Time 78,Stop Island,Time_Count,Stops - City Name - Stop Island,Longitude_Final,Latitude_Final,Bus_Stop_ID,Next_Bus_Stop_ID,Bus_Stop_Next_Bus_Stop,Bus_Terminal
0,X1,Ajruport - Cirkewwa,Airport 1,Hal Luqa,Monday,05:08,05:38,05:53,06:38,07:23,...,,MALTA STOP,26,Airport 1 - Hal Luqa - MALTA STOP,14.495967,35.849412,Stop_0,Stop_1,Stop_0_to_Stop_1,1
1,X1,Ajruport - Cirkewwa,Avjazzjoni,Hal Luqa,Monday,05:09,05:39,05:54,06:40,07:25,...,,MALTA STOP,26,Avjazzjoni - Hal Luqa - MALTA STOP,14.492289,35.854831,Stop_1,Stop_2,Stop_1_to_Stop_2,0
2,X1,Ajruport - Cirkewwa,Mitjar,Hal Luqa,Monday,05:10,05:40,05:55,06:41,07:26,...,,MALTA STOP,26,Mitjar - Hal Luqa - MALTA STOP,14.486949,35.855699,Stop_2,Stop_3,Stop_2_to_Stop_3,0
3,X1,Ajruport - Cirkewwa,Ingieret,San Vincenz,Monday,05:14,05:45,06:00,06:47,07:32,...,,MALTA STOP,26,Ingieret - San Vincenz - MALTA STOP,14.482673,35.870979,Stop_3,Stop_4,Stop_3_to_Stop_4,0
4,X1,Ajruport - Cirkewwa,Marsa Park & Ride 1,Il-Marsa,Monday,05:18,05:48,06:03,06:51,07:36,...,,MALTA STOP,26,Marsa Park & Ride 1 - Il-Marsa - MALTA STOP,14.484522,35.878715,Stop_4,Stop_5,Stop_4_to_Stop_5,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8949,300,Mater Dei - Lascaris,Kullegg,L-Imsida,"Monday, Tuesday, Wednesday, Thursday, Friday",06:22,08:22,09:22,10:22,11:22,...,,MALTA STOP,13,Kullegg - L-Imsida - MALTA STOP,14.490996,35.894680,Stop_228,Stop_227,Stop_228_to_Stop_227,0
8950,300,Mater Dei - Lascaris,Marina,L-Imsida,"Monday, Tuesday, Wednesday, Thursday, Friday",06:23,08:23,09:23,10:23,11:23,...,,MALTA STOP,13,Marina - L-Imsida - MALTA STOP,14.494457,35.895950,Stop_227,Stop_226,Stop_227_to_Stop_226,0
8951,300,Mater Dei - Lascaris,Pieta,Tal-Pietà,"Monday, Tuesday, Wednesday, Thursday, Friday",06:25,08:25,09:25,10:25,11:25,...,,MALTA STOP,13,Pieta - Tal-Pietà - MALTA STOP,14.497433,35.892146,Stop_226,Stop_237,Stop_226_to_Stop_237,0
8952,300,Mater Dei - Lascaris,Bombi 1,Il-Floriana,"Monday, Tuesday, Wednesday, Thursday, Friday",06:27,08:27,09:27,10:27,11:27,...,,MALTA STOP,13,Bombi 1 - Il-Floriana - MALTA STOP,14.501387,35.890324,Stop_237,Stop_1082,Stop_237_to_Stop_1082,0


In [112]:
Reset_Conditions_Malta_Monday = (
    Monday_Routes_Truncated['Route Number'].shift(-1) != Monday_Routes_Truncated['Route Number']) | \
    (Monday_Routes_Truncated['Route Direction'].shift(-1) != Monday_Routes_Truncated['Route Direction']) | \
    (Monday_Routes_Truncated['Date'].shift(-1) != Monday_Routes_Truncated['Date'])

In [160]:
Monday_Routes_Truncated['Concatenated_Columns'] = (
    Monday_Routes_Truncated['Route Number'] + '-' +
    Monday_Routes_Truncated['Route Direction'] + '-' +
    Monday_Routes_Truncated['Date']
)


# Step 1: Define groups based on the reset conditions
Monday_Routes_Truncated['Group_ID'] = (
    Monday_Routes_Truncated['Concatenated_Columns'].where(~Reset_Conditions_Malta_Monday)
    .ffill()
    .bfill()
)

# Step 2: Count the number of rows in each group
group_sizes = Monday_Routes_Truncated.groupby('Group_ID').size()

# Step 3: Map group sizes back to the original DataFrame
Monday_Routes_Truncated['Group'] = Monday_Routes_Truncated['Group_ID'].map(group_sizes)

# Step 5: Drop temporary columns if not needed
Monday_Routes_Truncated.drop(columns=['Concatenated_Columns', 'Group_ID'], inplace=True)

# Count the size of each group
partition_counts = Monday_Routes_Truncated.groupby(['Route Number', 'Group']).size()

# Find the maximum size for each Route Number
max_partition_per_route = partition_counts.groupby('Route Number').max()

# Reset the index for better readability
#result = max_partition_per_route.reset_index(name='Max Size')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Monday_Routes_Truncated['Concatenated_Columns'] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Monday_Routes_Truncated['Group_ID'] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Monday_Routes_Truncated['Group'] = Monday_Routes_Truncated['Group_ID'].map(group_sizes)
A value is trying to be s

In [161]:
max_partition_per_route

Route Number
1       27
101     69
103     33
106     58
109     46
        ..
X1A     17
X2      66
X3      70
X300    14
X4      34
Length: 98, dtype: int64

In [163]:
Reset_Conditions_Malta_Tuesday = (
    Tuesday_Routes_Truncated['Route Number'].shift(-1) != Tuesday_Routes_Truncated['Route Number']) | \
    (Tuesday_Routes_Truncated['Route Direction'].shift(-1) != Tuesday_Routes_Truncated['Route Direction']) | \
    (Tuesday_Routes_Truncated['Date'].shift(-1) != Tuesday_Routes_Truncated['Date'])

In [164]:
Tuesday_Routes_Truncated['Concatenated_Columns'] = (
    Tuesday_Routes_Truncated['Route Number'] + '-' +
    Tuesday_Routes_Truncated['Route Direction'] + '-' +
    Tuesday_Routes_Truncated['Date']
)


# Step 1: Define groups based on the reset conditions
Tuesday_Routes_Truncated['Group_ID'] = (
    Tuesday_Routes_Truncated['Concatenated_Columns'].where(~Reset_Conditions_Malta_Tuesday)
    .ffill()
    .bfill()
)

# Step 2: Count the number of rows in each group
group_sizes = Tuesday_Routes_Truncated.groupby('Group_ID').size()

# Step 3: Map group sizes back to the original DataFrame
Tuesday_Routes_Truncated['Group'] = Tuesday_Routes_Truncated['Group_ID'].map(group_sizes)

# Step 5: Drop temporary columns if not needed
Tuesday_Routes_Truncated.drop(columns=['Concatenated_Columns', 'Group_ID'], inplace=True)

# Count the size of each group
partition_counts = Tuesday_Routes_Truncated.groupby(['Route Number', 'Group']).size()

# Find the maximum size for each Route Number
max_partition_per_route_Tuesday = partition_counts.groupby('Route Number').max()

# Reset the index for better readability
#result = max_partition_per_route.reset_index(name='Max Size')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Tuesday_Routes_Truncated['Concatenated_Columns'] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Tuesday_Routes_Truncated['Group_ID'] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Tuesday_Routes_Truncated['Group'] = Tuesday_Routes_Truncated['Group_ID'].map(group_sizes)
A value is trying to 

In [165]:
max_partition_per_route_Tuesday

Route Number
1       27
101     69
103     33
106     58
109     46
        ..
X1A     17
X2      66
X3      70
X300    14
X4      34
Length: 98, dtype: int64