In [1]:
import re
from collections import defaultdict
from datetime import date, datetime

import numpy as np
import pandas as pd

In [2]:
# Import dataset
raw_data = pd.read_csv(
	'./scats_data.csv',
	dtype={
		'SCATS Number': int,
		'Location': str,
		'NB_LATITUDE': float,
		'NB_LONGITUDE': float
	}
)

# Rename select columns
raw_data.rename(columns={
	'SCATS Number': 'SCATS',
	'NB_LATITUDE': 'Latitude',
	'NB_LONGITUDE': 'Longitude',
}, inplace=True)

# SCATS is the intersection ID
# Location is [owner road] [direction from intersection] [other road in intersection]

raw_data.drop_duplicates(inplace=True)

# Fix Auburn N/Burwood intersection missing position
# https://www.openstreetmap.org/way/1092802786#map=19/-37.823687/145.045020
# south: -37.82542, 145.04346
# east: -37.82529, 145.04387
# west: -37.82518, 145.04301
# north: -37.82505, 145.04346 (estimated by Claude)
def fix_burwood_auburn_latitude(_latitude: float):
	# Do it this funky way to avoid floating point nonsense
	if _latitude == 0:
		return -37.82505
	else:
		return _latitude

def fix_burwood_auburn_longitude(_longitude: float):
	if _longitude == 0:
		return 145.04346
	else:
		return _longitude

raw_data['Latitude'] = raw_data['Latitude'].apply(fix_burwood_auburn_latitude)
raw_data['Longitude'] = raw_data['Longitude'].apply(fix_burwood_auburn_longitude)
raw_data

Unnamed: 0,SCATS,Location,Latitude,Longitude,Date,0:00,0:15,0:30,0:45,1:00,...,21:30,21:45,22:00,22:15,22:30,22:45,23:00,23:15,23:30,23:45
0,970,WARRIGAL_RD N of HIGH STREET_RD,-37.86703,145.09159,1/10/2006,86,83,52,58,59,...,114,97,97,66,81,50,59,47,29,34
1,970,WARRIGAL_RD N of HIGH STREET_RD,-37.86703,145.09159,2/10/2006,32,28,17,11,7,...,111,102,107,114,80,60,62,48,44,26
2,970,WARRIGAL_RD N of HIGH STREET_RD,-37.86703,145.09159,3/10/2006,26,32,21,14,10,...,130,132,114,86,93,90,73,57,29,40
3,970,WARRIGAL_RD N of HIGH STREET_RD,-37.86703,145.09159,4/10/2006,32,22,28,13,16,...,115,113,132,101,113,90,78,66,52,44
4,970,WARRIGAL_RD N of HIGH STREET_RD,-37.86703,145.09159,5/10/2006,40,39,21,11,16,...,171,120,116,113,99,91,61,55,49,36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4187,4821,VICTORIA_ST W OF BURNLEY_ST,-37.81296,145.00830,27/10/2006,51,56,43,29,35,...,122,121,127,103,122,124,117,99,108,88
4188,4821,VICTORIA_ST W OF BURNLEY_ST,-37.81296,145.00830,28/10/2006,91,87,89,72,64,...,93,93,93,105,105,112,82,97,106,107
4189,4821,VICTORIA_ST W OF BURNLEY_ST,-37.81296,145.00830,29/10/2006,100,81,89,73,100,...,87,118,83,76,66,64,77,60,49,45
4190,4821,VICTORIA_ST W OF BURNLEY_ST,-37.81296,145.00830,30/10/2006,40,29,36,20,24,...,90,88,89,80,74,48,67,62,50,62


In [3]:
# Import site reference
raw_reference = pd.read_csv(
	'./scats_reference.csv',
	names=['SCATS', 'Intersection', 'Site_Type'],
	header=0,
	dtype={
		'SCATS': np.int32,
		'Intersection': str,
		'Site_Type': str
	}
)

raw_reference.drop_duplicates(inplace=True)
# Remove any site that isn't an intersection (rest are unused)
raw_reference = raw_reference[raw_reference.Site_Type == 'INT']
raw_reference.drop(columns={'Site_Type'}, inplace=True)
raw_reference

Unnamed: 0,SCATS,Intersection
0,964,ABBOTTS/CLELANDS DEVELOPMENTS
1,968,ABBOTTS/GAINE/MONASH
2,972,ABBOTTS/NATIONAL
3,983,ABBOTTS/REMINGTON
4,1053,ABBOTTSFORD/HAINES
...,...,...
4506,6075,WYNDHAM ST/VAUGHAN
4507,4107,YAN YEAN/IRONBARK
4509,5048,YARRA/EASTERN BEACH
4510,5081,YARRA/LT MALOP


In [4]:
# Perform an inner merge to keep only SCATS sites present in both tables
merged_df = pd.merge(raw_reference, raw_data, on='SCATS', how='inner')
merged_df

Unnamed: 0,SCATS,Intersection,Location,Latitude,Longitude,Date,0:00,0:15,0:30,0:45,...,21:30,21:45,22:00,22:15,22:30,22:45,23:00,23:15,23:30,23:45
0,4057,BALWYN/BELMORE,BALWYN_RD N OF BELMORE_RD,-37.80431,145.08197,1/10/2006,25,35,19,20,...,33,40,32,27,24,19,21,16,12,12
1,4057,BALWYN/BELMORE,BALWYN_RD N OF BELMORE_RD,-37.80431,145.08197,2/10/2006,5,5,3,3,...,44,47,37,25,22,18,11,13,13,11
2,4057,BALWYN/BELMORE,BALWYN_RD N OF BELMORE_RD,-37.80431,145.08197,3/10/2006,8,8,3,8,...,46,48,34,39,29,33,27,17,9,11
3,4057,BALWYN/BELMORE,BALWYN_RD N OF BELMORE_RD,-37.80431,145.08197,4/10/2006,6,5,4,5,...,47,54,40,26,31,37,21,18,24,7
4,4057,BALWYN/BELMORE,BALWYN_RD N OF BELMORE_RD,-37.80431,145.08197,5/10/2006,8,11,8,3,...,41,56,46,39,27,39,19,19,21,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4187,4063,WHITEHORSE/BALWYN,WHITEHORSE_RD W OF BALWYN_RD,-37.81429,145.07951,27/10/2006,13,19,7,15,...,57,70,64,73,47,67,50,56,51,47
4188,4063,WHITEHORSE/BALWYN,WHITEHORSE_RD W OF BALWYN_RD,-37.81429,145.07951,28/10/2006,48,24,27,22,...,58,56,54,51,51,51,93,89,56,46
4189,4063,WHITEHORSE/BALWYN,WHITEHORSE_RD W OF BALWYN_RD,-37.81429,145.07951,29/10/2006,38,37,39,37,...,43,58,41,52,37,33,43,33,30,25
4190,4063,WHITEHORSE/BALWYN,WHITEHORSE_RD W OF BALWYN_RD,-37.81429,145.07951,30/10/2006,15,17,10,6,...,65,54,49,47,46,31,45,39,23,25


In [15]:
# Extract location information
extracted = merged_df.copy()

def process_location(_locations: pd.Series):
	streets: list[str] = []
	directions: list[str] = []

	for _, item in _locations.items():
		parts: list[str] = re.split(' of ', item, flags=re.IGNORECASE)
		first_part = parts[0]

		# Get all words in the first part
		words = first_part.split()

		# Last word is the direction, everything before is the street
		direction = words[-1]
		street = ' '.join(words[:-1])

		streets.append(street)
		directions.append(direction)

	return streets, directions

streets, directions = process_location(extracted['Location'])
extracted.insert(3, 'Street', pd.Series(streets))
extracted.insert(4, 'Direction', pd.Series(directions))

def process_date(_dates: pd.Series):
	#dates: list[str] = []
	#years: list[int] = []
	#months:list[int] = []
	#days: list[int] = []
	#day_indexes: list[int] = []
	days_of_week = []

	for _, item in _dates.items():
		# Import as a datetime object
		date_obj = datetime.strptime(item, '%d/%m/%Y')
		#dates.append(date_obj.strftime("%Y-%m-%d"))
		#years.append(date_obj.year)
		#months.append(date_obj.month)
		#days.append(date_obj.day)
		#day_indexes.append((date_obj.date() - date(2000, 1, 1)).days)
		days_since_first_mon = (date_obj.date() - date(2000, 1, 3)).days % 7
		match days_since_first_mon:
			case 0:
				day_of_week = 'Monday'
			case 1:
				day_of_week = 'Tuesday'
			case 2:
				day_of_week = 'Wednesday'
			case 3:
				day_of_week = 'Thursday'
			case 4:
				day_of_week = 'Friday'
			case 5:
				day_of_week = 'Saturday'
			case 6:
				day_of_week = 'Sunday'
			case _:
				day_of_week = None
		days_of_week.append(days_since_first_mon)

	#return dates, years, months, days, day_indexes, days_of_week
	return days_of_week

# The only one of value might be day of week, but as an int
#dates, years, months, days, date_indexes, days_of_week = process_date(extracted['Date'])
days_of_week = process_date(extracted['Date'])
#extracted['Date'] = dates
extracted.insert(8,'Day_of_week', days_of_week)
#extracted.insert(8, 'DayIndex', date_indexes)
#extracted.insert(8,'Day', days)
#extracted.insert(8,'Month', months)
#extracted.insert(8,'Year', years)

# Remove the location and date columns since they're no longer needed
extracted.drop(columns=['Location', 'Date'], inplace=True)
extracted

Unnamed: 0,SCATS,Intersection,Street,Direction,Latitude,Longitude,Day_of_week,0:00,0:15,0:30,...,21:30,21:45,22:00,22:15,22:30,22:45,23:00,23:15,23:30,23:45
0,4057,BALWYN/BELMORE,BALWYN_RD,N,-37.80431,145.08197,6,25,35,19,...,33,40,32,27,24,19,21,16,12,12
1,4057,BALWYN/BELMORE,BALWYN_RD,N,-37.80431,145.08197,0,5,5,3,...,44,47,37,25,22,18,11,13,13,11
2,4057,BALWYN/BELMORE,BALWYN_RD,N,-37.80431,145.08197,1,8,8,3,...,46,48,34,39,29,33,27,17,9,11
3,4057,BALWYN/BELMORE,BALWYN_RD,N,-37.80431,145.08197,2,6,5,4,...,47,54,40,26,31,37,21,18,24,7
4,4057,BALWYN/BELMORE,BALWYN_RD,N,-37.80431,145.08197,3,8,11,8,...,41,56,46,39,27,39,19,19,21,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4187,4063,WHITEHORSE/BALWYN,WHITEHORSE_RD,W,-37.81429,145.07951,4,13,19,7,...,57,70,64,73,47,67,50,56,51,47
4188,4063,WHITEHORSE/BALWYN,WHITEHORSE_RD,W,-37.81429,145.07951,5,48,24,27,...,58,56,54,51,51,51,93,89,56,46
4189,4063,WHITEHORSE/BALWYN,WHITEHORSE_RD,W,-37.81429,145.07951,6,38,37,39,...,43,58,41,52,37,33,43,33,30,25
4190,4063,WHITEHORSE/BALWYN,WHITEHORSE_RD,W,-37.81429,145.07951,0,15,17,10,...,65,54,49,47,46,31,45,39,23,25


In [17]:
def reconfigure(_df: pd.DataFrame):
	# Create a list to store IDs
	ids = []

	# Process each location group
	for _, scats in _df.groupby('SCATS', sort=False):
		for _, directions in scats.groupby('Direction', sort=False):
			for _, days in directions.groupby('Day_of_week', sort=False):
				# For each row in this location group, assign sequential IDs
				for i in range(len(days)):
					ids.append(i)

	# Add the IDs as a new column for use in the MultiIndex
	_df_with_ids = _df.copy()
	_df_with_ids['ID'] = ids

	# Change to be unique for each:
	# SCAT, Direction, Dayofweek_as_int, ID

	# Create the MultiIndex
	index = pd.MultiIndex.from_arrays(
		[_df_with_ids['SCATS'], _df_with_ids['Direction'], _df_with_ids['Day_of_week'], _df_with_ids['ID']],
		names=['SCATS', 'Direction', 'Days_of_week' 'ID']
	)

	# Drop the columns that are now in the index
	stripped = _df_with_ids.drop(columns=['SCATS', 'Direction', 'Days_of_week' 'ID'])
	stripped.set_index(index, inplace=True)

	return stripped

reconfigured = reconfigure(extracted)
reconfigured

ValueError: Length of names must match number of levels in MultiIndex.

In [7]:
# Save dataframe to csv
reconfigured.to_csv('./processed.csv')

In [8]:
def create_graph(_df: pd.DataFrame):
	'''Take the information from a dataframe and create a graph from it.'''

	locations: dict[int, tuple[float, float]] = {}
	street_to_nodes: dict[str, list[int]] = {}

	for _, row in _df.iterrows():
		scats_num: int = row['SCATS']
		latitude: float = row['Latitude']
		longitude: float = row['Longitude']
		loc_desc: str = row['Intersection']

		# Locations is easy to set up
		locations[scats_num] = (latitude, longitude)

		# Split the location description by '/' to get individual streets
		# Clean and process each street name
		streets = [street.strip() for street in loc_desc.split('/')]

		# Associate each street with the SCATS number
		for street in streets:
			if street:
				if street not in street_to_nodes:
					street_to_nodes[street] = []
				street_to_nodes[street].append(scats_num)

	edge_dict = defaultdict(lambda: defaultdict(int))  # Nested defaultdict for {node: {connected_node: cost}}

	# Connect a node to all other nodes with the same street
	for _, nodes in street_to_nodes.items():
		for node in nodes:
			# Add all other nodes from this street as edges with default cost 1
			for connected_node in nodes:
				if connected_node != node:
					edge_dict[node][connected_node] = 1

	# Convert to a regular dictionary
	edges: dict[int, dict[int, int]] = {node: dict(connected_nodes) for node, connected_nodes in edge_dict.items()}

	return locations, edges

locations, edges = create_graph(extracted)
print(locations)
print(edges)

{4057: (-37.80487, 145.08092), 3001: (-37.81457, 145.02161), 3002: (-37.8151, 145.02608), 4035: (-37.8188, 145.05739), 4032: (-37.80225, 145.06081), 4040: (-37.83254, 145.05507), 3120: (-37.82284, 145.05684), 4034: (-37.81184, 145.05901), 4030: (-37.79471, 145.0612), 4043: (-37.84714, 145.05205), 2000: (-37.85187, 145.09407), 4266: (-37.82518, 145.04301), 4262: (-37.82155, 145.01503), 4264: (-37.82405, 145.03349), 4263: (-37.82295, 145.02402), 3812: (-37.83725, 145.06055), 3127: (-37.82527, 145.07757), 3122: (-37.82374, 145.06417), 3126: (-37.82772, 145.0983), 2820: (-37.7948, 145.03015), 4324: (-37.809176, 145.036452), 3180: (-37.79618, 145.08328), 4051: (-37.79406, 145.06885), 2827: (-37.78127, 145.07688), 2825: (-37.78661, 145.06202), 4270: (-37.83015, 145.03208), 4335: (-37.80619, 145.03532), 3662: (-37.80892, 145.02704), 4321: (-37.80063, 145.04864), 2200: (-37.8164799, 145.0977388), 2846: (-37.86088, 145.05744), 4272: (-37.8318, 145.04614), 3804: (-37.83362, 145.06202), 4812: (-3

In [9]:
# Print basic graph info
print(f'Number of nodes (intersections): {len(locations)}')
print(f'Number of edges (street connections): {len(edges)}')

# List all nodes and their attributes
for node, (latitude, longitude) in locations.items():
	print(f'{node:4}: ({latitude:.6f}, {longitude:.6f})')

# List all edges and their cost
for node, others in edges.items():
	for other, cost in others.items():
		print(f'{node:4} -- {other:4}: Cost = {cost}')

Number of nodes (intersections): 40
Number of edges (street connections): 35
4057: (-37.804870, 145.080920)
3001: (-37.814570, 145.021610)
3002: (-37.815100, 145.026080)
4035: (-37.818800, 145.057390)
4032: (-37.802250, 145.060810)
4040: (-37.832540, 145.055070)
3120: (-37.822840, 145.056840)
4034: (-37.811840, 145.059010)
4030: (-37.794710, 145.061200)
4043: (-37.847140, 145.052050)
2000: (-37.851870, 145.094070)
4266: (-37.825180, 145.043010)
4262: (-37.821550, 145.015030)
4264: (-37.824050, 145.033490)
4263: (-37.822950, 145.024020)
3812: (-37.837250, 145.060550)
3127: (-37.825270, 145.077570)
3122: (-37.823740, 145.064170)
3126: (-37.827720, 145.098300)
2820: (-37.794800, 145.030150)
4324: (-37.809176, 145.036452)
3180: (-37.796180, 145.083280)
4051: (-37.794060, 145.068850)
2827: (-37.781270, 145.076880)
2825: (-37.786610, 145.062020)
4270: (-37.830150, 145.032080)
4335: (-37.806190, 145.035320)
3662: (-37.808920, 145.027040)
4321: (-37.800630, 145.048640)
2200: (-37.816480, 145.0

In [10]:
import search

method = search.select_method('DFS')

if method is None:
	print("Incorrect method type, valid methods:\nDFS, BFS, GBFS, AS, CUS1, CUS2, IDS, BS")
	quit()

graph = search.Graph(edges)
graph.locations = locations

origin = 4030
goals = [4051]

problem = search.GraphProblem(origin, goals, graph)

result, count = method(problem, True)

print('method=AS')
# \n
# Ouput goal node
print('goal=', goals, sep='', end=' | ')

# Output number (length of path)
print('number of nodes=', count, sep='')
# \n
if (result is not None):
	# Output path: list of nodes
	print('path=', result.solution(), sep='')
else:
	print('No path found!')

<Node 4030> [<Node 3001>, <Node 4335>, <Node 3662>, <Node 4321>, <Node 2846>, <Node 4035>, <Node 4032>, <Node 4040>, <Node 3120>, <Node 4034>, <Node 4043>, <Node 3180>, <Node 4051>]
<Node 4051> [<Node 4030>, <Node 3180>, <Node 2827>]
method=AS
goal=[4051] | number of nodes=1
path=[4051]
