In [1]:
import pandas as pd
import numpy as np
import networkx as nx

In [2]:
raw_reference = pd.read_csv(
	'./scats_reference.csv',
	dtype={
		'Site_Number': np.int32,
		'Location_Descriptors': str,
		'Site_Type': str
	}
)
raw_reference = raw_reference[raw_reference.Site_Type == 'INT']
raw_reference = raw_reference.rename(columns={'Site_Number': 'SCATS_Number'})
raw_reference = raw_reference.drop_duplicates()
raw_reference

Unnamed: 0,SCATS_Number,Location_Description,Site_Type
0,964,ABBOTTS/CLELANDS DEVELOPMENTS,INT
1,968,ABBOTTS/GAINE/MONASH,INT
2,972,ABBOTTS/NATIONAL,INT
3,983,ABBOTTS/REMINGTON,INT
4,1053,ABBOTTSFORD/HAINES,INT
...,...,...,...
4506,6075,WYNDHAM ST/VAUGHAN,INT
4507,4107,YAN YEAN/IRONBARK,INT
4509,5048,YARRA/EASTERN BEACH,INT
4510,5081,YARRA/LT MALOP,INT


In [3]:
raw_data = pd.read_csv(
	'./scats_data.csv',
	dtype={
		'SCATS_number': np.int32,
		'Location': str,
		'NB_LATITUDE': np.float32,
		'NB_LONGITUDE': np.float32
	}
)
raw_data = raw_data.drop_duplicates()
raw_data

Unnamed: 0,SCATS_Number,Location,NB_LATITUDE,NB_LONGITUDE
0,970,WARRIGAL_RD N of HIGH STREET_RD,-37.867031,145.091583
31,970,HIGH STREET_RD E of WARRIGAL_RD,-37.867352,145.091949
62,970,WARRIGAL_RD S of HIGH STREET_RD,-37.867599,145.091461
93,970,HIGH STREET_RD W of WARRIGAL_RD,-37.867229,145.091034
123,2000,WARRIGAL_RD N of TOORAK_RD,-37.851685,145.094345
...,...,...,...,...
4042,4812,SWAN_ST SW of MADDEN_GV,-37.829029,145.014557
4068,4821,WALMER_ST N OF VICTORIA_ST,-37.812851,145.008484
4099,4821,VICTORIA_ST E OF BURNLEY_ST,-37.812931,145.008652
4130,4821,BURNLEY_ST S OF VICTORIA_ST,-37.813122,145.008438


In [4]:
ref_df = raw_reference.drop(columns={'Site_Type'})
data_df = raw_data.drop(columns={'Location'})
# Perform an inner merge to keep only SCATS numbers present in both tables
merged_df = pd.merge(ref_df, data_df, on='SCATS_Number', how='inner')
merged_df.to_csv('./merged_data.csv')
merged_df

Unnamed: 0,SCATS_Number,Location_Description,NB_LATITUDE,NB_LONGITUDE
0,4057,BALWYN/BELMORE,-37.804310,145.081970
1,4057,BALWYN/BELMORE,-37.805080,145.082458
2,4057,BALWYN/BELMORE,-37.805641,145.081711
3,4057,BALWYN/BELMORE,-37.804871,145.080917
4,3001,BARKERS/CHURCH/HIGH,-37.814411,145.022430
...,...,...,...,...
135,3682,WARRIGAL/RIVERSDALE,-37.837410,145.096252
136,4063,WHITEHORSE/BALWYN,-37.814041,145.080093
137,4063,WHITEHORSE/BALWYN,-37.814400,145.080444
138,4063,WHITEHORSE/BALWYN,-37.814758,145.079956


In [5]:
def create_network_graph(_df: pd.DataFrame):

	# Initialize an undirected graph
	nodegraph = nx.Graph()

	# Add nodes (SCATS sites) with location description, latitude, and longitude
	for _, row in _df.iterrows():
		scats_num = row['SCATS_Number']
		location = row['Location_Description']
		latitude = row['NB_LATITUDE']
		longitude = row['NB_LONGITUDE']
		nodegraph.add_node(scats_num, location=location, latitude=latitude, longitude=longitude)

	# Parse streets and create edges between intersections sharing a street
	street_to_nodes = {}

	for _, row in _df.iterrows():
		scats_num = row['SCATS_Number']
		# Split the location description by '/' to get individual streets
		streets = row['Location_Description'].split('/')
		# Clean and process each street name
		streets = [street.strip() for street in streets]

		# Associate each street with the SCATS number
		for street in streets:
			if street:
				if street not in street_to_nodes:
					street_to_nodes[street] = []
				street_to_nodes[street].append(scats_num)

	# Create edges between SCATS sites that share a street
	for street, nodes in street_to_nodes.items():
		# If multiple intersections share a street, connect them
		for i in range(len(nodes)):
			for j in range(i + 1, len(nodes)):
				nodegraph.add_edge(nodes[i], nodes[j], street=street)

	# There is a non-zero chance that this ignores distance

	return nodegraph

networkgraph = create_network_graph(merged_df)

# Print basic graph info
print(f'Number of nodes (intersections): {networkgraph.number_of_nodes()}')
print(f'Number of edges (street connections): {networkgraph.number_of_edges()}')

# List all nodes and their attributes
for node, attrs in networkgraph.nodes(data=True):
	print(f'Node {node}: Location = {attrs['location']}, Lat = {attrs['latitude']}, Lon = {attrs['longitude']}')

# List all edges and their street
for u, v, attrs in networkgraph.edges(data=True):
	print(f'Edge {u} -- {v}: Street = {attrs['street']}')

# Save the graph to a file
#nx.write_graphml(G, 'scats_graph.graphml')

Number of nodes (intersections): 40
Number of edges (street connections): 133
Node 4057: Location = BALWYN/BELMORE, Lat = -37.80487060546875, Lon = 145.08091735839844
Node 3001: Location = BARKERS/CHURCH/HIGH, Lat = -37.814571380615234, Lon = 145.0216064453125
Node 3002: Location = BARKERS/DENMARK/POWER, Lat = -37.815101623535156, Lon = 145.0260772705078
Node 4035: Location = BURKE/BARKERS/MONT ALBERT, Lat = -37.81880187988281, Lon = 145.05738830566406
Node 4032: Location = BURKE/BELMORE/HARP, Lat = -37.802249908447266, Lon = 145.06080627441406
Node 4040: Location = BURKE/CAMBERWELL/RIVERSDALE, Lat = -37.83253860473633, Lon = 145.05506896972656
Node 3120: Location = BURKE/CANTERBURY, Lat = -37.82284164428711, Lon = 145.0568389892578
Node 4034: Location = BURKE/COTHAM/WHITEHORSE, Lat = -37.81184005737305, Lon = 145.0590057373047
Node 4030: Location = BURKE/DONCASTER/KILBY/HIGH, Lat = -37.794708251953125, Lon = 145.0612030029297
Node 4043: Location = BURKE/TOORAK, Lat = -37.8471412658691

In [6]:
import search

method = search.select_method('AS')
