tfpd_neighbors aims to find the closest neighbors to each scat. This allows for the future routing to take place. Firslty, all of the neighbors will be found using the shared roads. From there, the closest neighbor in each direction will be calculated using the long and lat of each scat.

In [1]:
import os
import pandas as pd
import re
from geopy.distance import geodesic
import math


Find SCAT neighbors based on the shared roads

In [3]:
scats_neighbors_file = 'data/scats_neighbors.csv'
scats_data_file = 'scats-10-2006.csv'

#Get road names
def extract_road_names(location):
    #Remove unused info and clean road names
    location_cleaned = re.sub(r'\b[NESW]\b', '', location)
    roads = re.split(r'\s(of|near)\s', location_cleaned)
    
    cleaned_roads = [road.strip() for road in roads if road.strip() and road.lower() not in ['of', 'near']]
    final_roads = []
    for road in cleaned_roads:
        sub_roads = re.split(r'\s(OF|NEAR)\s', road)
        final_roads.extend([sub.strip() for sub in sub_roads if sub.strip() and sub.lower() not in ['of', 'near']])
    
    return list(set(final_roads))

#Create file if it doesnt exist
if not os.path.exists(scats_neighbors_file):
    print(f"{scats_neighbors_file} not found. Creating the file now.")
    
    data = pd.read_csv(scats_data_file)
    
    #Get roadnames for each SCAT
    data['Roads'] = data['Location'].apply(extract_road_names)

    #Select SCATS Number, Roads, NB_LATITUDE, NB_LONGITUDE
    data_selected = data[['SCATS Number', 'Roads', 'NB_LATITUDE', 'NB_LONGITUDE']]

    #Calculate ave long and lat for each SCAT
    grouped = data_selected.groupby('SCATS Number').agg({
        'Roads': lambda x: list(set([road for sublist in x for road in sublist])),
        'NB_LATITUDE': 'mean',
        'NB_LONGITUDE': 'mean'
    }).reset_index()

    #Find neighbors based on shared roads
    def find_neighbors(scats_num, road_names):
        potential_neighbors = data[
            (data['SCATS Number'] != scats_num) & 
            (data['Roads'].apply(lambda roads: any(road in roads for road in road_names)))
        ]
        return ', '.join(map(str, potential_neighbors['SCATS Number'].unique()))

    grouped['Neighbors'] = grouped.apply(lambda row: find_neighbors(row['SCATS Number'], row['Roads']), axis=1)
    grouped['Road(s)'] = grouped['Roads'].apply(lambda roads: ', '.join(roads))

    grouped.rename(columns={
        'SCATS Number': 'SCAT number',
        'NB_LATITUDE': 'NB_Latitude',
        'NB_LONGITUDE': 'NB_Longitude',
    }, inplace=True)

    grouped.drop(columns=['Roads'], inplace=True)

    grouped.to_csv(scats_neighbors_file, index=False)
    print(f"{scats_neighbors_file} has been created successfully.")
else:
    print(f"{scats_neighbors_file} already exists.")


data/scats_neighbors.csv already exists.


Find the closest neighbors is each direction using the shared roads and the long and lat

In [4]:
#Calc distance between two points using geodesic
def calculate_distance_and_bearing(lat1, lon1, lat2, lon2):
    distance = geodesic((lat1, lon1), (lat2, lon2)).meters
    
    #Find bearing
    d_lon = math.radians(lon2 - lon1)
    lat1 = math.radians(lat1)
    lat2 = math.radians(lat2)
    x = math.sin(d_lon) * math.cos(lat2)
    y = math.cos(lat1) * math.sin(lat2) - math.sin(lat1) * math.cos(lat2) * math.cos(d_lon)
    bearing = math.degrees(math.atan2(x, y))
    
    #Normalize bearing to 0-360 degrees
    bearing = (bearing + 360) % 360
    return distance, bearing

#Calc direction using bearing. Only NSEW
def get_direction(bearing):
    if 45 <= bearing < 135:
        return 'East'
    elif 135 <= bearing < 225:
        return 'South'
    elif 225 <= bearing < 315:
        return 'West'
    else:
        return 'North'

#Find closest scat in each direction
def find_closest_neighbors(scats_num, lat, lon, neighbors):
    #Only use the scat if it is already defined as a neighbor (has a shared road)
    neighbors_list = [int(n.strip()) for n in neighbors.split(',')] if isinstance(neighbors, str) else []
    neighbors_data = data[data['SCAT number'].isin(neighbors_list)]
    
    closest_neighbors = {
        'North': None,
        'East': None,
        'South': None,
        'West': None
    }
    
    closest_distances = {
        'North': float('inf'),
        'East': float('inf'),
        'South': float('inf'),
        'West': float('inf')
    }
    
    #For each neighbor, calculate the distance and bearing
    for _, neighbor in neighbors_data.iterrows():
        neighbor_lat = neighbor['NB_Latitude']
        neighbor_lon = neighbor['NB_Longitude']
        distance, bearing = calculate_distance_and_bearing(lat, lon, neighbor_lat, neighbor_lon)
        
        direction = get_direction(bearing)
        
        #Update the closest neighbor in that direction
        if distance < closest_distances[direction]:
            closest_distances[direction] = distance
            closest_neighbors[direction] = neighbor['SCAT number']
    
    return closest_neighbors

#Load the existing scats_neighbors.csv file to process nearest neighbors
if os.path.exists(scats_neighbors_file):
    data = pd.read_csv(scats_neighbors_file)

    #Process each SCAT to find its closest neighbor in each direction
    for idx, row in data.iterrows():
        scats_num = row['SCAT number']
        lat = row['NB_Latitude']
        lon = row['NB_Longitude']
        
        #Use precomputed neighbors
        neighbors = row['Neighbors']
        
        closest_neighbors = find_closest_neighbors(scats_num, lat, lon, neighbors)
        
        #Save closest neighbor
        data.at[idx, 'North Neighbor'] = closest_neighbors['North']
        data.at[idx, 'East Neighbor'] = closest_neighbors['East']
        data.at[idx, 'South Neighbor'] = closest_neighbors['South']
        data.at[idx, 'West Neighbor'] = closest_neighbors['West']
    
    data.drop(columns=['Neighbors'], inplace=True)

    #Save the updated DataFrame back to the same file
    data.to_csv(scats_neighbors_file, index=False)
    print(f"Updated neighbors saved to {scats_neighbors_file} with closest neighbors added.")
else:
    print(f"{scats_neighbors_file} does not exist. Please create the file in the previous section.")


Updated neighbors saved to data/scats_neighbors.csv with closest neighbors added.
