**betweenness centrality**

In [7]:
# graph of Vancouver

import pandas as pd
import networkx as nx

# Load datasets
segments_df = pd.read_csv('streetsegments.csv') # has the connections

weights_df = pd.read_csv('segments.csv') # has the weights

# Initialize graph
G = nx.Graph()

# Function to add edges based on non-zero junction IDs
def add_edge_based_on_junctions(graph, row, length):
    junctions = []
    if row['pseudoJunctionID1'] != 0:
        junctions.append(row['pseudoJunctionID1'])
    if row['pseudoJunctionID2'] != 0:
        junctions.append(row['pseudoJunctionID2'])
    if row['adjustJunctionID1'] != 0:
        junctions.append(row['adjustJunctionID1'])
    if row['adjustJunctionID2'] != 0:
        junctions.append(row['adjustJunctionID2'])
    
    if len(junctions) == 2:
        segment_id = row['StreetID']
        length_metres = length.get(segment_id, 0) # length of 0 default
        graph.add_edge(junctions[0], junctions[1], id=segment_id, weight=length_metres)

# Create a dictionary from weights_df for easy lookup
length_dict = dict(zip(weights_df['id'], weights_df['length_metres']))

# Add edges based on junction IDs
for index, row in segments_df.iterrows():
    add_edge_based_on_junctions(G, row, length_dict)

# Calculate betweenness centrality
print('calculating betweenness_centrality')
betweenness_centrality = nx.betweenness_centrality(G, weight='weight', normalized=True)



calculating betweenness_centrality


In [8]:
# WRITING BETWEENNESS CENTRALITY TO NEW SHEET
# Prepare data for Excel
data = [{'id': node, 'betweenness_centrality': betweenness_centrality} for node, betweenness_centrality in betweenness_centrality.items()]
df = pd.DataFrame(data)

# Write to Excel
excel_file = 'betweenness_centrality_results.xlsx'
df.to_excel(excel_file, index=False)

print(f"Data successfully written to {excel_file}")

# Optional: Check the resulting graph
print("Number of nodes:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())

Data successfully written to betweenness_centrality_results.xlsx
Number of nodes: 6179
Number of edges: 10503


In [11]:
# MERGING BETWEENNESS CENTRALITY WITH MAIN SHEET
import pandas as pd

# Load betweenness centrality data
betweenness_df = pd.read_excel('betweenness_centrality.xlsx')

# Load the target dataset where we want to add the betweenness centrality column
final_dataset_reach = pd.read_excel('final_dataset_reach.xlsx')

# Merge betweenness centrality data with final_dataset_reach based on 'Junction ID'
merged_dataset = pd.merge(final_dataset_reach, betweenness_df, on='id', how='left')

# Write the updated dataset to Excel with the added column
merged_dataset.to_excel('final_dataset_with_centrality.xlsx', index=False)

print("Updated dataset with betweenness centrality saved successfully.")


Updated dataset with betweenness centrality saved successfully.


**Graffiti counts**

In [20]:
# ADDING GRAFFITI COUNTS TO MAIN SHEET
# will be used to calculate reach

import pandas as pd

# Load the datasets
graffiti_df = pd.read_csv('graffiti.csv')
target_df = pd.read_excel('final_dataset_with_centrality.xlsx')

# Aggregate graffiti counts by junctionID
graffiti_aggregated = graffiti_df.groupby('junction_id')['count'].sum().reset_index()

# Rename the 'count' column to 'graffiti_count' for clarity
graffiti_aggregated.rename(columns={'count': 'graffiti_count'}, inplace=True)

# Merge the aggregated graffiti counts with the target dataset
merged_df = target_df.merge(graffiti_aggregated, how='left', left_on='id', right_on='junction_id')

# Drop the 'junction_id' column as it is no longer needed in merged_df
merged_df.drop(columns=['junction_id'], inplace=True)

# Fill NaN values in the 'graffiti_count' column with 0 (if there are any junctions with no graffiti counts)
merged_df['graffiti_count'] = merged_df['graffiti_count'].fillna(0)

# Save the result to a new CSV file (or overwrite the existing one if needed)
merged_df.to_excel('final_dataset_graffiti_count.xlsx', index=False)

**Homeless Shelter Counts**

In [2]:
# HOMELESS SHELTER COUNTS

import pandas as pd
from geopy.distance import geodesic

# Load data from Excel files
shelters_df = pd.read_excel('homeless-shelter-locations.xlsx')
junctions_df = pd.read_excel('junctions.xlsx')

# Extract latitude and longitude for homeless shelters
shelters_df[['latitude', 'longitude']] = shelters_df['geo_point_2d'].str.split(',', expand=True).astype(float)

# Function to find the nearest junction
def find_nearest_junction(shelter_lat, shelter_lon, junctions_df):
    min_distance = float('inf')
    nearest_junction_id = None
    
    for _, junction in junctions_df.iterrows():
        junction_lat = junction['latitude']
        junction_lon = junction['longitude']
        distance = geodesic((shelter_lat, shelter_lon), (junction_lat, junction_lon)).meters
        
        if distance < min_distance:
            min_distance = distance
            nearest_junction_id = junction['id']
    
    return nearest_junction_id

# Apply the function to each shelter to find the nearest junction
shelters_df['nearest_junction_id'] = shelters_df.apply(
    lambda row: find_nearest_junction(row['latitude'], row['longitude'], junctions_df), axis=1)

# Count the number of shelters assigned to each junction
junction_shelter_count = shelters_df['nearest_junction_id'].value_counts().reset_index()
junction_shelter_count.columns = ['id', 'homeless_shelter_count']

# Load the final dataset
final_dataset = pd.read_excel('final_dataset_graffiti_count.xlsx')

# Merge the homeless shelter counts with the final_dataset
final_dataset_with_counts = final_dataset.merge(junction_shelter_count, on='id', how='left').fillna(0)
final_dataset_with_counts['homeless_shelter_count'] = final_dataset_with_counts['homeless_shelter_count'].astype(int)

# Save the updated dataset to a new file
final_dataset_with_counts.to_excel('final_dataset_shelter_counts.xlsx', index=False)

print("The updated dataset with homeless shelter counts has been saved as 'final_dataset_shelter_counts.xlsx'.")


The updated dataset with homeless shelter counts has been saved as 'final_dataset_shelter_counts.xlsx'.


**TRAFFIC SIGNAL COUNTS**

In [3]:
# TRAFFIC SIGNAL COUNTS

import pandas as pd
from geopy.distance import geodesic

# Load data from Excel files
signals_df = pd.read_excel('traffic-signals.xlsx')
junctions_df = pd.read_excel('junctions.xlsx')

# Extract latitude and longitude for traffic signals
signals_df[['latitude', 'longitude']] = signals_df['geo_point_2d'].str.split(',', expand=True).astype(float)

# Function to find the nearest junction for a traffic signal
def find_nearest_junction(signal_lat, signal_lon, junctions_df):
    min_distance = float('inf')
    nearest_junction_id = None
    
    for _, junction in junctions_df.iterrows():
        junction_lat = junction['latitude']
        junction_lon = junction['longitude']
        distance = geodesic((signal_lat, signal_lon), (junction_lat, junction_lon)).meters
        
        if distance < min_distance:
            min_distance = distance
            nearest_junction_id = junction['id']
    
    return nearest_junction_id

# Apply the function to each traffic signal to find the nearest junction
signals_df['nearest_junction_id'] = signals_df.apply(
    lambda row: find_nearest_junction(row['latitude'], row['longitude'], junctions_df), axis=1)

# Count the number of signals assigned to each junction
junction_signal_count = signals_df['nearest_junction_id'].value_counts().reset_index()
junction_signal_count.columns = ['id', 'traffic_signal_count']

# Load the final dataset where you want to merge the traffic signal counts
final_dataset = pd.read_excel('final_dataset_shelter_counts.xlsx')

# Merge the traffic signal counts with the final_dataset
final_dataset_with_counts = final_dataset.merge(junction_signal_count, on='id', how='left').fillna(0)
final_dataset_with_counts['traffic_signal_count'] = final_dataset_with_counts['traffic_signal_count'].astype(int)

# Save the updated dataset to a new file
final_dataset_with_counts.to_excel('final_dataset_traffic_signal_counts.xlsx', index=False)

print("The updated dataset with traffic signal counts has been saved as 'final_dataset_traffic_signal_counts.xlsx'.")



The updated dataset with traffic signal counts has been saved as 'final_dataset_traffic_signal_counts.xlsx'.


**STREET LIGHT COUNTS**

In [5]:
# STREET LIGHT COUNTS

import pandas as pd
from geopy.distance import geodesic

# Load data from Excel files
lights_df = pd.read_excel('street-lighting-poles.xlsx')
junctions_df = pd.read_excel('junctions.xlsx')

# Extract latitude and longitude for traffic signals
lights_df[['latitude', 'longitude']] = lights_df['geo_point_2d'].str.split(',', expand=True).astype(float)

# Function to find the nearest junction for a traffic signal
def find_nearest_junction(light_lat, light_lon, junctions_df):
    min_distance = float('inf')
    nearest_junction_id = None
    
    for _, junction in junctions_df.iterrows():
        junction_lat = junction['latitude']
        junction_lon = junction['longitude']
        distance = geodesic((light_lat, light_lon), (junction_lat, junction_lon)).meters
        
        if distance < min_distance:
            min_distance = distance
            nearest_junction_id = junction['id']
    
    return nearest_junction_id

# Apply the function to each traffic signal to find the nearest junction
lights_df['nearest_junction_id'] = lights_df.apply(
    lambda row: find_nearest_junction(row['latitude'], row['longitude'], junctions_df), axis=1)

# Count the number of signals assigned to each junction
junction_lights_count = lights_df['nearest_junction_id'].value_counts().reset_index()
junction_lights_count.columns = ['id', 'street_lighting_poles_count']

# Load the final dataset where you want to0 merge the traffic signal counts
final_dataset = pd.read_excel('final_dataset_traffic_signal_counts.xlsx')

# Merge the traffic signal counts with the final_dataset
final_dataset_with_counts = final_dataset.merge(junction_lights_count, on='id', how='left').fillna(0)
final_dataset_with_counts['street_lighting_poles_count'] = final_dataset_with_counts['street_lighting_poles_count'].astype(int)

# Save the updated dataset to a new file
final_dataset_with_counts.to_excel('final_dataset_lights_counts.xlsx', index=False)

print("The updated dataset with traffic signal counts has been saved as 'final_dataset_lights_counts.xlsx'.")

The updated dataset with traffic signal counts has been saved as 'final_dataset_lights_counts.xlsx'.
