In [None]:
import osmnx as ox
from osmnx import utils_graph
import networkx as nx

import geopandas as gpd
import pandas as pd
import numpy as np
from statistics import mean

import matplotlib.pyplot as plt
import plotly.express as px
import shapely
import folium
import plotly.graph_objects as go # or plotly.express as px

import os

import sklearn
import pyproj
from scipy.sparse import csr_matrix
from shapely.ops import linemerge

import warnings

from distance_matrix_functions_cmm import *


In [None]:
# Compare sparse distance matrices for differences
walk_csv = pd.read_csv(r'data/distance_matrices/distmatrix_walk_contracosta.csv').set_index('Unnamed: 0')
walk_csv.index.names = [None]
drive_csv = pd.read_csv(r'data/distance_matrices/distmatrix_contracosta.csv').set_index('Unnamed: 0')
drive_csv.index.names = [None]

# Investigate a long driving distance/short walking distance point
# load graphs
county = 'contracosta'
drive = ox.load_graphml(os.path.join(os.getcwd(), 'data/graphs/graph_' + county + '.graphml'))
walk = ox.load_graphml(os.path.join(os.getcwd(), 'data', 'graphs', 'graph_walk_' + county + '.graphml'))


In [None]:
drive_proj = ox.project_graph(drive)
walk_proj = ox.project_graph(walk)

In [None]:
# return the nearest node on thr graph to a certain lat/long. 
# Deals with projection issues -- must path projected graph!
def get_coords_and_nearest_node2(in_pt, in_colname, in_pt_gdf, in_graph):
    pt_geom = in_pt_gdf.loc[in_pt_gdf[in_colname] == in_pt]

    latitude = mean(pt_geom.LAT) #Edge case: a small handful of sites have two sets of coords
    longitude = mean(pt_geom.LON)

    geom = gpd.points_from_xy([longitude], [latitude]) # create point
    gdf = gpd.GeoDataFrame(geometry = geom, crs = "nad83").to_crs(in_graph.graph['crs']) # project to graph CRS
    
    # CLAIRE: Replaced with updated nearest_nodes function,
    node = ox.nearest_nodes(in_graph, X=gdf['geometry'].x, Y=gdf['geometry'].y)[0]
    return(node) # get nearest node 


In [None]:
# find long driving/short walking point
# function to return the row and column names of the nth maximum value of a matrix
def find_max_pos(data, n):
    stack = data.stack()
    max = stack.sort_values(ascending=False).head(n)
    max = max.tail(1).unstack()
    return [max.columns[0], max.index[0]]

# function that takes resilience hub or block group data and id,
# and returns coordinates of hub or block group corresponding to id
def locate(data, id, col):
    row = data.loc[data[col] == id]
    return [row['LAT'].iloc[0], row['LON'].iloc[0]]

# Function that takes origin node, destination node, graph, and creates MultiLineString shortest path

def create_shortest_path(orig_node, dest_node, graph):
    route = nx.shortest_path(graph, orig_node, dest_node)
    route_attributes = ox.utils_graph.get_route_edge_attributes(graph, route)
    results = []
    for item in route_attributes:
        results.append(item['geometry'])
    return linemerge(results)

# function that takes a hub ID and block group code,
# and plots walking and driving routes between them
def plot_routes(site, bg, site_data, bg_data):
    bg_pt = locate(bg_data, bg, 'GISJOIN')
    site_pt = locate(site_data, site, 'id_site')

    orig_node = get_coords_and_nearest_node2(bg, 'GISJOIN', bg_data, drive_proj)
    dest_node = get_coords_and_nearest_node2(site, 'id_site', site_data, drive_proj)
    drive_path = create_shortest_path(orig_node, dest_node, drive_proj)

    orig_node = get_coords_and_nearest_node2(bg, 'GISJOIN', bg_data, walk_proj)
    dest_node = get_coords_and_nearest_node2(site, 'id_site', site_data, walk_proj)
    walk_path = create_shortest_path(orig_node, dest_node, walk_proj)

    x_drive,y_drive = drive_path.coords.xy
    df_drive = pd.DataFrame({'LAT':x_drive,'LON':y_drive,'type':"drive"})
    
    # project back
    geom = gpd.points_from_xy(df_drive['LAT'], df_drive['LON'])
    gdf = gpd.GeoDataFrame(geometry = geom, crs = drive_proj.graph['crs']).to_crs('nad83') # project to graph CRS
    df_drive['LAT'] = gdf['geometry'].x
    df_drive['LON'] = gdf['geometry'].y
    
    x_walk,y_walk = walk_path.coords.xy
    df_walk = pd.DataFrame({'LAT':x_walk,'LON':y_walk,'type':"walk"})

    # project back
    geom = gpd.points_from_xy(df_walk['LAT'], df_walk['LON'])
    gdf = gpd.GeoDataFrame(geometry = geom, crs = walk_proj.graph['crs']).to_crs('nad83') # project to graph CRS
    df_walk['LAT'] = gdf['geometry'].x
    df_walk['LON'] = gdf['geometry'].y
    
    df = [df_drive, df_walk]
    df = pd.concat(df)

    fig = px.line_mapbox(df, lat="LON", lon="LAT", color = "type", 
                     mapbox_style="open-street-map", zoom=10)
    fig.add_scattermapbox(lat=[bg_pt[0]], lon=[bg_pt[1]], name = "Block Group")
    fig.add_scattermapbox(lat=[site_pt[0]], lon=[site_pt[1]], name = "Site")
    fig.show()


In [None]:
# USING FUNCTIONS
ca_albers_nad83 = 'NAD_1983_California_Teale_Albers_FtUS'
nad83 = 'EPSG:4629'
wgs84 = 'EPSG:4326'

# Building candidate sites GeoDataFrame
sites_path = os.path.join(os.getcwd(), 'data', 'candidate_site_campuses_2021-11-17', 'candidate_sites_campuses.csv')
sites_df_raw = pd.read_csv(sites_path)
sites_df_raw = sites_df_raw.loc[sites_df_raw['cat_site'] != 'X', ['id_site', 'cat_site', 'SQFT_ROOF', 'LON', 'LAT']]
sites_geom = gpd.points_from_xy(sites_df_raw.LON, sites_df_raw.LAT, crs = nad83)
sites_gdf = gpd.GeoDataFrame(sites_df_raw, geometry = sites_geom, crs = nad83)

# Building block group GeoDataFrame
bgs_path = os.path.join(os.getcwd(), 'data', 'bg_ca_19', 'shp', 'blockgroup_CA_19.shp')
bgs_gdf = gpd.read_file(bgs_path)
bgs_gdf = bgs_gdf.to_crs(sites_gdf.crs)
bgs_lons = [float(intpt) for intpt in bgs_gdf['INTPTLON']]
bgs_lats = [float(intpt) for intpt in bgs_gdf['INTPTLAT']]
bgs = pd.DataFrame(bgs_gdf[['GISJOIN', 'COUNTYFP']])
bgs['LON'] = bgs_lons
bgs['LAT'] = bgs_lats
bgs_pt_geom = gpd.points_from_xy(x = bgs.LON,y = bgs.LAT, crs = nad83)
bgs_pt_gdf = gpd.GeoDataFrame(bgs, geometry = bgs_pt_geom, crs = nad83)

In [None]:
# Get top 5 longest driving block group centroid/site code pairs
pt_1 = find_max_pos(drive_csv, 1)
pt_2 = find_max_pos(drive_csv, 2)
pt_3 = find_max_pos(drive_csv, 3)
pt_4 = find_max_pos(drive_csv, 4)
pt_5 = find_max_pos(drive_csv, 5)


In [None]:
plot_routes(pt_2[0], pt_2[1], sites_gdf, bgs_pt_gdf)


In [None]:
# Plot all routes to RYSE Center
site_pt = locate(site_data, "294779081", 'id_site')
dest_node = get_coords_and_nearest_node2("294779081", 'id_site', site_data, drive_proj)

# find block groups with driving distances to RYSE center
drive_distances = drive_csv[~drive_csv["294779081"].isnull()]["294779081"].index

bg_pt = list() # stores bg lat/longs
df_drive = list() # stores driving paths from each bg to RYSE Center
for i in np.arange(len(drive_distances)): # go through all bgs with distances to RYSE Center
    bg_pt.append(locate(bg_data, drive_distances[i], 'GISJOIN')) # get bg location
    orig_node = get_coords_and_nearest_node2(drive_distances[i], 'GISJOIN', bg_data, drive_proj) # find node
    drive_path = create_shortest_path(orig_node, dest_node, drive_proj)
    x_drive,y_drive = drive_path.coords.xy
    to_append = pd.DataFrame({'LAT':x_drive,'LON':y_drive,'type':"drive"})
    # project back
    geom = gpd.points_from_xy(to_append['LAT'], to_append['LON'])
    gdf = gpd.GeoDataFrame(geometry = geom, crs = drive_proj.graph['crs']).to_crs('nad83') # project to graph CRS
    to_append['LAT'] = gdf['geometry'].x
    to_append['LON'] = gdf['geometry'].y
    df_drive.append(to_append) # put path into list of paths
    
# find block groups with walking distances to RYSE center
walk_distances = walk_csv[~walk_csv["294779081"].isnull()]["294779081"].index

bg_pt = list() # stores bg lat/longs
df_walk = list() # stores driving paths from each bg to RYSE Center
for i in np.arange(len(walk_distances)): # go through all bgs with distances to RYSE Center
    bg_pt.append(locate(bg_data, walk_distances[i], 'GISJOIN')) # get bg location
    orig_node = get_coords_and_nearest_node2(walk_distances[i], 'GISJOIN', bg_data, walk_proj) # find node
    walk_path = create_shortest_path(orig_node, dest_node, walk_proj)
    x_walk,y_walk = walk_path.coords.xy
    to_append = pd.DataFrame({'LAT':x_walk,'LON':y_walk,'type':"walk"})
    # project back
    geom = gpd.points_from_xy(to_append['LAT'], to_append['LON'])
    gdf = gpd.GeoDataFrame(geometry = geom, crs = walk_proj.graph['crs']).to_crs('nad83') # project to graph CRS
    to_append['LAT'] = gdf['geometry'].x
    to_append['LON'] = gdf['geometry'].y
    df_walk.append(to_append) # put path into list of paths


In [None]:
# create latlon_drive dataset to map
latlon_drive = df_drive[0]
latlon_drive.loc[len(latlon_drive)] = [None, None, "drive"] 
    
for i in np.arange(len(df_drive)-1) + 1:
    latlon_drive = pd.concat([latlon_drive,df_drive[i]])
    latlon_drive.loc[len(latlon_drive)] = [None, None, "drive"] 


In [None]:
# create latlon_walk dataset to map
latlon_walk = df_walk[0]
latlon_walk.loc[len(latlon_walk)] = [None, None, "walk"] 
    
for i in np.arange(len(df_walk)-1) + 1:
    latlon_walk = pd.concat([latlon_walk,df_walk[i]])
    latlon_walk.loc[len(latlon_walk)] = [None, None, "walk"] 


In [None]:
# drive map
fig = px.line_mapbox(latlon_drive, lat="LON", lon="LAT", color = "type", 
                     mapbox_style="open-street-map", zoom=10)
fig.add_scattermapbox(lat=[site_pt[0]], lon=[site_pt[1]], name = "RYSE Center")
fig.show()

In [None]:
# walk map
fig = px.line_mapbox(latlon_walk, lat="LON", lon="LAT", color = "type", 
                     mapbox_style="open-street-map", zoom=10)
fig.add_scattermapbox(lat=[site_pt[0]], lon=[site_pt[1]], name = "RYSE Center")
fig.show()
