In [1]:
# This script will create a map of survey results, with buffers around respondents' addresses according to the max.
# distance they said they'd be comfortable walking to a resilience hub

import osmnx as ox
from osmnx import utils_graph
import networkx as nx

import geopandas as gpd
import pandas as pd
import numpy as np
from statistics import mean

import matplotlib.pyplot as plt
import plotly.express as px
import shapely
import folium
import plotly.graph_objects as go # or plotly.express as px

import os

import sklearn
import pyproj
from scipy.sparse import csr_matrix
from shapely.ops import linemerge
import scipy
import haversine as hs
import warnings

In [281]:
# Load survey data
survey = pd.read_csv(r'data/survey_cleaned.csv')
survey = pd.concat([survey, survey['geometry'].str.split(', ', expand=True).rename(columns={0:'Latitude', 1:'Longitude'})], axis = 1)


In [282]:
nad83 = 'EPSG:4269'
gdf = gpd.GeoDataFrame(survey, geometry=gpd.points_from_xy(survey.Longitude, survey.Latitude), crs = nad83)


In [283]:
survey.Latitude = pd.to_numeric(survey.Latitude)
survey.Longitude = pd.to_numeric(survey.Longitude)

In [284]:
# doesn't actually create the mile radii we're looking for, I don't know if it's worth it to figure out how to do it
fig = px.scatter_mapbox(survey, lat="Latitude", lon="Longitude", 
                     mapbox_style="open-street-map", zoom=10, color_discrete_sequence = ["blue"], size="Distance")

fig.show()

In [88]:
fig = px.density_mapbox(survey, lat="Latitude", lon="Longitude", radius=30,
                     mapbox_style="open-street-map", zoom=10)

fig.show()

In [285]:
# get block group shapefile data for block groups that intersect with our Wilmington buffer
nad83 = 'EPSG:4269'

bgs_path = os.path.join(os.getcwd(), 'data', 'bg_ca_19', 'shp', 'blockgroup_CA_19.shp')
bgs_gdf = gpd.read_file(bgs_path)
bgs_gdf = bgs_gdf.to_crs(nad83)
bgs_lons = [float(intpt) for intpt in bgs_gdf['INTPTLON']]
bgs_lats = [float(intpt) for intpt in bgs_gdf['INTPTLAT']]
bgs = pd.DataFrame(bgs_gdf[['GISJOIN', 'COUNTYFP']])
bgs['LON'] = bgs_lons
bgs['LAT'] = bgs_lats
bgs_pt_geom = gpd.points_from_xy(x = bgs.LON,y = bgs.LAT, crs = nad83)
bgs_pt_gdf = gpd.GeoDataFrame(bgs, geometry = bgs_pt_geom, crs = nad83)

# Building Wilmington shapefile
wilmington_path = os.path.join(os.getcwd(), 'data', 'LA_Times_Neighborhood_Boundaries', 'LA_Times_Neighborhood_Boundaries.shp')
wilmington_gdf = gpd.read_file(wilmington_path)
wilmington_gdf['geometry'] = wilmington_gdf['geometry'].unary_union
del wilmington_gdf['OBJECTID']
area_graph_buffer = 0.1
wilmington_shp = wilmington_gdf.unary_union
wilmington_buffered = wilmington_shp.buffer(area_graph_buffer)

bgs_area_gdf = bgs_pt_gdf.loc[bgs_pt_gdf.within(wilmington_shp)]

In [286]:
# intersect the survey data with block groups
survey_in_bgs = gpd.tools.sjoin(gdf, bgs_gdf, predicate="within", how='inner')


In [287]:
# map block groups and mean survey response distance for block groups we 
#survey_in_bgs.groupby(['GEOID']).size()

grouped = survey_in_bgs.groupby(['GISJOIN'])
counts = grouped.size().to_frame(name='counts')
counts = counts.join(grouped.agg({'Distance': 'mean'}))    
counts = counts.reset_index()

In [288]:
counts_gdf = pd.merge(counts, bgs_gdf)

In [289]:
counts_gdf = gpd.GeoDataFrame(counts_gdf, geometry = counts_gdf.geometry, crs = nad83)
fig = px.choropleth_mapbox(counts_gdf, geojson = counts_gdf.geometry, color = "Distance",
                           locations = counts_gdf.index, mapbox_style="open-street-map", 
                           zoom=1)
fig.show()

In [290]:
counts_gdf = gpd.GeoDataFrame(counts_gdf, geometry = counts_gdf.geometry, crs = nad83)
fig = px.choropleth_mapbox(counts_gdf, geojson = counts_gdf.geometry, color = "counts",
                           locations = counts_gdf.index, mapbox_style="open-street-map", 
                           zoom=1)
fig.show()

In [291]:
# "Spatial interpolation": Get distances between block group centroids and all survey responses
# then calculate distance-weighted average of the block groups
matrix = np.zeros([len(bgs_area_gdf), len(gdf)])

for i in np.arange(len(bgs_area_gdf)):
    for j in np.arange(len(gdf)):
        matrix[i,j] = hs.haversine((bgs_area_gdf.iloc[i]['LAT'], bgs_area_gdf.iloc[i]['LON']),(gdf.iloc[j]['Latitude'], gdf.iloc[j]['Longitude']))


In [292]:
# each row of matrix is a block group, each column is that block group's distance to a different survey response
avg = np.zeros([len(bgs_area_gdf)])

for i in np.arange(len(bgs_area_gdf)): # go through all block groups in matrix, taking weighted average of them
    avg[i] = np.average(survey['Distance'], weights = 1/matrix[i])
    
# add weighted estimated distances to the dataframe
bgs_area_gdf['avg_dist'] = list(avg)
merged_avg = pd.merge(bgs_area_gdf[['avg_dist', 'GISJOIN']], bgs_gdf)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [295]:
# map weighted average distances
merged_avg_gdf = gpd.GeoDataFrame(merged_avg, geometry = merged_avg.geometry, crs = nad83)
fig = px.choropleth_mapbox(merged_avg_gdf, geojson = merged_avg_gdf.geometry, color = "avg_dist",
                           locations = merged_avg_gdf.index, mapbox_style="open-street-map", 
                           zoom=1)
fig.show()