In [1]:
import osmnx as ox
from osmnx import utils_graph
import networkx as nx

import geopandas as gpd
import pandas as pd
import numpy as np
from statistics import mean

import matplotlib.pyplot as plt
import shapely
import folium

import os

import warnings

warnings.filterwarnings('ignore')

from distance_matrix_calculation import *

## Build Distance Matrix

### Read in requisite data and define CRS

In [2]:
ca_albers_nad83 = 'NAD_1983_California_Teale_Albers_FtUS'
nad83 = 'EPSG:4629'
wgs84 = 'EPSG:4326'

In [3]:
sites_path = os.path.join(os.getcwd(), 'data', 'candidate_site_campuses_2021-11-17', 'candidate_sites_campuses.csv')

sites_df_raw = pd.read_csv(sites_path)
sites_df_raw = sites_df_raw.loc[sites_df_raw['cat_site'] != 'X', ['id_site', 'cat_site', 'SQFT_ROOF', 'LON', 'LAT']]

sites_geom = gpd.points_from_xy(sites_df_raw.LON, sites_df_raw.LAT, crs = nad83)
sites_gdf = gpd.GeoDataFrame(sites_df_raw, geometry = sites_geom, crs = nad83)

In [4]:
bgs_path = os.path.join(os.getcwd(), 'data', 'bg_ca_19', 'shp', 'blockgroup_CA_19.shp')

bgs_gdf = gpd.read_file(bgs_path)
bgs_gdf = bgs_gdf.to_crs(sites_gdf.crs)

bgs_lons = [float(intpt) for intpt in bgs_gdf['INTPTLON']]
bgs_lats = [float(intpt) for intpt in bgs_gdf['INTPTLAT']]

bgs = pd.DataFrame(bgs_gdf[['GISJOIN', 'COUNTYFP']])
bgs['LON'] = bgs_lons
bgs['LAT'] = bgs_lats

bgs_pt_geom = gpd.points_from_xy(x = bgs.LON,y = bgs.LAT, crs = nad83)
bgs_pt_gdf = gpd.GeoDataFrame(bgs, geometry = bgs_pt_geom, crs = nad83)

### Iterate through each county and build matrix

In [5]:
ca_county_names = ['Alameda', 'Alpine', 'Amador', 'Butte', 'Calaveras', 'Colusa', 'Contra Costa', 
                   'Del Norte', 'El Dorado', 'Fresno', 'Glenn', 'Humboldt', 'Imperial', 'Inyo', 
                   'Kern', 'Kings', 'Lake', 'Lassen', 'Los Angeles', 'Madera', 'Marin', 'Mariposa', 
                   'Mendocino', 'Merced', 'Modoc', 'Mono', 'Monterey', 'Napa', 'Nevada', 'Orange', 
                   'Placer', 'Plumas', 'Riverside', 'Sacramento', 'San Benito', 'San Bernardino', 
                   'San Diego', 'San Francisco', 'San Joaquin', 'San Luis Obispo', 'San Mateo', 
                   'Santa Barbara', 'Santa Clara', 'Santa Cruz', 'Shasta', 'Sierra', 'Siskiyou', 
                   'Solano', 'Sonoma', 'Stanislaus', 'Sutter', 'Tehama', 'Trinity', 'Tulare', 
                   'Tuolumne', 'Ventura', 'Yolo', 'Yuba']

ca_county_fips = ['00' + str(int(num)) for num in np.linspace(1, 115, 58)]
ca_county_fips = [num[-3:] for num in ca_county_fips]

ca_counties = {ca_county_names[i]:ca_county_fips[i] for i in range(len(ca_county_names))}

In [22]:
ca_counties = {'Santa Cruz':'087'}

In [32]:
for county in ca_counties.keys():
    
    # Get graph for county
    county_graph, county_nodes_gdf, county_edges_gdf = get_county_drive_graph(county, nad83)

    county_bbox = make_county_bbox(county_nodes_gdf)

    sites_county_gdf = clip_sites_to_county(county_bbox, sites_gdf)
    bgs_county_gdf = clip_bgs_to_county(county_bbox, bgs_pt_gdf, ca_counties[county])

    bgs_county_buffer_gdf = buffer_bgs(bgs_county_gdf, nad83)

    # Initialize blank matrix
    name_index = {i:bgs_county_gdf.iloc[i]['GISJOIN'] for i in range(0, len(bgs_county_gdf))}
    name_columns = {i:sites_county_gdf.iloc[i]['id_site'] for i in range(0, len(sites_county_gdf))}

    dist_to_site_matrix = np.NaN*np.zeros((len(bgs_county_gdf), len(sites_county_gdf)))
    dist_to_site_df = pd.DataFrame(dist_to_site_matrix)

    dist_to_site_df.rename(index = name_index, columns = name_columns, inplace = True)

    # Build matrix
    bgs_bbox = bgs_county_gdf.loc[:, 'GISJOIN']

    for bg in bgs_bbox:

        node_origin = get_coords_and_nearest_node(bg, 'GISJOIN', bgs_county_gdf, county_graph)

        sites_nearby = get_nearby_sites(bg, bgs_county_buffer_gdf, sites_county_gdf)

        for site in sites_nearby:

            node_target = get_coords_and_nearest_node(site, 'id_site', sites_county_gdf, county_graph)

            try:
                travel_dist_m = nx.shortest_path_length(county_graph, node_origin, node_target, weight = 'length')

                dist_to_site_df.loc[bg, site] = round(travel_dist_m/1609.344, 2)

            except:
                dist_to_site_df.loc[bg, site] = None
    
    #Write output to csv
    output_county = county.lower().replace(' ', '')
    output_file_name = os.path.join(os.getcwd(), 'data', 'distance_matrices', 'distmatrix_' + output_county + '.csv')
    dist_to_site_df.to_csv(output_file_name)