# Calculating Harris County and Charleston County Distances

### Import libraries and packages

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import haversine as hs



### Reading in Harris County and Charleston County InfoUSA Data

In [2]:
df = pd.read_parquet('/hpc/group/codeplus22-vis/infousa_copy/zip_00_99_final.parquet')
df

Unnamed: 0,zip,county,state,child_num,has_child,age_code,lat_h_4326,lon_h_4326,lat_h_3857,lon_h_3857
0,18833,113,PA,0,0,K,41.546738,-76.540436,-8.520442e+06,5.093323e+06
1,18833,15,PA,0,0,H,41.590800,-76.424200,-8.507503e+06,5.099879e+06
2,18833,15,PA,1,1,C,41.600392,-76.441724,-8.509454e+06,5.101307e+06
3,18833,15,PA,0,0,L,41.592483,-76.437832,-8.509021e+06,5.100129e+06
4,18833,15,PA,1,1,H,41.566196,-76.347977,-8.499018e+06,5.096218e+06
...,...,...,...,...,...,...,...,...,...,...
190987608,92003,73,CA,0,0,C,33.285885,-117.240445,-1.305115e+07,3.933312e+06
190987609,92003,73,CA,0,0,E,33.284700,-117.210800,-1.304785e+07,3.933154e+06
190987610,92003,73,CA,0,0,G,33.282869,-117.183963,-1.304486e+07,3.932911e+06
190987611,92003,73,CA,0,0,H,33.278284,-117.181181,-1.304455e+07,3.932300e+06


In [3]:
df_harris = df[(df['county'] == 201) & (df['state'] == 'TX')]
df_harris

Unnamed: 0,zip,county,state,child_num,has_child,age_code,lat_h_4326,lon_h_4326,lat_h_3857,lon_h_3857
135007287,77244,201,TX,1,1,H,29.738000,-95.606000,-1.064281e+07,3.469916e+06
135007288,77244,201,TX,0,0,H,29.738000,-95.606000,-1.064281e+07,3.469916e+06
135007289,77244,201,TX,0,0,M,29.738000,-95.606000,-1.064281e+07,3.469916e+06
135007290,77244,201,TX,0,0,M,29.738000,-95.606000,-1.064281e+07,3.469916e+06
135007291,77244,201,TX,2,1,F,29.738000,-95.606000,-1.064281e+07,3.469916e+06
...,...,...,...,...,...,...,...,...,...,...
190664559,77041,201,TX,0,0,E,29.853170,-95.565193,-1.063827e+07,3.484690e+06
190664560,77041,201,TX,0,0,C,29.836561,-95.533901,-1.063479e+07,3.482559e+06
190664561,77041,201,TX,0,0,H,29.865044,-95.599070,-1.064204e+07,3.486214e+06
190664562,77041,201,TX,0,0,B,29.835408,-95.555649,-1.063721e+07,3.482411e+06


In [4]:
df_harris = df_harris.drop(['zip', 'county', 'state', 'child_num'], axis = 1)
df_harris

Unnamed: 0,has_child,age_code,lat_h_4326,lon_h_4326,lat_h_3857,lon_h_3857
135007287,1,H,29.738000,-95.606000,-1.064281e+07,3.469916e+06
135007288,0,H,29.738000,-95.606000,-1.064281e+07,3.469916e+06
135007289,0,M,29.738000,-95.606000,-1.064281e+07,3.469916e+06
135007290,0,M,29.738000,-95.606000,-1.064281e+07,3.469916e+06
135007291,1,F,29.738000,-95.606000,-1.064281e+07,3.469916e+06
...,...,...,...,...,...,...
190664559,0,E,29.853170,-95.565193,-1.063827e+07,3.484690e+06
190664560,0,C,29.836561,-95.533901,-1.063479e+07,3.482559e+06
190664561,0,H,29.865044,-95.599070,-1.064204e+07,3.486214e+06
190664562,0,B,29.835408,-95.555649,-1.063721e+07,3.482411e+06


In [5]:
df_charleston = df[(df['county'] == 19) & (df['state'] == 'SC')]
df_charleston

Unnamed: 0,zip,county,state,child_num,has_child,age_code,lat_h_4326,lon_h_4326,lat_h_3857,lon_h_3857
43688371,29402,19,SC,0,0,C,32.7765,-79.9308,-8.897856e+06,3.865676e+06
43688372,29402,19,SC,0,0,M,32.7765,-79.9308,-8.897856e+06,3.865676e+06
43688373,29402,19,SC,0,0,L,32.7765,-79.9308,-8.897856e+06,3.865676e+06
43688374,29402,19,SC,1,1,D,32.7765,-79.9308,-8.897856e+06,3.865676e+06
43688375,29402,19,SC,0,0,M,32.7765,-79.9308,-8.897856e+06,3.865676e+06
...,...,...,...,...,...,...,...,...,...,...
101215466,29417,19,SC,0,0,A,32.7870,-79.9917,-8.904635e+06,3.867066e+06
101215467,29417,19,SC,0,0,F,32.7870,-79.9917,-8.904635e+06,3.867066e+06
101215468,29417,19,SC,0,0,I,32.7870,-79.9917,-8.904635e+06,3.867066e+06
101215469,29417,19,SC,0,0,B,32.7870,-79.9917,-8.904635e+06,3.867066e+06


In [6]:
df_charleston = df_charleston.drop(['zip', 'county', 'state', 'child_num'], axis = 1)
df_charleston

Unnamed: 0,has_child,age_code,lat_h_4326,lon_h_4326,lat_h_3857,lon_h_3857
43688371,0,C,32.7765,-79.9308,-8.897856e+06,3.865676e+06
43688372,0,M,32.7765,-79.9308,-8.897856e+06,3.865676e+06
43688373,0,L,32.7765,-79.9308,-8.897856e+06,3.865676e+06
43688374,1,D,32.7765,-79.9308,-8.897856e+06,3.865676e+06
43688375,0,M,32.7765,-79.9308,-8.897856e+06,3.865676e+06
...,...,...,...,...,...,...
101215466,0,A,32.7870,-79.9917,-8.904635e+06,3.867066e+06
101215467,0,F,32.7870,-79.9917,-8.904635e+06,3.867066e+06
101215468,0,I,32.7870,-79.9917,-8.904635e+06,3.867066e+06
101215469,0,B,32.7870,-79.9917,-8.904635e+06,3.867066e+06


### Reading in AST data

In [13]:
df_tanks = gpd.read_file('/hpc/group/codeplus22-vis/infousa_copy/ast_master.shp')
df_tanks

Unnamed: 0,state,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,county,geometry
0,New York,closed_roof_tank,39.6,40.625572,-73.745231,-8.209282e+06,4.957270e+06,36059,"POLYGON ((-73.74547 40.62575, -73.74500 40.625..."
1,New York,closed_roof_tank,19.8,40.624761,-73.744420,-8.209191e+06,4.957151e+06,36059,"POLYGON ((-73.74465 40.62485, -73.74419 40.624..."
2,New York,closed_roof_tank,12.6,40.626086,-73.746257,-8.209396e+06,4.957345e+06,36059,"POLYGON ((-73.74633 40.62615, -73.74618 40.626..."
3,New York,closed_roof_tank,30.6,40.625786,-73.746203,-8.209390e+06,4.957301e+06,36059,"POLYGON ((-73.74639 40.62593, -73.74601 40.625..."
4,New York,closed_roof_tank,24.0,40.625781,-73.745813,-8.209346e+06,4.957300e+06,36059,"POLYGON ((-73.74595 40.62590, -73.74567 40.625..."
...,...,...,...,...,...,...,...,...,...
98164,Colorado,narrow_closed_roof_tank,5.4,39.777431,-104.920718,-1.167972e+07,4.833652e+06,08031,"POLYGON ((-104.92075 39.77746, -104.92069 39.7..."
98165,Colorado,narrow_closed_roof_tank,4.8,39.777301,-104.920631,-1.167971e+07,4.833633e+06,08031,"POLYGON ((-104.92066 39.77732, -104.92060 39.7..."
98166,Colorado,narrow_closed_roof_tank,3.6,39.777701,-104.920609,-1.167971e+07,4.833691e+06,08031,"POLYGON ((-104.92064 39.77772, -104.92058 39.7..."
98167,Colorado,narrow_closed_roof_tank,4.8,39.776628,-104.920617,-1.167971e+07,4.833535e+06,08031,"POLYGON ((-104.92065 39.77665, -104.92059 39.7..."


In [14]:
df_tanks = df_tanks.drop(['state', 'county'], axis = 1)
df_tanks

Unnamed: 0,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,geometry
0,closed_roof_tank,39.6,40.625572,-73.745231,-8.209282e+06,4.957270e+06,"POLYGON ((-73.74547 40.62575, -73.74500 40.625..."
1,closed_roof_tank,19.8,40.624761,-73.744420,-8.209191e+06,4.957151e+06,"POLYGON ((-73.74465 40.62485, -73.74419 40.624..."
2,closed_roof_tank,12.6,40.626086,-73.746257,-8.209396e+06,4.957345e+06,"POLYGON ((-73.74633 40.62615, -73.74618 40.626..."
3,closed_roof_tank,30.6,40.625786,-73.746203,-8.209390e+06,4.957301e+06,"POLYGON ((-73.74639 40.62593, -73.74601 40.625..."
4,closed_roof_tank,24.0,40.625781,-73.745813,-8.209346e+06,4.957300e+06,"POLYGON ((-73.74595 40.62590, -73.74567 40.625..."
...,...,...,...,...,...,...,...
98164,narrow_closed_roof_tank,5.4,39.777431,-104.920718,-1.167972e+07,4.833652e+06,"POLYGON ((-104.92075 39.77746, -104.92069 39.7..."
98165,narrow_closed_roof_tank,4.8,39.777301,-104.920631,-1.167971e+07,4.833633e+06,"POLYGON ((-104.92066 39.77732, -104.92060 39.7..."
98166,narrow_closed_roof_tank,3.6,39.777701,-104.920609,-1.167971e+07,4.833691e+06,"POLYGON ((-104.92064 39.77772, -104.92058 39.7..."
98167,narrow_closed_roof_tank,4.8,39.776628,-104.920617,-1.167971e+07,4.833535e+06,"POLYGON ((-104.92065 39.77665, -104.92059 39.7..."


### Finding tanks closest to households

#### Harris County:

In [15]:
gdf_harris = gpd.GeoDataFrame(
    df_harris, geometry=gpd.points_from_xy(df_harris.lon_h_4326, df_harris.lat_h_4326))
gdf_harris = gdf_harris[['geometry']]
gdf_harris

Unnamed: 0,geometry
135007287,POINT (-95.60600 29.73800)
135007288,POINT (-95.60600 29.73800)
135007289,POINT (-95.60600 29.73800)
135007290,POINT (-95.60600 29.73800)
135007291,POINT (-95.60600 29.73800)
...,...
190664559,POINT (-95.56519 29.85317)
190664560,POINT (-95.53390 29.83656)
190664561,POINT (-95.59907 29.86504)
190664562,POINT (-95.55565 29.83541)


In [16]:
gdf_tanks = gpd.GeoDataFrame(
    df_tanks, geometry=gpd.points_from_xy(df_tanks.lon_t_4326, df_tanks.lat_t_4326))
gdf_tanks

Unnamed: 0,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,geometry
0,closed_roof_tank,39.6,40.625572,-73.745231,-8.209282e+06,4.957270e+06,POINT (-73.74523 40.62557)
1,closed_roof_tank,19.8,40.624761,-73.744420,-8.209191e+06,4.957151e+06,POINT (-73.74442 40.62476)
2,closed_roof_tank,12.6,40.626086,-73.746257,-8.209396e+06,4.957345e+06,POINT (-73.74626 40.62609)
3,closed_roof_tank,30.6,40.625786,-73.746203,-8.209390e+06,4.957301e+06,POINT (-73.74620 40.62579)
4,closed_roof_tank,24.0,40.625781,-73.745813,-8.209346e+06,4.957300e+06,POINT (-73.74581 40.62578)
...,...,...,...,...,...,...,...
98164,narrow_closed_roof_tank,5.4,39.777431,-104.920718,-1.167972e+07,4.833652e+06,POINT (-104.92072 39.77743)
98165,narrow_closed_roof_tank,4.8,39.777301,-104.920631,-1.167971e+07,4.833633e+06,POINT (-104.92063 39.77730)
98166,narrow_closed_roof_tank,3.6,39.777701,-104.920609,-1.167971e+07,4.833691e+06,POINT (-104.92061 39.77770)
98167,narrow_closed_roof_tank,4.8,39.776628,-104.920617,-1.167971e+07,4.833535e+06,POINT (-104.92062 39.77663)


Finding closest tanks:

In [17]:
from sklearn.neighbors import BallTree
import numpy as np

def get_nearest(src_points, candidates, k_neighbors=1):
    """Find nearest neighbors for all source points from a set of candidate points"""

    # Create tree from the candidate points
    tree = BallTree(candidates, leaf_size=15, metric = 'euclidean')

    # Find closest points and distances
    distances, indices = tree.query(src_points, k=k_neighbors)

    # Transpose to get distances and indices into arrays
    distances = distances.transpose()
    indices = indices.transpose()

    # Get closest indices and distances (i.e. array at index 0)
    # note: for the second closest points, you would take index 1, etc.
    closest = indices[0]
    closest_dist = distances[0]

    # Return indices and distances
    return (closest, closest_dist)


def nearest_neighbor(left_gdf, right_gdf):
    """
    For each point in left_gdf, find closest point in right GeoDataFrame and return them.

    NOTICE: Assumes that the input Points are in WGS84 projection (lat/lon).
    """

    left_geom_col = left_gdf.geometry.name
    right_geom_col = right_gdf.geometry.name

    # Ensure that index in right gdf is formed of sequential numbers
    right = right_gdf.copy().reset_index(drop=True)

    # Parse coordinates from points and insert them into a numpy array as RADIANS
    left_radians = np.array(left_gdf[left_geom_col].apply(lambda geom: (geom.x * (np.pi / 180), geom.y * (np.pi / 180))).to_list())
    right_radians = np.array(right[right_geom_col].apply(lambda geom: (geom.x * (np.pi / 180), geom.y * (np.pi / 180))).to_list())

    # Find the nearest points
    # -----------------------
    # closest ==> index in right_gdf that corresponds to the closest point
    # dist ==> distance between the nearest neighbors (in meters)

    closest, dist = get_nearest(src_points=left_radians, candidates=right_radians)

    # Return points from right GeoDataFrame that are closest to points in left GeoDataFrame
    closest_points = right.loc[closest]

    # Ensure that the index corresponds the one in left_gdf
    closest_points = closest_points.reset_index(drop=True)
    
    return closest_points

In [18]:
%%time
closest_tanks_harris = nearest_neighbor(gdf_harris, gdf_tanks)
closest_tanks_harris

CPU times: user 2min 30s, sys: 733 ms, total: 2min 30s
Wall time: 2min 31s


Unnamed: 0,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,geometry
0,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,POINT (-95.41283 29.65269)
1,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,POINT (-95.41283 29.65269)
2,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,POINT (-95.41283 29.65269)
3,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,POINT (-95.41283 29.65269)
4,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,POINT (-95.41283 29.65269)
...,...,...,...,...,...,...,...
2335203,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06,POINT (-95.43760 29.86794)
2335204,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06,POINT (-95.43760 29.86794)
2335205,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06,POINT (-95.43760 29.86794)
2335206,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06,POINT (-95.43760 29.86794)


Re-merging with household coordinates to find distance:

In [19]:
closest_tanks_harris = closest_tanks_harris.reset_index()
closest_tanks_harris

Unnamed: 0,index,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,geometry
0,0,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,POINT (-95.41283 29.65269)
1,1,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,POINT (-95.41283 29.65269)
2,2,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,POINT (-95.41283 29.65269)
3,3,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,POINT (-95.41283 29.65269)
4,4,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,POINT (-95.41283 29.65269)
...,...,...,...,...,...,...,...,...
2335203,2335203,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06,POINT (-95.43760 29.86794)
2335204,2335204,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06,POINT (-95.43760 29.86794)
2335205,2335205,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06,POINT (-95.43760 29.86794)
2335206,2335206,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06,POINT (-95.43760 29.86794)


In [20]:
df_harris = df_harris.reset_index()
df_harris

Unnamed: 0,index,has_child,age_code,lat_h_4326,lon_h_4326,lat_h_3857,lon_h_3857,geometry
0,135007287,1,H,29.738000,-95.606000,-1.064281e+07,3.469916e+06,POINT (-95.60600 29.73800)
1,135007288,0,H,29.738000,-95.606000,-1.064281e+07,3.469916e+06,POINT (-95.60600 29.73800)
2,135007289,0,M,29.738000,-95.606000,-1.064281e+07,3.469916e+06,POINT (-95.60600 29.73800)
3,135007290,0,M,29.738000,-95.606000,-1.064281e+07,3.469916e+06,POINT (-95.60600 29.73800)
4,135007291,1,F,29.738000,-95.606000,-1.064281e+07,3.469916e+06,POINT (-95.60600 29.73800)
...,...,...,...,...,...,...,...,...
2335203,190664559,0,E,29.853170,-95.565193,-1.063827e+07,3.484690e+06,POINT (-95.56519 29.85317)
2335204,190664560,0,C,29.836561,-95.533901,-1.063479e+07,3.482559e+06,POINT (-95.53390 29.83656)
2335205,190664561,0,H,29.865044,-95.599070,-1.064204e+07,3.486214e+06,POINT (-95.59907 29.86504)
2335206,190664562,0,B,29.835408,-95.555649,-1.063721e+07,3.482411e+06,POINT (-95.55565 29.83541)


In [21]:
df_harris_dist = df_harris.merge(closest_tanks_harris, left_index=True, right_index = True)
df_harris_dist = df_harris_dist.drop(['index_x', 'index_y', 'geometry_x', 'geometry_y'], axis = 1)
df_harris_dist

Unnamed: 0,has_child,age_code,lat_h_4326,lon_h_4326,lat_h_3857,lon_h_3857,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857
0,1,H,29.738000,-95.606000,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06
1,0,H,29.738000,-95.606000,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06
2,0,M,29.738000,-95.606000,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06
3,0,M,29.738000,-95.606000,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06
4,1,F,29.738000,-95.606000,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06
...,...,...,...,...,...,...,...,...,...,...,...,...
2335203,0,E,29.853170,-95.565193,-1.063827e+07,3.484690e+06,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06
2335204,0,C,29.836561,-95.533901,-1.063479e+07,3.482559e+06,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06
2335205,0,H,29.865044,-95.599070,-1.064204e+07,3.486214e+06,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06
2335206,0,B,29.835408,-95.555649,-1.063721e+07,3.482411e+06,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06


Computing distances:

In [22]:
%%time

def distancer(row):
    coords_1 = (row['lat_h_4326'], row['lon_h_4326'])
    coords_2 = (row['lat_t_4326'], row['lon_t_4326'])
    return (hs.haversine(coords_1, coords_2) * 1000)

df_harris_dist['distance_m'] = df_harris_dist.apply(distancer, axis=1)
df_harris_dist

CPU times: user 33.9 s, sys: 596 ms, total: 34.5 s
Wall time: 34.6 s


Unnamed: 0,has_child,age_code,lat_h_4326,lon_h_4326,lat_h_3857,lon_h_3857,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,distance_m
0,1,H,29.738000,-95.606000,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,20931.486080
1,0,H,29.738000,-95.606000,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,20931.486080
2,0,M,29.738000,-95.606000,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,20931.486080
3,0,M,29.738000,-95.606000,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,20931.486080
4,1,F,29.738000,-95.606000,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,29.652685,-95.412834,-1.062131e+07,3.458983e+06,20931.486080
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2335203,0,E,29.853170,-95.565193,-1.063827e+07,3.484690e+06,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06,12413.140255
2335204,0,C,29.836561,-95.533901,-1.063479e+07,3.482559e+06,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06,9920.965430
2335205,0,H,29.865044,-95.599070,-1.064204e+07,3.486214e+06,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06,15573.298866
2335206,0,B,29.835408,-95.555649,-1.063721e+07,3.482411e+06,closed_roof_tank,57.6,29.867938,-95.437601,-1.062407e+07,3.486586e+06,11945.509912


Dropping latitude and longitude coordinates in the 4326 projection not used in our GPU visualizaitons (that this data is processed for). Also dropping latitude and longitude for nearest tanks, because this is the data for plotting households.

In [23]:
df_harris_dist = df_harris_dist.drop(['lat_h_4326', 'lon_h_4326', 'lat_t_4326', 'lon_t_4326', 'lat_t_3857', 'lon_t_3857'], axis = 1)

In [24]:
df_harris_dist['distance_mi']  = df_harris_dist['distance_m'] / 1609.344
df_harris_dist

Unnamed: 0,has_child,age_code,lat_h_3857,lon_h_3857,tank_type,diameter,distance_m,distance_mi
0,1,H,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222
1,0,H,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222
2,0,M,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222
3,0,M,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222
4,1,F,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222
...,...,...,...,...,...,...,...,...
2335203,0,E,-1.063827e+07,3.484690e+06,closed_roof_tank,57.6,12413.140255,7.713168
2335204,0,C,-1.063479e+07,3.482559e+06,closed_roof_tank,57.6,9920.965430,6.164602
2335205,0,H,-1.064204e+07,3.486214e+06,closed_roof_tank,57.6,15573.298866,9.676799
2335206,0,B,-1.063721e+07,3.482411e+06,closed_roof_tank,57.6,11945.509912,7.422596


Categorizing households by distance measures

In [25]:
conditions_harris = [(df_harris_dist['distance_mi'] <= 0.5),
              ((df_harris_dist['distance_mi'] > 0.5) & (df_harris_dist['distance_mi'] <= 1)),
              ((df_harris_dist['distance_mi'] > 1) & (df_harris_dist['distance_mi'] <= 5)),
              (df_harris_dist['distance_mi'] > 5)]

values_harris = [1, 2, 3, 4]

df_harris_dist['distance_category'] = np.select(conditions_harris, values_harris)
df_harris_dist

Unnamed: 0,has_child,age_code,lat_h_3857,lon_h_3857,tank_type,diameter,distance_m,distance_mi,distance_category
0,1,H,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222,4
1,0,H,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222,4
2,0,M,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222,4
3,0,M,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222,4
4,1,F,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222,4
...,...,...,...,...,...,...,...,...,...
2335203,0,E,-1.063827e+07,3.484690e+06,closed_roof_tank,57.6,12413.140255,7.713168,4
2335204,0,C,-1.063479e+07,3.482559e+06,closed_roof_tank,57.6,9920.965430,6.164602,4
2335205,0,H,-1.064204e+07,3.486214e+06,closed_roof_tank,57.6,15573.298866,9.676799,4
2335206,0,B,-1.063721e+07,3.482411e+06,closed_roof_tank,57.6,11945.509912,7.422596,4


Exporting to parquet file

In [26]:
df_harris_dist.to_parquet('/hpc/group/codeplus22-vis/infousa_copy/distances_harris_final.parquet')

In [27]:
df = pd.read_parquet('/hpc/group/codeplus22-vis/infousa_copy/distances_harris_final.parquet')
df

Unnamed: 0,has_child,age_code,lat_h_3857,lon_h_3857,tank_type,diameter,distance_m,distance_mi,distance_category
0,1,H,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222,4
1,0,H,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222,4
2,0,M,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222,4
3,0,M,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222,4
4,1,F,-1.064281e+07,3.469916e+06,closed_roof_tank,10.2,20931.486080,13.006222,4
...,...,...,...,...,...,...,...,...,...
2335203,0,E,-1.063827e+07,3.484690e+06,closed_roof_tank,57.6,12413.140255,7.713168,4
2335204,0,C,-1.063479e+07,3.482559e+06,closed_roof_tank,57.6,9920.965430,6.164602,4
2335205,0,H,-1.064204e+07,3.486214e+06,closed_roof_tank,57.6,15573.298866,9.676799,4
2335206,0,B,-1.063721e+07,3.482411e+06,closed_roof_tank,57.6,11945.509912,7.422596,4


#### Charleston County

In [28]:
gdf_charleston = gpd.GeoDataFrame(
    df_charleston, geometry=gpd.points_from_xy(df_charleston.lon_h_4326, df_charleston.lat_h_4326))
gdf_charleston = gdf_charleston[['geometry']]
gdf_charleston

Unnamed: 0,geometry
43688371,POINT (-79.93080 32.77650)
43688372,POINT (-79.93080 32.77650)
43688373,POINT (-79.93080 32.77650)
43688374,POINT (-79.93080 32.77650)
43688375,POINT (-79.93080 32.77650)
...,...
101215466,POINT (-79.99170 32.78700)
101215467,POINT (-79.99170 32.78700)
101215468,POINT (-79.99170 32.78700)
101215469,POINT (-79.99170 32.78700)


Finding closest tanks:

In [29]:
%%time
closest_tanks_charleston = nearest_neighbor(gdf_charleston, gdf_tanks)
closest_tanks_charleston

CPU times: user 21.2 s, sys: 90.5 ms, total: 21.3 s
Wall time: 21.4 s


Unnamed: 0,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,geometry
0,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,POINT (-79.95022 32.76141)
1,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,POINT (-79.95022 32.76141)
2,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,POINT (-79.95022 32.76141)
3,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,POINT (-79.95022 32.76141)
4,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,POINT (-79.95022 32.76141)
...,...,...,...,...,...,...,...
245095,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06,POINT (-79.96275 32.82609)
245096,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06,POINT (-79.96275 32.82609)
245097,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06,POINT (-79.96275 32.82609)
245098,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06,POINT (-79.96275 32.82609)


Re-merging with household coordinates to find distance:

In [30]:
closest_tanks_charleston = closest_tanks_charleston.reset_index()
closest_tanks_charleston

Unnamed: 0,index,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,geometry
0,0,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,POINT (-79.95022 32.76141)
1,1,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,POINT (-79.95022 32.76141)
2,2,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,POINT (-79.95022 32.76141)
3,3,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,POINT (-79.95022 32.76141)
4,4,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,POINT (-79.95022 32.76141)
...,...,...,...,...,...,...,...,...
245095,245095,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06,POINT (-79.96275 32.82609)
245096,245096,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06,POINT (-79.96275 32.82609)
245097,245097,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06,POINT (-79.96275 32.82609)
245098,245098,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06,POINT (-79.96275 32.82609)


In [31]:
df_charleston = df_charleston.reset_index()
df_charleston

Unnamed: 0,index,has_child,age_code,lat_h_4326,lon_h_4326,lat_h_3857,lon_h_3857,geometry
0,43688371,0,C,32.7765,-79.9308,-8.897856e+06,3.865676e+06,POINT (-79.93080 32.77650)
1,43688372,0,M,32.7765,-79.9308,-8.897856e+06,3.865676e+06,POINT (-79.93080 32.77650)
2,43688373,0,L,32.7765,-79.9308,-8.897856e+06,3.865676e+06,POINT (-79.93080 32.77650)
3,43688374,1,D,32.7765,-79.9308,-8.897856e+06,3.865676e+06,POINT (-79.93080 32.77650)
4,43688375,0,M,32.7765,-79.9308,-8.897856e+06,3.865676e+06,POINT (-79.93080 32.77650)
...,...,...,...,...,...,...,...,...
245095,101215466,0,A,32.7870,-79.9917,-8.904635e+06,3.867066e+06,POINT (-79.99170 32.78700)
245096,101215467,0,F,32.7870,-79.9917,-8.904635e+06,3.867066e+06,POINT (-79.99170 32.78700)
245097,101215468,0,I,32.7870,-79.9917,-8.904635e+06,3.867066e+06,POINT (-79.99170 32.78700)
245098,101215469,0,B,32.7870,-79.9917,-8.904635e+06,3.867066e+06,POINT (-79.99170 32.78700)


In [32]:
df_charleston_dist = df_charleston.merge(closest_tanks_charleston, left_index=True, right_index = True)
df_charleston_dist = df_charleston_dist.drop(['index_x', 'index_y', 'geometry_x', 'geometry_y'], axis = 1)
df_charleston_dist

Unnamed: 0,has_child,age_code,lat_h_4326,lon_h_4326,lat_h_3857,lon_h_3857,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857
0,0,C,32.7765,-79.9308,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06
1,0,M,32.7765,-79.9308,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06
2,0,L,32.7765,-79.9308,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06
3,1,D,32.7765,-79.9308,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06
4,0,M,32.7765,-79.9308,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06
...,...,...,...,...,...,...,...,...,...,...,...,...
245095,0,A,32.7870,-79.9917,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06
245096,0,F,32.7870,-79.9917,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06
245097,0,I,32.7870,-79.9917,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06
245098,0,B,32.7870,-79.9917,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06


Computing distances:

In [33]:
df_charleston_dist['distance_m'] = df_charleston_dist.apply(distancer, axis=1)
df_charleston_dist

Unnamed: 0,has_child,age_code,lat_h_4326,lon_h_4326,lat_h_3857,lon_h_3857,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,distance_m
0,0,C,32.7765,-79.9308,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,2472.401421
1,0,M,32.7765,-79.9308,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,2472.401421
2,0,L,32.7765,-79.9308,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,2472.401421
3,1,D,32.7765,-79.9308,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,2472.401421
4,0,M,32.7765,-79.9308,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,32.761407,-79.950218,-8.900018e+06,3.863677e+06,2472.401421
...,...,...,...,...,...,...,...,...,...,...,...,...,...
245095,0,A,32.7870,-79.9917,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06,5119.645368
245096,0,F,32.7870,-79.9917,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06,5119.645368
245097,0,I,32.7870,-79.9917,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06,5119.645368
245098,0,B,32.7870,-79.9917,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,32.826088,-79.962751,-8.901413e+06,3.872243e+06,5119.645368


Dropping latitude and longitude coordinates in the 4326 projection not used in our GPU visualizaitons (that this data is processed for). Also dropping latitude and longitude for nearest tanks, because this is the data for plotting households.

In [34]:
df_charleston_dist = df_charleston_dist.drop(['lat_h_4326', 'lon_h_4326', 'lat_t_4326', 'lon_t_4326', 'lat_t_3857', 'lon_t_3857'], axis = 1)

In [35]:
df_charleston_dist['distance_mi']  = df_charleston_dist['distance_m'] / 1609.344
df_charleston_dist

Unnamed: 0,has_child,age_code,lat_h_3857,lon_h_3857,tank_type,diameter,distance_m,distance_mi
0,0,C,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279
1,0,M,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279
2,0,L,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279
3,1,D,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279
4,0,M,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279
...,...,...,...,...,...,...,...,...
245095,0,A,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,5119.645368,3.181200
245096,0,F,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,5119.645368,3.181200
245097,0,I,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,5119.645368,3.181200
245098,0,B,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,5119.645368,3.181200


Categorizing households by distance measures

In [36]:
conditions_charleston = [(df_charleston_dist['distance_mi'] <= 0.5),
              ((df_charleston_dist['distance_mi'] > 0.5) & (df_charleston_dist['distance_mi'] <= 1)),
              ((df_charleston_dist['distance_mi'] > 1) & (df_charleston_dist['distance_mi'] <= 5)),
              (df_charleston_dist['distance_mi'] > 5)]

values_charleston = [1, 2, 3, 4]

df_charleston_dist['distance_category'] = np.select(conditions_charleston, values_charleston)
df_charleston_dist

Unnamed: 0,has_child,age_code,lat_h_3857,lon_h_3857,tank_type,diameter,distance_m,distance_mi,distance_category
0,0,C,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279,3
1,0,M,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279,3
2,0,L,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279,3
3,1,D,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279,3
4,0,M,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279,3
...,...,...,...,...,...,...,...,...,...
245095,0,A,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,5119.645368,3.181200,3
245096,0,F,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,5119.645368,3.181200,3
245097,0,I,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,5119.645368,3.181200,3
245098,0,B,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,5119.645368,3.181200,3


Exporting to parquet file

In [37]:
df_charleston_dist.to_parquet('/hpc/group/codeplus22-vis/infousa_copy/distances_charleston_final.parquet')

In [38]:
df = pd.read_parquet('/hpc/group/codeplus22-vis/infousa_copy/distances_charleston_final.parquet')
df

Unnamed: 0,has_child,age_code,lat_h_3857,lon_h_3857,tank_type,diameter,distance_m,distance_mi,distance_category
0,0,C,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279,3
1,0,M,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279,3
2,0,L,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279,3
3,1,D,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279,3
4,0,M,-8.897856e+06,3.865676e+06,external_floating_roof_tank,10.8,2472.401421,1.536279,3
...,...,...,...,...,...,...,...,...,...
245095,0,A,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,5119.645368,3.181200,3
245096,0,F,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,5119.645368,3.181200,3
245097,0,I,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,5119.645368,3.181200,3
245098,0,B,-8.904635e+06,3.867066e+06,closed_roof_tank,8.4,5119.645368,3.181200,3
