# Plotting Harris County and Charleston County Distances

### Importing libraries and packages

In [2]:
import geopandas as gpd

import cuxfilter
from cuxfilter.layouts import double_feature_quad_base, double_feature
import cudf
import numpy as np

import holoviews as hv
import pandas as pd

from pyproj import Proj, Transformer



### Reading in Harris County and Charleston County data

In [3]:
df_harris = pd.read_parquet('/hpc/group/codeplus22-vis/infousa_copy/distances_harris.parquet')
df_harris = df_harris[df_harris['DIST'] != 4]
df_harris['AGE'] = np.where(((df_harris['head_hh_age_code'] == 'J') | (df_harris['head_hh_age_code'] == 'K') | 
                            (df_harris['head_hh_age_code'] == 'L') | (df_harris['head_hh_age_code'] == 'M')), 1, 2)
df_harris.rename(columns = {'children_ind': 'CHILD'}, inplace = True)
df_harris

Unnamed: 0,CHILD,head_hh_age_code,GE_LONGITUDE_2010,GE_LATITUDE_2010,tank_lat,tank_lon,distance_m,distance_km,DIST,AGE
19703,1,G,-95.361500,29.758100,29.760494,-95.340638,2031.333422,2.031333,3,2
19704,0,H,-95.361500,29.758100,29.760494,-95.340638,2031.333422,2.031333,3,2
19705,0,I,-95.361500,29.758100,29.760494,-95.340638,2031.333422,2.031333,3,2
19706,0,J,-95.361500,29.758100,29.760494,-95.340638,2031.333422,2.031333,3,1
19707,0,F,-95.361500,29.758100,29.760494,-95.340638,2031.333422,2.031333,3,2
...,...,...,...,...,...,...,...,...,...,...
2316143,0,A,-95.365114,29.760214,29.760494,-95.340638,2362.856160,2.362856,3,2
2316145,0,C,-95.378942,29.739481,29.760494,-95.340638,4374.185085,4.374185,3,2
2316147,0,B,-95.352527,29.760223,29.760494,-95.340638,1148.030231,1.148030,2,2
2316148,0,I,-95.367720,29.751347,29.760494,-95.340638,2805.207195,2.805207,3,2


In [4]:
df_charleston = pd.read_parquet('/hpc/group/codeplus22-vis/infousa_copy/distances_charleston.parquet')
df_charleston = df_charleston[df_charleston['DIST'] != 4]
df_charleston
df_charleston['AGE'] = np.where(((df_charleston['head_hh_age_code'] == 'J') | (df_charleston['head_hh_age_code'] == 'K') | 
                            (df_charleston['head_hh_age_code'] == 'L') | (df_charleston['head_hh_age_code'] == 'M')), 1, 2)
df_charleston.rename(columns = {'children_ind': 'CHILD'}, inplace = True)
df_charleston

Unnamed: 0,CHILD,head_hh_age_code,GE_LONGITUDE_2010,GE_LATITUDE_2010,tank_lat,tank_lon,distance_m,distance_km,DIST,AGE
0,0,C,-79.9308,32.7765,32.761407,-79.950218,2472.401421,2.472401,3,2
1,0,M,-79.9308,32.7765,32.761407,-79.950218,2472.401421,2.472401,3,1
2,0,L,-79.9308,32.7765,32.761407,-79.950218,2472.401421,2.472401,3,1
3,1,D,-79.9308,32.7765,32.761407,-79.950218,2472.401421,2.472401,3,2
4,0,M,-79.9308,32.7765,32.761407,-79.950218,2472.401421,2.472401,3,1
...,...,...,...,...,...,...,...,...,...,...
244715,0,M,-80.0250,32.9119,32.913922,-79.984952,3745.184836,3.745185,3,1
244716,0,E,-80.0250,32.9119,32.913922,-79.984952,3745.184836,3.745185,3,2
244717,0,I,-80.0250,32.9119,32.913922,-79.984952,3745.184836,3.745185,3,2
244718,0,B,-80.0250,32.9119,32.913922,-79.984952,3745.184836,3.745185,3,2


### Converting latitudes and longitudes to the correct coordinate system

In [5]:
# Apply transformation
transform_4326_to_3857 = Transformer.from_crs('epsg:4326', 'epsg:3857')
df_harris['LATITUDE_TX'], df_harris['LONGITUDE_TX'] = transform_4326_to_3857.transform(
                                                df_harris['GE_LATITUDE_2010'], df_harris['GE_LONGITUDE_2010']
                                            )

df_harris = df_harris.drop(['GE_LATITUDE_2010', 'GE_LONGITUDE_2010', 'tank_lat', 'tank_lon'], axis=1)
df_harris

Unnamed: 0,CHILD,head_hh_age_code,distance_m,distance_km,DIST,AGE,LATITUDE_TX,LONGITUDE_TX
19703,1,G,2031.333422,2.031333,3,2,-1.061559e+07,3.472494e+06
19704,0,H,2031.333422,2.031333,3,2,-1.061559e+07,3.472494e+06
19705,0,I,2031.333422,2.031333,3,2,-1.061559e+07,3.472494e+06
19706,0,J,2031.333422,2.031333,3,1,-1.061559e+07,3.472494e+06
19707,0,F,2031.333422,2.031333,3,2,-1.061559e+07,3.472494e+06
...,...,...,...,...,...,...,...,...
2316143,0,A,2362.856160,2.362856,3,2,-1.061600e+07,3.472765e+06
2316145,0,C,4374.185085,4.374185,3,2,-1.061754e+07,3.470106e+06
2316147,0,B,1148.030231,1.148030,2,2,-1.061459e+07,3.472766e+06
2316148,0,I,2805.207195,2.805207,3,2,-1.061629e+07,3.471628e+06


In [6]:
# Apply transformation
transform_4326_to_3857 = Transformer.from_crs('epsg:4326', 'epsg:3857')
df_charleston['LATITUDE_SC'], df_charleston['LONGITUDE_SC'] = transform_4326_to_3857.transform(
                                                df_charleston['GE_LATITUDE_2010'], df_charleston['GE_LONGITUDE_2010']
                                            )

df_charleston = df_charleston.drop(['GE_LATITUDE_2010', 'GE_LONGITUDE_2010', 'tank_lat', 'tank_lon'], axis=1)
df_charleston

Unnamed: 0,CHILD,head_hh_age_code,distance_m,distance_km,DIST,AGE,LATITUDE_SC,LONGITUDE_SC
0,0,C,2472.401421,2.472401,3,2,-8.897856e+06,3.865676e+06
1,0,M,2472.401421,2.472401,3,1,-8.897856e+06,3.865676e+06
2,0,L,2472.401421,2.472401,3,1,-8.897856e+06,3.865676e+06
3,1,D,2472.401421,2.472401,3,2,-8.897856e+06,3.865676e+06
4,0,M,2472.401421,2.472401,3,1,-8.897856e+06,3.865676e+06
...,...,...,...,...,...,...,...,...
244715,0,M,3745.184836,3.745185,3,1,-8.908342e+06,3.883616e+06
244716,0,E,3745.184836,3.745185,3,2,-8.908342e+06,3.883616e+06
244717,0,I,3745.184836,3.745185,3,2,-8.908342e+06,3.883616e+06
244718,0,B,3745.184836,3.745185,3,2,-8.908342e+06,3.883616e+06


### Reading in AST data

In [7]:
df_tanks = gpd.read_file('/hpc/group/codeplus22-vis/ast_dataset/tile_level_annotations.shp')
df_tanks.head(n=3)

Unnamed: 0,tile_name,minx_polyg,miny_polyg,maxx_polyg,maxy_polyg,nw_corner_,nw_corne_1,se_corner_,se_corne_1,object_cla,diameter (,merged_bbo,bbox_withi,Category1,Category2,Category3,Category4,Category5,state,geometry
0,m_4007327_nw_18_060_20190809,974,314,1041,380,40.625753,-73.745466,40.625392,-73.744997,closed_roof_tank,39.6,1,0,0.0,0.0,0.0,0.0,0.0,New York,"POLYGON ((-73.74547 40.62575, -73.74500 40.625..."
1,m_4007327_nw_18_060_20190809,1091,479,1157,512,40.624853,-73.744652,40.624669,-73.744188,closed_roof_tank,19.8,0,0,0.0,0.0,0.0,0.0,0.0,New York,"POLYGON ((-73.74465 40.62485, -73.74419 40.624..."
2,m_4007327_nw_18_060_20190809,851,243,872,265,40.626147,-73.746331,40.626026,-73.746184,closed_roof_tank,12.6,0,0,0.0,0.0,0.0,0.0,0.0,New York,"POLYGON ((-73.74633 40.62615, -73.74618 40.626..."


In [8]:
df_tanks['tank_lat'] = (df_tanks['nw_corner_'] + df_tanks['se_corner_'])/2
df_tanks['tank_lon'] = (df_tanks['nw_corne_1'] + df_tanks['se_corne_1'])/2
df_tanks = df_tanks[['object_cla', 'tank_lat', 'tank_lon']]
df_tanks

Unnamed: 0,object_cla,tank_lat,tank_lon
0,closed_roof_tank,40.625572,-73.745231
1,closed_roof_tank,40.624761,-73.744420
2,closed_roof_tank,40.626086,-73.746257
3,closed_roof_tank,40.625786,-73.746203
4,closed_roof_tank,40.625781,-73.745813
...,...,...,...
98164,narrow_closed_roof_tank,39.777431,-104.920718
98165,narrow_closed_roof_tank,39.777301,-104.920631
98166,narrow_closed_roof_tank,39.777701,-104.920609
98167,narrow_closed_roof_tank,39.776628,-104.920617


### Finding tanks in Harris County; transforming coords to correct coordinate system

In [9]:
df_tanks_harris = df_tanks[(df_tanks['tank_lat'] >= 29.530) & (df_tanks['tank_lat'] <= 30.120)]
df_tanks_harris = df_tanks_harris[(df_tanks_harris['tank_lon'] <= -94.960) & (df_tanks_harris['tank_lon'] >= -95.820)]

df_tanks_harris = df_tanks_harris[['object_cla', 'tank_lat', 'tank_lon']]
df_tanks_harris

Unnamed: 0,object_cla,tank_lat,tank_lon
787,closed_roof_tank,29.977380,-95.389967
788,closed_roof_tank,29.977288,-95.389356
789,closed_roof_tank,29.947372,-95.417173
790,closed_roof_tank,29.945862,-95.436315
791,closed_roof_tank,30.001199,-95.420964
...,...,...,...
89848,narrow_closed_roof_tank,29.886461,-95.410524
89849,narrow_closed_roof_tank,29.886245,-95.410528
89850,narrow_closed_roof_tank,29.886175,-95.410505
89851,closed_roof_tank,29.886404,-95.374128


In [9]:
transform_4326_to_3857 = Transformer.from_crs('epsg:4326', 'epsg:3857')
df_tanks_harris['LATITUDE_TX'], df_tanks_harris['LONGITUDE_TX'] = transform_4326_to_3857.transform(
                                                df_tanks_harris['tank_lat'], df_tanks_harris['tank_lon']
                                            )

df_tanks_harris = df_tanks_harris.drop(['tank_lat', 'tank_lon'], axis=1)
df_tanks_harris

Unnamed: 0,object_cla,LATITUDE_TX,LONGITUDE_TX
787,closed_roof_tank,-1.061876e+07,3.500643e+06
788,closed_roof_tank,-1.061869e+07,3.500631e+06
789,closed_roof_tank,-1.062179e+07,3.496787e+06
790,closed_roof_tank,-1.062392e+07,3.496593e+06
791,closed_roof_tank,-1.062221e+07,3.503704e+06
...,...,...,...
89848,narrow_closed_roof_tank,-1.062105e+07,3.488964e+06
89849,narrow_closed_roof_tank,-1.062105e+07,3.488936e+06
89850,narrow_closed_roof_tank,-1.062105e+07,3.488927e+06
89851,closed_roof_tank,-1.061700e+07,3.488957e+06


### Finding tanks in Charleston County; transforming coords to correct coordinate system

In [10]:
df_tanks_charleston = df_tanks[(df_tanks['tank_lat'] >= 32.560) & (df_tanks['tank_lat'] <= 33.080)]
df_tanks_charleston = df_tanks_charleston[(df_tanks_charleston['tank_lon'] <= -79.460) & (df_tanks_charleston['tank_lon'] >= -80.370)]

df_tanks_charleston = df_tanks_charleston[['object_cla', 'tank_lat', 'tank_lon']]
df_tanks_charleston

Unnamed: 0,object_cla,tank_lat,tank_lon
26475,sedimentation_tank,32.830784,-79.945434
26476,sedimentation_tank,32.830662,-79.946022
26477,closed_roof_tank,32.825047,-79.944816
26478,closed_roof_tank,32.825263,-79.943791
26479,closed_roof_tank,32.826536,-79.941263
...,...,...,...
96278,spherical_tank,32.827541,-79.939549
96279,spherical_tank,32.827387,-79.940151
96280,closed_roof_tank,32.825002,-79.939474
96281,closed_roof_tank,32.824928,-79.939648


In [11]:
transform_4326_to_3857 = Transformer.from_crs('epsg:4326', 'epsg:3857')
df_tanks_charleston['LATITUDE_SC'], df_tanks_charleston['LONGITUDE_SC'] = transform_4326_to_3857.transform(
                                                df_tanks_charleston['tank_lat'], df_tanks_charleston['tank_lon']
                                            )

df_tanks_charleston = df_tanks_charleston.drop(['tank_lat', 'tank_lon'], axis=1)
df_tanks_charleston

Unnamed: 0,object_cla,LATITUDE_SC,LONGITUDE_SC
26475,sedimentation_tank,-8.899485e+06,3.872865e+06
26476,sedimentation_tank,-8.899550e+06,3.872849e+06
26477,closed_roof_tank,-8.899416e+06,3.872105e+06
26478,closed_roof_tank,-8.899302e+06,3.872134e+06
26479,closed_roof_tank,-8.899021e+06,3.872302e+06
...,...,...,...
96278,spherical_tank,-8.898830e+06,3.872435e+06
96279,spherical_tank,-8.898897e+06,3.872415e+06
96280,closed_roof_tank,-8.898822e+06,3.872099e+06
96281,closed_roof_tank,-8.898841e+06,3.872089e+06


### Combining both tank dataframes

In [12]:
df_tanks_harris_charleston = df_tanks_harris.append(df_tanks_charleston, ignore_index = True)
df_tanks_harris_charleston['distance_km'] = 6
df_tanks_harris_charleston['DIST'] = 0
df_tanks_harris_charleston['AGE'] = 0
df_tanks_harris_charleston['CHILD'] = 3.0
df_tanks_harris_charleston

  df_tanks_harris_charleston = df_tanks_harris.append(df_tanks_charleston, ignore_index = True)


Unnamed: 0,object_cla,LATITUDE_TX,LONGITUDE_TX,LATITUDE_SC,LONGITUDE_SC,distance_km,DIST,AGE,CHILD
0,closed_roof_tank,-1.061876e+07,3.500643e+06,,,6,0,0,3.0
1,closed_roof_tank,-1.061869e+07,3.500631e+06,,,6,0,0,3.0
2,closed_roof_tank,-1.062179e+07,3.496787e+06,,,6,0,0,3.0
3,closed_roof_tank,-1.062392e+07,3.496593e+06,,,6,0,0,3.0
4,closed_roof_tank,-1.062221e+07,3.503704e+06,,,6,0,0,3.0
...,...,...,...,...,...,...,...,...,...
1490,spherical_tank,,,-8.898830e+06,3.872435e+06,6,0,0,3.0
1491,spherical_tank,,,-8.898897e+06,3.872415e+06,6,0,0,3.0
1492,closed_roof_tank,,,-8.898822e+06,3.872099e+06,6,0,0,3.0
1493,closed_roof_tank,,,-8.898841e+06,3.872089e+06,6,0,0,3.0


### Combining tank, InfoUSA, and distance dataframes

In [13]:
df = df_harris.append(df_charleston, ignore_index = True)
df = df.append(df_tanks_harris_charleston, ignore_index = True)
df

  df = df_harris.append(df_charleston, ignore_index = True)
  df = df.append(df_tanks_harris_charleston, ignore_index = True)


Unnamed: 0,CHILD,head_hh_age_code,distance_m,distance_km,DIST,AGE,LATITUDE_TX,LONGITUDE_TX,LATITUDE_SC,LONGITUDE_SC,object_cla
0,1.0,G,2031.333422,2.031333,3,2,-1.061559e+07,3.472494e+06,,,
1,0.0,H,2031.333422,2.031333,3,2,-1.061559e+07,3.472494e+06,,,
2,0.0,I,2031.333422,2.031333,3,2,-1.061559e+07,3.472494e+06,,,
3,0.0,J,2031.333422,2.031333,3,1,-1.061559e+07,3.472494e+06,,,
4,0.0,F,2031.333422,2.031333,3,2,-1.061559e+07,3.472494e+06,,,
...,...,...,...,...,...,...,...,...,...,...,...
850691,3.0,,,6.000000,0,0,,,-8.898830e+06,3.872435e+06,spherical_tank
850692,3.0,,,6.000000,0,0,,,-8.898897e+06,3.872415e+06,spherical_tank
850693,3.0,,,6.000000,0,0,,,-8.898822e+06,3.872099e+06,closed_roof_tank
850694,3.0,,,6.000000,0,0,,,-8.898841e+06,3.872089e+06,closed_roof_tank


### Converting pandas dataframe to a cu dataframe, then a cuxfilter one

In [14]:
cdf = cudf.DataFrame.from_pandas(df)

In [15]:
cux_df = cuxfilter.DataFrame.from_dataframe(cdf)

### Defining charts

In [16]:
label_map_dist = {0: 'Tank', 1: '1 football field away', 
             2: '1 mile away', 3: '5km away'}

In [17]:
label_map_age = {0: 'Tank', 1: 'Elderly', 
             2: 'Not Elderly'}

In [18]:
label_map_children = {0: 'No Children', 1: 'Children', 
             2: 'Tank'}

In [19]:
colors = ['#05c1ff', '#ff0000', '#ff00a4', '#a11aeb']

In [20]:
harris = cuxfilter.charts.scatter(x='LATITUDE_TX', y='LONGITUDE_TX', pixel_shade_type='linear', color_palette = colors, aggregate_fn = 'max', aggregate_col = 'DIST', tile_provider="CartoDark", title = 'Households in Harris County in Close Proximity to Tanks',
                                   x_range=(-13825798.514061378,-7542228.134036879), y_range=(2819963.842141629,6272600.009501693), legend = True)

charleston = cuxfilter.charts.scatter(x='LATITUDE_SC', y='LONGITUDE_SC', pixel_shade_type='linear', color_palette = colors, aggregate_fn = 'max', aggregate_col = 'DIST', tile_provider="CartoDark", title = 'Households in Charleston County in Close Proximity to Tanks',
                                   x_range=(-13825798.514061378,-7542228.134036879), y_range=(2819963.842141629,6272600.009501693), legend = True)

dist = cuxfilter.charts.multi_select('DIST', label_map=label_map_dist)

age = cuxfilter.charts.multi_select('AGE', label_map=label_map_age)

children = cuxfilter.charts.multi_select('CHILD', label_map=label_map_children)

dist_slider = cuxfilter.charts.range_slider('distance_km')

charts_list = [harris, charleston, dist, age, children, dist_slider]

In [21]:
d = cux_df.dashboard(charts_list, sidebar = [dist, age, children, dist_slider], layout_array=[
    [1, 1, 2, 2],
    [1, 1, 2, 2]
])

### Displaying interactive dashboard

In [24]:
d.show()
d.app(sidebar_width=200) # run the dashboard within the notebook cell

Dashboard running at port 52709
