# Natural Hazard Risk Processing File

Processing NRI risk data with the household and tank risk data. 

### Importing libraries

In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np



### Importing data

This is a preprocessed file with tank and household distances, transformed latitude longitude coordinates, as well as all of the risk data for 6 natural hazards (earthquake, strong winds, hurricanes, tornadoes, cold floods, and riverine floods).

In [3]:
df=pd.read_parquet('/hpc/group/codeplus22-vis/celine_data/dist_all_hh_with_children_fixed.parquet')
df

Unnamed: 0,child_num,age_code,lat_3857,lon_3857,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks,avg_risk,distance_mi,distance_category,is_elderly
0,1.0,C,-8.509454e+06,5.101307e+06,2.050670,15.375901,5.380037,14.512438,-1.0,17.062917,9.063660,33.459368,4.0,2
1,1.0,H,-8.499018e+06,5.096218e+06,4.881886,15.876431,4.895073,24.892845,-1.0,30.218719,13.460825,28.501947,4.0,2
2,2.0,E,-8.496356e+06,5.099448e+06,4.881886,15.876431,4.895073,24.892845,-1.0,30.218719,13.460825,28.592896,4.0,2
3,1.0,G,-8.509963e+06,5.103102e+06,2.050670,15.375901,5.380037,14.512438,-1.0,17.062917,9.063660,33.876175,4.0,2
4,1.0,G,-8.508370e+06,5.099066e+06,2.050670,15.375901,5.380037,14.512438,-1.0,17.062917,9.063660,33.117674,4.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53165525,0.0,Z,-1.167972e+07,4.833652e+06,7.743007,12.625942,-1.000000,45.758161,-1.0,6.179840,12.051158,215.000000,0.0,0
53165526,0.0,Z,-1.167971e+07,4.833633e+06,7.743007,12.625942,-1.000000,45.758161,-1.0,6.179840,12.051158,215.000000,0.0,0
53165527,0.0,Z,-1.167971e+07,4.833691e+06,7.743007,12.625942,-1.000000,45.758161,-1.0,6.179840,12.051158,215.000000,0.0,0
53165528,0.0,Z,-1.167971e+07,4.833535e+06,7.743007,12.625942,-1.000000,45.758161,-1.0,6.179840,12.051158,215.000000,0.0,0


In [None]:
df.to_parquet('/hpc/group/codeplus22-vis/celine_data/hh_tank_risk.parquet')


#### Here we are just re-reading the file in to test if it exported as a parquet correctly

In [None]:
df = pd.read_parquet('/hpc/group/codeplus22-vis/celine_data/hh_tank_risk.parquet')
df

## Processing dataframe for individual natural hazards

Since we want to plot all of the risk dashboards separately, are now breaking down this dataframe, one for each type of natural hazard.

### Processing the dataframe for earthquake risk

We are taking overall dataframe we made above and dropping all of the other risks except for earthquake before exporting it to a parquet file to be used in visualizations.

In [14]:
df_erqk = df.drop(['swnd_risks', 'hrcn_risks', 'trnd_risks', 'cfld_risks', 'rfld_risks', 'avg_risk'], axis = 1)
df_erqk = df_erqk[['is_elderly', 'distance_mi', 'erqk_risks', 'lat_3857', 'lon_3857']]
df_erqk.rename(columns = {'erqk_risks': 'earthquake_risk'}, inplace = True)

df_erqk

Unnamed: 0,is_elderly,distance_mi,earthquake_risk,lat_3857,lon_3857
0,2,33.459368,2.050670,-8.509454e+06,5.101307e+06
1,2,28.501947,4.881886,-8.499018e+06,5.096218e+06
2,2,28.592896,4.881886,-8.496356e+06,5.099448e+06
3,2,33.876175,2.050670,-8.509963e+06,5.103102e+06
4,2,33.117674,2.050670,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,7.743007,-1.167972e+07,4.833652e+06
53165526,0,215.000000,7.743007,-1.167971e+07,4.833633e+06
53165527,0,215.000000,7.743007,-1.167971e+07,4.833691e+06
53165528,0,215.000000,7.743007,-1.167971e+07,4.833535e+06


In [15]:
df_erqk.to_parquet('/hpc/group/codeplus22-vis/celine_data/earthquake_risk.parquet')

### Processing dataframe for strong wind risk

We are doing the same thing as for the earthquake file, this time dropping all of the other risks except for strong wind risk before exporting it to a parquet file to be used in visualizations.

In [16]:
df_swnd = df.drop(['erqk_risks', 'hrcn_risks', 'trnd_risks', 'cfld_risks', 'rfld_risks', 'avg_risk'], axis = 1)
df_swnd = df_swnd[['is_elderly', 'distance_mi', 'swnd_risks', 'lat_3857', 'lon_3857']]
df_swnd.rename(columns = {'swnd_risks': 'strong_wind_risk'}, inplace = True)


df_swnd

Unnamed: 0,is_elderly,distance_mi,strong_wind_risk,lat_3857,lon_3857
0,2,33.459368,15.375901,-8.509454e+06,5.101307e+06
1,2,28.501947,15.876431,-8.499018e+06,5.096218e+06
2,2,28.592896,15.876431,-8.496356e+06,5.099448e+06
3,2,33.876175,15.375901,-8.509963e+06,5.103102e+06
4,2,33.117674,15.375901,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,12.625942,-1.167972e+07,4.833652e+06
53165526,0,215.000000,12.625942,-1.167971e+07,4.833633e+06
53165527,0,215.000000,12.625942,-1.167971e+07,4.833691e+06
53165528,0,215.000000,12.625942,-1.167971e+07,4.833535e+06


In [17]:
df_swnd.to_parquet('/hpc/group/codeplus22-vis/celine_data/strong_wind_risk.parquet')

### Processing dataframe for hurricane risk

Once again, we will be doing the same steps as the ones above, except we are keeping the column for hurricane risk. Every time we make a new dataframe, we will export it as a parquet file.

In [4]:
df_hrcn = df.drop(['erqk_risks','swnd_risks', 'trnd_risks', 'cfld_risks', 'rfld_risks', 'avg_risk'], axis = 1)
df_hrcn = df_hrcn[['is_elderly', 'distance_mi', 'hrcn_risks', 'lat_3857', 'lon_3857']]
df_hrcn.rename(columns = {'hrcn_risks': 'hurricane_risk'}, inplace = True)

df_hrcn

Unnamed: 0,is_elderly,distance_mi,hurricane_risk,lat_3857,lon_3857
0,2,33.459368,5.380037,-8.509454e+06,5.101307e+06
1,2,28.501947,4.895073,-8.499018e+06,5.096218e+06
2,2,28.592896,4.895073,-8.496356e+06,5.099448e+06
3,2,33.876175,5.380037,-8.509963e+06,5.103102e+06
4,2,33.117674,5.380037,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,-1.000000,-1.167972e+07,4.833652e+06
53165526,0,215.000000,-1.000000,-1.167971e+07,4.833633e+06
53165527,0,215.000000,-1.000000,-1.167971e+07,4.833691e+06
53165528,0,215.000000,-1.000000,-1.167971e+07,4.833535e+06


In [7]:
df_hrcn.to_parquet('/hpc/group/codeplus22-vis/celine_data/hurricane_risk.parquet')

### Processing dataframe for tornado risk

In [5]:
df_trnd = df.drop(['erqk_risks','swnd_risks', 'hrcn_risks', 'cfld_risks', 'rfld_risks', 'avg_risk'], axis = 1)
df_trnd = df_trnd[['is_elderly', 'distance_mi','trnd_risks', 'lat_3857', 'lon_3857']]
df_trnd.rename(columns = {'trnd_risks': 'tornado_risk'}, inplace = True)

df_trnd

Unnamed: 0,is_elderly,distance_mi,tornado_risk,lat_3857,lon_3857
0,2,33.459368,14.512438,-8.509454e+06,5.101307e+06
1,2,28.501947,24.892845,-8.499018e+06,5.096218e+06
2,2,28.592896,24.892845,-8.496356e+06,5.099448e+06
3,2,33.876175,14.512438,-8.509963e+06,5.103102e+06
4,2,33.117674,14.512438,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,45.758161,-1.167972e+07,4.833652e+06
53165526,0,215.000000,45.758161,-1.167971e+07,4.833633e+06
53165527,0,215.000000,45.758161,-1.167971e+07,4.833691e+06
53165528,0,215.000000,45.758161,-1.167971e+07,4.833535e+06


In [6]:
df_trnd.to_parquet('/hpc/group/codeplus22-vis/celine_data/tornado_risk.parquet')

### Processing dataframe for coastal flood risk

In [10]:
df_cfld = df.drop(['erqk_risks','swnd_risks', 'trnd_risks', 'hrcn_risks', 'rfld_risks', 'avg_risk'], axis = 1)
df_cfld = df_cfld[['is_elderly', 'distance_mi','cfld_risks', 'lat_3857', 'lon_3857']]
df_cfld.rename(columns = {'cfld_risks': 'coastal_flood_risk'}, inplace = True)

df_cfld

Unnamed: 0,is_elderly,distance_mi,coastal_flood_risk,lat_3857,lon_3857
0,2,33.459368,-1.0,-8.509454e+06,5.101307e+06
1,2,28.501947,-1.0,-8.499018e+06,5.096218e+06
2,2,28.592896,-1.0,-8.496356e+06,5.099448e+06
3,2,33.876175,-1.0,-8.509963e+06,5.103102e+06
4,2,33.117674,-1.0,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,-1.0,-1.167972e+07,4.833652e+06
53165526,0,215.000000,-1.0,-1.167971e+07,4.833633e+06
53165527,0,215.000000,-1.0,-1.167971e+07,4.833691e+06
53165528,0,215.000000,-1.0,-1.167971e+07,4.833535e+06


In [13]:
df_cfld.to_parquet('/hpc/group/codeplus22-vis/celine_data/coast_flood_risk.parquet')

### Processing dataframe for riverine flood risk

In [12]:
df_rfld = df.drop(['erqk_risks','swnd_risks', 'trnd_risks', 'hrcn_risks', 'cfld_risks', 'avg_risk'], axis = 1)
df_rfld = df_rfld[[ 'is_elderly', 'distance_mi', 'rfld_risks', 'lat_3857', 'lon_3857']]
df_rfld.rename(columns = {'rfld_risks': 'riverine_flood_risk'}, inplace = True)

df_rfld

Unnamed: 0,is_elderly,distance_mi,riverine_flood_risk,lat_3857,lon_3857
0,2,33.459368,17.062917,-8.509454e+06,5.101307e+06
1,2,28.501947,30.218719,-8.499018e+06,5.096218e+06
2,2,28.592896,30.218719,-8.496356e+06,5.099448e+06
3,2,33.876175,17.062917,-8.509963e+06,5.103102e+06
4,2,33.117674,17.062917,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,6.179840,-1.167972e+07,4.833652e+06
53165526,0,215.000000,6.179840,-1.167971e+07,4.833633e+06
53165527,0,215.000000,6.179840,-1.167971e+07,4.833691e+06
53165528,0,215.000000,6.179840,-1.167971e+07,4.833535e+06


In [14]:
df_rfld.to_parquet('/hpc/group/codeplus22-vis/celine_data/riverine_flood_risk.parquet')

### Processing dataframe for average flood risk

In [5]:
df_avg = df.drop(['erqk_risks','swnd_risks', 'trnd_risks', 'hrcn_risks', 'cfld_risks', 'rfld_risks'], axis = 1)
df_avg = df_avg[[ 'is_elderly', 'distance_mi', 'avg_risk', 'lat_3857', 'lon_3857']]
df_avg.rename(columns = {'avg_risk': 'average_risk'}, inplace = True)

df_avg

Unnamed: 0,is_elderly,distance_mi,average_risk,lat_3857,lon_3857
0,2,33.459368,9.063660,-8.509454e+06,5.101307e+06
1,2,28.501947,13.460825,-8.499018e+06,5.096218e+06
2,2,28.592896,13.460825,-8.496356e+06,5.099448e+06
3,2,33.876175,9.063660,-8.509963e+06,5.103102e+06
4,2,33.117674,9.063660,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,12.051158,-1.167972e+07,4.833652e+06
53165526,0,215.000000,12.051158,-1.167971e+07,4.833633e+06
53165527,0,215.000000,12.051158,-1.167971e+07,4.833691e+06
53165528,0,215.000000,12.051158,-1.167971e+07,4.833535e+06


In [7]:
df_avg.to_parquet('/hpc/group/codeplus22-vis/celine_data/average_risk.parquet')