# Natural Hazard Risk Processing File

Processing NRI risk data with the household and tank risk data. 

### Importing libraries

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np



### Importing household data

This is a preprocessed file with tank and household distances, transformed latitude longitude coordinates, as well as all of the risk data for 6 natural hazards (earthquake, strong winds, hurricanes, tornadoes, cold floods, and riverine floods).

In [2]:
df=pd.read_parquet('/hpc/group/codeplus22-vis/infousa_copy/distances_all_hh_with_children_final.parquet')
df

Unnamed: 0,zip,county,state,child_num,age_code,lat_h_3857,lon_h_3857,lat_h_4326,lon_h_4326,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks,avg_risk,distance_m,distance_mi,distance_category
0,18833,15,PA,1,C,-8.509454e+06,5.101307e+06,41.600392,-76.441724,2.050670,15.375901,5.380037,14.512438,-1.000000,17.062917,9.063660,53847.632898,33.459368,4
1,18833,15,PA,1,H,-8.499018e+06,5.096218e+06,41.566196,-76.347977,4.881886,15.876431,4.895073,24.892845,-1.000000,30.218719,13.460825,45869.438119,28.501947,4
2,18833,15,PA,0,E,-8.496356e+06,5.099448e+06,41.587904,-76.324061,4.881886,15.876431,4.895073,24.892845,-1.000000,30.218719,13.460825,46015.805516,28.592896,4
3,18833,15,PA,1,G,-8.509963e+06,5.103102e+06,41.612450,-76.446301,2.050670,15.375901,5.380037,14.512438,-1.000000,17.062917,9.063660,54518.419780,33.876175,4
4,18833,15,PA,1,G,-8.508370e+06,5.099066e+06,41.585339,-76.431989,2.050670,15.375901,5.380037,14.512438,-1.000000,17.062917,9.063660,53297.730315,33.117674,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53067356,92003,73,CA,0,H,-1.304989e+07,3.930304e+06,33.263291,-117.229201,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,53258.019576,33.092999,4
53067357,92003,73,CA,0,F,-1.304547e+07,3.934604e+06,33.295585,-117.189475,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,56199.475559,34.920735,4
53067358,92003,73,CA,2,L,-1.304803e+07,3.934243e+06,33.292877,-117.212471,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,56209.490501,34.926958,4
53067359,92003,73,CA,1,D,-1.304785e+07,3.933154e+06,33.284700,-117.210800,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,55287.090852,34.353806,4


In [3]:
df = df.drop(['zip', 'county', 'state', 'child_num', 'lat_h_4326','lon_h_4326', 'distance_m'], axis = 1)
df

Unnamed: 0,age_code,lat_h_3857,lon_h_3857,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks,avg_risk,distance_mi,distance_category
0,C,-8.509454e+06,5.101307e+06,2.050670,15.375901,5.380037,14.512438,-1.000000,17.062917,9.063660,33.459368,4
1,H,-8.499018e+06,5.096218e+06,4.881886,15.876431,4.895073,24.892845,-1.000000,30.218719,13.460825,28.501947,4
2,E,-8.496356e+06,5.099448e+06,4.881886,15.876431,4.895073,24.892845,-1.000000,30.218719,13.460825,28.592896,4
3,G,-8.509963e+06,5.103102e+06,2.050670,15.375901,5.380037,14.512438,-1.000000,17.062917,9.063660,33.876175,4
4,G,-8.508370e+06,5.099066e+06,2.050670,15.375901,5.380037,14.512438,-1.000000,17.062917,9.063660,33.117674,4
...,...,...,...,...,...,...,...,...,...,...,...,...
53067356,H,-1.304989e+07,3.930304e+06,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,33.092999,4
53067357,F,-1.304547e+07,3.934604e+06,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,34.920735,4
53067358,L,-1.304803e+07,3.934243e+06,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,34.926958,4
53067359,D,-1.304785e+07,3.933154e+06,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,34.353806,4


### Defining ```is_elderly```

This code is categorizing whether or not a household has elderly or not. Here, we have defined elderly to be 65 years old and up; thus, we are looking for rows where ```age_code``` is either ```J```, ```K```, ```L```, or ```M```. If the age code is either of the previously mentioned letters, the ```is_elderly``` column will be assigned a 1. Otherwise, this means that the household does not have elderly (the condition coded is false), and the ```is_elderly``` column will be assigned a 2.

In [4]:
df['is_elderly'] = np.where((df['age_code'] == 'J') | (df['age_code'] == 'K') | (df['age_code'] == 'L') | (df['age_code'] == 'M'), 1, 2)
df

Unnamed: 0,age_code,lat_h_3857,lon_h_3857,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks,avg_risk,distance_mi,distance_category,is_elderly
0,C,-8.509454e+06,5.101307e+06,2.050670,15.375901,5.380037,14.512438,-1.000000,17.062917,9.063660,33.459368,4,2
1,H,-8.499018e+06,5.096218e+06,4.881886,15.876431,4.895073,24.892845,-1.000000,30.218719,13.460825,28.501947,4,2
2,E,-8.496356e+06,5.099448e+06,4.881886,15.876431,4.895073,24.892845,-1.000000,30.218719,13.460825,28.592896,4,2
3,G,-8.509963e+06,5.103102e+06,2.050670,15.375901,5.380037,14.512438,-1.000000,17.062917,9.063660,33.876175,4,2
4,G,-8.508370e+06,5.099066e+06,2.050670,15.375901,5.380037,14.512438,-1.000000,17.062917,9.063660,33.117674,4,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
53067356,H,-1.304989e+07,3.930304e+06,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,33.092999,4,2
53067357,F,-1.304547e+07,3.934604e+06,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,34.920735,4,2
53067358,L,-1.304803e+07,3.934243e+06,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,34.926958,4,1
53067359,D,-1.304785e+07,3.933154e+06,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,34.353806,4,2


Renaming the household latitude longitude columns so that they will be eventually the same as the tank latitude and longitude columns; this is necessary for the merging process. 

In [5]:
df.rename(columns = {'lat_h_3857': 'lat_3857'}, inplace = True)
df.rename(columns = {'lon_h_3857': 'lon_3857'}, inplace = True)
df

Unnamed: 0,age_code,lat_3857,lon_3857,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks,avg_risk,distance_mi,distance_category,is_elderly
0,C,-8.509454e+06,5.101307e+06,2.050670,15.375901,5.380037,14.512438,-1.000000,17.062917,9.063660,33.459368,4,2
1,H,-8.499018e+06,5.096218e+06,4.881886,15.876431,4.895073,24.892845,-1.000000,30.218719,13.460825,28.501947,4,2
2,E,-8.496356e+06,5.099448e+06,4.881886,15.876431,4.895073,24.892845,-1.000000,30.218719,13.460825,28.592896,4,2
3,G,-8.509963e+06,5.103102e+06,2.050670,15.375901,5.380037,14.512438,-1.000000,17.062917,9.063660,33.876175,4,2
4,G,-8.508370e+06,5.099066e+06,2.050670,15.375901,5.380037,14.512438,-1.000000,17.062917,9.063660,33.117674,4,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
53067356,H,-1.304989e+07,3.930304e+06,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,33.092999,4,2
53067357,F,-1.304547e+07,3.934604e+06,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,34.920735,4,2
53067358,L,-1.304803e+07,3.934243e+06,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,34.926958,4,1
53067359,D,-1.304785e+07,3.933154e+06,34.617855,11.334705,1.771182,19.203448,2.036342,18.929178,14.648785,34.353806,4,2


### Importing risk of tanks

This dataframe contains the coordinates of the tanks and also each of the 6 risks associated with each tank. Below, we will also be dropping unused columns.

In [3]:
df_tanks = gpd.read_file('/hpc/group/codeplus22-vis/infousa_copy/tanks_risk_score_final.shp')
df_tanks = df_tanks.drop(['state', 'tank_type', 'diameter', 'county', 'on_floodpl'], axis = 1)
df_tanks.head()

NameError: name 'gpd' is not defined

In [7]:
df_tanks.rename(columns = {'lat_t_3857': 'lat_3857'}, inplace = True)
df_tanks.rename(columns = {'lon_t_3857': 'lon_3857'}, inplace = True)
df_tanks['distance_category'] = 0
df_tanks['is_elderly'] = 0
df_tanks.head()

Unnamed: 0,lat_t_4326,lon_t_4326,lat_3857,lon_3857,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks,avg_risk,adj_risk,geometry,distance_category,is_elderly
0,40.625572,-73.745231,-8209282.0,4957270.0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825,"POLYGON ((-73.74547 40.62575, -73.74500 40.625...",0,0
1,40.624761,-73.74442,-8209191.0,4957151.0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825,"POLYGON ((-73.74465 40.62485, -73.74419 40.624...",0,0
2,40.626086,-73.746257,-8209396.0,4957345.0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825,"POLYGON ((-73.74633 40.62615, -73.74618 40.626...",0,0
3,40.625786,-73.746203,-8209390.0,4957301.0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825,"POLYGON ((-73.74639 40.62593, -73.74601 40.625...",0,0
4,40.625781,-73.745813,-8209346.0,4957300.0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825,"POLYGON ((-73.74595 40.62590, -73.74567 40.625...",0,0


### Filling in the distance column for the tanks dataframe

The distance column in the final merged dataframe will represent the distance between a household and tank. However, for the tanks, there is no associated distance--when we do the range slider for distance, only households in a certain distance range will be changing. Therefore, we want to insert a number into the distance column that will not actually interfere with the other distance. In this case, we are finding the max distance and filling the tank dist column with a number a little bit higher than that.

In [8]:
print(df['distance_mi'].max())

213.4276172929592


In [9]:
df_tanks['distance_mi'] = 215
df_tanks

Unnamed: 0,lat_t_4326,lon_t_4326,lat_3857,lon_3857,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks,avg_risk,adj_risk,geometry,distance_category,is_elderly,distance_mi
0,40.625572,-73.745231,-8.209282e+06,4.957270e+06,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825,"POLYGON ((-73.74547 40.62575, -73.74500 40.625...",0,0,215
1,40.624761,-73.744420,-8.209191e+06,4.957151e+06,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825,"POLYGON ((-73.74465 40.62485, -73.74419 40.624...",0,0,215
2,40.626086,-73.746257,-8.209396e+06,4.957345e+06,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825,"POLYGON ((-73.74633 40.62615, -73.74618 40.626...",0,0,215
3,40.625786,-73.746203,-8.209390e+06,4.957301e+06,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825,"POLYGON ((-73.74639 40.62593, -73.74601 40.625...",0,0,215
4,40.625781,-73.745813,-8.209346e+06,4.957300e+06,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825,"POLYGON ((-73.74595 40.62590, -73.74567 40.625...",0,0,215
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98164,39.777431,-104.920718,-1.167972e+07,4.833652e+06,7.743007,12.625942,-1.000000,45.758161,-1.000000,6.179840,12.051158,12.051158,"POLYGON ((-104.92075 39.77746, -104.92069 39.7...",0,0,215
98165,39.777301,-104.920631,-1.167971e+07,4.833633e+06,7.743007,12.625942,-1.000000,45.758161,-1.000000,6.179840,12.051158,12.051158,"POLYGON ((-104.92066 39.77732, -104.92060 39.7...",0,0,215
98166,39.777701,-104.920609,-1.167971e+07,4.833691e+06,7.743007,12.625942,-1.000000,45.758161,-1.000000,6.179840,12.051158,12.051158,"POLYGON ((-104.92064 39.77772, -104.92058 39.7...",0,0,215
98167,39.776628,-104.920617,-1.167971e+07,4.833535e+06,7.743007,12.625942,-1.000000,45.758161,-1.000000,6.179840,12.051158,12.051158,"POLYGON ((-104.92065 39.77665, -104.92059 39.7...",0,0,215


### Merging tanks and households 

Here we are appending the household data and tanks data together. Once this data is merged, we are using the ```.drop()``` function to drop some of the columns. In this function, we specify the parameter ```axis``` equal to 1 because the columns we are dropping are located on axis = 1. Once the merged file is produced, we will export this as a parquet file.

In [None]:
df_merged = df.append(df_tanks, ignore_index=True)
df_merged = df_merged.drop(['lat_t_4326', 'lon_t_4326', 'adj_risk', 'geometry'], axis = 1)
df_merged

  df_merged = df.append(df_tanks, ignore_index=True)


Unnamed: 0,age_code,lat_3857,lon_3857,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks,avg_risk,distance_mi,distance_category,is_elderly
0,C,-8.509454e+06,5.101307e+06,2.050670,15.375901,5.380037,14.512438,-1.0,17.062917,9.063660,33.459368,4,2
1,H,-8.499018e+06,5.096218e+06,4.881886,15.876431,4.895073,24.892845,-1.0,30.218719,13.460825,28.501947,4,2
2,E,-8.496356e+06,5.099448e+06,4.881886,15.876431,4.895073,24.892845,-1.0,30.218719,13.460825,28.592896,4,2
3,G,-8.509963e+06,5.103102e+06,2.050670,15.375901,5.380037,14.512438,-1.0,17.062917,9.063660,33.876175,4,2
4,G,-8.508370e+06,5.099066e+06,2.050670,15.375901,5.380037,14.512438,-1.0,17.062917,9.063660,33.117674,4,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
53165525,,-1.167972e+07,4.833652e+06,7.743007,12.625942,-1.000000,45.758161,-1.0,6.179840,12.051158,215.000000,0,0
53165526,,-1.167971e+07,4.833633e+06,7.743007,12.625942,-1.000000,45.758161,-1.0,6.179840,12.051158,215.000000,0,0
53165527,,-1.167971e+07,4.833691e+06,7.743007,12.625942,-1.000000,45.758161,-1.0,6.179840,12.051158,215.000000,0,0
53165528,,-1.167971e+07,4.833535e+06,7.743007,12.625942,-1.000000,45.758161,-1.0,6.179840,12.051158,215.000000,0,0


In [None]:
df_merged.to_parquet('/hpc/group/codeplus22-vis/infousa_copy/hh_tank_risk.parquet')


#### Here we are just re-reading the file in to test if it exported as a parquet correctly

In [12]:
df = pd.read_parquet('/hpc/group/codeplus22-vis/infousa_copy/hh_tank_risk.parquet')
df

Unnamed: 0,age_code,lat_3857,lon_3857,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks,avg_risk,distance_mi,distance_category,is_elderly
0,C,-8.509454e+06,5.101307e+06,2.050670,15.375901,5.380037,14.512438,-1.0,17.062917,9.063660,33.459368,4,2
1,H,-8.499018e+06,5.096218e+06,4.881886,15.876431,4.895073,24.892845,-1.0,30.218719,13.460825,28.501947,4,2
2,E,-8.496356e+06,5.099448e+06,4.881886,15.876431,4.895073,24.892845,-1.0,30.218719,13.460825,28.592896,4,2
3,G,-8.509963e+06,5.103102e+06,2.050670,15.375901,5.380037,14.512438,-1.0,17.062917,9.063660,33.876175,4,2
4,G,-8.508370e+06,5.099066e+06,2.050670,15.375901,5.380037,14.512438,-1.0,17.062917,9.063660,33.117674,4,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
53165525,,-1.167972e+07,4.833652e+06,7.743007,12.625942,-1.000000,45.758161,-1.0,6.179840,12.051158,215.000000,0,0
53165526,,-1.167971e+07,4.833633e+06,7.743007,12.625942,-1.000000,45.758161,-1.0,6.179840,12.051158,215.000000,0,0
53165527,,-1.167971e+07,4.833691e+06,7.743007,12.625942,-1.000000,45.758161,-1.0,6.179840,12.051158,215.000000,0,0
53165528,,-1.167971e+07,4.833535e+06,7.743007,12.625942,-1.000000,45.758161,-1.0,6.179840,12.051158,215.000000,0,0


## Processing dataframe for individual natural hazards

Since we want to plot all of the risk dashboards separately, are now breaking down this dataframe, one for each type of natural hazard.

### Processing the dataframe for earthquake risk

We are taking overall dataframe we made above and dropping all of the other risks except for earthquake before exporting it to a parquet file to be used in visualizations.

In [13]:
df_erqk = df.drop(['swnd_risks', 'hrcn_risks', 'trnd_risks', 'cfld_risks', 'rfld_risks', 'avg_risk'], axis = 1)
df_erqk = df_erqk[['is_elderly', 'distance_mi', 'erqk_risks', 'lat_3857', 'lon_3857']]
df_erqk.rename(columns = {'erqk_risks': 'earthquake_risk'}, inplace = True)

df_erqk

Unnamed: 0,is_elderly,distance_mi,earthquake_risk,lat_3857,lon_3857
0,2,33.459368,2.050670,-8.509454e+06,5.101307e+06
1,2,28.501947,4.881886,-8.499018e+06,5.096218e+06
2,2,28.592896,4.881886,-8.496356e+06,5.099448e+06
3,2,33.876175,2.050670,-8.509963e+06,5.103102e+06
4,2,33.117674,2.050670,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,7.743007,-1.167972e+07,4.833652e+06
53165526,0,215.000000,7.743007,-1.167971e+07,4.833633e+06
53165527,0,215.000000,7.743007,-1.167971e+07,4.833691e+06
53165528,0,215.000000,7.743007,-1.167971e+07,4.833535e+06


In [5]:
df_erqk.to_parquet('/hpc/group/codeplus22-vis/infousa_copy/earthquake_risk_final.parquet')

### Processing dataframe for strong wind risk

We are doing the same thing as for the earthquake file, this time dropping all of the other risks except for strong wind risk before exporting it to a parquet file to be used in visualizations.

In [16]:
df_swnd = df.drop(['erqk_risks', 'hrcn_risks', 'trnd_risks', 'cfld_risks', 'rfld_risks', 'avg_risk'], axis = 1)
df_swnd = df_swnd[['is_elderly', 'distance_mi', 'swnd_risks', 'lat_3857', 'lon_3857']]
df_swnd.rename(columns = {'swnd_risks': 'strong_wind_risk'}, inplace = True)


df_swnd

Unnamed: 0,is_elderly,distance_mi,strong_wind_risk,lat_3857,lon_3857
0,2,33.459368,15.375901,-8.509454e+06,5.101307e+06
1,2,28.501947,15.876431,-8.499018e+06,5.096218e+06
2,2,28.592896,15.876431,-8.496356e+06,5.099448e+06
3,2,33.876175,15.375901,-8.509963e+06,5.103102e+06
4,2,33.117674,15.375901,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,12.625942,-1.167972e+07,4.833652e+06
53165526,0,215.000000,12.625942,-1.167971e+07,4.833633e+06
53165527,0,215.000000,12.625942,-1.167971e+07,4.833691e+06
53165528,0,215.000000,12.625942,-1.167971e+07,4.833535e+06


In [17]:
df_swnd.to_parquet('/hpc/group/codeplus22-vis/infousa_copy/strong_wind_risk_final.parquet')

In [None]:
df

### Processing dataframe for hurricane risk

Once again, we will be doing the same steps as the ones above, except we are keeping the column for hurricane risk. Every time we make a new dataframe, we will export it as a parquet file.

In [4]:
df_hrcn = df.drop(['erqk_risks','swnd_risks', 'trnd_risks', 'cfld_risks', 'rfld_risks', 'avg_risk'], axis = 1)
df_hrcn = df_hrcn[['is_elderly', 'distance_mi', 'hrcn_risks', 'lat_3857', 'lon_3857']]
df_hrcn.rename(columns = {'hrcn_risks': 'hurricane_risk'}, inplace = True)

df_hrcn

Unnamed: 0,is_elderly,distance_mi,hurricane_risk,lat_3857,lon_3857
0,2,33.459368,5.380037,-8.509454e+06,5.101307e+06
1,2,28.501947,4.895073,-8.499018e+06,5.096218e+06
2,2,28.592896,4.895073,-8.496356e+06,5.099448e+06
3,2,33.876175,5.380037,-8.509963e+06,5.103102e+06
4,2,33.117674,5.380037,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,-1.000000,-1.167972e+07,4.833652e+06
53165526,0,215.000000,-1.000000,-1.167971e+07,4.833633e+06
53165527,0,215.000000,-1.000000,-1.167971e+07,4.833691e+06
53165528,0,215.000000,-1.000000,-1.167971e+07,4.833535e+06


In [7]:
df_hrcn.to_parquet('/hpc/group/codeplus22-vis/infousa_copy/hurricane_risks_final.parquet')

### Processing dataframe for tornado risk

In [5]:
df_trnd = df.drop(['erqk_risks','swnd_risks', 'hrcn_risks', 'cfld_risks', 'rfld_risks', 'avg_risk'], axis = 1)
df_trnd = df_trnd[['is_elderly', 'distance_mi','trnd_risks', 'lat_3857', 'lon_3857']]
df_trnd.rename(columns = {'trnd_risks': 'tornado_risk'}, inplace = True)

df_trnd

Unnamed: 0,is_elderly,distance_mi,tornado_risk,lat_3857,lon_3857
0,2,33.459368,14.512438,-8.509454e+06,5.101307e+06
1,2,28.501947,24.892845,-8.499018e+06,5.096218e+06
2,2,28.592896,24.892845,-8.496356e+06,5.099448e+06
3,2,33.876175,14.512438,-8.509963e+06,5.103102e+06
4,2,33.117674,14.512438,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,45.758161,-1.167972e+07,4.833652e+06
53165526,0,215.000000,45.758161,-1.167971e+07,4.833633e+06
53165527,0,215.000000,45.758161,-1.167971e+07,4.833691e+06
53165528,0,215.000000,45.758161,-1.167971e+07,4.833535e+06


In [6]:
df_trnd.to_parquet('/hpc/group/codeplus22-vis/infousa_copy/tornado_risks_final.parquet')

### Processing dataframe for coastal flood risk

In [10]:
df_cfld = df.drop(['erqk_risks','swnd_risks', 'trnd_risks', 'hrcn_risks', 'rfld_risks', 'avg_risk'], axis = 1)
df_cfld = df_cfld[['is_elderly', 'distance_mi','cfld_risks', 'lat_3857', 'lon_3857']]
df_cfld.rename(columns = {'cfld_risks': 'coastal_flood_risk'}, inplace = True)

df_cfld

Unnamed: 0,is_elderly,distance_mi,coastal_flood_risk,lat_3857,lon_3857
0,2,33.459368,-1.0,-8.509454e+06,5.101307e+06
1,2,28.501947,-1.0,-8.499018e+06,5.096218e+06
2,2,28.592896,-1.0,-8.496356e+06,5.099448e+06
3,2,33.876175,-1.0,-8.509963e+06,5.103102e+06
4,2,33.117674,-1.0,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,-1.0,-1.167972e+07,4.833652e+06
53165526,0,215.000000,-1.0,-1.167971e+07,4.833633e+06
53165527,0,215.000000,-1.0,-1.167971e+07,4.833691e+06
53165528,0,215.000000,-1.0,-1.167971e+07,4.833535e+06


In [13]:
df_cfld.to_parquet('/hpc/group/codeplus22-vis/infousa_copy/coast_flood_risks_final.parquet')

### Processing dataframe for riverine flood risk

In [12]:
df_rfld = df.drop(['erqk_risks','swnd_risks', 'trnd_risks', 'hrcn_risks', 'cfld_risks', 'avg_risk'], axis = 1)
df_rfld = df_rfld[[ 'is_elderly', 'distance_mi', 'rfld_risks', 'lat_3857', 'lon_3857']]
df_rfld.rename(columns = {'rfld_risks': 'riverine_flood_risk'}, inplace = True)

df_rfld

Unnamed: 0,is_elderly,distance_mi,riverine_flood_risk,lat_3857,lon_3857
0,2,33.459368,17.062917,-8.509454e+06,5.101307e+06
1,2,28.501947,30.218719,-8.499018e+06,5.096218e+06
2,2,28.592896,30.218719,-8.496356e+06,5.099448e+06
3,2,33.876175,17.062917,-8.509963e+06,5.103102e+06
4,2,33.117674,17.062917,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,6.179840,-1.167972e+07,4.833652e+06
53165526,0,215.000000,6.179840,-1.167971e+07,4.833633e+06
53165527,0,215.000000,6.179840,-1.167971e+07,4.833691e+06
53165528,0,215.000000,6.179840,-1.167971e+07,4.833535e+06


In [14]:
df_rfld.to_parquet('/hpc/group/codeplus22-vis/infousa_copy/riverine_flood_risks_final.parquet')

### Processing dataframe for average flood risk

In [5]:
df_avg = df.drop(['erqk_risks','swnd_risks', 'trnd_risks', 'hrcn_risks', 'cfld_risks', 'rfld_risks'], axis = 1)
df_avg = df_avg[[ 'is_elderly', 'distance_mi', 'avg_risk', 'lat_3857', 'lon_3857']]
df_avg.rename(columns = {'avg_risk': 'average_risk'}, inplace = True)

df_avg

Unnamed: 0,is_elderly,distance_mi,average_risk,lat_3857,lon_3857
0,2,33.459368,9.063660,-8.509454e+06,5.101307e+06
1,2,28.501947,13.460825,-8.499018e+06,5.096218e+06
2,2,28.592896,13.460825,-8.496356e+06,5.099448e+06
3,2,33.876175,9.063660,-8.509963e+06,5.103102e+06
4,2,33.117674,9.063660,-8.508370e+06,5.099066e+06
...,...,...,...,...,...
53165525,0,215.000000,12.051158,-1.167972e+07,4.833652e+06
53165526,0,215.000000,12.051158,-1.167971e+07,4.833633e+06
53165527,0,215.000000,12.051158,-1.167971e+07,4.833691e+06
53165528,0,215.000000,12.051158,-1.167971e+07,4.833535e+06


In [7]:
df_avg.to_parquet('/hpc/group/codeplus22-vis/infousa_copy/average_risk_final.parquet')