# Data Source 5 - Nation Risk Index (NRI)

## Raw data source:
https://hazards.fema.gov/nri/data-resources

- Data Download: County Level > All Counties - County-level detail (Table)

# Version 1 - 2023 (Crosswalk) + NRI

# Version 2 - 2013 (Crosswalk) + NRI

In [1]:
crosswalk_version = 2023

In [2]:
import sys
sys.path.append('../../scripts')  
import merging_utils
import yaml
import pandas as pd
import os

with open("../../config/preprocessing.yaml", "r") as f:
    preprocessing_config = yaml.safe_load(f)

prefix = preprocessing_config['hazard'][crosswalk_version]['prefix']
fips_key_col = preprocessing_config['hazard'][crosswalk_version]['fips_key_col']
MSA_groupby_col = preprocessing_config['hazard'][crosswalk_version]['MSA_groupby_col']


file_path1 = '../../data/raw/NRI_Table_Counties/NRI_Table_Counties.csv'
file_path2 = preprocessing_config['hazard'][crosswalk_version]['file_path']

df1 = pd.read_csv(file_path1, dtype={'STCOFIPS' : str, MSA_groupby_col: str})
df2 = pd.read_csv(file_path2, dtype={fips_key_col: str})


## column 1
population_loss_equivalent = preprocessing_config['hazard'][crosswalk_version]['population_loss_equivalent']
population_loss_equivalent_cols = [prefix + '_' + col for col in df1.columns if population_loss_equivalent in col]

## column 2
building_loss_equivalent = preprocessing_config['hazard'][crosswalk_version]['building_loss_equivalent']
building_loss_equivalent_cols = [prefix + '_' + col for col in df1.columns if building_loss_equivalent in col]

## column 3
population_exposure = preprocessing_config['hazard'][crosswalk_version]['population_exposure']
population_exposure_cols = [prefix + '_' + col for col in df1.columns if population_exposure in col]

## column 4
building_exposure = preprocessing_config['hazard'][crosswalk_version]['building_exposure']
building_exposure_cols = [prefix + '_' + col for col in df1.columns if building_exposure in col]

## column 5
total_value_cols = preprocessing_config['hazard'][crosswalk_version]['total_value_cols']
total_value_cols = [prefix + '_' + col for col in total_value_cols]

## Can add columns more
additional_col = [prefix + '_' + 'AREA']

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [3]:
df1.head()

Unnamed: 0,OID_,NRI_ID,STATE,STATEABBRV,STATEFIPS,COUNTY,COUNTYTYPE,COUNTYFIPS,STCOFIPS,POPULATION,...,WNTW_EALS,WNTW_EALR,WNTW_ALRB,WNTW_ALRP,WNTW_ALRA,WNTW_ALR_NPCTL,WNTW_RISKV,WNTW_RISKS,WNTW_RISKR,NRI_VER
0,1,C01001,Alabama,AL,1,Autauga,County,1,1001,58764,...,15.784587,Very Low,2.687716e-07,7.410082e-09,8.725777e-06,10.461158,8494.906508,12.217626,Very Low,March 2023
1,2,C01003,Alabama,AL,1,Baldwin,County,3,1003,231365,...,56.205509,Relatively Moderate,1.268231e-09,2.28712e-08,1.54836e-07,13.339523,65619.701638,52.083996,Relatively Low,March 2023
2,3,C01005,Alabama,AL,1,Barbour,County,5,1005,25160,...,18.632002,Relatively Low,5.78805e-07,2.347236e-08,7.606598e-07,16.125039,15501.730335,19.535476,Very Low,March 2023
3,4,C01007,Alabama,AL,1,Bibb,County,7,1007,22239,...,13.308573,Very Low,9.014679e-07,1.2703e-08,1.202015e-05,16.991643,7496.18694,11.104041,Very Low,March 2023
4,5,C01009,Alabama,AL,1,Blount,County,9,1009,58992,...,23.64593,Relatively Low,5.268425e-07,1.482016e-08,2.002965e-07,12.039616,17175.160729,21.44448,Very Low,March 2023


In [4]:
df2.head()

Unnamed: 0,Crosswalk2023_CBSA Code,Crosswalk2023_Metropolitan Division Code,Crosswalk2023_CSA Code,Crosswalk2023_CBSA Title,Crosswalk2023_Metropolitan/Micropolitan Statistical Area,Crosswalk2023_Metropolitan Division Title,Crosswalk2023_CSA Title,Crosswalk2023_County/County Equivalent,Crosswalk2023_State Name,Crosswalk2023_FIPS State Code,Crosswalk2023_FIPS County Code,Crosswalk2023_Central/Outlying County,Crosswalk2023_FIPS_Key
0,10100,,,"Aberdeen, SD",Micropolitan Statistical Area,,,Brown County,South Dakota,46,13,Central,46013
1,10100,,,"Aberdeen, SD",Micropolitan Statistical Area,,,Edmunds County,South Dakota,46,45,Outlying,46045
2,10140,,,"Aberdeen, WA",Micropolitan Statistical Area,,,Grays Harbor County,Washington,53,27,Central,53027
3,10180,,101.0,"Abilene, TX",Metropolitan Statistical Area,,"Abilene-Sweetwater, TX",Callahan County,Texas,48,59,Outlying,48059
4,10180,,101.0,"Abilene, TX",Metropolitan Statistical Area,,"Abilene-Sweetwater, TX",Jones County,Texas,48,253,Outlying,48253


# Describe

In [5]:
df1.shape

(3231, 465)

In [6]:
# Extract columns that contain 'EALPE' in their names
ealpe_cols = [col for col in df1.columns if 'EALPE' in col]

# Calculate missing value count and percentage for those columns
ealpe_null_df = pd.DataFrame({
    'Missing Count': df1[ealpe_cols].isnull().sum(),
    'Missing %': df1[ealpe_cols].isnull().mean() * 100
}).sort_values('Missing %', ascending=False)

# View the result
ealpe_null_df


Unnamed: 0,Missing Count,Missing %
VLCN_EALPE,3125,96.719282
TSUN_EALPE,3103,96.038378
AVLN_EALPE,3023,93.562365
CFLD_EALPE,2646,81.89415
HRCN_EALPE,918,28.412256
ISTM_EALPE,229,7.087589
LTNG_EALPE,123,3.806871
WFIR_EALPE,88,2.723615
LNDS_EALPE,40,1.238007
SWND_EALPE,7,0.216651


In [7]:
# Extract columns that contain 'EALB' in their names
ealb_cols = [col for col in df1.columns if 'EALB' in col]

# Calculate missing value count and percentage for those columns
ealb_null_df = pd.DataFrame({
    'Missing Count': df1[ealb_cols].isnull().sum(),
    'Missing %': df1[ealb_cols].isnull().mean() * 100
}).sort_values('Missing %', ascending=False)

# View the result
ealb_null_df


Unnamed: 0,Missing Count,Missing %
VLCN_EALB,3125,96.719282
TSUN_EALB,3103,96.038378
AVLN_EALB,3023,93.562365
CFLD_EALB,2646,81.89415
HRCN_EALB,918,28.412256
ISTM_EALB,229,7.087589
LTNG_EALB,123,3.806871
WFIR_EALB,88,2.723615
LNDS_EALB,40,1.238007
SWND_EALB,7,0.216651


## Adding prefix

In [8]:
print('Before adding prefixes: ' , df1.columns)
df1 = merging_utils.add_prefix_all(df1, prefix=prefix)
print()
print('After adding prefixes: ' , df1.columns)

Before adding prefixes:  Index(['OID_', 'NRI_ID', 'STATE', 'STATEABBRV', 'STATEFIPS', 'COUNTY',
       'COUNTYTYPE', 'COUNTYFIPS', 'STCOFIPS', 'POPULATION',
       ...
       'WNTW_EALS', 'WNTW_EALR', 'WNTW_ALRB', 'WNTW_ALRP', 'WNTW_ALRA',
       'WNTW_ALR_NPCTL', 'WNTW_RISKV', 'WNTW_RISKS', 'WNTW_RISKR', 'NRI_VER'],
      dtype='object', length=465)

After adding prefixes:  Index(['NRI_OID_', 'NRI_NRI_ID', 'NRI_STATE', 'NRI_STATEABBRV',
       'NRI_STATEFIPS', 'NRI_COUNTY', 'NRI_COUNTYTYPE', 'NRI_COUNTYFIPS',
       'NRI_STCOFIPS', 'NRI_POPULATION',
       ...
       'NRI_WNTW_EALS', 'NRI_WNTW_EALR', 'NRI_WNTW_ALRB', 'NRI_WNTW_ALRP',
       'NRI_WNTW_ALRA', 'NRI_WNTW_ALR_NPCTL', 'NRI_WNTW_RISKV',
       'NRI_WNTW_RISKS', 'NRI_WNTW_RISKR', 'NRI_NRI_VER'],
      dtype='object', length=465)


## Merge with join identifiers

In [9]:
merged_df = pd.merge(left=df1, right=df2, left_on=prefix+'_'+'STCOFIPS', right_on=fips_key_col, how = 'outer')
merged_df[MSA_groupby_col] = merged_df[MSA_groupby_col].astype(str)

In [10]:
# county level !
merged_df

Unnamed: 0,NRI_OID_,NRI_NRI_ID,NRI_STATE,NRI_STATEABBRV,NRI_STATEFIPS,NRI_COUNTY,NRI_COUNTYTYPE,NRI_COUNTYFIPS,NRI_STCOFIPS,NRI_POPULATION,...,Crosswalk2023_CBSA Title,Crosswalk2023_Metropolitan/Micropolitan Statistical Area,Crosswalk2023_Metropolitan Division Title,Crosswalk2023_CSA Title,Crosswalk2023_County/County Equivalent,Crosswalk2023_State Name,Crosswalk2023_FIPS State Code,Crosswalk2023_FIPS County Code,Crosswalk2023_Central/Outlying County,Crosswalk2023_FIPS_Key
0,1.0,C01001,Alabama,AL,1.0,Autauga,County,1.0,01001,58764.0,...,"Montgomery, AL",Metropolitan Statistical Area,,"Montgomery-Selma, AL",Autauga County,Alabama,1.0,1.0,Central,01001
1,2.0,C01003,Alabama,AL,1.0,Baldwin,County,3.0,01003,231365.0,...,"Daphne-Fairhope-Foley, AL",Metropolitan Statistical Area,,"Mobile-Daphne-Fairhope, AL",Baldwin County,Alabama,1.0,3.0,Central,01003
2,3.0,C01005,Alabama,AL,1.0,Barbour,County,5.0,01005,25160.0,...,"Eufaula, AL-GA",Micropolitan Statistical Area,,,Barbour County,Alabama,1.0,5.0,Central,01005
3,4.0,C01007,Alabama,AL,1.0,Bibb,County,7.0,01007,22239.0,...,"Birmingham, AL",Metropolitan Statistical Area,,"Birmingham-Cullman-Talladega, AL",Bibb County,Alabama,1.0,7.0,Outlying,01007
4,5.0,C01009,Alabama,AL,1.0,Blount,County,9.0,01009,58992.0,...,"Birmingham, AL",Metropolitan Statistical Area,,"Birmingham-Cullman-Talladega, AL",Blount County,Alabama,1.0,9.0,Outlying,01009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3235,3227.0,C72151,Puerto Rico,PR,72.0,Yabucoa,Municipio,151.0,72151,30397.0,...,"San Juan-Bayamón-Caguas, PR",Metropolitan Statistical Area,,"San Juan-Bayamón, PR",Yabucoa Municipio,Puerto Rico,72.0,151.0,Central,72151
3236,3228.0,C72153,Puerto Rico,PR,72.0,Yauco,Municipio,153.0,72153,34151.0,...,"Ponce, PR",Metropolitan Statistical Area,,"Ponce-Coamo, PR",Yauco Municipio,Puerto Rico,72.0,153.0,Outlying,72153
3237,3229.0,C78010,Virgin Islands,VI,78.0,St. Croix,Island,10.0,78010,40913.0,...,,,,,,,,,,
3238,3230.0,C78020,Virgin Islands,VI,78.0,St. John,Island,20.0,78020,3882.0,...,,,,,,,,,,


## Define columns to include

In [11]:
all_relevant_cols = population_loss_equivalent_cols + building_loss_equivalent_cols + population_exposure_cols + building_exposure_cols + total_value_cols + additional_col

## Groupby MSA Level

In [12]:
merged_df_by_msa = merged_df.groupby(MSA_groupby_col, as_index=False)[all_relevant_cols].sum()

In [13]:
merged_df_by_msa.to_csv(f'../../data/interim/data5_hazard_{crosswalk_version}.csv', index=False)

In [14]:
# including Metro & Micro -> drop Micro at the Merge_1 notebook.
merged_df_by_msa

Unnamed: 0,Crosswalk2023_CBSA Code,NRI_AVLN_EALPE,NRI_CFLD_EALPE,NRI_CWAV_EALPE,NRI_ERQK_EALPE,NRI_HAIL_EALPE,NRI_HWAV_EALPE,NRI_HRCN_EALPE,NRI_ISTM_EALPE,NRI_LNDS_EALPE,...,NRI_RFLD_EXPB,NRI_SWND_EXPB,NRI_TRND_EXPB,NRI_TSUN_EXPB,NRI_VLCN_EXPB,NRI_WFIR_EXPB,NRI_WNTW_EXPB,NRI_BUILDVALUE,NRI_POPULATION,NRI_AREA
0,10100.0,0.000000e+00,0.000000e+00,2.164489e+06,6.357991e+03,1.876114e+05,4.347053e+05,0.000000e+00,2.225289e+05,1.065074e+03,...,6.505215e+08,1.397063e+10,1.397063e+10,0.000000e+00,0.000000e+00,1.514336e+09,1.397063e+10,1.397063e+10,42217.0,2.911111e+03
1,10140.0,0.000000e+00,2.331413e+07,0.000000e+00,1.280793e+07,3.978817e+02,2.055710e+03,0.000000e+00,1.739401e+03,1.225777e+05,...,3.281721e+09,1.566485e+10,1.566485e+10,8.037370e+09,0.000000e+00,2.640196e+09,1.566485e+10,1.566485e+10,75462.0,2.246051e+03
2,10180.0,0.000000e+00,0.000000e+00,4.854410e+05,1.990667e+04,4.830011e+05,1.375110e+06,2.751964e+03,1.185214e+05,4.047426e+04,...,4.136462e+09,3.170204e+10,3.170204e+10,0.000000e+00,0.000000e+00,4.755646e+09,3.170204e+10,3.170204e+10,176438.0,2.785546e+03
3,10220.0,0.000000e+00,0.000000e+00,3.584352e+05,6.623343e+04,5.127067e+04,5.587561e+05,2.668543e+02,7.210175e+04,1.740000e+04,...,5.588141e+07,6.279388e+09,6.279388e+09,0.000000e+00,0.000000e+00,6.728361e+08,6.279385e+09,6.279388e+09,37975.0,7.325881e+02
4,10300.0,0.000000e+00,0.000000e+00,7.921856e+04,3.808776e+04,8.064581e+03,2.315633e+05,4.115931e+02,1.086294e+03,1.740000e+04,...,2.221159e+08,1.965697e+10,1.965697e+10,0.000000e+00,0.000000e+00,6.519191e+09,1.965697e+10,1.965697e+10,99376.0,7.690893e+02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,49700.0,6.153254e+04,0.000000e+00,0.000000e+00,7.949410e+06,1.370243e+04,1.572410e+06,0.000000e+00,0.000000e+00,2.723655e+04,...,2.582857e+09,2.990595e+10,2.990595e+10,0.000000e+00,0.000000e+00,1.431904e+09,2.990595e+10,2.990595e+10,181133.0,1.265066e+03
932,49740.0,0.000000e+00,0.000000e+00,0.000000e+00,1.044114e+07,2.332881e+02,6.061415e+05,3.714090e+01,0.000000e+00,1.740000e+04,...,7.016421e+08,2.953707e+10,2.953707e+10,0.000000e+00,0.000000e+00,2.444249e+09,0.000000e+00,2.953707e+10,203299.0,5.574671e+03
933,49780.0,0.000000e+00,0.000000e+00,5.617986e+03,3.746210e+04,3.634842e+03,9.876197e+04,1.074454e+03,1.810320e+04,1.740000e+04,...,3.292056e+08,1.810675e+10,1.810675e+10,0.000000e+00,0.000000e+00,8.577985e+09,1.810675e+10,1.810675e+10,86374.0,6.793760e+02
934,49820.0,0.000000e+00,0.000000e+00,1.471546e+05,1.443930e+02,4.820357e+03,2.268236e+04,7.301043e+03,2.739103e+02,9.655049e+03,...,2.289017e+08,2.510090e+09,2.510090e+09,0.000000e+00,0.000000e+00,2.958618e+08,2.510090e+09,2.510090e+09,13865.0,1.068696e+03
