In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from pyincore import IncoreClient, Dataset, FragilityService, MappingSet, DataService
from pyincore.analyses.buildingdamage import BuildingDamage
from pyincore.analyses.cumulativebuildingdamage import CumulativeBuildingDamage
from pyincore.analyses.populationdislocation import PopulationDislocation, PopulationDislocationUtil

In [2]:
client = IncoreClient()

Connection successful to IN-CORE services. pyIncore version detected: 0.9.0


In [43]:
#update incore to 0.9.0
#use shape file for building inv
#Have a new environment to have pyincore 0.9.0
#update value loss to -> 60354810e379f22e16560dbd

# Input file mapping

In [3]:
# MMSA Housing Unit Inventory, IN-CORE_2av4_HUInventory_2020-01-21_HUinventory.csv
housing_unit_inv = "5e2b27de5edc600008ac0ae6"

# MMSA Address Point Inventory, IN-CORE_2dv3_AddressPointInventory_2020-01-22_strctid.csv
address_point_inv = "5e388d9a56c00f0008eb0e6f"

# MMSA Building Inventory, all_bldgs_ver5_Project.csv
building_inv = "5e388979c5ff060008a48eda"   #should be shape file

## Run Housing Unit Allocation 
https://github.com/IN-CORE/incore-docs/blob/master/notebooks/housingunitallocation.ipynb

Rosenheim, Nathanael, Roberto Guidotti, Paolo Gardoni & Walter Gillis Peacock. (2019). Integration of detailed household and housing unit characteristic data with critical infrastructure for post-hazard resilience modeling. Sustainable and Resilient Infrastructure. doi.org/10.1080/23789689.2019.1681821

In [31]:
from pyincore import IncoreClient
from pyincore.analyses.housingunitallocation import HousingUnitAllocation

# Create housing allocation 
hua = HousingUnitAllocation(client)

# Load input dataset
hua.load_remote_input_dataset("housing_unit_inventory", housing_unit_inv)
hua.load_remote_input_dataset("address_point_inventory", address_point_inv)
hua.load_remote_input_dataset("building_inventory", building_inv)

# Specify the result name
result_name = "IN-CORE_1bv6_housingunitallocation.csv"

seed = 1238
iterations = 1

# Set analysis parameters
hua.set_parameter("result_name", result_name)
hua.set_parameter("seed", seed)
hua.set_parameter("iterations", iterations)

Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...
Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...
Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...


True

In [32]:
# Run Housing unit allocation analysis
hua.run_analysis()

True

In [33]:
# Retrieve result dataset
result = hua.get_output_dataset("result")

# Convert dataset to Pandas DataFrame
hua_df = result.get_dataframe_from_csv(low_memory=False)

# Display top 5 rows of output data
hua_df.head()

Unnamed: 0,parid_card,bldg_id,struct_typ,str_prob,year_built,no_stories,a_stories,b_stories,bsmt_type,sq_foot,...,numprec,gqpop,ownershp,race,hispan,hispan_flag,vacancy,gqtype,randomhu,aphumerge
0,039028 00015_1,039028 00015_1_1,W1,0.97366,1946.0,1.0,1.0,0.0,CRAWL=0-24%,1008.0,...,1.0,0.0,2.0,2.0,0.0,2.0,0.0,0.0,0.090391,both
1,039028 00015_1,039028 00015_1_1,W1,0.97366,1946.0,1.0,1.0,0.0,CRAWL=0-24%,1008.0,...,1.0,0.0,2.0,2.0,0.0,2.0,0.0,0.0,0.391309,both
2,039028 00014_1,039028 00014_1_1,URM,0.83374,1909.0,1.0,1.0,0.0,NONE,2520.0,...,1.0,0.0,2.0,2.0,0.0,2.0,0.0,0.0,0.414422,both
3,039028 00014_1,039028 00014_1_1,URM,0.83374,1909.0,1.0,1.0,0.0,NONE,2520.0,...,1.0,0.0,2.0,2.0,0.0,2.0,0.0,0.0,0.927559,both
4,039029 00002_1,039029 00002_1_1,W1,0.97366,1916.0,1.0,1.0,0.0,CRAWL=0-24%,616.0,...,1.0,0.0,1.0,2.0,0.0,2.0,0.0,0.0,0.284164,both


In [7]:
hua_df.columns

Index(['parid_card', 'bldg_id', 'struct_typ', 'str_prob', 'year_built',
       'no_stories', 'a_stories', 'b_stories', 'bsmt_type', 'sq_foot',
       'gsq_foot', 'occ_detail', 'major_occ', 'broad_occ', 'appr_bldg',
       'repl_cst', 'str_cst', 'nstra_cst', 'nstrd_cst', 'dgn_lvl', 'cont_val',
       'efacility', 'dwell_unit', 'str_typ2', 'occ_typ2', 'tract_id',
       'IMPUTED', 'strctid', 'addrptid', 'blockid', 'blockidstr', 'PLCNAME10',
       'PLCGEOID10', 'COUNTYFP10', 'geometry', 'huestimate', 'residential',
       'bldgobs', 'flag_ap', 'parid', 'guid', 'occ_type', 'randomap',
       'randommergeorder', 'huid', 'FIPScounty', 'bgid', 'serialno',
       'livetype', 'numprec', 'gqpop', 'ownershp', 'race', 'hispan',
       'hispan_flag', 'vacancy', 'gqtype', 'randomhu', 'aphumerge'],
      dtype='object')

In [8]:
hua_df.to_csv("housing_unit_allocation_shelby.csv")

In [9]:
hua_df.dgn_lvl.unique()

array(['Low - Code', 'Pre - Code', nan, 'Moderate - Code', 'High - Code'], dtype=object)

In [10]:
len(hua_df)

557423

In [11]:
len(hua_df.guid.unique())

306004

In [12]:
len(hua_df.blockid.unique())

12699

In [13]:
len(hua_df.addrptid.unique())

557423

In [14]:
len(hua_df.parid.unique())

291553

## Explore results from Housing Unit Allocation

Keep observations that are matched to a building.

In [15]:
hua_df = hua_df.loc[hua_df['aphumerge'] == 'both']

In [21]:
hua_df['Race Ethnicity'] = "0 Vacant HU No Race Ethnicity Data"
hua_df['Race Ethnicity'].notes = "Identify Race and Ethnicity Housing Unit Characteristics."

hua_df.loc[(hua_df['race'] == 1) & (hua_df['hispan'] == 0),'Race Ethnicity'] = "1 White alone, Not Hispanic"
hua_df.loc[(hua_df['race'] == 2) & (hua_df['hispan'] == 0),'Race Ethnicity'] = "2 Black alone, Not Hispanic"
hua_df.loc[(hua_df['race'].isin([3,4,5,6,7])) & (hua_df['hispan'] == 0),'Race Ethnicity'] = "3 Other Race, Not Hispanic"
hua_df.loc[(hua_df['hispan'] == 1),'Race Ethnicity'] = "4 Any Race, Hispanic"
hua_df.loc[(hua_df['gqtype'] >= 1),'Race Ethnicity'] = "5 Group Quarters no Race Ethnicity Data"

# Check new variable
table_title = "Confirm housing unit characteristic by Race and Ethnicity."
ethnicity_tab = pd.crosstab(hua_df['Race Ethnicity'], hua_df['race'], 
            margins=True, margins_name="Total").style.set_caption(table_title)
ethnicity_tab

race,1.0,2.0,3.0,4.0,5.0,6.0,7.0,Total
Race Ethnicity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"1 White alone, Not Hispanic",154427,0,0,0,0,0,0,154427
"2 Black alone, Not Hispanic",0,173463,0,0,0,0,0,173463
"3 Other Race, Not Hispanic",0,0,723,6874,100,659,3074,11430
"4 Any Race, Hispanic",4469,90,110,24,38,6563,357,11651
Total,158896,173553,833,6898,138,7222,3431,350971


pandas.io.formats.style.Styler

In [17]:
# Check new variable
table_title = "Confirm housing unit characteristic by Race and Ethnicity."
pd.crosstab(hua_df['Race Ethnicity'], hua_df['hispan'], 
            margins=True, margins_name="Total").style.set_caption(table_title)

hispan,0.0,1.0,Total
Race Ethnicity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"1 White alone, Not Hispanic",154427,0,154427
"2 Black alone, Not Hispanic",173463,0,173463
"3 Other Race, Not Hispanic",11430,0,11430
"4 Any Race, Hispanic",0,11651,11651
Total,339320,11651,350971


In [18]:
table_title = "Table 1. Housing Unit Characteristics by Race and Ethnicity"
table1 = pd.pivot_table(hua_df, values='numprec', index=['Race Ethnicity'],
                              margins = True, margins_name = 'Total',
                              aggfunc=[len, np.sum], 
                              fill_value=0).reset_index().rename(
                                                            columns={'len': 'Housing Unit',
                                                                     'sum' : 'Population',
                                                                     'numprec': 'Count'})

varformat = {('Housing Unit','Count'): "{:,}", ('Population','Count'): "{:,}"}

In [19]:
table1.style.set_caption(table_title).format(varformat).set_table_styles([
    dict(selector='th', props=[('text-align', 'center')]),])

Unnamed: 0_level_0,Race Ethnicity,Housing Unit,Population
Unnamed: 0_level_1,Unnamed: 1_level_1,Count,Count
0,0 Vacant HU No Race Ethnicity Data,47303,0
1,"1 White alone, Not Hispanic",154427,357602
2,"2 Black alone, Not Hispanic",173463,467354
3,"3 Other Race, Not Hispanic",11430,32478
4,"4 Any Race, Hispanic",11651,44598
5,5 Group Quarters no Race Ethnicity Data,191,18329
6,Total,398465,920361


## Validate the Housing Unit Allocation has worked
Notice that the population count totals for the community should match (pretty closely) data collected for the 2010 Decennial Census.
This can be confirmed by going to data.census.gov

https://data.census.gov/cedsci/table?q=DECENNIALPL2010.P1&g=1600000US4165950&tid=DECENNIALSF12010.P1

Differences in the housing unit allocation and the Census count may be due to differences between political boundaries and the building inventory. See Rosenheim et al 2019 for more details.

The housing unit allocation results will become the input for the dislocation model.

### Damage Analysis for Earthquake
##### Notebook created by Dylan R. Sanderson (OSU - sanderdy@oregonstate.edu) and Gowtham Naraharisetty (NCSA) 
###### https://incore.ncsa.illinois.edu/doc/incore/notebooks/Seaside_testbed/Seaside_testbed.html

In [3]:
hazard_type = "earthquake"
hazard_id = "5b902cb273c3371e1236b36b"

bldg_dataset_id = "5a284f0bc7d30d13bc081a46"        # defining building dataset (GIS point layer)
mapping_id = "5b47b350337d4a3629076f2c"             # specifiying mapping id from fragilites to building types
#mapping_id = "5e99d145f2935b00011900a4"

# Create building damage
bldg_dmg = BuildingDamage(client)

# Load input dataset
bldg_dmg.load_remote_input_dataset("buildings", bldg_dataset_id)

# Load fragility mapping
fragility_service = FragilityService(client)
mapping_set = MappingSet(fragility_service.get_mapping(mapping_id))
bldg_dmg.set_input_dataset("dfr3_mapping_set", mapping_set)

# Specify the result name
result_name = "shelby_bldg_dmg_result"

# Set analysis parameters
bldg_dmg.set_parameter("result_name", result_name)
bldg_dmg.set_parameter("hazard_type", hazard_type)
bldg_dmg.set_parameter("hazard_id", hazard_id)
bldg_dmg.set_parameter("num_cpu", 4)

#Run building damage analysis
bldg_dmg.run_analysis()

# Retrieve result dataset
result = bldg_dmg.get_output_dataset("ds_result")

# Convert dataset to Pandas DataFrame
df = result.get_dataframe_from_csv()

# Display top 5 rows of output data
df.head()

Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...


Unnamed: 0,guid,LS_0,LS_1,LS_2,DS_0,DS_1,DS_2,DS_3
0,64124791-1502-48ea-81b6-1992855f45d5,0.765693,0.196848,0.010265,0.234307,0.568845,0.186583,0.010265
1,d04da316-7cba-4964-8104-f0edfde18239,0.55314,0.104687,0.004094,0.44686,0.448453,0.100592,0.004094
2,c24d708d-a21b-416f-8772-965548407231,0.55873,0.107277,0.004269,0.44127,0.451453,0.103009,0.004269
3,6ff63801-3bf4-4bf3-b6e5-ff9d5fe6f0d0,0.509678,0.086185,0.002948,0.490322,0.423493,0.083237,0.002948
4,ef25f515-4109-408f-a3d4-3b79da49edd0,0.509779,0.086225,0.00295,0.490221,0.423554,0.083275,0.00295


### Calculating Population Dislocation for Different Retrofit Levels for given Event

##### Credits: https://github.com/IN-CORE/incore-docs/blob/master/notebooks/populationdislocation.ipynb

In [4]:
pop_dis = PopulationDislocation(client)

In [5]:
#Housing unit allocation, performed at start of notebook
housing_unit_alloc = Dataset.from_file("housing_unit_allocation_shelby.csv",
                                       data_type = "incore:housingUnitAllocation")

#  "IN-CORE_1ev1_Clean_BlockGroupSVI_2020-01-22.csv"
bg_data = "5e2b272cb2adbb0008d3766e"

# Value loss parameters, "IN-CORE_value_loss_bai09.csv"
value_loss = "60354810e379f22e16560dbd"

# Building damage performed in this notebook
building_dmg = Dataset.from_file("shelby_bldg_dmg_result.csv",
                                 data_type="ergo:buildingDamageVer5")

In [6]:
# Create population dislocatin 
pop_dis = PopulationDislocation(client)

# Load input dataset
pop_dis.load_remote_input_dataset("block_group_data", bg_data)
pop_dis.load_remote_input_dataset("value_poss_param", value_loss)
pop_dis.set_input_dataset("housing_unit_allocation", housing_unit_alloc)
pop_dis.set_input_dataset("building_dmg", building_dmg)


# Specify the result name
result_name = "IN-CORE_1bv6_population_dislocation"

seed = 1111

# Set analysis parameters
pop_dis.set_parameter("result_name", result_name)
pop_dis.set_parameter("seed", seed)

Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...


True

In [7]:
# running population dislocation
pop_dis.run_analysis()

# reading 
df = pd.read_csv('{}.csv' .format(result_name))
df.dislocated = df.dislocated.astype("int32")
df.head()

Unnamed: 0.1,guid,DS_0,DS_1,DS_2,DS_3,Unnamed: 0,parid_card,bldg_id,struct_typ,str_prob,...,bgyear,pblackbg,phispbg,d_sf,rploss_0,rploss_1,rploss_2,rploss_3,prdis,dislocated
0,e45edbfc-70c2-4145-b3a4-4ccd71396cd9,0.428104,0.458312,0.108876,0.004708,0,039028 00015_1,039028 00015_1_1,W1,0.97366,...,2010.0,24.401294,3.106796,1,0.002914,0.100425,0.524702,0.889881,0.248587,1
1,e45edbfc-70c2-4145-b3a4-4ccd71396cd9,0.428104,0.458312,0.108876,0.004708,1,039028 00015_1,039028 00015_1_1,W1,0.97366,...,2010.0,24.401294,3.106796,1,0.00693,0.132025,0.677548,0.951019,0.26634,0
2,a276aa74-23e7-412b-8c64-e23a73802117,0.205468,0.569534,0.211755,0.013243,2,039028 00014_1,039028 00014_1_1,URM,0.83374,...,2010.0,24.401294,3.106796,1,0.005776,0.1,0.654935,0.941974,0.303232,0
3,a276aa74-23e7-412b-8c64-e23a73802117,0.205468,0.569534,0.211755,0.013243,3,039028 00014_1,039028 00014_1_1,URM,0.83374,...,2010.0,24.401294,3.106796,1,0.002404,0.105851,0.436479,0.854591,0.275371,0
4,6e91c1c9-56a9-48ff-8ae0-ec119f939f78,0.426533,0.459111,0.109593,0.004763,4,039029 00002_1,039029 00002_1_1,W1,0.97366,...,2010.0,24.401294,3.106796,0,0.003029,0.133077,0.438777,0.855511,0.351639,1


In [8]:
df

Unnamed: 0.1,guid,DS_0,DS_1,DS_2,DS_3,Unnamed: 0,parid_card,bldg_id,struct_typ,str_prob,...,bgyear,pblackbg,phispbg,d_sf,rploss_0,rploss_1,rploss_2,rploss_3,prdis,dislocated
0,e45edbfc-70c2-4145-b3a4-4ccd71396cd9,0.428104,0.458312,0.108876,0.004708,0,039028 00015_1,039028 00015_1_1,W1,0.97366,...,2010.0,24.401294,3.106796,1,0.002914,0.100425,0.524702,0.889881,0.248587,1
1,e45edbfc-70c2-4145-b3a4-4ccd71396cd9,0.428104,0.458312,0.108876,0.004708,1,039028 00015_1,039028 00015_1_1,W1,0.97366,...,2010.0,24.401294,3.106796,1,0.006930,0.132025,0.677548,0.951019,0.266340,0
2,a276aa74-23e7-412b-8c64-e23a73802117,0.205468,0.569534,0.211755,0.013243,2,039028 00014_1,039028 00014_1_1,URM,0.83374,...,2010.0,24.401294,3.106796,1,0.005776,0.100000,0.654935,0.941974,0.303232,0
3,a276aa74-23e7-412b-8c64-e23a73802117,0.205468,0.569534,0.211755,0.013243,3,039028 00014_1,039028 00014_1_1,URM,0.83374,...,2010.0,24.401294,3.106796,1,0.002404,0.105851,0.436479,0.854591,0.275371,0
4,6e91c1c9-56a9-48ff-8ae0-ec119f939f78,0.426533,0.459111,0.109593,0.004763,4,039029 00002_1,039029 00002_1_1,W1,0.97366,...,2010.0,24.401294,3.106796,0,0.003029,0.133077,0.438777,0.855511,0.351639,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
557418,ba1eb38e-8eb9-41b8-b7cb-a025d0eec190,0.168107,0.597094,0.215148,0.019651,557418,A0142 00541_1,A0142 00541_1_1,W1,0.97366,...,,,,0,0.010752,0.131787,0.738395,0.975358,,0
557419,464e50c8-c8d4-4ac1-8365-dabc7bf050d8,0.255839,0.512279,0.214945,0.016938,557419,D0106 00333_1,D0106 00333_1_1,W1,0.97366,...,,,,0,0.004972,0.103895,0.484434,0.873774,,0
557420,ddc053e6-11a6-4ef0-b380-19e97c0cfb53,0.256064,0.512267,0.214761,0.016908,557420,D0106 00132_1,D0106 00132_1_1,W1,0.97366,...,,,,0,0.004028,0.240296,0.542934,0.897173,,0
557421,7af9de8d-3b7f-4ce8-99b8-a4b02840c169,0.258675,0.512116,0.212638,0.016572,557421,D0106 A00029_1,D0106 A00029_1_1,W1,0.97366,...,,,,0,0.003145,0.106044,0.484690,0.873876,,0


In [10]:
df.guid.value_counts()

8ec675d6-c284-4891-968d-5fbd60f7f283    514
36a05fa8-6c19-4708-be1a-b1b6c51fcc1c    469
7b1a00bf-daf7-46e6-8340-35315af1bee1    430
36c2dfdb-ec9a-474d-afb1-4fb522665ee9    430
9c21b4f8-3be8-4fc4-b8e8-6a76148116d1    406
                                       ... 
a66fce2d-5d2a-47f0-aba0-678e147a24e7      1
84aef606-f9f1-4350-93d7-a185e63f0e15      1
12072fae-b030-4da5-9180-d0a6a3b1a1e4      1
8f76c105-eb0d-4491-9cfa-25878380aaba      1
c03e0e30-7ca0-430b-9175-316bca851c83      1
Name: guid, Length: 306003, dtype: int64

In [11]:
hua = pd.read_csv("housing_unit_allocation_shelby.csv")

In [12]:
hua

Unnamed: 0.1,Unnamed: 0,parid_card,bldg_id,struct_typ,str_prob,year_built,no_stories,a_stories,b_stories,bsmt_type,...,numprec,gqpop,ownershp,race,hispan,hispan_flag,vacancy,gqtype,randomhu,aphumerge
0,0,039028 00015_1,039028 00015_1_1,W1,0.97366,1946.0,1.0,1.0,0.0,CRAWL=0-24%,...,1.0,0.0,2.0,2.0,0.0,2.0,0.0,0.0,0.090391,both
1,1,039028 00015_1,039028 00015_1_1,W1,0.97366,1946.0,1.0,1.0,0.0,CRAWL=0-24%,...,1.0,0.0,2.0,2.0,0.0,2.0,0.0,0.0,0.391309,both
2,2,039028 00014_1,039028 00014_1_1,URM,0.83374,1909.0,1.0,1.0,0.0,NONE,...,1.0,0.0,2.0,2.0,0.0,2.0,0.0,0.0,0.414422,both
3,3,039028 00014_1,039028 00014_1_1,URM,0.83374,1909.0,1.0,1.0,0.0,NONE,...,1.0,0.0,2.0,2.0,0.0,2.0,0.0,0.0,0.927559,both
4,4,039029 00002_1,039029 00002_1_1,W1,0.97366,1916.0,1.0,1.0,0.0,CRAWL=0-24%,...,1.0,0.0,1.0,2.0,0.0,2.0,0.0,0.0,0.284164,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
557418,557418,A0142 00541_1,A0142 00541_1_1,W1,0.97366,1995.0,2.0,2.0,0.0,CRAWL=0-24%,...,,,,,,,,,,left_only
557419,557419,D0106 00333_1,D0106 00333_1_1,W1,0.97366,1928.0,1.0,1.0,0.0,CRAWL=0-24%,...,,,,,,,,,,left_only
557420,557420,D0106 00132_1,D0106 00132_1_1,W1,0.97366,1930.0,1.0,1.0,0.0,CRAWL=0-24%,...,,,,,,,,,,left_only
557421,557421,D0106 A00029_1,D0106 A00029_1_1,W1,0.97366,1984.0,1.0,1.0,0.0,CRAWL=0-24%,...,,,,,,,,,,left_only


In [13]:
hua.guid.value_counts()

8ec675d6-c284-4891-968d-5fbd60f7f283    514
36a05fa8-6c19-4708-be1a-b1b6c51fcc1c    469
7b1a00bf-daf7-46e6-8340-35315af1bee1    430
36c2dfdb-ec9a-474d-afb1-4fb522665ee9    430
9c21b4f8-3be8-4fc4-b8e8-6a76148116d1    406
                                       ... 
a66fce2d-5d2a-47f0-aba0-678e147a24e7      1
84aef606-f9f1-4350-93d7-a185e63f0e15      1
12072fae-b030-4da5-9180-d0a6a3b1a1e4      1
8f76c105-eb0d-4491-9cfa-25878380aaba      1
c03e0e30-7ca0-430b-9175-316bca851c83      1
Name: guid, Length: 306003, dtype: int64