### SEASIDE OPTMIZATION DATA GENERATION from IN-CORE


###### Date: 11/16/2020




### Importing the Libraries

In [1]:
import pandas as pd
import numpy as np
import sys # For displaying package versions
import os # For managing directories and file paths if drive is mounted


from pyincore import IncoreClient, Dataset, FragilityService, MappingSet, DataService
from pyincore.analyses.housingunitallocation import HousingUnitAllocation
from pyincore.analyses.buildingdamage import BuildingDamage
from pyincore.analyses.cumulativebuildingdamage import CumulativeBuildingDamage
from pyincore.analyses.populationdislocation import PopulationDislocation, PopulationDislocationUtil


In [2]:
client = IncoreClient()

Connection successful to IN-CORE services. pyIncore version detected: 0.9.0


In [3]:
# Check package versions - good practice for replication
print("Python Version ",sys.version)
print("pandas version: ", pd.__version__)
print("numpy version: ", np.__version__)

Python Version  3.7.8 | packaged by conda-forge | (default, Jul 31 2020, 01:53:57) [MSC v.1916 64 bit (AMD64)]
pandas version:  1.2.3
numpy version:  1.20.1


In [4]:
# Check working directory - good practice for relative path access
os.getcwd()

'C:\\Users\\TarunAdluri\\Desktop\\Thesis\\SeasideData\\INCOREPRESENTATION'

## Initial Interdependent Community Description - Seaside, OR

Explore building inventory and social systems. Specifically look at how the building inventory connects with the housing unit inventory using the housing unit allocation.
The housing unit allocation method will provide detail demographic characteristics for the community allocated to each structure.

In [5]:
# Seaside, OR Housing unit inventory, IN-CORE_1bv6_SetupSeaside_FourInventories_2019-08-02_HUinventory.csv
housing_unit_inv = "5d543087b9219c0689b98234"

# Seaside, OR Address point inventory, IN-CORE_1bv6_SetupSeaside_FourInventories_2019-08-02_addresspointinventory.csv
address_point_inv = "5d542fefb9219c0689b981fb"

# Seaside, OR Building inventory, IN-CORE_1bv6_SetupSeaside_FourInventories_2019-08-02_buildinginventory.csv
building_inv = "5d5433edb9219c0689b98344"

## Run Housing Unit Allocation 
https://github.com/IN-CORE/incore-docs/blob/master/notebooks/housingunitallocation.ipynb

Rosenheim, Nathanael, Roberto Guidotti, Paolo Gardoni & Walter Gillis Peacock. (2019). Integration of detailed household and housing unit characteristic data with critical infrastructure for post-hazard resilience modeling. Sustainable and Resilient Infrastructure. doi.org/10.1080/23789689.2019.1681821

In [6]:
# Create housing allocation 
hua = HousingUnitAllocation(client)

# Load input dataset
hua.load_remote_input_dataset("housing_unit_inventory", housing_unit_inv)
hua.load_remote_input_dataset("address_point_inventory", address_point_inv)
hua.load_remote_input_dataset("building_inventory", building_inv)

# Specify the result name
result_name = "IN-CORE_1bv6_housingunitallocation"

seed = 1238
iterations = 1

# Set analysis parameters
hua.set_parameter("result_name", result_name)
hua.set_parameter("seed", seed)
hua.set_parameter("iterations", iterations)

Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...
Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...
Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...


True

In [7]:
# Run Housing unit allocation analysis
hua.run_analysis()

True

In [8]:
# Retrieve result dataset
result = hua.get_output_dataset("result")

# Convert dataset to Pandas DataFrame
hua_df = result.get_dataframe_from_csv(low_memory=False)

# Display top 5 rows of output data
hua_df.head()

Unnamed: 0,strctid,parcelid,landuse,guid,d_sf,insignific,moderate,heavy,complete,addrptid,...,livetype,numprec,ownershp,race,hispan,vacancy,gqtype,bgid,randomhu,aphumerge
0,41007021038001001S,21038.0,499.0,e38d8575-7880-4a8c-b6d7-225ab1cf9264,1.0,0.001145,0.056595,0.403152,0.539108,41007021038001001S001001A,...,H,2.0,1.0,1.0,0.0,0.0,0.0,410079500000.0,0.005397,both
1,41007020864002002S,20864.0,192.0,af5771b4-4f42-4166-b772-78a3706fa8ac,1.0,0.036792,0.476158,0.41739,0.06966,41007020864002002S001001A,...,H,1.0,1.0,1.0,0.0,0.0,0.0,410079500000.0,0.007199,both
2,41007020864002001S,20864.0,192.0,37532fb5-5107-478b-ab2c-158eb001c68b,1.0,0.172897,0.499344,0.294506,0.033253,41007020864002001S001001A,...,H,4.0,1.0,1.0,0.0,0.0,0.0,410079500000.0,0.023555,both
3,41007020956001001S,20956.0,131.0,66d39314-1c68-4634-a82c-8fcb37f529ff,1.0,0.171095,0.498589,0.296536,0.03378,41007020956001001S001001A,...,H,3.0,1.0,1.0,0.0,0.0,0.0,410079500000.0,0.032571,both
4,41007021145001001S,21145.0,131.0,e519ec32-c5eb-422a-be3d-7ff4bb33f1e2,1.0,0.035605,0.471382,0.421331,0.071683,41007021145001001S001001A,...,H,2.0,1.0,1.0,0.0,0.0,0.0,410079500000.0,0.247678,both


## Found Issue - to be fixed
The building inventory for Seaside has damage state information - a new version of the file needs to be made without the damage data.

### Seaside, OR Building inventory, IN-CORE_1bv6_SetupSeaside_FourInventories_2019-08-02_buildinginventory.csv
building_inv = "5d5433edb9219c0689b98344"

In [9]:
hua_df = hua_df.drop(columns= ['insignific','moderate','heavy','complete'])
hua_df.head()

Unnamed: 0,strctid,parcelid,landuse,guid,d_sf,addrptid,residential,huestimate,x,y,...,livetype,numprec,ownershp,race,hispan,vacancy,gqtype,bgid,randomhu,aphumerge
0,41007021038001001S,21038.0,499.0,e38d8575-7880-4a8c-b6d7-225ab1cf9264,1.0,41007021038001001S001001A,1.0,1.0,-123.896065,46.012722,...,H,2.0,1.0,1.0,0.0,0.0,0.0,410079500000.0,0.005397,both
1,41007020864002002S,20864.0,192.0,af5771b4-4f42-4166-b772-78a3706fa8ac,1.0,41007020864002002S001001A,1.0,1.0,-123.903427,46.017498,...,H,1.0,1.0,1.0,0.0,0.0,0.0,410079500000.0,0.007199,both
2,41007020864002001S,20864.0,192.0,37532fb5-5107-478b-ab2c-158eb001c68b,1.0,41007020864002001S001001A,1.0,1.0,-123.903427,46.017498,...,H,4.0,1.0,1.0,0.0,0.0,0.0,410079500000.0,0.023555,both
3,41007020956001001S,20956.0,131.0,66d39314-1c68-4634-a82c-8fcb37f529ff,1.0,41007020956001001S001001A,1.0,1.0,-123.900246,46.013523,...,H,3.0,1.0,1.0,0.0,0.0,0.0,410079500000.0,0.032571,both
4,41007021145001001S,21145.0,131.0,e519ec32-c5eb-422a-be3d-7ff4bb33f1e2,1.0,41007021145001001S001001A,1.0,1.0,-123.898918,46.012257,...,H,2.0,1.0,1.0,0.0,0.0,0.0,410079500000.0,0.247678,both


## Explore results from Housing Unit Allocation

Keep observations that are matched to a building.

In [10]:
hua_df = hua_df.loc[hua_df['aphumerge'] == 'both']

In [11]:
hua_df['Race Ethnicity'] = "0 Vacant HU No Race Ethnicity Data"
hua_df['Race Ethnicity'].notes = "Identify Race and Ethnicity Housing Unit Characteristics."

hua_df.loc[(hua_df['race'] == 1) & (hua_df['hispan'] == 0),'Race Ethnicity'] = "1 White alone, Not Hispanic"
hua_df.loc[(hua_df['race'] == 2) & (hua_df['hispan'] == 0),'Race Ethnicity'] = "2 Black alone, Not Hispanic"
hua_df.loc[(hua_df['race'].isin([3,4,5,6,7])) & (hua_df['hispan'] == 0),'Race Ethnicity'] = "3 Other Race, Not Hispanic"
hua_df.loc[(hua_df['hispan'] == 1),'Race Ethnicity'] = "4 Any Race, Hispanic"
hua_df.loc[(hua_df['gqtype'] >= 1),'Race Ethnicity'] = "5 Group Quarters no Race Ethnicity Data"

# Check new variable
table_title = "Confirm housing unit characteristic by Race and Ethnicity."
pd.crosstab(hua_df['Race Ethnicity'], hua_df['race'], 
            margins=True, margins_name="Total").style.set_caption(table_title)

race,1.0,2.0,3.0,4.0,5.0,6.0,7.0,Total
Race Ethnicity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"1 White alone, Not Hispanic",2715,0,0,0,0,0,0,2715
"2 Black alone, Not Hispanic",0,14,0,0,0,0,0,14
"3 Other Race, Not Hispanic",0,0,26,35,5,7,60,133
"4 Any Race, Hispanic",93,2,1,0,0,84,11,191
Total,2808,16,27,35,5,91,71,3053


In [12]:
# Check new variable
table_title = "Confirm housing unit characteristic by Race and Ethnicity."
pd.crosstab(hua_df['Race Ethnicity'], hua_df['hispan'], 
            margins=True, margins_name="Total").style.set_caption(table_title)

hispan,0.0,1.0,Total
Race Ethnicity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"1 White alone, Not Hispanic",2715,0,2715
"2 Black alone, Not Hispanic",14,0,14
"3 Other Race, Not Hispanic",133,0,133
"4 Any Race, Hispanic",0,191,191
Total,2862,191,3053


In [13]:
table_title = "Table 1. Housing Unit Characteristics by Race and Ethnicity"
table1 = pd.pivot_table(hua_df, values='numprec', index=['Race Ethnicity'],
                              margins = True, margins_name = 'Total',
                              aggfunc=[len, np.sum], 
                              fill_value=0).reset_index().rename(
                                                            columns={'len': 'Housing Unit',
                                                                     'sum' : 'Population',
                                                                     'numprec': 'Count'})

varformat = {('Housing Unit','Count'): "{:,}", ('Population','Count'): "{:,}"}

In [14]:
table1.style.set_caption(table_title).format(varformat).set_table_styles([
    dict(selector='th', props=[('text-align', 'center')]),])

Unnamed: 0_level_0,Race Ethnicity,Housing Unit,Population
Unnamed: 0_level_1,Unnamed: 1_level_1,Count,Count
0,0 Vacant HU No Race Ethnicity Data,1683,0
1,"1 White alone, Not Hispanic",2715,5531
2,"2 Black alone, Not Hispanic",14,24
3,"3 Other Race, Not Hispanic",133,327
4,"4 Any Race, Hispanic",191,711
5,5 Group Quarters no Race Ethnicity Data,5,47
6,Total,4741,6640


## Validate the Housing Unit Allocation has worked
Notice that the population count totals for the community should match (pretty closely) data collected for the 2010 Decennial Census.
This can be confirmed by going to data.census.gov

https://data.census.gov/cedsci/table?q=DECENNIALPL2010.P1&g=1600000US4165950&tid=DECENNIALSF12010.P1

Differences in the housing unit allocation and the Census count may be due to differences between political boundaries and the building inventory. See Rosenheim et al 2019 for more details.

The housing unit allocation results will become the input for the dislocation model.

In [15]:
# Save cleaned HUA file as CSV
hua_df.to_csv('IN-CORE_1cv1_housingunitallocation_1238.csv')

### Damage Analysis for Earthquake
##### Notebook created by Dylan R. Sanderson (OSU - sanderdy@oregonstate.edu) and Gowtham Naraharisetty (NCSA) 
###### https://incore.ncsa.illinois.edu/doc/incore/notebooks/Seaside_testbed/Seaside_testbed.html

In [16]:
hazard_type = "earthquake"
rt = [100, 500]
rt_hazard_dict = {100: "5dfa4058b9219c934b64d495",
                  500: "5dfa4300b9219c934b64d4d0"}
bldg_eq_dmg_result_list = []  # place holder to saving earthquake building damage result iteration
bldg_dataset_id = "5df40388b9219c06cf8b0c80"        # defining building dataset (GIS point layer)
mapping_id = "5d2789dbb9219c3c553c7977"             # specifiying mapping id from fragilites to building types
fragility_service = FragilityService(client)        # loading fragility mapping
mapping_set = MappingSet(fragility_service.get_mapping(mapping_id))

for rt_val in rt:                                       # loop through recurrence interval
    bldg_dmg = BuildingDamage(client)                   # initializing pyincore
    bldg_dmg.load_remote_input_dataset("buildings", bldg_dataset_id) # loading in the above
    mapping_set = MappingSet(fragility_service.get_mapping(mapping_id))
    bldg_dmg.set_input_dataset("dfr3_mapping_set", mapping_set)

    result_name = 'buildings_eq_' + str(rt_val) + 'yr_dmg_result' # defining output name

    bldg_dmg.set_parameter("hazard_type", hazard_type)  # defining hazard type (e.g. earthquake vs. tsunami)
    hazard_id = rt_hazard_dict[rt_val]                  # specifying hazard id for specific recurrence interval
    bldg_dmg.set_parameter("hazard_id", hazard_id)      # loading above into pyincore
    bldg_dmg.set_parameter("num_cpu", 4)                # number of CPUs to use for parallel processing
    bldg_dmg.set_parameter("result_name", result_name)  # specifying output name in pyincore

    bldg_dmg.run_analysis()                             # running the analysis with the above parameters
    bldg_eq_dmg_result_list.append(bldg_dmg.get_output_dataset('result'))

Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...
Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...


### Calculating Population Dislocation for Different Retrofit Levels for given Event

##### Credits: https://github.com/IN-CORE/incore-docs/blob/master/notebooks/populationdislocation.ipynb

In [17]:
# Create population dislocatin 
pop_dis = PopulationDislocation(client)

In [18]:
# Seaside, OR Housing unit allocation, performed at start of notebook
housing_unit_alloc = Dataset.from_file('IN-CORE_1cv1_housingunitallocation_1238.csv','incore:housingUnitAllocation')

# Seaside, OR "IN-CORE_1bv6_SetupSeaside_FourInventories_2019-08-02_bgdata.csv"
bg_data = "5d542bd8b9219c0689b90408"

# Value loss parameters, "IN-CORE_value_loss_bai09.csv"
value_loss = "5dfd1069fc33d500081555d8"

# Load input dataset
pop_dis.set_input_dataset("housing_unit_allocation", housing_unit_alloc)
pop_dis.load_remote_input_dataset("block_group_data", bg_data)
pop_dis.load_remote_input_dataset("value_poss_param", value_loss)

Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...
Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...


In [19]:
# Create a dictionary to save multiple dislocation results
popdisresults = {}

for rt_val in rt:                                       # loop through recurrence interval  
    building_dmg_name = 'buildings_eq_' + str(rt_val) + 'yr_dmg_result.csv' # defining output name

    building_dmg = Dataset.from_file(building_dmg_name, data_type="ergo:buildingDamageVer4")
    pop_dis.set_input_dataset("building_dmg", building_dmg)

    # Specify the result name
    result_name = 'housingunit_eq_' + str(rt_val) + 'yr_popdis_result'

    seed = 1111

    # Set analysis parameters
    pop_dis.set_parameter("result_name", result_name)
    pop_dis.set_parameter("seed", seed)

    # Run Population dislocation analysis
    pop_dis.run_analysis()
    
    # Retrieve result dataset
    result = pop_dis.get_output_dataset("result")

    # Convert dataset to Pandas DataFrame
    df = result.get_dataframe_from_csv(low_memory=False)
    
    df.groupby('dislocated').aggregate({'numprec':np.sum})

    # Save results in dictionary
    popdisresults[rt_val] = df

In [20]:
# Look at results for 500 year return period
popdisresults[500].head()

Unnamed: 0.1,guid,insignific,moderate,heavy,complete,demandtype,demandunits,Unnamed: 0,strctid,parcelid,...,aphumerge,Race Ethnicity,pblackbg,phispbg,rploss_ins,rploss_med,rploss_hwy,rploss_cmp,prdis,dislocated
0,e38d8575-7880-4a8c-b6d7-225ab1cf9264,0.272003,0.378359,0.253359,0.096279,0.4 sd,in,0,41007021038001001S,21038.0,...,both,"1 White alone, Not Hispanic",0.0,23.54015,0.002914,0.100425,0.524702,0.889881,0.478782,True
1,af5771b4-4f42-4166-b772-78a3706fa8ac,0.454954,0.42152,0.091477,0.032049,0.35 sd,in,1,41007020864002002S,20864.0,...,both,"1 White alone, Not Hispanic",0.0,23.54015,0.00693,0.132025,0.677548,0.951019,0.416452,False
2,37532fb5-5107-478b-ab2c-158eb001c68b,0.380108,0.394729,0.171031,0.054131,0.35 sd,in,2,41007020864002001S,20864.0,...,both,"1 White alone, Not Hispanic",0.0,23.54015,0.005776,0.1,0.654935,0.941974,0.446992,True
3,66d39314-1c68-4634-a82c-8fcb37f529ff,0.46045,0.383305,0.130358,0.025887,0.35 sd,in,3,41007020956001001S,20956.0,...,both,"1 White alone, Not Hispanic",0.0,23.54015,0.002404,0.105851,0.436479,0.854591,0.400459,True
4,e519ec32-c5eb-422a-be3d-7ff4bb33f1e2,0.454954,0.42152,0.091477,0.032049,0.35 sd,in,4,41007021145001001S,21145.0,...,both,"1 White alone, Not Hispanic",0.0,23.54015,0.003029,0.133077,0.438777,0.855511,0.402589,True


In [21]:
popdisresults[500].columns

Index(['guid', 'insignific', 'moderate', 'heavy', 'complete', 'demandtype',
       'demandunits', 'Unnamed: 0', 'strctid', 'parcelid', 'landuse', 'd_sf',
       'addrptid', 'residential', 'huestimate', 'x', 'y', 'blockid',
       'randomap', 'randommergeorder', 'blockidstr', 'huid', 'livetype',
       'numprec', 'ownershp', 'race', 'hispan', 'vacancy', 'gqtype', 'bgid',
       'randomhu', 'aphumerge', 'Race Ethnicity', 'pblackbg', 'phispbg',
       'rploss_ins', 'rploss_med', 'rploss_hwy', 'rploss_cmp', 'prdis',
       'dislocated'],
      dtype='object')

In [22]:
# Create a dictionary to save multiple dislocation results tables
popdisresultstables = {}

for rt_val in rt:                                       # loop through recurrence interval  
    print(rt_val)
        
    df = popdisresults[rt_val]
    popdisresultstables[rt_val] = pd.pivot_table(df, values='numprec', index=['Race Ethnicity'],
                                  margins = True, margins_name = 'Total',
                                  columns=['dislocated'], aggfunc=[np.sum], 
                                  fill_value=0).reset_index().rename(
                                                                columns={'sum': 'Total Population',
                                                                         False: 'Does not dislocate',
                                                                         True: 'Dislocates'})
    numerator = popdisresultstables[rt_val][('Total Population','Dislocates')]
    denomenator = popdisresultstables[rt_val][('Total Population','Total')]
    popdisresultstables[rt_val][('Total Population','Percent Dislocation')] = numerator/denomenator * 100

    varformat = {('Total Population','Does not dislocate'): "{:,.0f}", 
                 ('Total Population','Dislocates'): "{:,.0f}",
                 ('Total Population','Total'): "{:,.0f}",
                 ('Total Population','Percent Dislocation'): "{:.2f}"}

100
500


In [23]:
rt_val = 100
table_title = "Table 1. Population Dislocation for "+ str(rt_val) +" year Earthquake"
table_title = table_title + "by Race and Hispanic, Seaside OR 2010"
popdisresultstables[rt_val].style.set_caption(table_title).format(varformat).set_table_styles([
    dict(selector='th', props=[('text-align', 'center')]),])

Unnamed: 0_level_0,Race Ethnicity,Total Population,Total Population,Total Population,Total Population
dislocated,Unnamed: 1_level_1,Does not dislocate,Dislocates,Total,Percent Dislocation
0,0 Vacant HU No Race Ethnicity Data,0,0,0,
1,"1 White alone, Not Hispanic",3633,1898,5531,34.32
2,"2 Black alone, Not Hispanic",9,15,24,62.5
3,"3 Other Race, Not Hispanic",217,110,327,33.64
4,"4 Any Race, Hispanic",527,184,711,25.88
5,5 Group Quarters no Race Ethnicity Data,43,4,47,8.51
6,Total,4429,2211,6640,33.3


In [24]:
rt_val = 500
table_title = "Table 2. Population Dislocation for "+ str(rt_val) +" year Earthquake"
table_title = table_title + "by Race and Hispanic, Seaside OR 2010"
popdisresultstables[rt_val].style.set_caption(table_title).format(varformat).set_table_styles([
    dict(selector='th', props=[('text-align', 'center')]),])

Unnamed: 0_level_0,Race Ethnicity,Total Population,Total Population,Total Population,Total Population
dislocated,Unnamed: 1_level_1,Does not dislocate,Dislocates,Total,Percent Dislocation
0,0 Vacant HU No Race Ethnicity Data,0,0,0,
1,"1 White alone, Not Hispanic",3026,2505,5531,45.29
2,"2 Black alone, Not Hispanic",8,16,24,66.67
3,"3 Other Race, Not Hispanic",176,151,327,46.18
4,"4 Any Race, Hispanic",454,257,711,36.15
5,5 Group Quarters no Race Ethnicity Data,18,29,47,61.7
6,Total,3682,2958,6640,44.55
