In [1]:
import pandas as pd
import geopandas as gps
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os
import h5py
import numpy as np
warnings.filterwarnings("ignore")

data_path = '/Users/xiaodanxu/Library/CloudStorage/GoogleDrive-arielinseu@gmail.com/My Drive/BEAM-CORE/Lab call/PSRC'
os.chdir(data_path)

In [3]:
# load PSRC parcel geography
parcel_geography = pd.read_csv('SoundCast_processed/parcel_2018_geography.csv')
print(len(parcel_geography))
print(parcel_geography.columns)
parcel_geography.head(5)

1302434
Index(['field1', 'ParcelID', 'rg_proposed', 'CityName', 'Census2010Block',
       'Census2010BlockGroup', 'Census2010Tract', 'minority_geog',
       'poverty_geog', 'GrowthCenterName', 'FAZID', 'taz_p', 'parcel_id_x',
       'District', 'district_name', 'CountyName', 'TAZ', 'LowIncome',
       'PeopleofColor', 'BaseYear', 'GEOID10', 'place_name', 'parcel_id_y',
       'disability_geog_vs_50_percent', 'disability_geog_vs_reg_total',
       'elderly_geog_vs_50_percent', 'elderly_geog_vs_reg_total',
       'english_geog_vs_50_percent', 'english_geog_vs_reg_total',
       'poverty_geog_vs_50_percent', 'poverty_geog_vs_reg_total',
       'racial_geog_vs_50_percent', 'racial_geog_vs_reg_total',
       'youth_geog_vs_50_percent', 'youth_geog_vs_reg_total'],
      dtype='object')


Unnamed: 0,field1,ParcelID,rg_proposed,CityName,Census2010Block,Census2010BlockGroup,Census2010Tract,minority_geog,poverty_geog,GrowthCenterName,...,elderly_geog_vs_50_percent,elderly_geog_vs_reg_total,english_geog_vs_50_percent,english_geog_vs_reg_total,poverty_geog_vs_50_percent,poverty_geog_vs_reg_total,racial_geog_vs_50_percent,racial_geog_vs_reg_total,youth_geog_vs_50_percent,youth_geog_vs_reg_total
0,0,1,Core,Kent,530330300000000.0,530330300000.0,53033030000.0,2,1,Kent MIC,...,0,1,0,1,0,1,1,1,0,0
1,1,2,Core,Kent,530330300000000.0,530330300000.0,53033030000.0,2,1,Kent MIC,...,0,1,0,1,0,1,1,1,0,0
2,2,3,Core,Kent,530330300000000.0,530330300000.0,53033030000.0,2,1,Kent MIC,...,0,1,0,1,0,1,1,1,0,0
3,3,4,Core,Kent,530330300000000.0,530330300000.0,53033030000.0,2,1,Kent MIC,...,0,1,0,1,0,1,1,1,0,0
4,4,5,Core,Kent,530330300000000.0,530330300000.0,53033030000.0,2,1,Kent MIC,...,0,1,0,1,0,1,1,1,0,0


In [4]:
parcel_geography_short = \
parcel_geography[['ParcelID','CityName', 'Census2010Block',
       'Census2010BlockGroup', 'Census2010Tract', 'FAZID', 'taz_p', 
                  'District', 'district_name', 'CountyName', 'TAZ', 
                  'BaseYear', 'GEOID10', 'place_name']]
parcel_geography_short.head(5)

Unnamed: 0,ParcelID,CityName,Census2010Block,Census2010BlockGroup,Census2010Tract,FAZID,taz_p,District,district_name,CountyName,TAZ,BaseYear,GEOID10,place_name
0,1,Kent,530330300000000.0,530330300000.0,53033030000.0,3600,1019,7.0,Renton-FedWay-Kent,King,1019,2018,530330300000000.0,Kent
1,2,Kent,530330300000000.0,530330300000.0,53033030000.0,3600,1018,7.0,Renton-FedWay-Kent,King,1018,2018,530330300000000.0,Kent
2,3,Kent,530330300000000.0,530330300000.0,53033030000.0,3600,1018,7.0,Renton-FedWay-Kent,King,1018,2018,530330300000000.0,Kent
3,4,Kent,530330300000000.0,530330300000.0,53033030000.0,3600,1018,7.0,Renton-FedWay-Kent,King,1018,2018,530330300000000.0,Kent
4,5,Kent,530330300000000.0,530330300000.0,53033030000.0,3600,1018,7.0,Renton-FedWay-Kent,King,1018,2018,530330300000000.0,Kent


In [23]:
# load PSRC synthetic population
psrc_file_path = 'v3.0.0_2018_2050/landuse/2018/v3.0_RTP/hh_and_persons.h5'
file= h5py.File(psrc_file_path, 'r')
print("Keys: %s" % file.keys())
household_key = 'Household'
person_key = 'Person'
print(np.array(file[household_key]))
print(np.array(file[person_key]))


Keys: <KeysViewHDF5 ['Household', 'Person']>
['hhexpfac' 'hhincome' 'hhno' 'hhparcel' 'hhsize' 'hhtaz' 'hownrent'
 'hrestype']
['hhno' 'pagey' 'pdairy' 'pgend' 'pno' 'ppaidprk' 'pptyp' 'prace'
 'psexpfac' 'pspcl' 'pstaz' 'pstyp' 'ptpass' 'puwarrp' 'puwdepp' 'puwmode'
 'pwpcl' 'pwtaz' 'pwtyp']


In [24]:
# print(np.ndarray(file[household_key]))
# collect PSRC households
psrc_households = None
list_of_hh_var = np.array(file[household_key])
print(list_of_hh_var)
for var in list_of_hh_var:
    df = pd.DataFrame(file[household_key][var])
    psrc_households = pd.concat([psrc_households, df], axis = 1)              
# psrc_households = pd.DataFrame(file[household_key]['hhexpfac'])
print(len(psrc_households))
psrc_households.columns = list_of_hh_var
psrc_households.head(5)

['hhexpfac' 'hhincome' 'hhno' 'hhparcel' 'hhsize' 'hhtaz' 'hownrent'
 'hrestype']
1605263


Unnamed: 0,hhexpfac,hhincome,hhno,hhparcel,hhsize,hhtaz,hownrent,hrestype
0,1,83800,1,649927,1,3594,1,1
1,1,83800,2,700235,1,3591,1,1
2,1,54650,3,649754,1,3592,1,1
3,1,54650,4,649765,1,3592,1,1
4,1,54650,5,649842,1,3594,1,1


In [25]:
# collect PSRC persons
psrc_persons = None
list_of_per_var = np.array(file[person_key])
print(list_of_per_var)
for var in list_of_per_var:
    df = pd.DataFrame(file[person_key][var])
    psrc_persons = pd.concat([psrc_persons, df], axis = 1)              
# psrc_households = pd.DataFrame(file[household_key]['hhexpfac'])
print(len(psrc_persons))
psrc_persons.columns = list_of_per_var
psrc_persons.head(5)

['hhno' 'pagey' 'pdairy' 'pgend' 'pno' 'ppaidprk' 'pptyp' 'prace'
 'psexpfac' 'pspcl' 'pstaz' 'pstyp' 'ptpass' 'puwarrp' 'puwdepp' 'puwmode'
 'pwpcl' 'pwtaz' 'pwtyp']
4053154


Unnamed: 0,hhno,pagey,pdairy,pgend,pno,ppaidprk,pptyp,prace,psexpfac,pspcl,pstaz,pstyp,ptpass,puwarrp,puwdepp,puwmode,pwpcl,pwtaz,pwtyp
0,1,69,-1,2,1,-1,1,1,1,-1,-1,0,-1,-1,-1,-1,-1,-1,1
1,2,69,-1,2,1,-1,1,1,1,-1,-1,0,-1,-1,-1,-1,-1,-1,1
2,3,61,-1,2,1,-1,1,1,1,-1,-1,0,-1,-1,-1,-1,-1,-1,1
3,4,61,-1,2,1,-1,1,1,1,-1,-1,0,-1,-1,-1,-1,-1,-1,1
4,5,61,-1,2,1,-1,1,1,1,-1,-1,0,-1,-1,-1,-1,-1,-1,1


In [26]:
# save output
analysis_year = 2018
psrc_households.to_csv('SoundCast_processed/households_PSRC_' + \
                       str(analysis_year) + '.csv', index = False)

psrc_persons.to_csv('SoundCast_processed/persons_PSRC_' + \
                       str(analysis_year) + '.csv', index = False)

In [16]:
# load urban sim synthetic population
urbansim_file_path = 'UrbanSim/model_data_2018.h5'
store = pd.HDFStore(urbansim_file_path,"r")
urbansim_household = store['/2018/households']
urbansim_persons = store['/2018/persons']
# print(store.keys())  
urbansim_persons.head(5)

Unnamed: 0_level_0,race_id,race,p_hispanic,earning,work_at_home,age,sex,household_id,hours,hispanic.1,member_id,student,person_age,hispanic,relate,worker,person_sex,edu
person_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,1,white,no,28500.0,0,33,2,485,40.0,0,1,0,20 to 35,0,0,1,female,18.0
1,1,white,no,28000.0,0,23,2,102,30.0,0,1,1,20 to 35,0,0,1,female,19.0
2,1,white,no,28000.0,0,23,2,103,30.0,0,1,1,20 to 35,0,0,1,female,19.0
3,1,white,no,28000.0,0,23,2,483,30.0,0,1,1,20 to 35,0,0,1,female,19.0
4,1,white,no,2800.0,0,29,2,382,3.0,0,2,1,20 to 35,0,1,0,female,21.0


In [23]:
print(urbansim_household.block_id.head(5))
print('total households in the region:')
print(len(urbansim_household))

print('total persons in the region:')
print(len(urbansim_persons))

household_id
0    530330052001000
1    530330052001000
2    530330052001000
3    530330052001000
4    530330052001004
Name: block_id, dtype: object
total households in the region:
1631494
total persons in the region:
3978288


In [2]:
# check beam results
beam_persons = pd.read_csv('BEAM/persons.csv')
print(len(beam_persons))
beam_hh = pd.read_csv('BEAM/households.csv')
print(len(beam_hh))
beam_hh.head(5)

3547302
1454695


Unnamed: 0,household_id,serialno,cars,income,race_of_head,age_of_head,workers,hispanic_status_of_head,tenure,recent_mover,...,tenure_mover,hh_seniors,block_id,lcm_county_id,persons,gt55,gt2,hh_type,TAZ,HHT
0,0,2011001205616,0.0,8100.0,9,64,0.0,0,2,0,...,rent not recent,no,530330052001000,53033,1,1,0,6,591,1
1,1,2011001205616,0.0,8100.0,9,64,0.0,0,2,0,...,rent not recent,no,530330052001000,53033,1,1,0,6,591,1
2,2,2011000244283,0.0,8400.0,1,61,0.0,0,2,0,...,rent not recent,no,530330052001000,53033,1,1,0,6,591,1
3,3,2011000244283,0.0,8400.0,1,61,0.0,0,2,0,...,rent not recent,no,530330052001000,53033,1,1,0,6,591,1
4,4,2009001123248,0.0,15300.0,1,47,1.0,0,2,0,...,rent not recent,no,530330052001004,53033,1,0,0,5,591,1


In [11]:
psrc_land_use_path = 'v3.0.0_2018_2050/landuse/2018/v3.0_RTP/parcels_urbansim.txt'
psrc_parcels = pd.read_csv(psrc_land_use_path, sep = ' ')
psrc_crs = 'EPSG:2285'
psrc_parcels.head(5)

Unnamed: 0,aparks,empedu_p,empfoo_p,empgov_p,empind_p,empmed_p,empofc_p,empoth_p,empret_p,emprsc_p,...,ppricdyp,pprichrp,sfunits,sqft_p,stugrd_p,stuhgh_p,stuuni_p,taz_p,xcoord_p,ycoord_p
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,60440,0,0,0,1019,1292251.0,162729.949912
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,5446,0,0,0,1018,1291828.0,164043.167618
2,0,0,0,0,0,0,0,0,0,0,...,0,0,1,3438,0,0,0,1018,1291590.0,164050.082322
3,0,0,0,0,5,2,17,9,6,0,...,0,0,0,5112,0,0,0,1018,1291535.0,164051.591361
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,6068,0,0,0,1018,1291475.0,164043.810513
