In [1]:
import pandas as pd
import numpy as np
import os
import sys
import numbers
def apply_filter_query(df, filters=None):
	"""
	Use the DataFrame.query method to filter a table down to the
	desired rows.

	Parameters
	----------
	df : pandas.DataFrame
	filters : list of str or str, optional
		List of filters to apply. Will be joined together with
		' and ' and passed to DataFrame.query. A string will be passed
		straight to DataFrame.query.
		If not supplied no filtering will be done.

	Returns
	-------
	filtered_df : pandas.DataFrame

	"""
	if filters:
		if isinstance(filters, str):
			query = filters
		else:
			query = ' and '.join(filters)
		return df.query(query)
	else:
		return df
def _filterize(name, value):
	"""
	Turn a `name` and `value` into a string expression compatible
	the ``DataFrame.query`` method.

	Parameters
	----------
	name : str
		Should be the name of a column in the table to which the
		filter will be applied.

		A suffix of '_max' will result in a "less than" filter,
		a suffix of '_min' will result in a "greater than or equal to" filter,
		and no recognized suffix will result in an "equal to" filter.
	value : any
		Value side of filter for comparison to column values.

	Returns
	-------
	filter_exp : str

	"""
	if name.endswith('_min'):
		name = name[:-4]
		comp = '>='
	elif name.endswith('_max'):
		name = name[:-4]
		comp = '<'
	else:
		comp = '=='

	result = '{} {} {!r}'.format(name, comp, value)
	return result

def filter_table(table, filter_series, ignore=None):
	"""
	Filter a table based on a set of restrictions given in
	Series of column name / filter parameter pairs. The column
	names can have suffixes `_min` and `_max` to indicate
	"less than" and "greater than" constraints.

	Parameters
	----------
	table : pandas.DataFrame
		Table to filter.
	filter_series : pandas.Series
		Series of column name / value pairs of filter constraints.
		Columns that ends with '_max' will be used to create
		a "less than" filters, columns that end with '_min' will be
		used to create "greater than or equal to" filters.
		A column with no suffix will be used to make an 'equal to' filter.
	ignore : sequence of str, optional
		List of column names that should not be used for filtering.

	Returns
	-------
	filtered : pandas.DataFrame

	"""
	ignore = ignore if ignore else set()

	filters = [_filterize(name, val)
				for name, val in filter_series.iteritems()
				if not (name in ignore or
						(isinstance(val, numbers.Number) and
						np.isnan(val)))]

	return apply_filter_query(table, filters)


In [2]:
run_folder = "/home/da/semcog_urbansim/runs"
run_num = "run295"

In [3]:
hdf = pd.HDFStore(os.path.join(run_folder, '%s.h5'%run_num), 'r')


In [1]:
hdf.keys()

NameError: name 'hdf' is not defined

### Household controls

In [39]:
region_ct = hdf['/base/annual_household_control_totals']
region_hh = hdf["/2050/households"]
region_p = hdf["/2050/persons"]
region_target = pd.read_csv('/home/da/share/urbansim/RDF2050/model_inputs/base_tables/remi_pop_total.csv', index_col='large_area_id')

In [6]:
max_cols = region_ct.columns[
	region_ct.columns.str.endswith("_max") & (region_ct == -1).any(axis=0)
]
region_ct[max_cols] = region_ct[max_cols].replace(-1, np.inf)
region_ct[max_cols] += 1

In [7]:
region_hh.index = region_hh.index.astype(int)
region_p.index = region_p.index.astype(int)

In [8]:
region_ct_2050 = region_ct.loc[2050]
region_simulated_hh_2050 = [filter_table(region_hh, region_ct_2050.iloc[i], 'total_number_of_households').shape[0] for i in range(region_ct_2050.shape[0])]

In [9]:
region_ct.loc[2050, 'region_simulated_hh_2050'] = pd.Series(region_simulated_hh_2050, index=region_ct_2050.index)

In [10]:
region_ct.loc[2050, 'abs_diff'] = (region_ct.loc[2050, 'region_simulated_hh_2050']  - region_ct.loc[2050, 'total_number_of_households'] ).abs()
region_ct.loc[2050, 'ratio'] = region_ct.loc[2050, 'region_simulated_hh_2050']  / region_ct.loc[2050, 'total_number_of_households'] 

In [12]:
region_ct.loc[2050].sort_values(by='abs_diff', ascending=False).head(20)

Unnamed: 0_level_0,age_of_head_min,children_max,income_max,race_id,age_of_head_max,cars_min,persons_max,cars_max,children_min,income_min,workers_max,persons_min,total_number_of_households,workers_min,large_area_id,region_simulated_hh_2050,abs_diff,ratio
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2050,35,1.0,31704,2,65.0,0,2.0,1.0,0,0,1.0,1,12220,0,5,11559.0,661.0,0.945908
2050,35,1.0,31704,2,65.0,1,2.0,2.0,0,0,1.0,1,7196,0,5,6811.0,385.0,0.946498
2050,35,1.0,64016,2,65.0,1,2.0,2.0,0,31705,2.0,1,5457,1,5,5804.0,347.0,1.063588
2050,35,1.0,31704,2,65.0,1,2.0,2.0,0,0,2.0,1,4536,1,5,4825.0,289.0,1.063713
2050,65,1.0,113190,4,inf,2,4.0,3.0,0,64017,3.0,3,1650,2,5,1421.0,229.0,0.861212
2050,65,1.0,113190,3,inf,3,5.0,inf,0,64017,2.0,4,2767,1,161,2557.0,210.0,0.924106
2050,65,1.0,1534131,1,inf,2,3.0,3.0,0,113191,2.0,2,4790,1,125,4995.0,205.0,1.042797
2050,35,1.0,31704,2,65.0,0,3.0,1.0,0,0,1.0,2,3273,0,5,3072.0,201.0,0.938588
2050,65,1.0,113190,1,inf,2,3.0,3.0,0,64017,1.0,2,7312,0,125,7139.0,173.0,0.97634
2050,65,1.0,64016,1,inf,2,3.0,3.0,0,31705,1.0,2,7906,0,125,7743.0,163.0,0.979383


In [13]:
# diff by ratio
region_ct.loc[2050]['abs_diff'].sum()/region_ct.loc[2050]["total_number_of_households"].sum()

0.017398918772793995

### MCD totals

In [24]:
mcd_total = pd.read_csv('/home/da/share/urbansim/RDF2050/model_inputs/base_tables/mcd_totals_2020_2050_nov15.csv', index_col='mcd')
semmcds = hdf['/base/semmcds']
semmcds = semmcds.reset_index()[['semmcd_id', 'large_area_id']]
semmcds = semmcds.rename(columns={'semmcd_id': 'mcd'})


In [8]:
p50 = hdf["/2050/parcels"]
b50 = hdf["/2050/buildings"]
b50 = b50.join(p50.semmcd, on='parcel_id')
region_hh = region_hh.join(b50.semmcd, on='building_id')

In [9]:
region_hh.building_id.isin(b50.index).sum()

2056153

In [11]:
# number of hh dont have building_id
region_hh[region_hh.building_id == -1].shape[0]

0

In [12]:
# percentage
region_hh[region_hh.building_id == -1].shape[0]/ region_hh.shape[0]

0.0

In [13]:
simulated_mcd_hh = region_hh.semmcd.fillna(-1).value_counts()

In [27]:
region_hh

Unnamed: 0_level_0,cars,children,race_id,income,ten,persons,workers,valp,age_of_head,building_id,large_area_id,blkgrp,household_type,semmcd
household_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,2,0,1,147811,1,2,2,250000,50,4015129,161,261614200001,14,4015
3,1,0,1,74656,1,1,0,200000,61,4024154,161,261614200001,8,4015
8,4,0,1,64331,1,2,1,325000,37,9084828,161,261614200001,8,4005
9,2,0,1,103250,1,2,2,159000,57,4022744,161,261614200001,8,4015
10,2,4,4,128248,1,6,2,230000,37,4019564,161,261614200001,17,4015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3869994,1,1,4,46729,2,4,1,225000,81,2480148,125,261251681002,10,2072
3869995,1,1,4,46729,2,4,1,225000,81,2934148,125,261251681002,10,2010
3869996,4,0,4,31240,1,5,2,600000,75,2136405,125,261251561001,4,2072
3869997,4,0,4,31240,1,5,2,600000,75,2040941,125,261251561001,4,2075


In [21]:
simulated_mcd_hh

5       257650
4005     55031
3130     54344
3115     53847
3030     47712
         ...  
2172        59
3045        35
2252         8
2073         8
6142         1
Name: semmcd, Length: 237, dtype: int64

In [23]:
diff = simulated_mcd_hh - mcd_total['2050']
mcd_total_vs_simulated = (diff)[diff.abs().sort_values(ascending=False).index]
mcd_total_vs_simulated.index = mcd_total_vs_simulated.index.astype(int)

In [24]:
mcd_total_vs_simulated.fillna(0).astype(int).head(50)

2172   -11205
2252   -10648
2145     4673
4005     4624
3110     3597
3115    -3294
2190     3164
2290     3130
6135    -2910
2073    -2817
2030    -2689
4110    -2616
4080    -2519
2115     2488
1025    -2302
2175     2275
2215     2227
2160     2199
3060     2192
2235     1778
2260     1744
1020    -1651
3130    -1637
4135    -1562
2240    -1561
2060     1559
6040     1536
1110     1522
3030     1517
2075    -1495
2170     1464
1210     1426
2150    -1412
3120    -1399
3135    -1336
1105    -1320
3025    -1297
7065     1250
4100     1240
6142    -1227
2285     1188
2015    -1179
7010     1166
2180    -1155
2095     1102
2220    -1086
7035    -1059
2005     1048
2205      916
7045      901
dtype: int64

In [26]:
simulated_mcd_hh.loc[4005]

55031

In [140]:
hdf["/base/semmcds"].loc[2252]

city_name        Southfield Twp
county_id                   125
large_area_id               125
Name: 2252, dtype: object

In [None]:
b50 = b50.join(p50.semmcd, on='parcel_id')

Unnamed: 0_level_0,parcel_id,sp_filter,improvement_value,residential_units,non_residential_sqft,sqft_per_unit,stories,building_type_id,market_value,year_built,land_area,sqft_price_res,sqft_price_nonres,event_id,mcd_model_quota,semmcd
building_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1000016,1360532,0,259397.618497,1.0,0.000000,1902.0,1.00,81,408325.317919,1985,1902,1980.717757,0.0,0,0.0,1135
1000035,1360480,0,309378.424954,1.0,0.000000,2724.0,2.00,81,450462.631927,1984,1362,1660.675548,0.0,0,0.0,1135
1000096,1360565,0,357748.589895,1.0,0.000000,3047.0,2.00,81,503442.774278,1985,1524,1600.271016,0.0,0,0.0,1135
1000122,1196635,0,353370.309037,1.0,0.000000,2345.0,2.00,81,462366.828645,1973,1172,524.166244,0.0,0,0.0,1130
1000140,1360430,0,311671.231693,1.0,0.000000,2498.0,1.75,81,437260.955582,1985,1427,542.346511,0.0,0,0.0,1135
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9044692,7037325,0,0.000000,1.0,0.000000,1123.0,1.00,81,0.000000,2050,0,1122.382758,0.0,0,0.0,7035
9044693,7036019,0,0.000000,4.0,0.000000,1930.4,1.00,81,0.000000,2050,0,848.514944,0.0,0,0.0,7035
9044694,7036594,0,0.000000,2.0,448.008990,2000.0,1.00,82,0.000000,2050,0,107.888180,0.0,0,0.0,7035
9044695,4018464,0,0.000000,0.0,587.633602,0.0,1.00,81,0.000000,2050,0,2048.745682,0.0,0,0.0,4005


In [147]:
b50[b50.semmcd == 2252].shape

(9, 16)

In [150]:
# checking other year
year= 2020
b_temp = hdf['/%s/buildings'%year]
p_temp = hdf['/%s/parcels'%year]
b_temp = b_temp.join(p_temp.semmcd, on='parcel_id')
(b_temp.semmcd==2252).sum()

9

In [152]:
mcd_total.loc[2252]

2020    10458.0
2021       17.0
2022    10390.0
2023    10406.0
2024    10442.0
2025    10462.0
2026    10492.0
2027    10509.0
2028    10530.0
2029    10538.0
2030    10579.0
2031    10600.0
2032    10634.0
2033    10675.0
2034    10687.0
2035    10712.0
2036    10724.0
2037    10728.0
2038    10728.0
2039    10722.0
2040    10718.0
2041    10710.0
2042    10707.0
2043    10700.0
2044    10690.0
2045    10682.0
2046    10674.0
2047    10670.0
2048    10668.0
2049    10663.0
2050    10656.0
Name: 2252, dtype: float64

In [7]:
hdf['/2050/households']

Unnamed: 0_level_0,cars,children,race_id,income,ten,persons,workers,valp,age_of_head,building_id,large_area_id,blkgrp,household_type
household_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1.0,2,0,1,147811,1,2,2,250000,50,4015129,161,261614200001,14
3.0,1,0,1,74656,1,1,0,200000,61,4024154,161,261614200001,8
8.0,4,0,1,64331,1,2,1,325000,37,9084828,161,261614200001,8
9.0,2,0,1,103250,1,2,2,159000,57,4022744,161,261614200001,8
10.0,2,4,4,128248,1,6,2,230000,37,4019564,161,261614200001,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3869994.0,1,1,4,46729,2,4,1,225000,81,2480148,125,261251681002,10
3869995.0,1,1,4,46729,2,4,1,225000,81,2934148,125,261251681002,10
3869996.0,4,0,4,31240,1,5,2,600000,75,2136405,125,261251561001,4
3869997.0,4,0,4,31240,1,5,2,600000,75,2040941,125,261251561001,4


In [14]:
hh_by_mcd_year = pd.DataFrame(index=mcd_total.index)
for year in range(2020, 2051):
	p = hdf["/%s/parcels" % year]
	b = hdf["/%s/buildings" % year]
	hh = hdf["/%s/households" % year]
	b = b.join(p.semmcd, on='parcel_id')
	hh = hh.join(b.semmcd, on='building_id')
	hh_vcount = hh.semmcd.fillna(-1).astype(int).value_counts()
	hh_by_mcd_year.loc[:, str(year)] = hh_vcount

In [23]:
hh_by_mcd_year

Unnamed: 0_level_0,2020,2021,2022,2023,2024,2025,2026,2027,2028,2029,...,2041,2042,2043,2044,2045,2046,2047,2048,2049,2050
mcd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5,266342.0,264208.0,261665.0,262450.0,263720.0,263925.0,264164.0,264020.0,263793.0,263477.0,...,258446.0,258159.0,257894.0,257619.0,257625.0,257689.0,257722.0,257751.0,257621.0,257650.0
1005,11638.0,11321.0,11223.0,11156.0,11126.0,11127.0,11112.0,11102.0,11110.0,11129.0,...,11097.0,11085.0,11086.0,11055.0,11037.0,11012.0,10978.0,10980.0,10960.0,10935.0
1010,1852.0,1844.0,1845.0,1837.0,1828.0,1820.0,1824.0,1815.0,1804.0,1800.0,...,1778.0,1784.0,1786.0,1782.0,1785.0,1775.0,1777.0,1781.0,1774.0,1783.0
1015,12835.0,12984.0,13016.0,13031.0,13075.0,13105.0,13113.0,13132.0,13123.0,13116.0,...,13186.0,13164.0,13165.0,13136.0,13136.0,13131.0,13127.0,13124.0,13123.0,13125.0
1020,36154.0,36934.0,37055.0,37171.0,37354.0,37549.0,37754.0,37870.0,38015.0,38131.0,...,38637.0,38620.0,38559.0,38564.0,38493.0,38477.0,38471.0,38450.0,38437.0,38424.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7080,5278.0,5269.0,5304.0,5407.0,5451.0,5474.0,5499.0,5528.0,5552.0,5604.0,...,6542.0,6564.0,6574.0,6595.0,6612.0,6650.0,6654.0,6653.0,6648.0,6632.0
7085,889.0,867.0,881.0,881.0,892.0,896.0,894.0,898.0,898.0,904.0,...,966.0,966.0,971.0,968.0,973.0,965.0,959.0,953.0,947.0,945.0
7090,2226.0,2316.0,2351.0,2391.0,2415.0,2440.0,2448.0,2443.0,2459.0,2483.0,...,2626.0,2626.0,2629.0,2631.0,2633.0,2646.0,2645.0,2645.0,2641.0,2637.0
7095,4252.0,4303.0,4326.0,4361.0,4366.0,4395.0,4418.0,4436.0,4464.0,4501.0,...,4691.0,4689.0,4688.0,4687.0,4687.0,4687.0,4685.0,4685.0,4678.0,4672.0


In [26]:
hh_by_mcd_year['la'] = semmcds.set_index('mcd')

In [32]:
hh_by_mcd_year.to_csv('~/semcog_urbansim/data/cache/hh_by_mcd_year.csv')

In [33]:
pd.DataFrame({"mcd_total":mcd_total.loc[5],"simulated":hh_by_mcd_year.loc[5]})

Unnamed: 0,mcd_total,simulated
2020,266342.0,266342.0
2021,264223.0,264208.0
2022,261701.0,261665.0
2023,262511.0,262450.0
2024,263796.0,263720.0
2025,264013.0,263925.0
2026,264258.0,264164.0
2027,264123.0,264020.0
2028,263899.0,263793.0
2029,263583.0,263477.0


In [38]:
j = hdf["/2020/jobs"]
b = hdf["/2020/buildings"]
j[~j.building_id.isin(b.index)]

Unnamed: 0,home_based_status,semcog_id,sqft,sector_id,building_id,large_area_id
1812963,0,247163,1000,7,-1,3
188995,0,813978,700,12,-1,3
192376,0,814024,1000,12,-1,3
208504,0,814109,700,12,-1,3
221324,0,814010,700,12,-1,3
...,...,...,...,...,...,...
2144495,0,579518,400,15,-1,161
2144812,0,579518,400,15,-1,161
2147177,0,579518,400,15,-1,161
910938,0,128181,250,18,-1,161


In [45]:
j = hdf["/2020/jobs"]
hh = hdf["/2020/households"]
b = hdf["/2020/buildings"]
# hh.building_id.isin(b.index)
# j[~j.building_id.isin(b.index)]

In [46]:
(~hh.building_id.isin(b.index)).sum()


0

In [50]:
hdf["/base/households"].shape

(1936872, 13)

##### end 

In [51]:
inhdf = pd.HDFStore('~/share/urbansim/RDF2050/model_inputs/base_hdf/forecast_data_input_112222.h5', 'r')

In [1]:
inhdf['households'].building_id.max()

NameError: name 'inhdf' is not defined

In [89]:
j = hdf['base/jobs']
ec = hdf['base/annual_employment_control_totals']
# (j[j.building_id==-1].index.isin(hdf['base/jobs'].index)).sum()
ec = ec.loc[2020].reset_index()
region_simulated_j = [filter_table(j, ec.iloc[i], 'total_number_of_jobs').shape[0] for i in range(ec.shape[0])]
ec.loc[:, 'region_simulated_j'] = region_simulated_j

In [93]:
ec[ec.region_simulated_j>ec.total_number_of_jobs].head(20)

Unnamed: 0,sector_id,home_based_status,large_area_id,total_number_of_jobs,region_simulated_j
1,1,1,3,460,468
3,2,1,3,11959,12048
5,3,1,3,2029,2059
7,4,1,3,3043,3135
9,5,1,3,6802,7053
10,6,0,3,44410,55688
15,8,1,3,1075,1119
16,9,0,3,28695,36305
19,10,1,3,9693,10048
20,11,0,3,11232,15942


In [83]:
ec = hdf['base/annual_employment_control_totals']
# (j[j.building_id==-1].index.isin(hdf['base/jobs'].index)).sum()
ec = ec.loc[2020].reset_index()
# filter_table(j, ec.iloc[22], 'total_number_of_jobs')
ec.iloc[22]

sector_id                  12
home_based_status           0
large_area_id               3
total_number_of_jobs    23471
Name: 22, dtype: int64

In [79]:
region_simulated_j

[1227,
 460,
 13806,
 11959,
 58960,
 2029,
 17748,
 3043,
 55785,
 6802,
 44410,
 19369,
 390,
 13,
 3330,
 1075,
 28695,
 20271,
 39814,
 9693,
 11232,
 5180,
 23471,
 15578,
 23644,
 659,
 36274,
 8456,
 21617,
 82,
 39404,
 2217,
 29472,
 7812,
 10210,
 209,
 254,
 156,
 4061,
 2589,
 20174,
 957,
 6463,
 1076,
 10352,
 2827,
 9796,
 2860,
 3245,
 14,
 3768,
 1281,
 19925,
 5484,
 17597,
 5525,
 9843,
 130,
 9414,
 9304,
 28869,
 682,
 27348,
 10054,
 31935,
 182,
 23699,
 1995,
 16795,
 5083,
 29706,
 256,
 156,
 989,
 2784,
 3660,
 9161,
 669,
 2351,
 696,
 9367,
 1181,
 1363,
 1130,
 47,
 5,
 628,
 314,
 5363,
 4655,
 5427,
 2210,
 176,
 388,
 1676,
 1736,
 4309,
 131,
 7092,
 1068,
 927,
 1,
 7657,
 488,
 3967,
 1540,
 3028,
 72,
 403,
 961,
 13469,
 12464,
 60693,
 2130,
 9831,
 1467,
 40716,
 5782,
 10628,
 10973,
 335,
 34,
 2987,
 879,
 18697,
 14058,
 30076,
 7226,
 3020,
 2962,
 12810,
 11385,
 20232,
 354,
 31296,
 4875,
 10424,
 45,
 27670,
 2064,
 18690,
 5976,
 20170,

In [82]:
ec.iloc[22]

sector_id                  12
home_based_status           0
large_area_id               3
total_number_of_jobs    23471
region_simulated_j      23471
Name: 22, dtype: int64

In [84]:
j[(j.home_based_status==0)&(j.sector_id==12)&(j.large_area_id==3)&(j.building_id!=-1)]

Unnamed: 0,home_based_status,semcog_id,sqft,sector_id,building_id,large_area_id
47536,0,814085,900,12,1904133,3
47787,0,814085,900,12,1904131,3
47788,0,814334,900,12,1904131,3
47967,0,48431,900,12,1414803,3
48166,0,814085,900,12,1414803,3
...,...,...,...,...,...,...
2730070,0,273581,700,12,1347982,3
2730071,0,814098,400,12,1918309,3
2730072,0,289933,250,12,1170444,3
2730073,0,282292,250,12,1724093,3


In [20]:
j[j.building_id!=-1].shape

(2695168, 6)

In [33]:
ec.loc[2020,12,0]

  ec.loc[2020,12,0]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,large_area_id,total_number_of_jobs
year,sector_id,home_based_status,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,12,0,3,23471
2020,12,0,5,9414
2020,12,0,93,1676
2020,12,0,99,12810
2020,12,0,115,974
2020,12,0,125,49658
2020,12,0,147,1344
2020,12,0,161,6455


In [35]:
bj = hdf['base/jobs']
bj[(bj.large_area_id==3)&(bj.sector_id==12)&(bj.home_based_status==0)]

Unnamed: 0_level_0,home_based_status,semcog_id,sqft,sector_id,building_id,large_area_id
job_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
47536,0,814085,900,12,1904133,3
47787,0,814085,900,12,1904131,3
47788,0,814334,900,12,1904131,3
47967,0,48431,900,12,1414803,3
48166,0,814085,900,12,1414803,3
...,...,...,...,...,...,...
2730070,0,273581,700,12,1347982,3
2730071,0,814098,400,12,1918309,3
2730072,0,289933,250,12,1170444,3
2730073,0,282292,250,12,1487318,3
