In [33]:
import pandas as pd
import geopandas as gpd

user = 'aolsen'

# Comparison of MAZ data and TAZ-TM1 files when summing to county levels

## Load and process spatial reference data

In [34]:
maz_shape = gpd.read_file(
    f'/Users/{user}/Box/Modeling and Surveys/Urban Modeling/Spatial/Zones/v2.2/TM2_maz_taz_v2.2/mazs_TM2_v2_2.shp')
maz_shape = maz_shape.to_crs('EPSG:26910')

In [35]:
# get centroid
maz_shape['geom_pt'] = maz_shape.representative_point()

In [36]:
# get taz 1454 data
tazs = gpd.read_file(
    'https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/transportation_analysis_zones_1454/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson').to_crs('EPSG:26910')
tazs.head(2)

Unnamed: 0,objectid,taz1454,district,county,gacres,Shape__Area,Shape__Length,geometry
0,1,566,11,Santa Clara,1708.683056,0.000444,0.09755,"POLYGON ((600337.262 4127333.574, 600264.066 4..."
1,2,565,11,Santa Clara,486.200669,0.000126,0.054171,"POLYGON ((601170.353 4126571.857, 601276.370 4..."


In [37]:
# join MAZ centroid to containing TAZ1454 shape
maz_x_taz_pt = gpd.sjoin(maz_shape.set_geometry(
    'geom_pt'), tazs[['taz1454', 'county', 'geometry']])

In [38]:
missing_taz_in_correspondence = set(range(1, 1455))-set(maz_x_taz_pt.taz1454)

## Load plan data

### MAZ data

In [39]:
pba_mazdata_2050 = pd.read_csv(
    f'/Users/{user}/Box/Modeling and Surveys/Development/Travel Model Two Development/Model Inputs/Land Use/FBP2050_by_TM2_maz_20220211.csv')
pba_mazdata_2050 = pba_mazdata_2050[pba_mazdata_2050.COUNTY.notna()]
pba_mazdata_2050.COUNTY = pba_mazdata_2050.COUNTY.astype(int)
pba_mazdata_2050['taz1454'] = pba_mazdata_2050.MAZ.map(
    maz_x_taz_pt.set_index('maz').taz1454)

In [40]:
pba_mazdata_2015 = pd.read_csv(
    f'/Users/{user}/Box/Modeling and Surveys/Development/Travel Model Two Development/Model Inputs/Land Use/FBP2015_by_TM2_maz_20220211.csv')
pba_mazdata_2015 = pba_mazdata_2015[pba_mazdata_2015.COUNTY.notna()]
pba_mazdata_2015.COUNTY = pba_mazdata_2015.COUNTY.astype(int)
pba_mazdata_2015['taz1454'] = pba_mazdata_2015.MAZ.map(
    maz_x_taz_pt.set_index('maz').taz1454)

### TAZ-1454 data

In [41]:
TAZ_PATH = f'/Users/{user}/Box/Modeling and Surveys/Share Data/plan-bay-area-2050/tazdata/PBA50_FinalBlueprintLandUse_TAZdata.xlsx'

pba_tazdata_2050 = pd.read_excel(TAZ_PATH, '2050')
pba_tazdata_2035 = pd.read_excel(TAZ_PATH, '2035')
pba_tazdata_2015 = pd.read_excel(TAZ_PATH, '2015')

In [45]:
# assign TM1 tazs to the maz files. This will lose San Quentin on the right hand side - 1439 won't be matched this way
pba_tazdata_2015['taz1454'] = pba_tazdata_2015.ZONE
pba_tazdata_2035['taz1454'] = pba_tazdata_2035.ZONE
pba_tazdata_2050['taz1454'] = pba_tazdata_2050.ZONE

### Comparison

In [46]:
def comparison(df1, df2, sumlev=['COUNTY']):

    return (df2.groupby(sumlev).sum()-df1.groupby(sumlev).sum().rename(columns=lambda x: x.upper())).stack()

#### County level

In [47]:
pba_2050_diff_cnty = comparison(
    pba_mazdata_2050, pba_tazdata_2050, sumlev=['COUNTY'])
pba_2050_diff_cnty.unstack(1)

  return (df2.groupby(sumlev).sum()-df1.groupby(sumlev).sum().rename(columns=lambda x: x.upper())).stack()


Unnamed: 0_level_0,AGREMPN,FPSEMPN,HEREMPN,MWTEMPN,OTHEMPN,RETEMPN,TOTEMP,TOTHH
COUNTY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,0.0,-3.0,-2.0,-3.0,-3.0,0.0,-11.0,-24.0
2,1.0,-4.0,-6.0,2.0,4.0,0.0,-3.0,25.0
3,-1.0,-115.0,-179.0,-5.0,-999.0,-1.0,-1300.0,-4.0
4,0.0,-1.0,-4.0,0.0,-3.0,-8.0,-16.0,-49.0
5,0.0,1.0,4.0,0.0,3.0,8.0,16.0,52.0
6,0.0,1.0,5.0,0.0,1.0,22.0,29.0,58.0
7,0.0,-1.0,-5.0,0.0,-1.0,-22.0,-29.0,-58.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
9,0.0,122.0,187.0,6.0,998.0,1.0,1314.0,-2.0


#### TAZ level

In [None]:
pba_2050_diff_taz = comparison(pba_mazdata_2050,pba_tazdata_2050,sumlev=['taz1454'])

In [49]:
pba_2050_diff_taz[pba_2050_diff_taz.ne(0)].unstack(1).TOTHH.dropna().sort_values()

taz1454
404.0   -1865.0
945.0   -1824.0
355.0   -1755.0
863.0   -1697.0
110.0   -1289.0
          ...  
266.0     739.0
109.0    1289.0
353.0    1755.0
856.0    1762.0
403.0    1867.0
Name: TOTHH, Length: 717, dtype: float64

In [54]:
pba_2050_diff_taz[pba_2050_diff_taz.ne(0)].unstack(1).TOTEMP.dropna().sort_values()

taz1454
988.0    -3494.0
945.0    -2934.0
400.0    -1704.0
971.0    -1699.0
355.0    -1647.0
           ...  
1168.0    1155.0
377.0     1704.0
965.0     2739.0
970.0     3069.0
249.0     5587.0
Name: TOTEMP, Length: 389, dtype: float64

In [57]:
# Alameda Naval Base - household difference
pba_2050_diff_taz.loc[964]

AGREMPN      0.0
COUNTY     -32.0
FPSEMPN      2.0
HEREMPN      3.0
MWTEMPN      0.0
OTHEMPN     11.0
RETEMPN      0.0
TOTEMP      16.0
TOTHH      279.0
dtype: float64

In [59]:
# Burlingame - totemp difference
pba_2050_diff_taz.loc[249]

AGREMPN      20.0
COUNTY     -114.0
FPSEMPN    2525.0
HEREMPN    1922.0
MWTEMPN     772.0
OTHEMPN     365.0
RETEMPN     -17.0
TOTEMP     5587.0
TOTHH       712.0
dtype: float64

In [68]:
# burlingame detail
pba_mazdata_2050.query('taz1454==249')[['MAZ','taz1454','tothh','totemp']]

Unnamed: 0,MAZ,taz1454,tothh,totemp
4427,110562,249.0,96.0,0.0
4428,110564,249.0,46.0,0.0
4429,110567,249.0,43.0,0.0
4430,110568,249.0,68.0,0.0
4431,110569,249.0,33.0,0.0
4432,110570,249.0,37.0,0.0
4619,110930,249.0,41.0,0.0
4620,110931,249.0,61.0,0.0
4621,110932,249.0,79.0,0.0
4623,110934,249.0,50.0,0.0


In [63]:
pba_tazdata_2050.query('ZONE==249')[['ZONE','TOTHH','TOTEMP']]

Unnamed: 0,ZONE,TOTHH,TOTEMP
248,249,5799,10842


In [69]:
tazs.query('taz1454==249').explore()