In [59]:
# this notebook split up the maz and taz shapefiles into counties
import os
import pandas as pd
import geopandas as gpd

In [60]:
# input and output directories

# directory where the maz and taz shapefiles are saved
input_dir = os.path.join('\\..\\..\\..\\..\\Data',
                       'GIS layers',
                       'TM2_maz_taz_v2.2')

# directory where the outputs will be saved
output_dir = os.path.join('\\..\\..\\..\\..\\Development',
                       'Travel Model Two',
                       'Supply',
                       'Network_QA_2022',
                       'Maps_to_publish',
                       'mazstazs_by_county')

In [61]:
os.listdir(os.path.join(input_dir))

['.backups',
 '.Rproj.user',
 'blocks_mazs_tazs.csv',
 'blocks_mazs_tazs.dbf',
 'blocks_mazs_tazs_updated.csv',
 'blocks_mazs_tazs_v2.1.1.csv',
 'blocks_mazs_tazs_v2.1.1.dbf',
 'blocks_mazs_tazs_v2.1.2.csv',
 'blocks_mazs_tazs_v2.1.2.dbf',
 'blocks_mazs_tazs_v2.1.3.csv',
 'blocks_mazs_tazs_v2.1.3.dbf',
 'blocks_mazs_tazs_v2.1.4.csv',
 'blocks_mazs_tazs_v2.1.4.dbf',
 'blocks_mazs_tazs_v2.1.5.csv',
 'blocks_mazs_tazs_v2.1.5.dbf',
 'block_noland.csv',
 'csv_to_dbf.R',
 'Index',
 'mazs_TM2_v2_2.cpg',
 'mazs_TM2_v2_2.dbf',
 'mazs_TM2_v2_2.json',
 'mazs_TM2_v2_2.prj',
 'mazs_TM2_v2_2.sbn',
 'mazs_TM2_v2_2.sbx',
 'mazs_TM2_v2_2.shp',
 'mazs_TM2_v2_2.shp.LZORN-Z840.17288.22144.sr.lock',
 'mazs_TM2_v2_2.shp.LZORN-Z840.22108.22144.sr.lock',
 'mazs_TM2_v2_2.shp.LZORN-Z840.22136.22144.sr.lock',
 'mazs_TM2_v2_2.shp.LZORN-Z840.6312.22144.sr.lock',
 'mazs_TM2_v2_2.shp.xml',
 'mazs_TM2_v2_2.shx',
 'mazs_TM2_v2_2_intersect_puma2000.cpg',
 'mazs_TM2_v2_2_intersect_puma2000.dbf',
 'mazs_TM2_v2_2_intersec

In [62]:
# read the maz file
maz_gdf = gpd.read_file(os.path.join(input_dir,'mazs_TM2_v2_2.shp'))

In [63]:
# get info for the maz file
maz_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 39726 entries, 0 to 39725
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   maz         39726 non-null  int64   
 1   ALAND10     39726 non-null  float64 
 2   AWATER10    39726 non-null  float64 
 3   blockcount  39726 non-null  int64   
 4   taz         39726 non-null  int64   
 5   partcount   39726 non-null  int64   
 6   PERIM_GEO   39726 non-null  float64 
 7   psq_overa   39726 non-null  float64 
 8   acres       39726 non-null  float64 
 9   geometry    39726 non-null  geometry
dtypes: float64(5), geometry(1), int64(4)
memory usage: 3.0 MB


In [64]:
# read the file containing the county information
TazToCounty_df = pd.read_csv(os.path.join(input_dir,'tazs_TM2_v2_2_superd_county.csv'))

In [65]:
# get info for the taz to county file
TazToCounty_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4735 entries, 0 to 4734
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   TAZ          4735 non-null   int64 
 1   SUPERD       4735 non-null   int64 
 2   COUNTY       4735 non-null   int64 
 3   COUNTY_NAME  4735 non-null   object
dtypes: int64(3), object(1)
memory usage: 148.1+ KB


In [66]:
# join the maz file with the TazToCounty file
maz_TazToCounty_gdf = pd.merge(maz_gdf,
                             TazToCounty_df,
                             how='outer',
                             left_on=['taz'], 
                             right_on=['TAZ'],
                             indicator=True)

In [67]:
# Review the merge indicator
maz_TazToCounty_gdf['_merge'].value_counts()

both          39726
left_only         0
right_only        0
Name: _merge, dtype: int64

In [68]:
# before exporting by county, need to drop the merge indicator
maz_TazToCounty_gdf = maz_TazToCounty_gdf.drop('_merge', 1)
# where 1 is the axis number (0 for rows and 1 for columns.)

  maz_TazToCounty_gdf = maz_TazToCounty_gdf.drop('_merge', 1)


In [69]:
# before exporting by county, look at data by county
maz_TazToCounty_gdf['COUNTY_NAME'].value_counts()

Alameda          8626
Santa Clara      8510
Contra Costa     5912
San Mateo        4454
San Francisco    4153
Sonoma           2887
Solano           2810
Marin            1418
Napa              956
Name: COUNTY_NAME, dtype: int64

In [70]:
# export to shape
counties = ["Alameda", "Contra Costa", "Marin",  "Napa",  "San Francisco",  "San Mateo",  "Santa Clara",  "Solano",  "Sonoma"]

for x in counties:
   
   maz_countylayer_gdf = maz_TazToCounty_gdf[maz_TazToCounty_gdf['COUNTY_NAME']==x]
   output_file_name = "mazs_" + x + ".shp"
   maz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))

  maz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  maz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  maz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  maz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  maz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  maz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  maz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  maz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  maz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))


In [72]:
# read the taz file
taz_gdf = gpd.read_file(os.path.join(input_dir,'tazs_TM2_v2_2.shp'))

In [73]:
# get info for the maz file
taz_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 4735 entries, 0 to 4734
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   taz         4735 non-null   int64   
 1   ALAND10     4735 non-null   float64 
 2   AWATER10    4735 non-null   float64 
 3   blockcount  4735 non-null   int64   
 4   mazcount    4735 non-null   int64   
 5   partcount   4735 non-null   int64   
 6   PERIM_GEO   4735 non-null   float64 
 7   psq_overa   4735 non-null   float64 
 8   acres       4735 non-null   float64 
 9   geometry    4735 non-null   geometry
dtypes: float64(5), geometry(1), int64(4)
memory usage: 370.0 KB


In [74]:
# join the taz file with the TazToCounty file
taz_TazToCounty_gdf = pd.merge(taz_gdf,
                             TazToCounty_df,
                             how='outer',
                             left_on=['taz'], 
                             right_on=['TAZ'],
                             indicator=True)

In [75]:
# Review the merge indicator
taz_TazToCounty_gdf['_merge'].value_counts()

both          4735
left_only        0
right_only       0
Name: _merge, dtype: int64

In [76]:
# before exporting by county, need to drop the merge indicator
taz_TazToCounty_gdf = taz_TazToCounty_gdf.drop('_merge', 1)
# where 1 is the axis number (0 for rows and 1 for columns.)

  taz_TazToCounty_gdf = taz_TazToCounty_gdf.drop('_merge', 1)


In [77]:
# before exporting by county, look at data by county
taz_TazToCounty_gdf['COUNTY_NAME'].value_counts()

Alameda          1100
Santa Clara      1021
San Francisco     636
Contra Costa      630
San Mateo         421
Sonoma            357
Solano            269
Marin             201
Napa              100
Name: COUNTY_NAME, dtype: int64

In [78]:
# export to shape
counties = ["Alameda", "Contra Costa", "Marin",  "Napa",  "San Francisco",  "San Mateo",  "Santa Clara",  "Solano",  "Sonoma"]

for x in counties:
   
   taz_countylayer_gdf = taz_TazToCounty_gdf[taz_TazToCounty_gdf['COUNTY_NAME']==x]
   output_file_name = "tazs_" + x + ".shp"
   taz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))

  taz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  taz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  taz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  taz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  taz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  taz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  taz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  taz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
  taz_countylayer_gdf.to_file(os.path.join(output_dir, output_file_name))
