In [80]:
import sys
import getpass
import re
from functools import partial
from tqdm import tqdm

user = getpass.getuser()
sys.dont_write_bytecode = True

sys.path.insert(0, '/Users/{}/Box/DataViz Projects/Utility Code'.format(user))

from utils_io import *  # initial imports
reload_module('utils_io')  # reload after update to utils_io.py
from utils_io import *  # reload reflects updates

#geo imports
import geopandas as gp
import folium
from geopandas.tools import geocode
from geopy.geocoders import GoogleV3
from geopy.extra.rate_limiter import RateLimiter
from geopy import Point

In [2]:
google_api_key = G_CREDS['dataviz_team']

# Housing and Community Development Annual Progress Report Data Processing <a name="top"></a>

## Background

### Annual Progress Reports

**[HCD Regional Housing Needs Allocation and Housing Element Website](https://www.hcd.ca.gov/community-development/housing-element/index.shtml)**

Each jurisdiction (city council or board of supervisors) must prepare an annual progress report on the jurisdiction’s status and progress in implementing its housing element. (Government Code Section 65400.)

Each jurisdiction’s Annual Progress Report (APR) must be submitted to HCD and the Governor’s Office of Planning and Research (OPR) by April 1 of each year (covering the previous calendar year).

New APR form and instructions - for calendar year (CY) 2018 and 2019

AB 879 and SB 35 of the 2017 Housing Package, as well as AB 1486 (2019), added new data requirements for the Housing Element Annual Progress Reports (APRs). These changes are reflected in the new APR form and instructions, which are posted below.

- [APR form for CY 2018 and 2019](https://www.hcd.ca.gov/community-development/housing-element/docs/Housing-Element-Annual-Progress-Report-2019.xlsm) (XLS)
- [APR instructions for CY 2018 and 2019](https://www.hcd.ca.gov/community-development/housing-element/docs/Housing-Element-Annual-Progress-Report-Instructions-2019.pdf) (PDF)

**Annual Progress Report Tables**

1. Table A - Housing Development Applications 
2. Table A2 - New Construction, Entitled, Permits, and Completed Units
3. Table B - Regional Housing Needs Allocation Progress – Permitted Units Issued By Affordability
4. Table C - Sites Identified or Rezoned to Accommodate Shortfall Housing Need
5. Table D - Program Implementation Status pursuant to Government Code section 65583 
6. Table E - Commercial Development Bonus Approved pursuant to Government Code

## Approach

## Process

## Data Sources

## Table of Contents

1. [Pre-Processing Steps](#preprocessing)
2. [Format Addresses](#format_address)
3. [Geocode Addresses](#geocode)


## Create Draft Housing APR Dataset

In [4]:
#Get col list from current Housing APR dataset
apr_current_id = 'cypr-67hy'
get_socrata_col_list(socrata_data_id=apr_current_id)

[{'fieldName': 'mtc_id', 'name': 'MTC_ID', 'dataTypeName': 'text'},
 {'fieldName': 'mtc_type', 'name': 'MTC_TYPE', 'dataTypeName': 'text'},
 {'fieldName': 'mtc_year', 'name': 'MTC_YEAR', 'dataTypeName': 'number'},
 {'fieldName': 'mtc_vlow_income_dr',
  'name': 'MTC_VLOW_INCOME_DR',
  'dataTypeName': 'number'},
 {'fieldName': 'mtc_vlow_income_ndr',
  'name': 'MTC_VLOW_INCOME_NDR',
  'dataTypeName': 'number'},
 {'fieldName': 'mtc_vlow_tot',
  'name': 'MTC_VLOW_TOT',
  'dataTypeName': 'number'},
 {'fieldName': 'mtc_low_income_dr',
  'name': 'MTC_LOW_INCOME_DR',
  'dataTypeName': 'number'},
 {'fieldName': 'mtc_low_income_ndr',
  'name': 'MTC_LOW_INCOME_NDR',
  'dataTypeName': 'number'},
 {'fieldName': 'mtc_low_tot', 'name': 'MTC_LOW_TOT', 'dataTypeName': 'number'},
 {'fieldName': 'mtc_mod_income_dr',
  'name': 'MTC_MOD_INCOME_DR',
  'dataTypeName': 'number'},
 {'fieldName': 'mtc_mod_income_ndr',
  'name': 'MTC_MOD_INCOME_NDR',
  'dataTypeName': 'number'},
 {'fieldName': 'mtc_mod_tot', 'nam

In [5]:
apr_fields = [{'fieldName': 'mtc_id', 'name': 'MTC_ID', 'dataTypeName': 'text'},
             {'fieldName': 'mtc_type', 'name': 'MTC_TYPE', 'dataTypeName': 'text'},
             {'fieldName': 'mtc_year', 'name': 'MTC_YEAR', 'dataTypeName': 'number'},
             {'fieldName': 'mtc_vlow_income_dr',
              'name': 'MTC_VLOW_INCOME_DR',
              'dataTypeName': 'number'},
             {'fieldName': 'mtc_vlow_income_ndr',
              'name': 'MTC_VLOW_INCOME_NDR',
              'dataTypeName': 'number'},
             {'fieldName': 'mtc_vlow_tot',
              'name': 'MTC_VLOW_TOT',
              'dataTypeName': 'number'},
             {'fieldName': 'mtc_low_income_dr',
              'name': 'MTC_LOW_INCOME_DR',
              'dataTypeName': 'number'},
             {'fieldName': 'mtc_low_income_ndr',
              'name': 'MTC_LOW_INCOME_NDR',
              'dataTypeName': 'number'},
             {'fieldName': 'mtc_low_tot', 'name': 'MTC_LOW_TOT', 'dataTypeName': 'number'},
             {'fieldName': 'mtc_mod_income_dr',
              'name': 'MTC_MOD_INCOME_DR',
              'dataTypeName': 'number'},
             {'fieldName': 'mtc_mod_income_ndr',
              'name': 'MTC_MOD_INCOME_NDR',
              'dataTypeName': 'number'},
             {'fieldName': 'mtc_mod_tot', 'name': 'MTC_MOD_TOT', 'dataTypeName': 'number'},
             {'fieldName': 'mtc_above_mod_income',
              'name': 'MTC_ABOVE_MOD_INCOME',
              'dataTypeName': 'number'},
             {'fieldName': 'mtc_total_units',
              'name': 'MTC_TOTAL_UNITS',
              'dataTypeName': 'number'},
             {'fieldName': 'mtc_issue_dt', 'name': 'MTC_ISSUE_DT', 'dataTypeName': 'text'},
             {'fieldName': 'mtc_no_of_units',
              'name': 'MTC_NO_OF_UNITS',
              'dataTypeName': 'number'},
             {'fieldName': 'mtc_address_full',
             'name': 'MTC_ADDRESS_FULL',
             'dataTypeName':'text'},
             {'fieldName': 'mtc_geocode_address',
              'name': 'MTC_GEOCODE_ADDRESS',
              'dataTypeName': 'text'},
             {'fieldName': 'mtc_geocode_type',
              'name': 'MTC_GEOCODE_TYPE',
              'dataTypeName': 'text'},
             {'fieldName': 'mtc_mapped', 'name': 'MTC_MAPPED', 'dataTypeName': 'checkbox'},
             {'fieldName': 'mtc_lat', 'name': 'MTC_LAT', 'dataTypeName': 'number'},
             {'fieldName': 'mtc_long', 'name': 'MTC_LONG', 'dataTypeName': 'number'},
             {'fieldName': 'mtc_wkt', 'name': 'MTC_WKT', 'dataTypeName': 'point'},
             {'fieldName': 'mtc_pda', 'name': 'MTC_PDA', 'dataTypeName': 'checkbox'},
             {'fieldName': 'mtc_tpa', 'name': 'MTC_TPA', 'dataTypeName': 'checkbox'},
             {'fieldName': 'mtc_housing_element_site',
              'name': 'MTC_HOUSING_ELEMENT_SITE',
              'dataTypeName': 'checkbox'},
             {'fieldName': 'mtc_notes', 'name': 'MTC_NOTES', 'dataTypeName': 'text'},
             {'fieldName': 'jurs_name', 'name': 'JURS_NAME', 'dataTypeName': 'text'},
             {'fieldName': 'cnty_name', 'name': 'CNTY_NAME', 'dataTypeName': 'text'},
             {'fieldName': 'prior_apn', 'name': 'PRIOR_APN', 'dataTypeName': 'text'},
             {'fieldName': 'apn', 'name': 'APN', 'dataTypeName': 'text'},
             {'fieldName': 'street_address',
              'name': 'STREET_ADDRESS',
              'dataTypeName': 'text'},
             {'fieldName': 'project_name', 'name': 'PROJECT_NAME', 'dataTypeName': 'text'},
             {'fieldName': 'jurs_tracking_id',
              'name': 'JURS_TRACKING_ID',
              'dataTypeName': 'text'},
             {'fieldName': 'unit_cat_uid', 'name': 'UNIT_CAT_UID', 'dataTypeName': 'text'},
             {'fieldName': 'tenure_uid', 'name': 'TENURE_UID', 'dataTypeName': 'text'},
             {'fieldName': 'vlow_income_dr_con_ent_permits',
              'name': 'VLOW_INCOME_DR_CON_ENT_PERMITS',
              'dataTypeName': 'number'},
             {'fieldName': 'vlow_income_ndr_con_ent_perm',
              'name': 'VLOW_INCOME_NDR_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'low_income_dr_con_ent_permits',
              'name': 'LOW_INCOME_DR_CON_ENT_PERMITS',
              'dataTypeName': 'number'},
             {'fieldName': 'low_income_ndr_con_ent_permits',
              'name': 'LOW_INCOME_NDR_CON_ENT_PERMITS',
              'dataTypeName': 'number'},
             {'fieldName': 'mod_income_dr_con_ent_permits',
              'name': 'MOD_INCOME_DR_CON_ENT_PERMITS',
              'dataTypeName': 'number'},
             {'fieldName': 'mod_income_ndr_con_ent_permits',
              'name': 'MOD_INCOME_NDR_CON_ENT_PERMITS',
              'dataTypeName': 'number'},
             {'fieldName': 'above_mod_income_con_ent',
              'name': 'ABOVE_MOD_INCOME_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'ent_approve_dt_con_ent_permits',
              'name': 'ENT_APPROVE_DT_CON_ENT_PERMITS',
              'dataTypeName': 'text'},
             {'fieldName': 'no_of_units_issued_ent',
              'name': 'NO_OF_UNITS_ISSUED_ENT',
              'dataTypeName': 'number'},
             {'fieldName': 'bp_vlow_income_dr_con_ent',
              'name': 'BP_VLOW_INCOME_DR_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'bp_vlow_income_ndr_con_perm',
              'name': 'BP_VLOW_INCOME_NDR_CON_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'bp_low_income_dr_con_ent',
              'name': 'BP_LOW_INCOME_DR_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'bp_low_income_ndr_con_ent',
              'name': 'BP_LOW_INCOME_NDR_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'bp_mod_income_dr_con_ent',
              'name': 'BP_MOD_INCOME_DR_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'bp_mod_income_ndr_con_ent',
              'name': 'BP_MOD_INCOME_NDR_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'bp_above_mod_income_con_ent',
              'name': 'BP_ABOVE_MOD_INCOME_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'bp_issue_dt_con_ent_perm',
              'name': 'BP_ISSUE_DT_CON_ENT_PERM',
              'dataTypeName': 'text'},
             {'fieldName': 'no_of_units_issued_bld_permits',
              'name': 'NO_OF_UNITS_ISSUED_BLD_PERMITS',
              'dataTypeName': 'number'},
             {'fieldName': 'co_vlow_income_dr_con_ent',
              'name': 'CO_VLOW_INCOME_DR_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'co_vlow_income_ndr_con_ent',
              'name': 'CO_VLOW_INCOME_NDR_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'co_low_income_dr_con_ent',
              'name': 'CO_LOW_INCOME_DR_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'co_low_income_ndr_con_ent',
              'name': 'CO_LOW_INCOME_NDR_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'co_mod_income_dr_con_ent',
              'name': 'CO_MOD_INCOME_DR_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'co_mod_income_ndr_con_ent',
              'name': 'CO_MOD_INCOME_NDR_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'co_above_mod_income_con_ent',
              'name': 'CO_ABOVE_MOD_INCOME_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'co_issue_dt_con_ent_perm',
              'name': 'CO_ISSUE_DT_CON_ENT_PERM',
              'dataTypeName': 'text'},
             {'fieldName': 'no_of_u_iss_certi_readiness',
              'name': 'NO_OF_U_ISS_CERTI_READINESS',
              'dataTypeName': 'number'},
             {'fieldName': 'extr_low_income_units_con',
              'name': 'EXTR_LOW_INCOME_UNITS_CON_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'approve_sb35_con_ent_perm',
              'name': 'APPROVE_SB35_CON_ENT_PERM',
              'dataTypeName': 'checkbox'},
             {'fieldName': 'infill_units_con_ent_perm',
              'name': 'INFILL_UNITS_CON_ENT_PERM',
              'dataTypeName': 'checkbox'},
             {'fieldName': 'fin_assist_detail',
              'name': 'FIN_ASSIST_DETAIL',
              'dataTypeName': 'text'},
             {'fieldName': 'dr_type_con_ent_perm',
              'name': 'DR_TYPE_CON_ENT_PERM',
              'dataTypeName': 'text'},
             {'fieldName': 'no_fa_dr_con_ent_perm',
              'name': 'NO_FA_DR_CON_ENT_PERM',
              'dataTypeName': 'text'},
             {'fieldName': 'term_aff_dr_con_ent_perm',
              'name': 'TERM_AFF_DR_CON_ENT_PERM',
              'dataTypeName': 'text'},
             {'fieldName': 'dem_des_units_con_ent_perm',
              'name': 'DEM_DES_UNITS_CON_ENT_PERM',
              'dataTypeName': 'number'},
             {'fieldName': 'dem_or_des_units_con_ent',
              'name': 'DEM_OR_DES_UNITS_CON_ENT_PERM',
              'dataTypeName': 'text'},
             {'fieldName': 'dem_des_units_own_rent_con',
              'name': 'DEM_DES_UNITS_OWN_RENT_CON_P',
              'dataTypeName': 'text'},
             {'fieldName': 'notes_con_ent_perm',
              'name': 'NOTES_CON_ENT_PERM',
              'dataTypeName': 'text'}]

In [9]:
# create_socrata_dataset(dataset_name='Housing APR Data 2018-2019',
#                        dataset_desc='2018 and 2019 Annual Progress Report from California Department of Housing and Community Development',
#                        tags=['hcd','apr','housing','basis'],
#                        category=None,
#                        columns=apr_fields,
#                        row_identifier='mtc_id')

created dataset on Socrata with id: azj6-3imm


{'id': 'azj6-3imm',
 'name': 'Housing APR Data 2018-2019',
 'assetType': 'dataset',
 'averageRating': 0,
 'createdAt': 1604610919,
 'description': '2018 and 2019 Annual Progress Report from California Department of Housing and Community Development',
 'displayType': 'table',
 'downloadCount': 0,
 'hideFromCatalog': False,
 'hideFromDataJson': False,
 'newBackend': True,
 'numberOfComments': 0,
 'oid': 35692949,
 'provenance': 'official',
 'publicationAppendEnabled': False,
 'publicationGroup': 17752917,
 'publicationStage': 'unpublished',
 'rowIdentifierColumnId': 507136173,
 'rowsUpdatedAt': 1604610919,
 'tableId': 17752917,
 'totalTimesRated': 0,
 'viewCount': 0,
 'viewLastModified': 1604610923,
 'viewType': 'tabular',
 'columns': [{'id': 507136173,
   'name': 'MTC_ID',
   'dataTypeName': 'text',
   'fieldName': 'mtc_id',
   'position': 1,
   'renderTypeName': 'text',
   'tableColumnId': 121834394,
   'format': {}},
  {'id': 507136174,
   'name': 'MTC_TYPE',
   'dataTypeName': 'text'

In [3]:
apr_new_id = 'azj6-3imm'

## Pre-Processing Steps <a name="preprocessing"></a>
**[Skip to next step](#format_address)**

In [4]:
#set working directory
work_dir = os.path.join('/Users',
                       user,
                       'Box',
                       'DataViz Projects',
                       'Data Services',
                       'Housing',
                       '2020')

In [5]:
#list files in working directory
os.listdir(os.path.join(work_dir,
                       '2019 Original APRs'))

['Extracts',
 'ORIGINAL_Table_A2_Report_with_Data_Dictionary.xlsx',
 'ORIGINAL_Table_A_2018_2019.xlsx',
 'ORIGINAL_Table_D_Report.xlsx',
 'Table_A2_Report_Address_Lat_Long.xlsx',
 'Table_A2_Report_Lat_Long_Google.csv']

In [13]:
#read in Table A2 (Annual Progress Report Table 2018-2019)
apr_df = pd.read_excel(os.path.join(work_dir,
                                  '2019 Original APRs',
                                 'ORIGINAL_Table_A2_Report_with_Data_Dictionary.xlsx'),
                      sheet_name='Table A2 Report')

In [14]:
apr_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 181271 entries, 0 to 181270
Data columns (total 48 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   JURS_NAME                    181269 non-null  object 
 1   CNTY_NAME                    181270 non-null  object 
 2   YEAR                         181270 non-null  float64
 3   PRIOR_APN                    19327 non-null   object 
 4   APN                          179827 non-null  object 
 5   STREET_ADDRESS               179115 non-null  object 
 6   PROJECT_NAME                 79978 non-null   object 
 7   JURS_TRACKING_ID             147489 non-null  object 
 8   UNIT_CAT_DESC                181268 non-null  object 
 9   TENURE                       181268 non-null  object 
 10  VLOW_INCOME_DR               181268 non-null  float64
 11  VLOW_INCOME_NDR              181268 non-null  float64
 12  LOW_INCOME_DR                181268 non-null  float64
 13 

In [15]:
apr_df.head(5)

Unnamed: 0,JURS_NAME,CNTY_NAME,YEAR,PRIOR_APN,APN,STREET_ADDRESS,PROJECT_NAME,JURS_TRACKING_ID,UNIT_CAT_DESC,TENURE,...,APPROVE_SB35,INFILL_UNITS,FIN_ASSIST_NAME,DR_TYPE,NO_FA_DR,TERM_AFF_DR,DEM_DES_UNITS,DEM_OR_DES_UNITS,DEM_DES_UNITS_OWN_RENT,NOTES
0,ADELANTO,SAN BERNARDINO,2019.0,,TBD,"10514 PAMPAS CT, Lot: 93",DR HORTON,,Single-Family Detached Unit,Owner,...,N,,,,,,,,,
1,ADELANTO,SAN BERNARDINO,2019.0,,TBD,"10515 CHEVRON CT, Lot: 129",DR HORTON,,Single-Family Detached Unit,Owner,...,N,,,,,,,,,
2,ADELANTO,SAN BERNARDINO,2019.0,,TBD,"10524 CHEVRON CT, Lot: 113",DR HORTON,,Single-Family Detached Unit,Owner,...,N,,,,,,,,,
3,ADELANTO,SAN BERNARDINO,2019.0,,TBD,"10536 CHEVRON CT, Lot: 114",DR HORTON,,Single-Family Detached Unit,Owner,...,N,,,,,,,,,
4,ADELANTO,SAN BERNARDINO,2019.0,,TBD,"10537 PAMPAS CT, Lot: 109",D R HORTON LOS ANGELES HOLDING COMPANY INC,,Single-Family Detached Unit,Owner,...,N,,,,,,,,,


In [16]:
apr_df['CNTY_NAME'].unique()

array(['SAN BERNARDINO', 'LOS ANGELES', 'ALAMEDA', 'ORANGE', 'ALPINE',
       'MODOC', 'AMADOR', 'NAPA', 'SHASTA', 'CALAVERAS', 'CONTRA COSTA',
       'HUMBOLDT', 'SAN LUIS OBISPO', 'KERN', 'SAN MATEO', 'PLACER',
       'KINGS', 'RIVERSIDE', 'MARIN', 'SOLANO', 'BUTTE', 'INYO',
       'IMPERIAL', 'SANTA BARBARA', 'VENTURA', 'SANTA CLARA',
       'SANTA CRUZ', 'SAN DIEGO', 'STANISLAUS', 'MADERA', 'SACRAMENTO',
       'LAKE', 'SONOMA', 'FRESNO', 'COLUSA', 'TEHAMA', 'DEL NORTE',
       'YOLO', 'TULARE', 'MERCED', 'SISKIYOU', 'EL DORADO', 'SAN JOAQUIN',
       'MENDOCINO', 'GLENN', 'NEVADA', 'MONTEREY', 'SAN BENITO', 'SUTTER',
       'MONO', 'MARIPOSA', 'YUBA', 'PLUMAS', 'SAN FRANCISCO', nan,
       'TUOLUMNE', 'LASSEN', 'TRINITY', 'SIERRA', 0], dtype=object)

In [17]:
#filter to include only Bay Area Counties
bay_area = ['ALAMEDA',
            'CONTRA COSTA',
            'MARIN',
            'NAPA',
            'SAN FRANCISCO',
            'SAN MATEO',
            'SANTA CLARA',
            'SOLANO',
            'SONOMA']

In [18]:
apr_bay_area = apr_df[apr_df['CNTY_NAME'].isin(bay_area)].copy()

In [19]:
apr_bay_area['CNTY_NAME'].unique()

array(['ALAMEDA', 'NAPA', 'CONTRA COSTA', 'SAN MATEO', 'MARIN', 'SOLANO',
       'SANTA CLARA', 'SONOMA', 'SAN FRANCISCO'], dtype=object)

**[Back to Top](#top)**

**[Back to Section Top](#preprocessing)**

## Format Addresses <a name='format_address'></a>

**[Skip to next step](#geocode)**

In [20]:
#Check a sample of records to review street address patterns
apr_bay_area[['JURS_NAME','STREET_ADDRESS']].sample(n=20)

Unnamed: 0,JURS_NAME,STREET_ADDRESS
171585,SANTA CLARA COUNTY,"San Jose, CA 95127-2927"
22043,GILROY,1611 CASTLEWOOD PL
82399,SAN JOSE,1359 YOSEMITE AV
16450,FAIRFIELD,4417 CANCUN DR
140280,MENLO PARK,797 Live Oak Ave
175708,SUNNYVALE,556 SAN MARTIN TR 2
174414,SONOMA COUNTY,"9121 RED HILL CT, Cotati CA 94931"
107675,DALY CITY,34 Baldwin Ave
4105,BRENTWOOD,841 WALSH ST
107926,DANVILLE,42 William Ct.


In [21]:
#Set street address to uppercase
apr_bay_area['STREET_ADDRESS'] = apr_bay_area['STREET_ADDRESS'].str.upper()

In [22]:
#Regex pattern for matching state, and zip of different formats
regex_pattern = r'( CA \d{5}-\d{4}|CA, \d{5}-\d{4}| CA \d{5}| CA, \d{5}|, CA \d{5}|, CA, \d{5}-\d{4}|, CA, \d{5}|CA$|, CA$|\d{5}$|\d{5}-\d{4}$)'

In [23]:
#Check street address for state and zipcode
apr_bay_area_state_zip = apr_bay_area[apr_bay_area['STREET_ADDRESS'].str.contains(regex_pattern,na=False)]

  return func(self, *args, **kwargs)


In [24]:
apr_bay_area_state_zip.sample(n=20)

Unnamed: 0,JURS_NAME,CNTY_NAME,YEAR,PRIOR_APN,APN,STREET_ADDRESS,PROJECT_NAME,JURS_TRACKING_ID,UNIT_CAT_DESC,TENURE,...,APPROVE_SB35,INFILL_UNITS,FIN_ASSIST_NAME,DR_TYPE,NO_FA_DR,TERM_AFF_DR,DEM_DES_UNITS,DEM_OR_DES_UNITS,DEM_DES_UNITS_OWN_RENT,NOTES
89222,SONOMA COUNTY,SONOMA,2019.0,,029-160-023,"4385 PARKER HILL RD, SANTA ROSA CA 95404",,BLD19-1577,Single-Family Detached Unit,Owner,...,N,,,,,,1.0,Destroyed,O,2017 COMPLEX FIRE - SFD 1877 SQFT: GARAGE 1058...
144613,OAKLAND,ALAMEDA,2018.0,,014 123100900,"5245 SHAFTER AVE, OAKLAND, CA 94618",,DRX180514,Accessory Dwelling Unit,Renter,...,N,Y,,,,,,,,
145087,OAKLAND,ALAMEDA,2018.0,,044 496000200,"1245 97TH AVE, OAKLAND, CA 94603",,RB1802843,Accessory Dwelling Unit,Renter,...,N,Y,,,,,,,,To convert illegal conversion into two dwellin...
53495,OAKLAND,ALAMEDA,2019.0,,029A130701100,"4069 LINCOLN AVE, OAKLAND, CA 94602",,RBC1903690,Accessory Dwelling Unit,Renter,...,N,Y,,,,,,,,"To convert lower level of existing SFD into 1,..."
53434,OAKLAND,ALAMEDA,2019.0,,026 081400900,"2765 GARDEN ST, OAKLAND, CA 94601",,RBC1900254,Accessory Dwelling Unit,Renter,...,N,Y,,,,,,,,775 basement conversion in SFD into secondary ...
10644,CONTRA COSTA COUNTY,CONTRA COSTA,2019.0,,31240015,36 SEAWARD BETHEL ISLAND CA 94511,,BIR19-000197,Single-Family Detached Unit,Owner,...,N,,,,,,,,,
89434,SONOMA COUNTY,SONOMA,2019.0,,053-051-041,"2000 TRINITY RD, GLEN ELLEN CA 95442",,BLD18-6555,Single-Family Detached Unit,Owner,...,N,,,,,,1.0,Destroyed,O,"2017 COMPLEX FIRE CABIN 881 SQFT, 354 DECKS EV..."
106882,CONTRA COSTA COUNTY,CONTRA COSTA,2018.0,,206800056,423 VENDEEN DANVILLE CA 94526,,BIR18-008833,Single-Family Detached Unit,Owner,...,N,,,,,,,,,
62670,RIO VISTA,SOLANO,2019.0,176-455-050,176-455-050,"1966 FREEDOM WAY, RIO VISTA, CA 94571",Encore Liberty,PB 19-6354,Single-Family Detached Unit,Owner,...,N,N,,,,,,,,
53520,OAKLAND,ALAMEDA,2019.0,,030 195600700,"4334 PAMPAS AVE, OAKLAND, CA 94619",,RB1705616,Single-Family Detached Unit,Owner,...,N,Y,,,,,,,,"Construct new two-story 2,512 SF SFD to includ..."


In [25]:
#Test regex pattern to replace street address with string not containing state and zip on subset
apr_bay_area_state_zip['STREET_ADDRESS'].str.replace(regex_pattern,'',regex=True)

10322          1540 FRED JACKSON RICHMOND
10323              1751 ORCHARD BRENTWOOD
10324          2681 WALNUT BLVD BRENTWOOD
10325                1311 PAYNE BRENTWOOD
10326               7760 BYRON HWY BYRON 
                       ...               
174721            182 SOUNDING, SEA RANCH
174722     183 BROAD REACH, THE SEA RANCH
174723     285 BROAD REACH, THE SEA RANCH
174724    42317 FORECASTLE, THE SEA RANCH
174725    40420 LEEWARD RD, THE SEA RANCH
Name: STREET_ADDRESS, Length: 3949, dtype: object

In [26]:
#Remove state and zipcode from address
apr_bay_area['STREET_ADDRESS_FMT'] = apr_bay_area['STREET_ADDRESS'].str.replace(regex_pattern,'',regex=True)

In [27]:
#Check street address for state and zipcode
apr_bay_area[['STREET_ADDRESS','STREET_ADDRESS_FMT']][(apr_bay_area['STREET_ADDRESS']
                                                       .str.contains(regex_pattern,na=False))]

  return func(self, *args, **kwargs)


Unnamed: 0,STREET_ADDRESS,STREET_ADDRESS_FMT
10322,1540 FRED JACKSON RICHMOND CA 94801-1535,1540 FRED JACKSON RICHMOND
10323,1751 ORCHARD BRENTWOOD CA 94513,1751 ORCHARD BRENTWOOD
10324,2681 WALNUT BLVD BRENTWOOD CA 94513-4402,2681 WALNUT BLVD BRENTWOOD
10325,1311 PAYNE BRENTWOOD CA 94513-4553,1311 PAYNE BRENTWOOD
10326,7760 BYRON HWY BYRON CA,7760 BYRON HWY BYRON
...,...,...
174721,"182 SOUNDING, SEA RANCH CA 95497","182 SOUNDING, SEA RANCH"
174722,"183 BROAD REACH, THE SEA RANCH CA 95497","183 BROAD REACH, THE SEA RANCH"
174723,"285 BROAD REACH, THE SEA RANCH CA 95497","285 BROAD REACH, THE SEA RANCH"
174724,"42317 FORECASTLE, THE SEA RANCH CA 95497","42317 FORECASTLE, THE SEA RANCH"


In [28]:
#Create subset df of street addresses containing zip codes
zip_regex = r'(\d{5}$|\d{5}-\d{4}$)'
apr_bay_area_zip = apr_bay_area[['STREET_ADDRESS']][(apr_bay_area['STREET_ADDRESS']
                                  .str.contains(zip_regex,na=False))]

  return func(self, *args, **kwargs)


In [29]:
#Check zipcodes 
apr_bay_area_zip.sample(n=10)

Unnamed: 0,STREET_ADDRESS
53120,"880 31ST ST, OAKLAND, CA 94608"
144782,"515 HADDON RD, OAKLAND, CA 94606"
89467,"100 SYLVIA DR, GLEN ELLEN CA 95442"
89984,"23580 FORT ROSS RD, CAZADERO CA 95421"
144841,"2119 34TH AVE, OAKLAND, CA 94601"
169773,"12 TURTLE BAY PL, UNIT 2ND, SAN MATEO, CA 9440..."
10647,60 SEAWARD BETHEL ISLAND CA 94511
171613,"SAN MARTIN, CA 95046-0000"
62580,"2258 STARS DRIVE, RIO VISTA, CA 94571"
174560,"5970 VINE HILL SCHOOL RD, SEBASTOPOL CA 95472"


In [None]:
#Test extract with regex on subset of records
apr_bay_area_zip['STREET_ADDRESS'].str.extract(zip_regex)

In [30]:
#Set new zip column value using zip regex 
apr_bay_area['MTC_ZIP'] = apr_bay_area['STREET_ADDRESS'].str.extract(zip_regex)

In [32]:
#Review records that match regex 
apr_bay_area[['STREET_ADDRESS','MTC_ZIP']][(apr_bay_area['STREET_ADDRESS']
                                  .str.contains(zip_regex,na=False))]

  return func(self, *args, **kwargs)


Unnamed: 0,STREET_ADDRESS,MTC_ZIP
10322,1540 FRED JACKSON RICHMOND CA 94801-1535,94801-1535
10323,1751 ORCHARD BRENTWOOD CA 94513,94513
10324,2681 WALNUT BLVD BRENTWOOD CA 94513-4402,94513-4402
10325,1311 PAYNE BRENTWOOD CA 94513-4553,94513-4553
10327,25987 MARSH CREEK RD BYRON CA 94513-4315,94513-4315
...,...,...
174721,"182 SOUNDING, SEA RANCH CA 95497",95497
174722,"183 BROAD REACH, THE SEA RANCH CA 95497",95497
174723,"285 BROAD REACH, THE SEA RANCH CA 95497",95497
174724,"42317 FORECASTLE, THE SEA RANCH CA 95497",95497


**[Back to Top](#top)**

**[Back to Section Top](#format_address)**

## Geocode Addresses <a name='geocode'></a>

**[Skip to next step](#income_categories)**

In [33]:
#Create concatenated address field 
apr_bay_area['MTC_ADDRESS_FULL'] =  (apr_bay_area['STREET_ADDRESS_FMT'] + 
                                         ', ' + 
                                         apr_bay_area['JURS_NAME'] +
                                        ', ' +
                                        'CA ' +
                                        apr_bay_area['MTC_ZIP'].fillna('') +
                                        ', USA')

In [35]:
#Return a sample of addresses to review format
apr_bay_area[['STREET_ADDRESS','JURS_NAME','STREET_ADDRESS_FMT','MTC_ZIP','MTC_ADDRESS_FULL']].sample(n=20)

Unnamed: 0,STREET_ADDRESS,JURS_NAME,STREET_ADDRESS_FMT,MTC_ZIP,MTC_ADDRESS_FULL
175470,846 COTATI TR 9,SUNNYVALE,846 COTATI TR 9,,"846 COTATI TR 9, SUNNYVALE, CA , USA"
79363,562 FILBERT ST,SAN FRANCISCO,562 FILBERT ST,,"562 FILBERT ST, SAN FRANCISCO, CA , USA"
82315,985 MORSE ST,SAN JOSE,985 MORSE ST,,"985 MORSE ST, SAN JOSE, CA , USA"
99138,5517 PINNACLE VIEW WAY,ANTIOCH,5517 PINNACLE VIEW WAY,,"5517 PINNACLE VIEW WAY, ANTIOCH, CA , USA"
46763,125 EDGEWOOD AVENUE,MARIN COUNTY,125 EDGEWOOD AVENUE,,"125 EDGEWOOD AVENUE, MARIN COUNTY, CA , USA"
22070,7040 KIRIGIN WY,GILROY,7040 KIRIGIN WY,,"7040 KIRIGIN WY, GILROY, CA , USA"
173584,13151 MCDOVE ST,SARATOGA,13151 MCDOVE ST,,"13151 MCDOVE ST, SARATOGA, CA , USA"
3907,2291 RESERVE DR,BRENTWOOD,2291 RESERVE DR,,"2291 RESERVE DR, BRENTWOOD, CA , USA"
89127,234 NEWCOMB STREET,SONOMA,234 NEWCOMB STREET,,"234 NEWCOMB STREET, SONOMA, CA , USA"
95171,443 POTTERS LANE,VACAVILLE,443 POTTERS LANE,,"443 POTTERS LANE, VACAVILLE, CA , USA"


In [41]:
#Create a geo locator
g = GoogleV3(api_key=google_api_key)

In [85]:
#Use a rate limiter
geocode = RateLimiter(g.geocode, min_delay_seconds=.25, error_wait_seconds=1)

In [87]:
#Create a location column with geolocator 
tqdm.pandas()
bound_box = [Point(38.864245, -123.632497), Point(36.893329, -121.208156)]
apr_bay_area['location'] = apr_bay_area['MTC_ADDRESS_FULL'].progress_apply(partial(geocode,
                                                                          components={"country": "USA"},
                                                                          bounds=bound_box),
                                                                          timeout=1000)

100%|██████████| 30864/30864 [6:34:10<00:00,  1.30it/s]  


In [90]:
apr_bay_area[apr_bay_area['location'].isnull()]

Unnamed: 0,JURS_NAME,CNTY_NAME,YEAR,PRIOR_APN,APN,STREET_ADDRESS,PROJECT_NAME,JURS_TRACKING_ID,UNIT_CAT_DESC,TENURE,...,NO_FA_DR,TERM_AFF_DR,DEM_DES_UNITS,DEM_OR_DES_UNITS,DEM_DES_UNITS_OWN_RENT,NOTES,STREET_ADDRESS_FMT,MTC_ZIP,MTC_ADDRESS_FULL,location
612,ANTIOCH,CONTRA COSTA,2019.0,,52242033,2786 LILY COURT,DENOVA HOMES,B1907-0275,Single-Family Detached Unit,Owner,...,,,,,,,2786 LILY COURT,,"2786 LILY COURT, ANTIOCH, CA , USA",
3291,BELMONT,SAN MATEO,2019.0,43072450,43072450,0 RALSTON RANCH ROAD,,PA2018-0107,Single-Family Detached Unit,Owner,...,,,,,,,0 RALSTON RANCH ROAD,,"0 RALSTON RANCH ROAD, BELMONT, CA , USA",
3294,BELMONT,SAN MATEO,2019.0,43222370,43222370,0 LONGFELLOW DRIVE,,PA2018-0004,Single-Family Detached Unit,Owner,...,,,,,,,0 LONGFELLOW DRIVE,,"0 LONGFELLOW DRIVE, BELMONT, CA , USA",
3304,BELMONT,SAN MATEO,2019.0,44260160,44260160,0 RALSTON AVE,,PA2018-0043,Single-Family Detached Unit,Owner,...,,,,,,,0 RALSTON AVE,,"0 RALSTON AVE, BELMONT, CA , USA",
3310,BELMONT,SAN MATEO,2019.0,45201240,45201240,0 TALBRYN DRIVE,,PA2018-0075,Single-Family Detached Unit,Owner,...,"2nd Dwelling Unit Study, 2014",,,,,,0 TALBRYN DRIVE,,"0 TALBRYN DRIVE, BELMONT, CA , USA",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176006,SUNNYVALE,SANTA CLARA,2018.0,,20934023,345 W MC KINLEY AV 526,Town Center,,5 or More Units Per Structure,Renter,...,,,,,,,345 W MC KINLEY AV 526,,"345 W MC KINLEY AV 526, SUNNYVALE, CA , USA",
176008,SUNNYVALE,SANTA CLARA,2018.0,,20934023,345 W MC KINLEY AV 528,Town Center,,5 or More Units Per Structure,Renter,...,,,,,,,345 W MC KINLEY AV 528,,"345 W MC KINLEY AV 528, SUNNYVALE, CA , USA",
180491,WINDSOR,SONOMA,2018.0,,066-060-004; 066-060-007; 066-060-021,711 AND 9500 OAK PARK STREET,Windsor Veterans Village,17-18-UP-DRC,5 or More Units Per Structure,Renter,...,,55,,,,"Award of $500,000 of in lieu housing funds.",711 AND 9500 OAK PARK STREET,,"711 AND 9500 OAK PARK STREET, WINDSOR, CA , USA",
180502,WINDSOR,SONOMA,2018.0,,163-320-022,1449 ACORN WAY,Portello,B17-0268,Single-Family Detached Unit,Owner,...,,,,,,,1449 ACORN WAY,,"1449 ACORN WAY, WINDSOR, CA , USA",


In [91]:
apr_bay_area['location'].iloc[0]

Location(3040 Thompson Ave, Alameda, CA 94501, USA, (37.7614592, -122.2302331, 0.0))

In [92]:
apr_bay_area['location'].iloc[0].raw

{'address_components': [{'long_name': '3040',
   'short_name': '3040',
   'types': ['street_number']},
  {'long_name': 'Thompson Avenue',
   'short_name': 'Thompson Ave',
   'types': ['route']},
  {'long_name': 'Alameda',
   'short_name': 'Alameda',
   'types': ['locality', 'political']},
  {'long_name': 'Alameda County',
   'short_name': 'Alameda County',
   'types': ['administrative_area_level_2', 'political']},
  {'long_name': 'California',
   'short_name': 'CA',
   'types': ['administrative_area_level_1', 'political']},
  {'long_name': 'United States',
   'short_name': 'US',
   'types': ['country', 'political']},
  {'long_name': '94501', 'short_name': '94501', 'types': ['postal_code']},
  {'long_name': '1754',
   'short_name': '1754',
   'types': ['postal_code_suffix']}],
 'formatted_address': '3040 Thompson Ave, Alameda, CA 94501, USA',
 'geometry': {'location': {'lat': 37.7614592, 'lng': -122.2302331},
  'location_type': 'RANGE_INTERPOLATED',
  'viewport': {'northeast': {'lat': 3

In [123]:
apr_bay_area['location'].iloc[0].raw['formatted_address']

'3040 Thompson Ave, Alameda, CA 94501, USA'

In [95]:
#Set geocode location type
apr_bay_area['MTC_GEOCODE_TYPE'] = apr_bay_area['location'].map(lambda loc: loc.raw['geometry']['location_type'] 
                                                                if loc else None)

In [117]:
#Set lattitude
apr_bay_area['MTC_LAT'] = (apr_bay_area['location']
                                        .map(lambda loc: loc.raw['geometry']['location']['lat']
                                             if loc else None))

In [119]:
#Set longitude 
apr_bay_area['MTC_LONG'] = (apr_bay_area['location']
                                        .map(lambda loc: loc.raw['geometry']['location']['lng']
                                             if loc else None))

In [124]:
#Set geocode address 
apr_bay_area['MTC_GEOCODE_ADDRESS'] = (apr_bay_area['location']
                                      .map(lambda loc: loc.raw['formatted_address']
                                          if loc else None))

In [125]:
os.listdir(os.path.join(work_dir,
                       '2019 Original APRs'))

['Extracts',
 'ORIGINAL_Table_A2_Report_with_Data_Dictionary.xlsx',
 'ORIGINAL_Table_A_2018_2019.xlsx',
 'ORIGINAL_Table_D_Report.xlsx',
 'Table_A2_Report_Address_Lat_Long.xlsx',
 'Table_A2_Report_Lat_Long_Google.csv']

In [126]:
# # Write temporary output to csv
# apr_bay_area.to_csv(os.path.join(work_dir,
#                        '2019 Original APRs',
#                                 'Table_A2_Report_Lat_Long_Google.csv'),
#                    index=False)

In [67]:
#Read temporary output csv back into notebook
apr_bay_area = pd.read_csv(os.path.join(work_dir,
                                        '2019 Original APRs',
                                        'Table_A2_Report_Lat_Long_Google.csv'))

In [68]:
apr_bay_area.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30864 entries, 0 to 30863
Data columns (total 56 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   JURS_NAME                    30864 non-null  object 
 1   CNTY_NAME                    30864 non-null  object 
 2   YEAR                         30864 non-null  int64  
 3   PRIOR_APN                    2983 non-null   object 
 4   APN                          30680 non-null  object 
 5   STREET_ADDRESS               30686 non-null  object 
 6   PROJECT_NAME                 14339 non-null  object 
 7   JURS_TRACKING_ID             23670 non-null  object 
 8   UNIT_CAT_DESC                30864 non-null  object 
 9   TENURE                       30864 non-null  object 
 10  VLOW_INCOME_DR               30864 non-null  int64  
 11  VLOW_INCOME_NDR              30864 non-null  int64  
 12  LOW_INCOME_DR                30864 non-null  int64  
 13  LOW_INCOME_NDR  

In [69]:
#Create geodataframe from lat long coords
apr_bay_area_geo = gp.GeoDataFrame(apr_bay_area,
                                  geometry=gp.points_from_xy(apr_bay_area['MTC_LONG'],apr_bay_area['MTC_LAT']),
                                  crs="EPSG:4326")

In [70]:
apr_bay_area_geo.head(5)

Unnamed: 0,JURS_NAME,CNTY_NAME,YEAR,PRIOR_APN,APN,STREET_ADDRESS,PROJECT_NAME,JURS_TRACKING_ID,UNIT_CAT_DESC,TENURE,...,NOTES,STREET_ADDRESS_FMT,MTC_ZIP,MTC_ADDRESS_FULL,location,MTC_GEOCODE_TYPE,MTC_LAT,MTC_LONG,MTC_GEOCODE_ADDRESS,geometry
0,ALAMEDA,ALAMEDA,2019,,69-106-19,3040 THOMPSON AVE,,CB19-0152,Accessory Dwelling Unit,Renter,...,,3040 THOMPSON AVE,,"3040 THOMPSON AVE, ALAMEDA, CA , USA","3040 Thompson Ave, Alameda, CA 94501, USA",RANGE_INTERPOLATED,37.761459,-122.230233,"3040 Thompson Ave, Alameda, CA 94501, USA",POINT (-122.23023 37.76146)
1,ALAMEDA,ALAMEDA,2019,,69-130-11,2916 MARINA DR,,CB19-0317,Accessory Dwelling Unit,Renter,...,,2916 MARINA DR,,"2916 MARINA DR, ALAMEDA, CA , USA","2916 Marina Dr, Alameda, CA 94501, USA",RANGE_INTERPOLATED,37.766979,-122.228978,"2916 Marina Dr, Alameda, CA 94501, USA",POINT (-122.22898 37.76698)
2,ALAMEDA,ALAMEDA,2019,,69-64-1,1206 COURT ST,,CB19-0426,Accessory Dwelling Unit,Renter,...,,1206 COURT ST,,"1206 COURT ST, ALAMEDA, CA , USA","1206 Court St, Alameda, CA 94501, USA",RANGE_INTERPOLATED,37.756314,-122.236484,"1206 Court St, Alameda, CA 94501, USA",POINT (-122.23648 37.75631)
3,ALAMEDA,ALAMEDA,2019,,69-77-1,1322 FOUNTAIN ST,,CB19-0236,Accessory Dwelling Unit,Renter,...,,1322 FOUNTAIN ST,,"1322 FOUNTAIN ST, ALAMEDA, CA , USA","1322 Fountain St, Alameda, CA 94501, USA",RANGE_INTERPOLATED,37.757186,-122.234114,"1322 Fountain St, Alameda, CA 94501, USA",POINT (-122.23411 37.75719)
4,ALAMEDA,ALAMEDA,2019,,69-99-6,1515 COURT ST,,CB18-1068,Accessory Dwelling Unit,Renter,...,,1515 COURT ST,,"1515 COURT ST, ALAMEDA, CA , USA","1515 Court St, Alameda, CA 94501, USA",RANGE_INTERPOLATED,37.760636,-122.232302,"1515 Court St, Alameda, CA 94501, USA",POINT (-122.23230 37.76064)


In [101]:
sf_permits[sf_permits['MTC_GEOCODE_ADDRESS'].isnull()].size

72

In [102]:
sf_permits = (apr_bay_area_geo[['MTC_GEOCODE_TYPE','MTC_GEOCODE_ADDRESS','geometry']]
              [(apr_bay_area_geo['JURS_NAME'] == 'SAN FRANCISCO') & 
               (apr_bay_area_geo['MTC_GEOCODE_ADDRESS'].notnull())])

In [103]:
sf_permits.head(5)

Unnamed: 0,MTC_GEOCODE_TYPE,MTC_GEOCODE_ADDRESS,geometry
10458,ROOFTOP,"1327 Columbus Ave, San Francisco, CA 94133, USA",POINT (-122.41933 37.80638)
10459,ROOFTOP,"2741 Hyde St #2743, San Francisco, CA 94109, USA",POINT (-122.42075 37.80627)
10460,ROOFTOP,"1255 Columbus Ave, San Francisco, CA 94133, USA",POINT (-122.41817 37.80555)
10461,ROOFTOP,"15 Vandewater St, San Francisco, CA 94133, USA",POINT (-122.41241 37.80525)
10462,ROOFTOP,"28 Houston St, San Francisco, CA 94133, USA",POINT (-122.41610 37.80400)


In [106]:
#Plot points 
map = folium.Map(location=(37.830787, -122.362624),
                zoom_start=11)

permits = folium.features.GeoJson(sf_permits.to_json())

map.add_child(permits)

In [75]:
apr_bay_area_geo['MTC_GEOCODE_TYPE'].unique()

array(['RANGE_INTERPOLATED', 'ROOFTOP', 'GEOMETRIC_CENTER', 'APPROXIMATE',
       nan], dtype=object)

In [76]:
geocode_type_summary = ((apr_bay_area_geo[['MTC_GEOCODE_TYPE']].fillna('NO_GEOCODE_TYPE'))
                        .groupby('MTC_GEOCODE_TYPE')
                        .agg({'MTC_GEOCODE_TYPE':'count'})
                        .rename(columns={'MTC_GEOCODE_TYPE':'GEOCODE_COUNT'})
                        .reset_index())
geocode_type_summary

Unnamed: 0,MTC_GEOCODE_TYPE,GEOCODE_COUNT
0,APPROXIMATE,555
1,GEOMETRIC_CENTER,1463
2,NO_GEOCODE_TYPE,527
3,RANGE_INTERPOLATED,4933
4,ROOFTOP,23386


In [78]:
geocode_type_summary['GEOCODE_PERCENTAGE'] = geocode_type_summary[['GEOCODE_COUNT']].apply(lambda x: (x/x.sum()*100))
geocode_type_summary

Unnamed: 0,MTC_GEOCODE_TYPE,GEOCODE_COUNT,GEOCODE_PERCENTAGE
0,APPROXIMATE,555,1.798212
1,GEOMETRIC_CENTER,1463,4.74015
2,NO_GEOCODE_TYPE,527,1.707491
3,RANGE_INTERPOLATED,4933,15.983022
4,ROOFTOP,23386,75.771125


**[Back to Top](#top)**

**[Back to Section Top](#geocode)**

## Create income category summary columns <a name='income_categories'></a>

**[Skip to next step](#)**

In [111]:
apr_bay_area_geo.columns

Index(['JURS_NAME', 'CNTY_NAME', 'YEAR', 'PRIOR_APN', 'APN', 'STREET_ADDRESS',
       'PROJECT_NAME', 'JURS_TRACKING_ID', 'UNIT_CAT_DESC', 'TENURE',
       'VLOW_INCOME_DR', 'VLOW_INCOME_NDR', 'LOW_INCOME_DR', 'LOW_INCOME_NDR',
       'MOD_INCOME_DR', 'MOD_INCOME_NDR', 'ABOVE_MOD_INCOME', 'ENT_APPROVE_DT',
       'NO_ENTITLEMENTS', 'BP_VLOW_INCOME_DR', 'BP_VLOW_INCOME_NDR',
       'BP_LOW_INCOME_DR', 'BP_LOW_INCOME_NDR', 'BP_MOD_INCOME_DR',
       'BP_MOD_INCOME_NDR', 'BP_ABOVE_MOD_INCOME', 'BP_ISSUE_DT',
       'NO_BILDING_PERMITS', 'CO_VLOW_INCOME_DR', 'CO_VLOW_INCOME_NDR',
       'CO_LOW_INCOME_DR', 'CO_LOW_INCOME_NDR', 'CO_MOD_INCOME_DR',
       'CO_MOD_INCOME_NDR', 'CO_ABOVE_MOD_INCOME', 'CO_ISSUE_DT',
       'NO_OTHER_FORMS_OF_READINESS', 'EXTR_LOW_INCOME_UNITS', 'APPROVE_SB35',
       'INFILL_UNITS', 'FIN_ASSIST_NAME', 'DR_TYPE', 'NO_FA_DR', 'TERM_AFF_DR',
       'DEM_DES_UNITS', 'DEM_OR_DES_UNITS', 'DEM_DES_UNITS_OWN_RENT', 'NOTES',
       'STREET_ADDRESS_FMT', 'MTC_ZIP', 'MTC_

In [112]:
#Create count columns for sum of units by income category
apr_bay_area_geo['MTC_VLOW_TOT'] = apr_bay_area_geo['VLOW_INCOME_DR'] + apr_bay_area_geo['VLOW_INCOME_NDR']
apr_bay_area_geo['MTC_MOD_TOT'] = apr_bay_area_geo['LOW_INCOME_DR'] + apr_bay_area_geo['LOW_INCOME_NDR']
apr_bay_area_geo['MTC_LOW_TOT'] = apr_bay_area_geo['MOD_INCOME_DR'] + apr_bay_area_geo['MOD_INCOME_NDR']

In [None]:
IF CO_VLOW_INCOME_NDR_CON_ENT_PERM > 0, 
THEN CO_VLOW_INCOME_NDR_CON_ENT_PERM 
ELSE IF BP_VLOW_INCOME_NDR_CON_ENT_PERM 
THEN BP_VLOW_INCOME_NDR_CON_ENT_PERM
ELSE IF VLOW_INCOME_NDR_CON_ENT_PERMITS > 0 
THEN VLOW_INCOME_NDR_CON_ENT_PERMITS

In [132]:
#Create function to calculate income category permit count

def sum_permits_by_income(row,co_col,bp_col,entitle_col):
    if (row[co_col] > 0):
        return row[co_col]
    elif (row[bp_col] > 0):
        return row[bp_col]
    elif (row[entitle_col] > 0):
        return row[entitle_col]

In [133]:
apr_bay_area_geo['MTC_VLOW_INCOME_NDR'] = apr_bay_area_geo.apply(lambda row: 
                                                                 sum_permits_by_income(row,
                                                                                    'CO_VLOW_INCOME_NDR',
                                                                                     'BP_VLOW_INCOME_NDR',
                                                                                     'VLOW_INCOME_NDR'
                                                                                    ),
                                                                 axis=1)

In [134]:
apr_bay_area_geo[apr_bay_area_geo['MTC_VLOW_INCOME_NDR'] > 2]

Unnamed: 0,JURS_NAME,CNTY_NAME,YEAR,PRIOR_APN,APN,STREET_ADDRESS,PROJECT_NAME,JURS_TRACKING_ID,UNIT_CAT_DESC,TENURE,...,location,MTC_GEOCODE_TYPE,MTC_LAT,MTC_LONG,MTC_GEOCODE_ADDRESS,geometry,MTC_VLOW_TOT,MTC_MOD_TOT,MTC_LOW_TOT,MTC_VLOW_INCOME_NDR
8883,PALO ALTO,SANTA CLARA,2019,,132-35-045,3705 EL CAMINO REAL,,18PLN-00136,5 or More Units Per Structure,Renter,...,"3705 El Camino Real, Palo Alto, CA 94306, USA",ROOFTOP,37.418378,-122.132401,"3705 El Camino Real, Palo Alto, CA 94306, USA",POINT (-122.13240 37.41838),58,5,0,58.0
12948,SAN MATEO COUNTY,SAN MATEO,2019,Entitled,54113140,"2700 MIDDLEFIELD ROAD, REDWOOD CITY CA 94063",Middlefield Junction,PLN2019-00320,5 or More Units Per Structure,Renter,...,"2700 Middlefield Rd, Redwood City, CA 94063, USA",ROOFTOP,37.475026,-122.212471,"2700 Middlefield Rd, Redwood City, CA 94063, USA",POINT (-122.21247 37.47503),119,59,0,59.0
17316,BERKELEY,ALAMEDA,2018,,056 196001404,2001 FOURTH STREET,,,5 or More Units Per Structure,Renter,...,"2001 Fourth St, Berkeley, CA 94710, USA",ROOFTOP,37.867366,-122.29928,"2001 Fourth St, Berkeley, CA 94710, USA",POINT (-122.29928 37.86737),0,0,0,12.0
27420,SAN MATEO COUNTY,SAN MATEO,2018,,60265050,"105 5TH AVE, REDWOOD CITY, CA",Waverly Place,BLD2015-01761,5 or More Units Per Structure,Renter,...,"105 Fifth Ave, Redwood City, CA 94063, USA",ROOFTOP,37.469259,-122.207616,"105 Fifth Ave, Redwood City, CA 94063, USA",POINT (-122.20762 37.46926),0,0,0,7.0
28611,SANTA ROSA,SONOMA,2018,,173030029,,RUSSELL,AVE,Single-Family Detached Unit,Renter,...,,,,,,POINT (nan nan),14,0,0,14.0


**[Back to Top](#top)**

**[Back to Section Top](#income_categories)**