In [2]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import googlemaps
from datetime import datetime
from dotenv import load_dotenv

## Load Data

In [18]:
afford_housing = pd.read_csv('./data/affordable-housing-addresses.csv')
all_housing = pd.read_csv('./data/property-data.csv')

In [4]:
afford_housing.head()

Unnamed: 0,Project Name,Neighborhood,Zip Code,TtlProjUnits,RentUnits,OwnUnits,TtlMarket,MarketRent,MarketOwn,Total Income-Restricted,Income-Restricted Rental,Income-Restricted Ownership,Tenure,Public/ Private,Includes Senior Units?,Section 8,formatted_address
0,Abbot Street/ Shawmut Ave,Roxbury,2119,16,16,0,0.0,0.0,0.0,16,16,0.0,Rental,Private,,,"100 Shawmut Ave, Boston, MA 02118, United States"
1,Academy Homes I,Roxbury,2119,202,202,0,52.0,52.0,0.0,150,150,0.0,Rental,Private,,,"1592 Columbus Ave, Roxbury, MA 02119, United S..."
2,Academy Homes II,Roxbury,2119,236,236,0,0.0,0.0,0.0,236,236,0.0,Rental,Private,,Y,"2926 Washington St, Roxbury, MA 02119, United ..."
3,Adams Court Phase A,Mattapan,2126,50,50,0,0.0,0.0,0.0,50,50,0.0,Rental,Private,,,"59 Msgr Patrick J Lydon Way, Boston, MA 02124,..."
4,Adams Court Phase B,Mattapan,2126,45,45,0,0.0,0.0,0.0,45,45,0.0,Rental,Private,,,


In [5]:
all_housing.head()

Unnamed: 0,PID,CM_ID,GIS_ID,ST_NUM,ST_NAME,UNIT_NUM,CITY,ZIPCODE,BLDG_SEQ,NUM_BLDGS,...,KITCHEN_STYLE2,KITCHEN_STYLE3,HEAT_TYPE,HEAT_FUEL,AC_TYPE,FIRE_PLACE,ORIENTATION,NUM_PARKING,PROP_VIEW,CORNER_UNIT
0,100001000,,100001000.0,104 A 104,PUTNAM ST,,EAST BOSTON,2128,1.0,1,...,S - Semi-Modern,S - Semi-Modern,W - Ht Water/Steam,,N - None,0,,3,A - Average,
1,100002000,,100002000.0,197,LEXINGTON ST,,EAST BOSTON,2128,1.0,1,...,M - Modern,M - Modern,F - Forced Hot Air,,C - Central AC,0,,0,A - Average,
2,100003000,,100003000.0,199,LEXINGTON ST,,EAST BOSTON,2128,1.0,1,...,S - Semi-Modern,S - Semi-Modern,S - Space Heat,,N - None,0,,0,A - Average,
3,100004000,,100004000.0,201,LEXINGTON ST,,EAST BOSTON,2128,1.0,1,...,S - Semi-Modern,S - Semi-Modern,W - Ht Water/Steam,,N - None,0,,0,A - Average,
4,100005000,,100005000.0,203,LEXINGTON ST,,EAST BOSTON,2128,1.0,1,...,S - Semi-Modern,,W - Ht Water/Steam,,N - None,0,,0,A - Average,


## Cleaning street addresses

In [19]:
# isolating affordable housing street addresses
import re

split_add = afford_housing["formatted_address"].str.split("[,]", expand = True).astype(str).applymap(lambda x: x.strip())

#checks for first row value that starts with a number
def helper(x):
    for i in x:
        if bool(re.search('^[0-9].*',  i)):
            return i
        else:
            return np.nan

afford_housing["street_address"] = split_add.apply(lambda x: helper(x), axis = 1)
afford_housing.drop(["formatted_address"], axis = 1, inplace = True)
afford_housing.head()

Unnamed: 0,Project Name,Neighborhood,Zip Code,TtlProjUnits,RentUnits,OwnUnits,TtlMarket,MarketRent,MarketOwn,Total Income-Restricted,Income-Restricted Rental,Income-Restricted Ownership,Tenure,Public/ Private,Includes Senior Units?,Section 8,street_address
0,Abbot Street/ Shawmut Ave,Roxbury,2119,16,16,0,0.0,0.0,0.0,16,16,0.0,Rental,Private,,,100 Shawmut Ave
1,Academy Homes I,Roxbury,2119,202,202,0,52.0,52.0,0.0,150,150,0.0,Rental,Private,,,1592 Columbus Ave
2,Academy Homes II,Roxbury,2119,236,236,0,0.0,0.0,0.0,236,236,0.0,Rental,Private,,Y,2926 Washington St
3,Adams Court Phase A,Mattapan,2126,50,50,0,0.0,0.0,0.0,50,50,0.0,Rental,Private,,,59 Msgr Patrick J Lydon Way
4,Adams Court Phase B,Mattapan,2126,45,45,0,0.0,0.0,0.0,45,45,0.0,Rental,Private,,,


In [24]:
all_housing["street_address"] = all_housing.apply(lambda x: x["ST_NUM"].title() + ' ' + x["ST_NAME"].title(), axis = 1)
all_housing.drop(["ST_NUM", "ST_NAME"], axis = 1, inplace = True)
all_housing.head()

Unnamed: 0,PID,CM_ID,GIS_ID,UNIT_NUM,CITY,ZIPCODE,BLDG_SEQ,NUM_BLDGS,LUC,LU,...,KITCHEN_STYLE3,HEAT_TYPE,HEAT_FUEL,AC_TYPE,FIRE_PLACE,ORIENTATION,NUM_PARKING,PROP_VIEW,CORNER_UNIT,street_address
0,100001000,,100001000.0,,EAST BOSTON,2128,1.0,1,105,R3,...,S - Semi-Modern,W - Ht Water/Steam,,N - None,0,,3,A - Average,,104 A 104 Putnam St
1,100002000,,100002000.0,,EAST BOSTON,2128,1.0,1,105,R3,...,M - Modern,F - Forced Hot Air,,C - Central AC,0,,0,A - Average,,197 Lexington St
2,100003000,,100003000.0,,EAST BOSTON,2128,1.0,1,105,R3,...,S - Semi-Modern,S - Space Heat,,N - None,0,,0,A - Average,,199 Lexington St
3,100004000,,100004000.0,,EAST BOSTON,2128,1.0,1,105,R3,...,S - Semi-Modern,W - Ht Water/Steam,,N - None,0,,0,A - Average,,201 Lexington St
4,100005000,,100005000.0,,EAST BOSTON,2128,1.0,1,104,R2,...,,W - Ht Water/Steam,,N - None,0,,0,A - Average,,203 Lexington St


## Remove non-rental unites (still needs work)

In [6]:
afford_rentals = afford_housing.loc[afford_housing["Tenure"].str.match(".*Rental.*")]

In [7]:
# keeping columns useful for cleaning up non-rental properties
cols = ["ST_NUM", "ST_NAME", "CITY", "ZIPCODE", "NUM_BLDGS", "LU", "OWN_OCC", "RES_UNITS", "RC_UNITS"]
all_rentals = all_housing[cols]

# remove any properties that don't have residential units
all_rentals = all_rentals.loc[all_rentals["RES_UNITS"] > 0]

# remove any buildings that only have one residential unit that is occupied by the property owner
all_rentals = all_rentals.loc[~((all_rentals["OWN_OCC"] == "Y") & (all_rentals["RES_UNITS"] == 1))]
all_rentals.head()

Unnamed: 0,ST_NUM,ST_NAME,CITY,ZIPCODE,NUM_BLDGS,LU,OWN_OCC,RES_UNITS,RC_UNITS
17,239,LEXINGTON ST,EAST BOSTON,2128,1,CM,N,4,0
22,241,LEXINGTON ST,EAST BOSTON,2128,1,CM,N,3,0
23,241,LEXINGTON ST,EAST BOSTON,2128,1,CD,N,1,0
25,241,LEXINGTON ST,EAST BOSTON,2128,1,CD,N,1,0
30,249,LEXINGTON ST,EAST BOSTON,2128,1,CM,N,3,0


In [13]:
all_housing.loc[all_housing["LU"] == "R3"]["RES_UNITS"].value_counts()

0    13744
3       17
Name: RES_UNITS, dtype: int64