# Finding Approved Building Permits in for Buildings in City Council District 7

In [1]:
from geopy.geocoders import Nominatim
import geopandas as gpd
from shapely.geometry import Point
import matplotlib as plt
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("../data/approved-building-permit.csv", low_memory=False)

In [3]:
df_street_address = pd.read_csv("../data/Boston-SAM.csv", low_memory=False)

In [896]:
df.shape

(666484, 23)

In [897]:
df_columns = df.columns

### Number of rows missing longitude: 14735
But the majority of them have address, which can be filled by comparing with Love Street Address Management dataset to impute the value. Only 14 rows have both longitude and address missing, which will be dropped.

In [898]:
df[df["x_longitude"].isna()][["address", "x_longitude"]].shape

(14735, 2)

In [899]:
df[df["x_longitude"].isna() & df["address"].isna()][["address", "x_longitude"]].shape

(14, 2)

### Number of rows missing latitude: 14735
But the majority of them have address, which can be filled by comparing with Love Street Address Management dataset to impute the value. Only 14 rows have both latitude and address missing, which will be dropped.

In [900]:
df[df["y_latitude"].isna()][["address", "y_latitude"]].shape

(14735, 2)

In [901]:
df[df["y_latitude"].isna() & df["address"].isna()][["address", "y_latitude"]].shape

(14, 2)

### The row that has missing value for longitude also has missing values for latitude
This make it easy to impute by joining the table to Live Street Address Management only once.

In [902]:
df[df["x_longitude"].isna()][["address", "x_longitude"]].index.equals(df[df["y_latitude"].isna()][["address", "y_latitude"]].index)

True

## Impute `x_longitude` and `y_latitude`

In [903]:
df["address_new"] = df["address"].str.lower().str.strip()
df[["address_new", "x_longitude", "y_latitude"]]

Unnamed: 0,address_new,x_longitude,y_latitude
0,181-183 state st,-71.052924,42.359190
1,175 w boundary rd,-71.149611,42.260750
2,15 prospect st,-71.057585,42.375243
3,211 w springfield st,-71.080251,42.340600
4,14 william jackson av,-71.154051,42.344600
...,...,...,...
666479,3 center pz,-71.060818,42.360383
666480,1188 commonwealth av,-71.129145,42.350100
666481,46 burroughs st,-71.117131,42.313220
666482,16 colonial ave,-71.075261,42.290650


In [904]:
df_street_address["FULL_ADDRESS_new"] = df_street_address["FULL_ADDRESS"].str.lower().str.strip()
df_street_address[["FULL_ADDRESS_new"]]

Unnamed: 0,FULL_ADDRESS_new
0,6-10 a st
1,15 a st
2,7 a st
3,10 a st
4,172-174 a st
...,...
400191,56r wichita ter
400192,2 charlesgate w
400193,116 centre st
400194,56b crockett ave


In [905]:
df_unique_street_address = df_street_address.drop_duplicates(subset=["FULL_ADDRESS_new"])
df_unique_street_address.shape

(396507, 33)

In [906]:
df = pd.merge(df, df_unique_street_address, left_on="address_new", right_on="FULL_ADDRESS_new", how="left")
df["x_longitude"] = df["x_longitude"].combine_first(df["POINT_X"])
df["y_latitude"] = df["y_latitude"].combine_first(df["POINT_Y"])

In [907]:
df.shape

(666484, 57)

In [908]:
df.columns

Index(['permitnumber', 'worktype', 'permittypedescr', 'description',
       'comments', 'applicant', 'declared_valuation', 'total_fees',
       'issued_date', 'expiration_date', 'status', 'occupancytype', 'sq_feet',
       'address', 'city', 'state', 'zip', 'property_id', 'parcel_id', 'gpsy',
       'gpsx', 'y_latitude', 'x_longitude', 'address_new', '_id', 'OID_',
       'SAM_ADDRESS_ID', 'BUILDING_ID', 'RELATIONSHIP_TYPE', 'FULL_ADDRESS',
       'STREET_NUMBER', 'IS_RANGE', 'RANGE_FROM', 'RANGE_TO', 'UNIT',
       'FULL_STREET_NAME', 'STREET_ID', 'STREET_PREFIX', 'STREET_BODY',
       'STREET_SUFFIX_ABBR', 'STREET_FULL_SUFFIX', 'STREET_SUFFIX_DIR',
       'STREET_NUMBER_SORT', 'MAILING_NEIGHBORHOOD', 'ZIP_CODE', 'X_COORD',
       'Y_COORD', 'SAM_STREET_ID', 'WARD', 'PRECINCT_WARD', 'PARCEL',
       'created_date', 'last_edited_date', 'shape_wkt', 'POINT_X', 'POINT_Y',
       'FULL_ADDRESS_new'],
      dtype='object')

In [909]:
df = df[list(df_columns) + ["address_new"]]

In [910]:
missing = df[df["x_longitude"].isna() & df["y_latitude"].isna()].copy()

In [911]:
missing.shape

(7207, 24)

In [912]:
missing["address_new"].unique()

array(['1 financial cn', '29l-31 germania st', '1 long island', ...,
       '2r cary pl', '1011 massachusetts av', '492-494 bennington st'],
      dtype=object)

## Use street name to match
After manual inspection, we found that there are mismatch between the spellings of street suffix and street number, we will do our best to clean it up by manually replacing some commonly misused suffixes.

### Remove numbers and dashes

In [913]:
df["address_new"] = df["address_new"].str.replace(r"\b\w*[\d-]\w*\b", "", regex=True).str.strip()

In [914]:
df["address_new"].sort_values()

632996    - beacon st
585109    - beacon st
567453    - beacon st
610311    - beacon st
632393    - beacon st
             ...     
665914            NaN
666038            NaN
666068            NaN
666221            NaN
666222            NaN
Name: address_new, Length: 666484, dtype: object

In [915]:
df_street_address_unique_street_name = df_street_address.copy()
df_street_address_unique_street_name["FULL_STREET_NAME_new"] = df_street_address_unique_street_name["FULL_STREET_NAME"].str.lower()
df_street_address_unique_street_name = df_street_address_unique_street_name.drop_duplicates(subset=["FULL_STREET_NAME_new"]).copy()

In [916]:
df_street_address_unique_street_name["FULL_STREET_NAME_new"].sort_values()

0                  a st
43             abbot st
76        abbotsford st
95              abby rd
104         aberdeen st
              ...      
133956        zamora ct
133959        zamora st
133974       zeigler st
133977        zeller st
134864              NaN
Name: FULL_STREET_NAME_new, Length: 4113, dtype: object

In [917]:
df = pd.merge(df, df_street_address_unique_street_name, left_on="address_new", right_on="FULL_STREET_NAME_new", how="left")

In [918]:
df["x_longitude"] = df["x_longitude"].combine_first(df["POINT_X"])
df["y_latitude"] =df["y_latitude"].combine_first(df["POINT_Y"])

In [919]:
df.shape

(666484, 58)

In [920]:
df = df[list(df_columns) + ["address_new"]]

In [921]:
missing = df[df["x_longitude"].isna() & df["y_latitude"].isna()].copy()

In [922]:
missing.shape

(5873, 24)

In [923]:
missing["address_new"].unique()

array(['financial cn', 'long island', 'first av', 'chestnut hill av',
       'legends wy', 'massachusetts av', 'commonwealth av', 'louders ln',
       'harrison av', 'brookline av', 'warren av', 'huntington av',
       'walnut av', 'faneuil hall market pl', 'vfw pk', 'northern av',
       'rutherford av', 'american legion hw', 'atlantic av',
       'long island richards', 'commercial wharf e', 'blue hill av',
       'e india ro', 'columbus av', 'western av', 'dorchester av',
       'harvard av', 'damon st', 'north harvard', 'long wharf',
       'commercial wharf east', 'forest hills av', 'mt pleasant av',
       'marina park drive', 'rowes wharf', 'brookside av', 'necco ct',
       'charles street south', 'thompson island', 'harvard',
       'commercial wharf', 'longwood av', 'mcclellan hw',
       'william f mcclellan hw', 'hyde park av', 'marbury te',
       'gillette pk', 'odonnell te', 'wilbert ct', 'spectacle island st',
       'neponset av', 'martin luther king jr bl', 'fifth av'

### Fix street suffix

In [924]:
df['address_new'] = df['address_new'].str.replace(r'^\bst\b', 'saint', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bhw\b$', 'hwy', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bte\b$', "ter", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bwa\b$', "way", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^\bmsgr\b', 'monsignor', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bav\b$', 'ave', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^mt\b', 'mount', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bal\b$', 'aly', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bcn\b$', 'ctr', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bwy\b$', 'way', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bwh\b', 'whf', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bwharf\b', 'whf', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bpk\b$', 'park', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bro\b$', 'row', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bbl\b$', 'blvd', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bci\b$', 'cir', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bdrive\b$', 'dr', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bpkwy\b$', 'parkway', case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^commercial whf east$', "commercial whf e", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^commercial whf$', "commercial whf e", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^fish pier$', "boston fish pier", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^a fish pier$', "boston fish pier", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^fish pier rd$', "boston fish pier", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'parkvale', "park vale", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'\bwm\b', "william", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^charlesgate east st$', "charlesgate e st", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^s crescent cc$', "s crescent cirt", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^s crescent cir$', "s crescent cirt", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^charles street south$', "charles st s", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^east india rd$', "east india row", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^saint richards$', "saint richard", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace("e seventh", "e seventh st", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace("east fourth st", "e fourth st", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace("e fifth", "e fifth st", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace("n harvard", "n harvard st", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace('east india', "e india", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace("west walnut", "w walnut", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace("west newton", "w newton", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace("west springfield", "w springfield", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r"[^a-zA-Z0-9 ]", "", regex=True).str.strip()
df_street_address_unique_street_name["FULL_STREET_NAME_new"] = df_street_address_unique_street_name["FULL_STREET_NAME_new"].str.replace(r"[^a-zA-Z0-9 ]", "", regex=True).str.strip()

In [925]:
df = pd.merge(df, df_street_address_unique_street_name, left_on="address_new", right_on="FULL_STREET_NAME_new", how="left")

In [926]:
df["x_longitude"] = df["x_longitude"].combine_first(df["POINT_X"])
df["y_latitude"] =df["y_latitude"].combine_first(df["POINT_Y"])

In [927]:
df.shape

(666484, 58)

In [928]:
df = df[list(df_columns) + ["address_new"]]

In [929]:
missing = df[df["x_longitude"].isna() & df["y_latitude"].isna()].copy()

In [930]:
missing.shape

(1000, 24)

In [931]:
missing["address_new"].unique()

array(['long island', 'faneuil hall market pl', 'vfw park',
       'long island richards', 'damon st', 'north harvard', 'necco ct',
       'thompson island', 'harvard', 'mcclellan hwy', 'wilbert ct',
       'spectacle island st', 'desoto rd', 'long island nichols',
       'thompson pl', 'necco pl', 'truman park', 'longwood', 'washington',
       'west broadway', 'long island morris bldg', 'constitution pl',
       'monsignor p j lydon way', 'charlesgate east', 'vfw parkway st',
       'n harvard st way', 'roseberry rd', 'lagrange rd',
       'monsignor ocallaghan way', 'moon island', 'orton marotta way',
       'broadway st', 'westinghouse pz', 'long island shelter',
       'beechland st', 'david mugar way', 'wood island park',
       'mary moore beatty cir', 'bennington', 'spectacle island',
       'w broadway st', 'linskey barry ct', 'lamartine street ex',
       'neponset valley park', 'charles st south', 'franklin park',
       'deer island', 'templeton st', 'hilltop st', 'ledgehil

### Remove suffix
Compare only street body.

In [932]:
def remove_last_word(text):
    words = text.split()  # Split the string into words
    if len(words) <= 1:
        return text  # Return the original string if it's the only word
    words = words[:-1]  # Remove the last word using slicing
    return ' '.join(words)

In [933]:
df["address_new"] = df["address_new"].astype(str)
df["address_new"] = df["address_new"].str.replace(r"\b\w*[^\w\s]\w*\b|\b\w*\d\w*\b", "", regex=True)
df["address_new"] = df["address_new"].str.strip()
df["address_new"] = df["address_new"].apply(remove_last_word)

In [934]:
df["address_new"].unique()

array(['state', 'w boundary', 'prospect', ..., 'a acorn', 'mindoro',
       'marbury te'], dtype=object)

In [935]:
df_street_address_unique_street_body = df_street_address_unique_street_name.copy()
df_street_address_unique_street_body["STREET_BODY_new"] = df_street_address_unique_street_body["STREET_BODY"].str.replace(r"[^a-zA-Z0-9 ]", "", regex=True).str.strip()
df_street_address_unique_street_body["STREET_BODY_new"] = df_street_address_unique_street_body["STREET_BODY_new"].str.lower().str.strip()
df_street_address_unique_street_body.drop_duplicates(subset=["STREET_BODY_new"], inplace=True)

In [936]:
df = pd.merge(df, df_street_address_unique_street_body, left_on="address_new", right_on="STREET_BODY_new", how="left")

In [937]:
df["x_longitude"] = df["x_longitude"].combine_first(df["POINT_X"])
df["y_latitude"] =df["y_latitude"].combine_first(df["POINT_Y"])

In [938]:
df.shape

(666484, 59)

In [939]:
df = df[list(df_columns) + ["address_new"]]

In [940]:
missing = df[df["x_longitude"].isna() & df["y_latitude"].isna()].copy()

In [941]:
missing.shape

(353, 24)

In [942]:
missing["address_new"].unique()

array(['faneuil hall market', 'mcclellan', 'spectacle island', 'desoto',
       'long island morris', 'monsignor p j lydon', 'vfw parkway',
       'n harvard st', 'roseberry', 'monsignor ocallaghan',
       'orton marotta', 'david mugar', 'wood island', 'mary moore beatty',
       'spectacle', 'w broadway', 'linskey barry', 'lamartine street',
       'charles st', 'hilltop', 'ledgehill', 'pittsburg',
       'saint richards', 'logan airport trmnl', 'charlesgate e',
       'william day', 'stadium', 'embankment', 'daves', 'haul', 'v f w',
       'new market', 'sterling', 'degauthier', 'star king', 'lowney',
       'monsignor a a jacobbe', 'crowley rogers', 'yawkey', 'devine',
       'hefferan', 'river st sturbridge', 'w roxbury', 'brooke marshall',
       'tibbets town', 'love joy', 'tchapitoulas', 'gen william h devine',
       'rev r a burke', 'darius', 'gavin'], dtype=object)

### Correct name misspelling or unconventional abbreviation
Compare to street body

In [943]:
df['address_new'] = df['address_new'].str.replace(r'^linskey barry$', "linskybarry", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^orton marotta$', "ortonmarotta", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^mary moore beatty$', "dr mary moore beatty", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^lamartine street$', "lamartine", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^charles st$', "charles", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^hilltop$', "hill top", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^ledgehill$', "ledge hill", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^saint richards$', "saint richard", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^sterling$', "henry sterling", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^monsignor a a jacobbe$', "monsignor albert a jacobbe", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^crowley rogers$', "crowleyrogers", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^brooke marshall$', "brook marshall", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^tibbets town$', "tibbetts town", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^gen william h devine$', "general william h devine", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^rev r a burke$', "rev richard a burke", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^david mugar$', "David G Mugar", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^monsignor ocallaghan$', "monsignor dennis f ocallaghan", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^roseberry$', "rosebery", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'^monsignor p j lydon$', "monsignor patrick j lydon", case=False, regex=True)
df['address_new'] = df['address_new'].str.replace(r'mcclellan', "william f mcclellan", case=False, regex=True)

In [944]:
df = pd.merge(df, df_street_address_unique_street_body, left_on="address_new", right_on="STREET_BODY_new", how="left")

In [945]:
df["x_longitude"] = df["x_longitude"].combine_first(df["POINT_X"])
df["y_latitude"] =df["y_latitude"].combine_first(df["POINT_Y"])

In [946]:
df.shape

(666484, 59)

In [947]:
df = df[list(df_columns) + ["address_new"]]

In [948]:
missing = df[df["x_longitude"].isna() & df["y_latitude"].isna()].copy()

In [949]:
missing.shape

(252, 24)

In [950]:
missing["address_new"].unique()

array(['faneuil hall market', 'spectacle island', 'desoto',
       'long island morris', 'vfw parkway', 'n harvard st',
       'David G Mugar', 'wood island', 'spectacle', 'w broadway',
       'pittsburg', 'logan airport trmnl', 'charlesgate e', 'william day',
       'stadium', 'embankment', 'daves', 'haul', 'v f w', 'new market',
       'degauthier', 'star king', 'lowney', 'yawkey', 'devine',
       'hefferan', 'river st sturbridge', 'w roxbury', 'love joy',
       'tchapitoulas', 'darius', 'gavin'], dtype=object)

There are 252 addresses without coordinates because the name doesn't appear in Live Address Street Management. We'll check against District 7 ZIP Code to see how many out of 304 may be in District 7.

In [959]:
df[df["x_longitude"].isna() & df["y_latitude"].isna()].shape

(252, 24)

In [951]:
district_shapefile = gpd.read_file("../data/city-council-district")
zip_shapefile = gpd.read_file("../data/zip-codes")

In [952]:
if zip_shapefile.crs != district_shapefile.crs:
    zip_shapefile = zip_shapefile.to_crs(district_shapefile.crs)
district_7 = district_shapefile[district_shapefile["DISTRICT"] == 7]
zip_in_district_7 = gpd.sjoin(zip_shapefile, district_7, how="inner", predicate="intersects")
unique_zip_codes = zip_in_district_7["ZIP5"].unique()

In [953]:
unique_zip_codes

array(['02125', '02118', '02130', '02121', '02119', '02115', '02116',
       '02120', '02215'], dtype=object)

8 addresses may be in District 7 going by the ZIP code. We deem like the number is small enough that we can drop the rows without impacting analysis for District 7.

In [954]:
missing[missing["zip"].isin(unique_zip_codes)]["address_new"]

154111    william day
175190     new market
192520     degauthier
322075         yawkey
322612         yawkey
323387         yawkey
511586         yawkey
666438         yawkey
Name: address_new, dtype: object

In [960]:
df = df.dropna(subset=["x_longitude", "y_latitude"], how="all")

In [961]:
df[df["x_longitude"].isna() & df["y_latitude"].isna()].shape

(0, 24)

In [965]:
df = df[df_columns]

In [966]:
geometry = [Point(xy) for xy in zip(df['x_longitude'], df['y_latitude'])]
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:3857")
gdf = gdf.to_crs("EPSG:3857")
gdf

Unnamed: 0,permitnumber,worktype,permittypedescr,description,comments,applicant,declared_valuation,total_fees,issued_date,expiration_date,...,city,state,zip,property_id,parcel_id,gpsy,gpsx,y_latitude,x_longitude,geometry
0,A1000569,INTEXT,Amendment to a Long Form,Interior/Exterior Work,This work is to Amend Permit ALT347244. Elimin...,Patrick Sharkey,"$36,500.00",$390.00,2021-01-28 16:29:26+00,2021-07-28 04:00:00+00,...,Boston,MA,02109,130392.0,3.038070e+08,2.956235e+06,777000.467910,42.359190,-71.052924,POINT (-71.053 42.359)
1,A100071,COB,Amendment to a Long Form,City of Boston,Change connector link layout from attached enc...,Renee Santeusanio,"$40,000.00",$29.00,2011-11-04 15:04:58+00,2012-05-04 04:00:00+00,...,West Roxbury,MA,02132,17268.0,2.012032e+09,2.920239e+06,751016.119617,42.260750,-71.149611,POINT (-71.15 42.261)
2,A1001012,OTHER,Amendment to a Long Form,Other,Amend Alt943748 to erect a roof deck as per pl...,Jusimar Oliveria,"$5,000.00",$70.00,2020-06-01 18:08:47+00,,...,Charlestown,MA,02129,113443.0,2.028370e+08,2.962078e+06,775710.380595,42.375243,-71.057585,POINT (-71.058 42.375)
3,A1001201,INTEXT,Amendment to a Long Form,Interior/Exterior Work,Build steel balcony over garden level with sta...,Andreas Hwang,"$74,295.75",$803.00,2019-11-13 18:38:56+00,2020-05-13 04:00:00+00,...,Roxbury,MA,02118,129994.0,4.025580e+08,2.949423e+06,769648.312667,42.340600,-71.080251,POINT (-71.08 42.341)
4,A100137,EXTREN,Amendment to a Long Form,Renovations - Exterior,Landscaping/stonework - amending permit #2801/...,,"$15,000.00",$182.00,2013-01-03 19:13:09+00,2013-07-03 04:00:00+00,...,Brighton,MA,02135,149852.0,2.204944e+09,2.950791e+06,749690.298777,42.344600,-71.154051,POINT (-71.154 42.345)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
666479,U49993435,OSEAT,Use of Premises,Outside Seating,Outdoor patio / amenity space for commercial o...,Christine McMahon,$0.00,$50.00,2019-09-25 15:36:11+00,2020-03-25 04:00:00+00,...,Boston,MA,02114,154961.0,3.027170e+08,2.956658e+06,774864.862273,42.360383,-71.060818,POINT (-71.061 42.36)
666480,U49993492,TEMTRL,Use of Premises,Temporary Trailers,Temporary Construction Trailer at Herb Chamber...,Regina Olivieri,"$1,000.00",$53.00,2019-09-12 17:07:00+00,2020-03-12 04:00:00+00,...,Allston,MA,02134,424193.0,2.101574e+09,2.952823e+06,756414.438216,42.350100,-71.129145,POINT (-71.129 42.35)
666481,U4999352,DRIVE,Use of Premises,Driveway Installation,Owners would like to install a curb cut and a ...,Maria Dubrowski,"$2,700.00",$53.00,2012-05-30 18:28:59+00,2012-11-30 05:00:00+00,...,Jamaica Plain,MA,02130,24047.0,1.901861e+09,2.939398e+06,759722.466377,42.313220,-71.117131,POINT (-71.117 42.313)
666482,U49996367,RESPAR,Use of Premises,Residential Parking,2 car parking,Cesar DaSilva,"$7,000.00",$53.00,2023-04-10 15:10:24+00,2023-10-10 04:00:00+00,...,Dorchester,MA,02124,34863.0,1.701519e+09,2.931227e+06,771088.017082,42.290650,-71.075261,POINT (-71.075 42.291)


In [969]:
district_shapefile = gpd.read_file("../data/city-council-district")

count = 0
is_D7_addresses = []

for row in df.itertuples(index=True, name="Row"):
    address_point = Point(row.x_longitude, row.y_latitude)
    address_gdf = gpd.GeoDataFrame(geometry=[address_point], crs="EPSG:4326")
    address_gdf = address_gdf.to_crs(district_shapefile.crs)
    result = gpd.sjoin(address_gdf, district_shapefile, how="left", predicate="intersects")

    if result['DISTRICT'].values[0] == 7:
        count += 1
        is_D7_addresses.append(True)
    else:
        is_D7_addresses.append(False)

In [970]:
print(count)

53150


In [971]:
df['is_d7'] = is_D7_addresses
df

Unnamed: 0,permitnumber,worktype,permittypedescr,description,comments,applicant,declared_valuation,total_fees,issued_date,expiration_date,...,city,state,zip,property_id,parcel_id,gpsy,gpsx,y_latitude,x_longitude,is_d7
0,A1000569,INTEXT,Amendment to a Long Form,Interior/Exterior Work,This work is to Amend Permit ALT347244. Elimin...,Patrick Sharkey,"$36,500.00",$390.00,2021-01-28 16:29:26+00,2021-07-28 04:00:00+00,...,Boston,MA,02109,130392.0,3.038070e+08,2.956235e+06,777000.467910,42.359190,-71.052924,False
1,A100071,COB,Amendment to a Long Form,City of Boston,Change connector link layout from attached enc...,Renee Santeusanio,"$40,000.00",$29.00,2011-11-04 15:04:58+00,2012-05-04 04:00:00+00,...,West Roxbury,MA,02132,17268.0,2.012032e+09,2.920239e+06,751016.119617,42.260750,-71.149611,False
2,A1001012,OTHER,Amendment to a Long Form,Other,Amend Alt943748 to erect a roof deck as per pl...,Jusimar Oliveria,"$5,000.00",$70.00,2020-06-01 18:08:47+00,,...,Charlestown,MA,02129,113443.0,2.028370e+08,2.962078e+06,775710.380595,42.375243,-71.057585,False
3,A1001201,INTEXT,Amendment to a Long Form,Interior/Exterior Work,Build steel balcony over garden level with sta...,Andreas Hwang,"$74,295.75",$803.00,2019-11-13 18:38:56+00,2020-05-13 04:00:00+00,...,Roxbury,MA,02118,129994.0,4.025580e+08,2.949423e+06,769648.312667,42.340600,-71.080251,True
4,A100137,EXTREN,Amendment to a Long Form,Renovations - Exterior,Landscaping/stonework - amending permit #2801/...,,"$15,000.00",$182.00,2013-01-03 19:13:09+00,2013-07-03 04:00:00+00,...,Brighton,MA,02135,149852.0,2.204944e+09,2.950791e+06,749690.298777,42.344600,-71.154051,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
666479,U49993435,OSEAT,Use of Premises,Outside Seating,Outdoor patio / amenity space for commercial o...,Christine McMahon,$0.00,$50.00,2019-09-25 15:36:11+00,2020-03-25 04:00:00+00,...,Boston,MA,02114,154961.0,3.027170e+08,2.956658e+06,774864.862273,42.360383,-71.060818,False
666480,U49993492,TEMTRL,Use of Premises,Temporary Trailers,Temporary Construction Trailer at Herb Chamber...,Regina Olivieri,"$1,000.00",$53.00,2019-09-12 17:07:00+00,2020-03-12 04:00:00+00,...,Allston,MA,02134,424193.0,2.101574e+09,2.952823e+06,756414.438216,42.350100,-71.129145,False
666481,U4999352,DRIVE,Use of Premises,Driveway Installation,Owners would like to install a curb cut and a ...,Maria Dubrowski,"$2,700.00",$53.00,2012-05-30 18:28:59+00,2012-11-30 05:00:00+00,...,Jamaica Plain,MA,02130,24047.0,1.901861e+09,2.939398e+06,759722.466377,42.313220,-71.117131,False
666482,U49996367,RESPAR,Use of Premises,Residential Parking,2 car parking,Cesar DaSilva,"$7,000.00",$53.00,2023-04-10 15:10:24+00,2023-10-10 04:00:00+00,...,Dorchester,MA,02124,34863.0,1.701519e+09,2.931227e+06,771088.017082,42.290650,-71.075261,False


In [972]:
df.to_csv("../data/d7-approved_building-permit-new.csv", index=False)