In [36]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import requests
import json

In [3]:
filepath = "data/abp_data.csv"

data = pd.read_csv(filepath)

In [4]:
data

Unnamed: 0,permitnumber,worktype,permittypedescr,description,comments,applicant,declared_valuation,total_fees,issued_date,expiration_date,...,occupancytype,sq_feet,address,city,state,zip,property_id,parcel_id,lat,long
0,A100071,COB,Amendment to a Long Form,City of Boston,Change connector link layout from attached enc...,Renee Santeusanio,40000.00,429.0,2011-11-04 11:04:58,2012-05-04 00:00:00,...,Comm,170.0,175 W Boundary RD,West Roxbury,MA,02132,17268,2012032000,42.260750,-71.149610
1,A1001012,OTHER,Amendment to a Long Form,Other,Amend Alt943748 to erect a roof deck as per pl...,Jusimar Oliveria,5000.00,70.0,2020-06-01 14:08:47,2020-12-01 00:00:00,...,1-3FAM,0.0,15 Prospect ST,Charlestown,MA,02129,113443,0202837000,42.375243,-71.057585
2,A1001201,INTEXT,Amendment to a Long Form,Interior/Exterior Work,Build steel balcony over garden level with sta...,Andreas Hwang,74295.75,803.0,2019-11-13 13:38:56,2020-05-13 00:00:00,...,Multi,0.0,211 W Springfield ST,Roxbury,MA,02118,129994,0402558000,42.340600,-71.080250
3,A100137,EXTREN,Amendment to a Long Form,Renovations - Exterior,Landscaping/stonework - amending permit #2801/...,,15000.00,206.0,2013-01-03 14:13:09,2013-07-03 00:00:00,...,1-2FAM,0.0,14 William Jackson AVE,Brighton,MA,02135,149852,2204944000,42.344600,-71.154050
4,A1001913,INTREN,Amendment to a Long Form,Renovations - Interior NSC,Amendment to issued permit ALT888985. relocate...,Ping Mandawe,1.00,33.0,2019-10-18 09:21:00,2020-04-18 00:00:00,...,Comm,0.0,130-140 Brighton AVE,Allston,MA,02134,20073,2100846000,42.352760,-71.131850
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
478014,U49993435,OSEAT,Use of Premises,Outside Seating,Outdoor patio / amenity space for commercial o...,Christine McMahon,0.00,50.0,2019-09-25 11:36:11,2020-03-25 00:00:00,...,Mixed,0.0,3 Center Pz,Boston,MA,02108,154961,0302713000,42.359180,-71.060040
478015,U49993435,OSEAT,Use of Premises,Outside Seating,Outdoor patio / amenity space for commercial o...,Christine McMahon,0.00,50.0,2019-09-25 11:36:11,2020-03-25 00:00:00,...,Mixed,0.0,3 Center Pz,Boston,MA,02108,154961,0302717000,42.359180,-71.060040
478016,U49993492,TEMTRL,Use of Premises,Temporary Trailers,Temporary Construction Trailer at Herb Chamber...,Regina Olivieri,1000.00,53.0,2019-09-12 13:07:00,2020-03-12 00:00:00,...,Comm,0.0,1188 Commonwealth Av,Allston,MA,02134,424193,2101574010,,
478017,U4999352,DRIVE,Use of Premises,Driveway Installation,Owners would like to install a curb cut and a ...,Maria Dubrowski,2700.00,53.0,2012-05-30 14:28:59,2012-11-30 00:00:00,...,1-2FAM,0.0,46 Burroughs ST,Jamaica Plain,MA,02130,24047,1901861000,42.313220,-71.117130


Here we have filtered out all the permits we don't care for. We can rerun our analysis from Deliverable 1

In [10]:
df = data.loc[(data['permittypedescr'] == 'Erect/New Construction') | (data['permittypedescr'] == 'Excavation Permit') | (data['permittypedescr'] == 'Foundation Permit') | (data['description'] == 'Excavation Borings Test Pits') | (data['description'] == 'New construction') | (data['description'] == 'Erect')]
applicants = pd.DataFrame(df['applicant'])
owners = pd.DataFrame(df['owner'])
applicants

Unnamed: 0,applicant
100,Amy Sowersby
200,Hugh Meehan
275,Joe Hennessey
276,Joe Hennessey
444,Joshua Swerling
...,...
475958,john mercer
477044,David Amato
477379,Bruce Fulford
477471,Jorge Rodriguez


In [11]:
applicants.fillna("na")
applicants = applicants.applymap(str)

owners.fillna("na")
owners = owners.applymap(str)
applicants

Unnamed: 0,applicant
100,Amy Sowersby
200,Hugh Meehan
275,Joe Hennessey
276,Joe Hennessey
444,Joshua Swerling
...,...
475958,john mercer
477044,David Amato
477379,Bruce Fulford
477471,Jorge Rodriguez


In [12]:
def prep_df_column(col, generate_map=False):
    """
    col: A pandas column-frame of strings
    generate_map: Whether or not to generate a map of the original values to the edited values 

    Outputs the columm with all the entries set to lowercase and with spaces removed
    """
    new_col = col.iloc[:, 0]
    new_col = [name.lower().replace(" ", "") for name in new_col]
    col_map = dict()
    if generate_map:
        for i in range(len(new_col)):
            col_map[new_col[i]] = col.iloc[:, 0][i]
        return new_col, col_map
    else:
        return new_col

In [13]:
# we split the encoders into two so they can keep track of their inverse transforms
applcants_encoder = LabelEncoder()
owners_encoder = LabelEncoder()

# here we also optionally output the mapping between the preprocessed values and the original, which we can use for display purposes
# however, building the map takes longer than we'd like
# applicants_cleaned, applicants_map = prep_df_column(applicants, generate_map=True)
# owners_cleaned, owners_map = prep_df_column(owners, generate_map=True)

applicants_cleaned = prep_df_column(applicants, generate_map=False)
owners_cleaned = prep_df_column(owners, generate_map=False)

applicants_labels = pd.DataFrame(applcants_encoder.fit_transform(applicants_cleaned))
owners_labels = pd.DataFrame(owners_encoder.fit_transform(owners_cleaned))

In [14]:
applicants_labels_vals = applicants_labels.value_counts()
index = [tup[0] for tup in applicants_labels_vals.index]
applicants_labels_vals.index = applcants_encoder.inverse_transform(index)
pd.DataFrame(applicants_labels_vals)

Unnamed: 0,0
,8408
timothyjohnson,63
antonioferrara,61
antonioruscito,52
arthurchoo,41
...,...
lauderlylima,1
laurenslaven,1
lawrencemahoney,1
lawrenceobrien,1


In [16]:
owners_labels_vals = owners_labels.value_counts()
owners_index = [tup[0] for tup in owners_labels_vals.index]
owners_labels_vals.index = owners_encoder.inverse_transform(owners_index)
pd.DataFrame(owners_labels_vals)

Unnamed: 0,0
,2921
cityofboston,242
bostonhousingauthority,106
harvardre/allstoninc,69
csndcheritagehomesllc,59
...,...
martinoma,1
martinhermans,1
martinezsilverioe,1
martinezpetronila,1


## Cencus Tract to Demographic Filtering

In [23]:
# Function to convert the Lat and Long into a census tract.
# Refer to https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html for the FIPS code to census tract conversion
# FCC Census block conversion API link: https://geo.fcc.gov/api/census/#!/block/get_block_find

def Coord_to_census_tract(Lat,Long):
    Converter = requests.get("https://geo.fcc.gov/api/census/block/find?latitude=%20" + str(Lat) + "&longitude=" + str(Long) + "&showall=true&format=json")
    #print(Converter.text)
    Census_data = Converter.text
    json_data = json.loads(Census_data)
    # print(json_data)
    FIPS_code = json_data['Block']['FIPS']
    Census_tract = FIPS_code[5:11]
    #print(type(Census_tract))
    # print("yowza")
    if Census_tract[0] == "0":
        Census_tract.replace(Census_tract[0],'')
        Census_split = Census_tract[ : 4] + '.'+ Census_tract[4 : ]
        return(Census_split)
    else:
        Census_split = Census_tract[ : 4] + '.'+ Census_tract[4 : ]
        return(Census_split)

In [18]:
df2 = df.copy()
df2 = df2[df2['lat'].notna()]
df2 = df2[df2['long'].notna()]
df2

Unnamed: 0,permitnumber,worktype,permittypedescr,description,comments,applicant,declared_valuation,total_fees,issued_date,expiration_date,...,occupancytype,sq_feet,address,city,state,zip,property_id,parcel_id,lat,long
200,A1048,ERECT,Amendment to a Long Form,Erect,Revise rear apartment layout. Add roof deck to...,Hugh Meehan,0.0,39.0,2010-03-30 11:51:01,2010-09-30 00:00:00,...,Mixed,0.0,1435-1437 Tremont ST,Mission Hill,MA,02120,172949,0902932002,42.331915,-71.096461
444,A121115,ERECT,Amendment to a Long Form,Erect,Slab Foundation and related Site improvements...,Joshua Swerling,0.0,386.0,2012-04-18 09:45:56,2012-10-18 00:00:00,...,Comm,0.0,1833 Centre ST,West Roxbury,MA,02132,27732,2006346010,42.286860,-71.154040
445,A121135,ERECT,Amendment to a Long Form,Erect,Amend plans to the existing permit #ERT70819 ...,dren luci,12000.0,158.0,2012-04-12 14:59:18,2012-10-12 00:00:00,...,1-2FAM,0.0,109 Tudor ST,South Boston,MA,02127,138503,0600695000,42.336341,-71.053275
620,A176767,NEWCON,Amendment to a Long Form,New construction,Submitting MEP Plans and all Engineering Affid...,Anthony Ross,0.0,86.0,2017-03-23 15:59:21,2017-09-23 00:00:00,...,Mixed,18000.0,337-345 Belgrade AVE,Roslindale,MA,02131,13659,2001401000,42.286233,-71.145838
637,A18115,NEWCON,Amendment to a Long Form,New construction,Construct and fit-up of partial 4th floor prac...,Paul Pennie,625000.0,7445.0,2010-09-02 12:07:46,2011-03-02 00:00:00,...,Other,40910.0,31 Hemenway ST,Boston,MA,02115,72664,0401763000,42.345755,-71.089652
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469184,SF93847,NEWCON,Short Form Bldg Permit,New construction,Install 6 ft (W) x12 ft (L) garden Storage She...,Kristopher Furlonge,1000.0,30.0,2011-09-02 09:23:58,2012-03-02 00:00:00,...,VacLd,72.0,108 Perham ST,West Roxbury,MA,02132,109223,2008388000,42.283040,-71.164880
471339,SF95509,ERECT,Short Form Bldg Permit,Erect,"1-24x24x24"" stage with 1 set stairs and railin...",Boston Party Rental Inc,1350.0,40.0,2011-09-14 15:30:35,2012-03-14 00:00:00,...,VacLd,576.0,1 Jewish War Veterans DR,Jamaica Plain,MA,02121,163434,1203486000,42.301029,-71.088551
472003,SF96041,ERECT,Short Form Bldg Permit,Erect,Annual Erection of Harvard Stadium Air Support...,Christopher McCann,42000.0,440.0,2011-11-29 13:32:07,,...,Mixed,192000.0,79 N Harvard ST,Allston,MA,02134,168218,2200533000,42.366423,-71.126254
475958,SF99343,ERECT,Short Form Bldg Permit,Erect,remove rear stairs and replace same as existing,john mercer,5000.0,70.0,2011-10-06 08:40:56,2012-04-06 00:00:00,...,1-3FAM,3000.0,174 W Seventh ST,South Boston,MA,02127,125698,0600920000,42.334770,-71.051280


In [24]:
df2['Cencus_Tract'] = df2.apply(lambda row: float(Coord_to_census_tract(row.lat, row.long)), axis=1)
df2

Unnamed: 0,permitnumber,worktype,permittypedescr,description,comments,applicant,declared_valuation,total_fees,issued_date,expiration_date,...,sq_feet,address,city,state,zip,property_id,parcel_id,lat,long,Cencus_Tract
200,A1048,ERECT,Amendment to a Long Form,Erect,Revise rear apartment layout. Add roof deck to...,Hugh Meehan,0.0,39.0,2010-03-30 11:51:01,2010-09-30 00:00:00,...,0.0,1435-1437 Tremont ST,Mission Hill,MA,02120,172949,0902932002,42.331915,-71.096461,808.01
444,A121115,ERECT,Amendment to a Long Form,Erect,Slab Foundation and related Site improvements...,Joshua Swerling,0.0,386.0,2012-04-18 09:45:56,2012-10-18 00:00:00,...,0.0,1833 Centre ST,West Roxbury,MA,02132,27732,2006346010,42.286860,-71.154040,1302.00
445,A121135,ERECT,Amendment to a Long Form,Erect,Amend plans to the existing permit #ERT70819 ...,dren luci,12000.0,158.0,2012-04-12 14:59:18,2012-10-12 00:00:00,...,0.0,109 Tudor ST,South Boston,MA,02127,138503,0600695000,42.336341,-71.053275,608.00
620,A176767,NEWCON,Amendment to a Long Form,New construction,Submitting MEP Plans and all Engineering Affid...,Anthony Ross,0.0,86.0,2017-03-23 15:59:21,2017-09-23 00:00:00,...,18000.0,337-345 Belgrade AVE,Roslindale,MA,02131,13659,2001401000,42.286233,-71.145838,1106.07
637,A18115,NEWCON,Amendment to a Long Form,New construction,Construct and fit-up of partial 4th floor prac...,Paul Pennie,625000.0,7445.0,2010-09-02 12:07:46,2011-03-02 00:00:00,...,40910.0,31 Hemenway ST,Boston,MA,02115,72664,0401763000,42.345755,-71.089652,104.05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469184,SF93847,NEWCON,Short Form Bldg Permit,New construction,Install 6 ft (W) x12 ft (L) garden Storage She...,Kristopher Furlonge,1000.0,30.0,2011-09-02 09:23:58,2012-03-02 00:00:00,...,72.0,108 Perham ST,West Roxbury,MA,02132,109223,2008388000,42.283040,-71.164880,1302.00
471339,SF95509,ERECT,Short Form Bldg Permit,Erect,"1-24x24x24"" stage with 1 set stairs and railin...",Boston Party Rental Inc,1350.0,40.0,2011-09-14 15:30:35,2012-03-14 00:00:00,...,576.0,1 Jewish War Veterans DR,Jamaica Plain,MA,02121,163434,1203486000,42.301029,-71.088551,9803.00
472003,SF96041,ERECT,Short Form Bldg Permit,Erect,Annual Erection of Harvard Stadium Air Support...,Christopher McCann,42000.0,440.0,2011-11-29 13:32:07,,...,192000.0,79 N Harvard ST,Allston,MA,02134,168218,2200533000,42.366423,-71.126254,1.00
475958,SF99343,ERECT,Short Form Bldg Permit,Erect,remove rear stairs and replace same as existing,john mercer,5000.0,70.0,2011-10-06 08:40:56,2012-04-06 00:00:00,...,3000.0,174 W Seventh ST,South Boston,MA,02127,125698,0600920000,42.334770,-71.051280,608.00


In [25]:
sub_neigh = pd.read_csv("data/Sub-Neighborhoods.csv")
sub_neigh

Unnamed: 0,Census Tract#,Zipcode,Neighborhood,US Geonames Sub-neigborhood,Total Population,Black,Black Proportion,White,White Proportion,American Indian & Alaska Native,...,Male,Male Proportion,Female,Female Proportion,Lat,Long,Primary Sub-Neighborhood,Secondary Sub-Neighborhood,Teritriary Sub-Neighborhood(s),T-Stop/Commuter Rail
0,1.00,"02134, 02135",Brighton,,5324,311,5.84%,3315,62.27%,0,...,2578,48.42%,2746,51.58%,42.361484,-71.138588,Lower Allston,Allston,Packard's Corner,Boston Landing
1,2.01,02135,Brighton,,3991,237,5.94%,3164,79.28%,17,...,1800,45.10%,2191,54.90%,42.354066,-71.161168,Oak Square,Brighton,Hunnewell Hill,
2,2.02,02135,Brighton,,4272,263,6.16%,3031,70.95%,0,...,2175,50.91%,2097,49.09%,42.352605,-71.154344,Oak Square,Brighton,Allston,
3,3.01,02135,Brighton,,2946,216,7.33%,2203,74.78%,0,...,1650,56.01%,1296,43.99%,42.354144,-71.168827,Hunnwell Hill,Oak Square,,
4,3.02,02135,Brighton,,3469,359,10.35%,2346,67.63%,25,...,1733,49.96%,1736,50.04%,42.347258,-71.167686,Brighton,St. Elizabeth's,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176,9815.02,02151,East Boston,,0,0,0.00%,0,0.00%,0,...,0,0.00%,0,0.00%,42.396367,-71.004379,Beachmont,Orient Heights,,Beachmont
177,9816.00,02128,East Boston,,0,0,0.00%,0,0.00%,0,...,0,0.00%,0,0.00%,42.388621,-70.993442,Orient Heights,Beachmont,,Suffolk Downs
178,9817.00,02116,Beacon Hill,,0,0,0.00%,0,0.00%,0,...,0,0.00%,0,0.00%,42.355087,-71.065743,Downtown Crossing,,,Boylston Street
179,9818.00,"02130, 02215",Jamaica Plain,,26,9,34.62%,17,65.38%,0,...,4,15.38%,22,84.62%,42.323121,-71.116882,High Street Hill,,,Heath Street / Back of the Hill


In [26]:
sub_neigh['Census Tract#']

0         1.00
1         2.01
2         2.02
3         3.01
4         3.02
        ...   
176    9815.02
177    9816.00
178    9817.00
179    9818.00
180    9901.01
Name: Census Tract#, Length: 181, dtype: float64

In [27]:
sub_neigh['Census Tract#'] = sub_neigh.apply(lambda row: float(row['Census Tract#']), axis=1)
sub_neigh

Unnamed: 0,Census Tract#,Zipcode,Neighborhood,US Geonames Sub-neigborhood,Total Population,Black,Black Proportion,White,White Proportion,American Indian & Alaska Native,...,Male,Male Proportion,Female,Female Proportion,Lat,Long,Primary Sub-Neighborhood,Secondary Sub-Neighborhood,Teritriary Sub-Neighborhood(s),T-Stop/Commuter Rail
0,1.00,"02134, 02135",Brighton,,5324,311,5.84%,3315,62.27%,0,...,2578,48.42%,2746,51.58%,42.361484,-71.138588,Lower Allston,Allston,Packard's Corner,Boston Landing
1,2.01,02135,Brighton,,3991,237,5.94%,3164,79.28%,17,...,1800,45.10%,2191,54.90%,42.354066,-71.161168,Oak Square,Brighton,Hunnewell Hill,
2,2.02,02135,Brighton,,4272,263,6.16%,3031,70.95%,0,...,2175,50.91%,2097,49.09%,42.352605,-71.154344,Oak Square,Brighton,Allston,
3,3.01,02135,Brighton,,2946,216,7.33%,2203,74.78%,0,...,1650,56.01%,1296,43.99%,42.354144,-71.168827,Hunnwell Hill,Oak Square,,
4,3.02,02135,Brighton,,3469,359,10.35%,2346,67.63%,25,...,1733,49.96%,1736,50.04%,42.347258,-71.167686,Brighton,St. Elizabeth's,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176,9815.02,02151,East Boston,,0,0,0.00%,0,0.00%,0,...,0,0.00%,0,0.00%,42.396367,-71.004379,Beachmont,Orient Heights,,Beachmont
177,9816.00,02128,East Boston,,0,0,0.00%,0,0.00%,0,...,0,0.00%,0,0.00%,42.388621,-70.993442,Orient Heights,Beachmont,,Suffolk Downs
178,9817.00,02116,Beacon Hill,,0,0,0.00%,0,0.00%,0,...,0,0.00%,0,0.00%,42.355087,-71.065743,Downtown Crossing,,,Boylston Street
179,9818.00,"02130, 02215",Jamaica Plain,,26,9,34.62%,17,65.38%,0,...,4,15.38%,22,84.62%,42.323121,-71.116882,High Street Hill,,,Heath Street / Back of the Hill


In [28]:
sub_neigh['White Proportion'] = sub_neigh.apply(lambda row: float(row['White Proportion'][:-1]), axis=1)
sub_neigh

Unnamed: 0,Census Tract#,Zipcode,Neighborhood,US Geonames Sub-neigborhood,Total Population,Black,Black Proportion,White,White Proportion,American Indian & Alaska Native,...,Male,Male Proportion,Female,Female Proportion,Lat,Long,Primary Sub-Neighborhood,Secondary Sub-Neighborhood,Teritriary Sub-Neighborhood(s),T-Stop/Commuter Rail
0,1.00,"02134, 02135",Brighton,,5324,311,5.84%,3315,62.27,0,...,2578,48.42%,2746,51.58%,42.361484,-71.138588,Lower Allston,Allston,Packard's Corner,Boston Landing
1,2.01,02135,Brighton,,3991,237,5.94%,3164,79.28,17,...,1800,45.10%,2191,54.90%,42.354066,-71.161168,Oak Square,Brighton,Hunnewell Hill,
2,2.02,02135,Brighton,,4272,263,6.16%,3031,70.95,0,...,2175,50.91%,2097,49.09%,42.352605,-71.154344,Oak Square,Brighton,Allston,
3,3.01,02135,Brighton,,2946,216,7.33%,2203,74.78,0,...,1650,56.01%,1296,43.99%,42.354144,-71.168827,Hunnwell Hill,Oak Square,,
4,3.02,02135,Brighton,,3469,359,10.35%,2346,67.63,25,...,1733,49.96%,1736,50.04%,42.347258,-71.167686,Brighton,St. Elizabeth's,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176,9815.02,02151,East Boston,,0,0,0.00%,0,0.00,0,...,0,0.00%,0,0.00%,42.396367,-71.004379,Beachmont,Orient Heights,,Beachmont
177,9816.00,02128,East Boston,,0,0,0.00%,0,0.00,0,...,0,0.00%,0,0.00%,42.388621,-70.993442,Orient Heights,Beachmont,,Suffolk Downs
178,9817.00,02116,Beacon Hill,,0,0,0.00%,0,0.00,0,...,0,0.00%,0,0.00%,42.355087,-71.065743,Downtown Crossing,,,Boylston Street
179,9818.00,"02130, 02215",Jamaica Plain,,26,9,34.62%,17,65.38,0,...,4,15.38%,22,84.62%,42.323121,-71.116882,High Street Hill,,,Heath Street / Back of the Hill


In [29]:
sub_neigh.loc[sub_neigh['White Proportion'] > 75.0]

Unnamed: 0,Census Tract#,Zipcode,Neighborhood,US Geonames Sub-neigborhood,Total Population,Black,Black Proportion,White,White Proportion,American Indian & Alaska Native,...,Male,Male Proportion,Female,Female Proportion,Lat,Long,Primary Sub-Neighborhood,Secondary Sub-Neighborhood,Teritriary Sub-Neighborhood(s),T-Stop/Commuter Rail
1,2.01,02135,Brighton,,3991,237,5.94%,3164,79.28,17,...,1800,45.10%,2191,54.90%,42.354066,-71.161168,Oak Square,Brighton,Hunnewell Hill,
5,4.01,02135,Brighton,Aberdeen,5691,137,2.41%,4404,77.39,34,...,2669,46.90%,3022,53.10%,42.343849,-71.149293,St. Elizabeth's,Oak Square,Corey Hill,Sutherland Road/Chiswick Road/Chestnut Hill Av...
6,4.02,02135,Brighton,Brighton,3494,150,4.29%,2868,82.08,0,...,1806,51.69%,1688,48.31%,42.344174,-71.158829,St. Elizabeth's,Brighton,Aberdeen/Oak Square,South Street
7,5.02,"02135, 02467",Brighton,,5796,155,2.67%,4917,84.83,12,...,2574,44.41%,3222,55.59%,42.331505,-71.157497,Chestnut Hill Neighborhood Association,Fisher Hill,,Chestnut Hill Ave/ Reservoir/Cleveland Circle
8,5.03,02135,Brighton,,2007,77,3.84%,1633,81.37,109,...,1002,49.93%,1005,50.07%,42.339636,-71.150099,Aberdeen,Salisbury Road-Corey Farm,Corey Hill,Sutherland Road/Chiswick Road/Chestnut Hill Av...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,1302.00,02132,West Roxbury,Highland,5299,255,4.81%,4812,90.81,56,...,2526,47.67%,2773,52.33%,42.287332,-71.157012,Brook Farm,,,Highland
155,1303.00,02132,West Roxbury,West Roxbury,4519,170,3.76%,3948,87.36,0,...,2094,46.34%,2425,53.66%,42.279129,-71.150710,Bellevue Hill,Roxbury,,Woodard Rd @ Wren
156,1304.02,02132,West Roxbury,,4934,204,4.13%,4084,82.77,95,...,2128,43.13%,2806,56.87%,42.272401,-71.169406,Upper Washington/Spring Street,,,Spring Street @ Goud
173,9812.02,"02127, 02210",South Boston Waterfront,Fort Winthrop (historical),215,27,12.56%,179,83.26,0,...,206,95.81%,9,4.19%,42.343916,-71.024362,City Point,Telegraph Hill,D Street/West Broadway,


In [30]:
merged = pd.merge(df2, sub_neigh, left_on="Cencus_Tract", right_on="Census Tract#")
merged

Unnamed: 0,permitnumber,worktype,permittypedescr,description,comments,applicant,declared_valuation,total_fees,issued_date,expiration_date,...,Male,Male Proportion,Female,Female Proportion,Lat,Long,Primary Sub-Neighborhood,Secondary Sub-Neighborhood,Teritriary Sub-Neighborhood(s),T-Stop/Commuter Rail
0,A1048,ERECT,Amendment to a Long Form,Erect,Revise rear apartment layout. Add roof deck to...,Hugh Meehan,0.0,39.00,2010-03-30 11:51:01,2010-09-30 00:00:00,...,2274,50.34%,2243,49.66%,42.334606,-71.096210,Frederick Douglas Square Historic District,Lower Roxbury,Fenway-Kenmore,Longwood Medical Area/Ruggles
1,A21302,NEWCON,Amendment to a Long Form,New construction,Add 2 individual roof decks to 2 top floor uni...,Philip Hresko,15000.0,246.00,2010-05-13 13:59:40,2010-11-13 00:00:00,...,2274,50.34%,2243,49.66%,42.334606,-71.096210,Frederick Douglas Square Historic District,Lower Roxbury,Fenway-Kenmore,Longwood Medical Area/Ruggles
2,COO752522,ERECT,Certificate of Occupancy,Erect,(18) Residential units and;retail space on fir...,Ken McLaughlin,0.0,560.00,2018-03-27 00:00:00,,...,2274,50.34%,2243,49.66%,42.334606,-71.096210,Frederick Douglas Square Historic District,Lower Roxbury,Fenway-Kenmore,Longwood Medical Area/Ruggles
3,COO752522,ERECT,Certificate of Occupancy,Erect,(18) Residential units and;retail space on fir...,Ken McLaughlin,0.0,560.00,2018-03-27 00:00:00,,...,2274,50.34%,2243,49.66%,42.334606,-71.096210,Frederick Douglas Square Historic District,Lower Roxbury,Fenway-Kenmore,Longwood Medical Area/Ruggles
4,COO752522,ERECT,Certificate of Occupancy,Erect,(18) Residential units and;retail space on fir...,Ken McLaughlin,0.0,560.00,2018-03-27 00:00:00,,...,2274,50.34%,2243,49.66%,42.334606,-71.096210,Frederick Douglas Square Historic District,Lower Roxbury,Fenway-Kenmore,Longwood Medical Area/Ruggles
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9164,EXCA-800149,Main,Excavation Permit,Maintenance,install new light base; //; Boston Police Deta...,,0.0,110.00,2018-02-08 21:02:48,2018-03-09 00:00:00,...,295,77.63%,85,22.37%,42.300613,-71.094613,Roxbury,Dorchester,Forest Hills Woodbourne,
9165,SF95509,ERECT,Short Form Bldg Permit,Erect,"1-24x24x24"" stage with 1 set stairs and railin...",Boston Party Rental Inc,1350.0,40.00,2011-09-14 15:30:35,2012-03-14 00:00:00,...,295,77.63%,85,22.37%,42.300613,-71.094613,Roxbury,Dorchester,Forest Hills Woodbourne,
9166,EXCA-930860,Main,Excavation Permit,Maintenance,REPLACE EXIST TEL CONDUIT RDWY-SDWK 88x2 // BO...,,0.0,7584.35,2019-04-11 15:45:39,2019-04-16 00:00:00,...,0,0.00%,0,0.00%,42.388621,-70.993442,Orient Heights,Beachmont,,Suffolk Downs
9167,EXCA-930864,Main,Excavation Permit,Maintenance,REPLACE EXIST TEL CONDUIT RDWY-SDWK 95x2 // BO...,,0.0,3075.70,2019-04-11 15:46:06,2019-04-16 00:00:00,...,0,0.00%,0,0.00%,42.388621,-70.993442,Orient Heights,Beachmont,,Suffolk Downs


In [31]:
merged.loc[merged['White Proportion'] > 75.0]

Unnamed: 0,permitnumber,worktype,permittypedescr,description,comments,applicant,declared_valuation,total_fees,issued_date,expiration_date,...,Male,Male Proportion,Female,Female Proportion,Lat,Long,Primary Sub-Neighborhood,Secondary Sub-Neighborhood,Teritriary Sub-Neighborhood(s),T-Stop/Commuter Rail
38,A121115,ERECT,Amendment to a Long Form,Erect,Slab Foundation and related Site improvements...,Joshua Swerling,0.0,386.00,2012-04-18 09:45:56,2012-10-18 00:00:00,...,2526,47.67%,2773,52.33%,42.287332,-71.157012,Brook Farm,,,Highland
39,COO563429,ERECT,Certificate of Occupancy,Erect,Eight Unit BuildingERT301170 issued: 3/19/15 ...,John Omalley,900000.0,3875.00,2016-02-23 11:11:50,,...,2526,47.67%,2773,52.33%,42.287332,-71.157012,Brook Farm,,,Highland
40,COO706319,ERECT,Certificate of Occupancy,Erect,Single Family Dwelling Ert 411049 issued 5/21/15,Anthony Ross,300000.0,235.00,2017-05-17 14:07:38,,...,2526,47.67%,2773,52.33%,42.287332,-71.157012,Brook Farm,,,Highland
41,COO987899,ERECT,Certificate of Occupancy,Erect,Sixteen (16);Residential Units ;Commercial;Spa...,greg ALLSTON STREET GROUP LLC,3362500.0,5420.00,2019-09-03 10:27:23,,...,2526,47.67%,2773,52.33%,42.287332,-71.157012,Brook Farm,,,Highland
42,E106890,ERECT,Electrical Permit,Erect,electrical work in conjunction with installati...,DENNIS SULLIVAN,1.0,26.00,2011-11-22 08:48:36,2012-05-22 00:00:00,...,2526,47.67%,2773,52.33%,42.287332,-71.157012,Brook Farm,,,Highland
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9136,EXCA-942441,Main,Excavation Permit,Maintenance,REPLACE DEFECTIVE CONDUIT RDWY 5x2 / BOSTON FI...,,0.0,309.70,2019-09-20 15:05:45,2019-05-21 00:00:00,...,1095,44.01%,1393,55.99%,42.367537,-71.051959,East Boston,Waterfront,,
9137,EXCA-956817,Main,Excavation Permit,Maintenance,REPAIR EXISTING TELEPHONE CONDUIT IN SDWK 5x5 ...,,0.0,255.00,2019-09-23 08:26:58,2019-06-17 00:00:00,...,1095,44.01%,1393,55.99%,42.367537,-71.051959,East Boston,Waterfront,,
9138,EXCA-968921,Main,Excavation Permit,Maintenance,ABANDON; INACTIVE GAS SERVICE; 5 X 4 RDWY;; GU...,,0.0,3103.78,2019-06-26 10:15:26,2019-07-28 00:00:00,...,1095,44.01%,1393,55.99%,42.367537,-71.051959,East Boston,Waterfront,,
9139,EXCA-971221,Main,Excavation Permit,Maintenance,PERM TRENCH REPAIR;;; SUSI;; 9:30AM-3:30PM; MO...,,0.0,118.00,2019-07-03 15:57:41,2019-08-06 00:00:00,...,1095,44.01%,1393,55.99%,42.367537,-71.051959,East Boston,Waterfront,,


In [32]:
len(sub_neigh.loc[sub_neigh['White Proportion'] > 50.0].index)/len(sub_neigh.index)

0.6408839779005525

In [33]:
len(merged.loc[merged['White Proportion'] > 50.0].index)/len(merged.index)

0.7069473224997274

In [34]:
len(sub_neigh.loc[sub_neigh['White Proportion'] > 75.0].index)/len(sub_neigh.index)

0.3425414364640884

In [35]:
len(merged.loc[merged['White Proportion'] > 75.0].index)/len(merged.index)

0.44083324244737704

In [68]:
counts = merged.groupby('Neighborhood').agg('size')
percents = (counts/len(merged.index))*100
percents.sort_values(ascending=False)

Neighborhood
Dorchester                 15.356091
South Boston               10.415531
Roxbury                     8.561457
Downtown                    7.481732
East Boston                 6.936416
Brighton                    5.693096
Jamaica Plain               5.474970
South End                   5.398626
West Roxbury                4.929654
Hyde Park                   4.231650
Roslindale                  3.740866
Charlestown                 3.141019
Back Bay                    3.064674
Fenway                      2.759298
Mattapan                    2.377577
Mission Hill                2.028575
South Boston Waterfront     1.854074
North End                   1.821355
Allston                     1.766823
Beacon Hill                 1.352383
West End                    0.981568
Longwood Medical Area       0.632566
dtype: float64

In [67]:
pd.set_option('display.max_rows', 100)
counts = merged.groupby('Primary Sub-Neighborhood').agg('size')
percents = (counts/len(merged.index))*100
percents.sort_values(ascending=False)

Primary Sub-Neighborhood
Back Bay                                      4.820591
Boston                                        4.046243
Telegraph Hill                                3.871742
Downtown Crossing                             3.326426
Franklin Field South                          3.304613
D Street / West Broadway                      3.097393
Nubian Square                                 2.257607
Allston                                       2.094012
Bunker Hill                                   1.984949
Frederick Douglas Square Historic District    1.963137
Hyde Park                                     1.908605
Brook Farm                                    1.843167
South Boston                                  1.777729
Seaport District                              1.690479
Columbus Park/Andrew Square                   1.646854
Upper Washington/Spring Street                1.559603
West End                                      1.515978
Lower Roxbury                           

In [85]:
counts = merged[['Neighborhood', 'White Proportion']]
counts = counts.groupby('Neighborhood').agg(['size', np.mean])
counts['Percents'] = counts['White Proportion']['size']/len(merged.index)*100
counts['Mean White Proportion'] = counts['White Proportion']['mean']
counts = counts.drop(columns=['White Proportion'])
counts.sort_values('Percents', ascending=False)

Unnamed: 0_level_0,Percents,Mean White Proportion
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
Dorchester,15.356091,34.231591
South Boston,10.415531,90.813581
Roxbury,8.561457,21.117885
Downtown,7.481732,64.754052
East Boston,6.936416,87.095425
Brighton,5.693096,71.774425
Jamaica Plain,5.47497,70.008147
South End,5.398626,64.634525
West Roxbury,4.929654,82.058628
Hyde Park,4.23165,37.317655


In [86]:
counts = merged[['Primary Sub-Neighborhood', 'White Proportion']]
counts = counts.groupby('Primary Sub-Neighborhood').agg(['size', np.mean])
counts['Percents'] = counts['White Proportion']['size']/len(merged.index)*100
counts['Mean White Proportion'] = counts['White Proportion']['mean']
counts = counts.drop(columns=['White Proportion'])
counts.sort_values('Percents', ascending=False)

Unnamed: 0_level_0,Percents,Mean White Proportion
Primary Sub-Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
Back Bay,4.820591,79.78914
Boston,4.046243,84.698302
Telegraph Hill,3.871742,95.792282
Downtown Crossing,3.326426,60.18623
Franklin Field South,3.304613,13.902607
D Street / West Broadway,3.097393,92.436761
Nubian Square,2.257607,12.907053
Allston,2.094012,59.559063
Bunker Hill,1.984949,82.873571
Frederick Douglas Square Historic District,1.963137,51.908389
