### This file gets a list of addresses in Morris county by neighborhood

In [330]:
import pandas as pd
import numpy as np
from shapely.geometry import Point
import geopandas as gpd
from tqdm import tqdm

req_cols = ['FULLADDR','POST_CODE','POST_COMM','COUNTY','PLACE_TYPE','PLACEMENT','LONG_', 'LAT']
data = gpd.read_file('../Data/Addr_NG911.gdb', include_fields=req_cols) # 5m 51s for full df

### 1.) Subset initial address dataset to Morris County only

In [ ]:
df = data.drop("geometry", axis=1).copy()
df["PLACE_TYPE"] = df["PLACE_TYPE"].mask(~df["PLACE_TYPE"].isin(['Residence', 'Unknown', 'Industrial', 'Outdoors']), "Commercial") # Sets all other possible Place Types to commercial as it fits that category
df.dropna(inplace=True)
df['COUNTY'].isnull().value_counts()
df["COUNTY"] = df["COUNTY"].astype(np.int64) # need to convert this column into int64
df = df[df["COUNTY"]==882231] # Morris County
df["ADDRESS"] = df["FULLADDR"] + ", " + df["POST_COMM"] + ", NJ " + df["POST_CODE"]
morris_add_list = df[["POST_COMM","PLACE_TYPE","ADDRESS","LONG_","LAT"]]

### 2.) Get town list from municipalities geofiles and save

In [ ]:
filename = "../Data/morrisMunicipalities/Municipalities.shp"
gdf = gpd.read_file(filename).reset_index()
town_list = []

for index in range(39):
    row = gdf[gdf["index"] == index]
    town = row.Label.values[0]
    town_list.append(town)

towns = pd.DataFrame({'towns': town_list})
towns.to_csv("./csv/morris_towns.csv", index=False)

### 3.) Using a dict to try to replace as many town names as possible before the problem

In [377]:
# History for townships needed to be researched as some neighbourhoods were represented by different names, for instance, 
# long valley is a census-designated place (CDP) and most addresses in Washington Twp are in Long Valley.
 # 'Dover Town' --> Dover
 #'Chatham Township',
 #'Rockaway Borough',
 #'Mendham Borough',
 #'Mount Olive Township', --> budd lake
 #'Morristown Town',
 #'Washington Township', --> long valley
 #'Mount Arlington Borough',
 #'Chester Township',
 #'Montville Township',
 #'Parsippany-Troy Hills Township',
 #'Butler Borough',
 #'Long Hill Township', --> 'Stirling','Millington','Gillette'
 #'Victory Gardens Borough', --> Dover (small section, usually put as dover)
 #'Jefferson Township', --> 'Oak Ridge', 'Lake Hopatcong'
 #'Chester Borough',
 #'Mountain Lakes Borough',
 #'Denville Township',
 #'Mendham Township',
 #'Madison Borough',
 #'Pequannock Township',
 #'Hanover Township', --> 'Cedar Knolls','Whippany'
 #'Lincoln Park Borough',
 #'Kinnelon Borough',
 #'Morris Plains Borough',
 #'Netcong Borough',
 #'Harding Township', --> 'Green Village','New Vernon'
 #'Chatham Borough',
 #'Roxbury Township', --> 'Ledgewood', 'Succasunna', 'Kenvil'
 #'Morris Township',
 #'Wharton Borough',
 #'Boonton Town',
 #'Florham Park Borough',
 #'Randolph Township',
 #'Boonton Township',
 #'Rockaway Township',
 #'Mine Hill Township',
 #'East Hanover Township',
 #'Riverdale'
 
problems = ['Chatham Borough','Chatham Township',
        'Boonton Town', 'Boonton Township',
        'Mendham Borough', 'Mendham Township',
        'Rockaway Borough', 'Rockaway Township',
        'Chester Borough', 'Chester Township',
        'Morristown Town', 'Morris Township',
        'Dover Town', 'Victory Gardens Borough']
non_problems =  list(set(towns) - set(problems))

In [378]:
some_dict = {'Randolph':'Randolph Township','Madison':'Madison Borough','Parsippany':'Parsippany-Troy Hills Township','Kinnelon':'Kinnelon Borough','Morris Plains':'Morris Plains Borough','Butler':'Butler Borough','Denville':'Denville Township','East Hanover':'East Hanover Township','Montville':'Montville Township','Wharton':'Wharton Borough','Lincoln Park':'Lincoln Park Borough','Florham Park':'Florham Park Borough','Mine Hill':'Mine Hill Township','Netcong':'Netcong Borough','Mountain Lakes':'Mountain Lakes Borough','Mount Arlington':'Mount Arlington Borough','Pequannock':'Pequannock Township','Budd Lake':'Mount Olive Township','Long Valley':'Washington Township','Riverdale':'Riverdale Borough','Stirling':'Long Hill Township','Millington':'Long Hill Township','Gillette':'Long Hill Township','Oak Ridge':'Jefferson Township','Lake Hopatcong':'Jefferson Township','Cedar Knolls':'Hanover Township','Whippany':'Hanover Township','Green Village':'Harding Township','New Vernon':'Harding Township','Ledgewood':'Roxbury Township','Succasunna':'Roxbury Township','Kenvil':'Roxbury Township','Chatham':'Chatham','Boonton':'Boonton','Mendham':'Mendham','Rockaway':'Rockaway','Chester':'Chester','Morristown':'Morristown','Dover':'Dover'}

p = ['Chatham','Boonton','Mendham','Rockaway','Chester','Morristown', 'Dover']

In [379]:
morris_add = morris_add_list.copy()
morris_add.replace({'POST_COMM': some_dict}, inplace=True)

NameError: name 'morris_add_list' is not defined

In [335]:
df_good = morris_add[morris_add.POST_COMM.isin(non_problems)].reset_index(drop=True)
df_bad = morris_add[morris_add.POST_COMM.isin(p)].reset_index(drop=True)

### 4.) Now we need to sort by coordinates to identify towns

In [336]:
df_bad

Unnamed: 0,POST_COMM,PLACE_TYPE,ADDRESS,LONG_,LAT
0,Chatham,Commercial,"480 Main Street, Chatham, NJ 07928",-74.390045,40.745433
1,Morristown,Commercial,"Mendham Road, Morristown, NJ 07960",-74.531211,40.788261
2,Morristown,Residence,"12 Wheatsheaf Farm Road, Morristown, NJ 07960",-74.502242,40.802582
3,Morristown,Residence,"2 Wheatsheaf Farm Road, Morristown, NJ 07960",-74.502903,40.802872
4,Boonton,Residence,"140 Chestnut Street, Boonton, NJ 07005",-74.423443,40.908487
...,...,...,...,...,...
55153,Dover,Unknown,"171 West Clinton Street, Dover, NJ 07801",-74.563845,40.891246
55154,Morristown,Residence,"2 Symor Drive, Morristown, NJ 07960",-74.457119,40.775384
55155,Morristown,Residence,"1 Symor Drive, Morristown, NJ 07960",-74.457255,40.775971
55156,Chester,Residence,"24 Mill Ridge Lane, Chester, NJ 07930",-74.706008,40.786139


In [337]:
gdf = gdf.to_crs(4326) # Change coordinate systems
bounds = [gdf.iloc[[x]].unary_union.bounds for x in range(39)]
gdf = gdf[gdf["Label"].isin(problems)].reset_index(drop=True)
gdf

Unnamed: 0,MuniID,GNIS,Label,GISAcres,GISSQMiles,EditDate,geometry
0,1409,885196,Dover Town,1745.225,2.727,2015-03-31,"POLYGON ((-74.57408 40.88851, -74.57383 40.888..."
1,1405,882194,Chatham Township,5981.966,9.35,,"POLYGON ((-74.43827 40.75237, -74.43821 40.752..."
2,1434,885374,Rockaway Borough,1356.812,2.12,2019-07-11,"POLYGON ((-74.50337 40.90782, -74.50337 40.907..."
3,1418,885296,Mendham Borough,3826.459,5.979,,"POLYGON ((-74.57082 40.75492, -74.57091 40.754..."
4,1424,885309,Morristown Town,1923.595,3.006,,"POLYGON ((-74.47880 40.81453, -74.47791 40.813..."
5,1407,882199,Chester Township,18694.754,29.21,2019-07-12,"POLYGON ((-74.64595 40.83721, -74.64594 40.837..."
6,1437,885427,Victory Gardens Borough,92.874,0.145,2016-04-20,"POLYGON ((-74.54970 40.87542, -74.54857 40.876..."
7,1406,885184,Chester Borough,1020.198,1.594,,"POLYGON ((-74.69934 40.77738, -74.69934 40.777..."
8,1419,882200,Mendham Township,11526.822,18.011,,"POLYGON ((-74.54221 40.78945, -74.54223 40.789..."
9,1404,885182,Chatham Borough,1522.1289,2.3783,,"POLYGON ((-74.36515 40.74976, -74.36525 40.749..."


In [338]:
# 12 mins
new_town_list = []
for row in tqdm(range(len(df_bad))):
    row_add = df_bad.iloc[[row]]
    x = row_add.LONG_
    y = row_add.LAT
    p = Point(x, y)
    
    town_var = row_add.POST_COMM.values[0]
    for index in range(len(gdf)):
        gdf_row = gdf.iloc[[index]]
        #print(gdf_row)
        town = gdf_row.Label.values[0] 
        
        aoi_geom = gdf_row.unary_union
        
        if aoi_geom.contains(p):
            town_var = town
            break
    new_town_list.append(town_var)

100%|██████████| 55158/55158 [12:27<00:00, 73.78it/s] 


In [339]:
new_town_list

['Chatham Borough',
 'Morris Township',
 'Morris Township',
 'Morris Township',
 'Boonton Town',
 'Rockaway Township',
 'Mendham Township',
 'Morristown',
 'Morristown Town',
 'Morris Township',
 'Mendham Borough',
 'Dover Town',
 'Dover Town',
 'Chatham Township',
 'Mendham Township',
 'Mendham Borough',
 'Morris Township',
 'Morristown Town',
 'Boonton Town',
 'Morris Township',
 'Morris Township',
 'Morris Township',
 'Morris Township',
 'Morris Township',
 'Morris Township',
 'Chester Township',
 'Morris Township',
 'Morris Township',
 'Morris Township',
 'Morristown Town',
 'Dover Town',
 'Morris Township',
 'Rockaway Township',
 'Morris Township',
 'Boonton Town',
 'Morris Township',
 'Morris Township',
 'Morris Township',
 'Rockaway',
 'Boonton Town',
 'Boonton Town',
 'Morris Township',
 'Mendham',
 'Morris Township',
 'Boonton Town',
 'Morristown',
 'Boonton Town',
 'Rockaway Township',
 'Rockaway Borough',
 'Chatham Borough',
 'Morris Township',
 'Morris Township',
 'Morristo

In [374]:
new_df_bad = df_bad.copy()
new_df_bad.POST_COMM = new_town_list
new_df_bad = new_df_bad[new_df_bad["POST_COMM"].isin(towns)]
combined_df = pd.concat([df_good, new_df_bad], ignore_index=True)
combined_df.POST_COMM.nunique()

39

In [375]:
combined_df

Unnamed: 0,POST_COMM,PLACE_TYPE,ADDRESS,LONG_,LAT
0,Randolph Township,Commercial,"25 School House Road, Randolph, NJ 07869",-74.571228,40.844481
1,Randolph Township,Commercial,"303 Dover-Chester Road, Randolph, NJ 07869",-74.612319,40.834135
2,Madison Borough,Commercial,"700 Shunpike Road, Madison, NJ 07940",-74.434092,40.749933
3,Parsippany-Troy Hills Township,Commercial,"509 South Beverwyck Road, Parsippany, NJ 07054",-74.392478,40.850515
4,Parsippany-Troy Hills Township,Commercial,"1 Lincoln Avenue, Parsippany, NJ 07054",-74.385510,40.875584
...,...,...,...,...,...
171408,Dover Town,Unknown,"171 West Clinton Street, Dover, NJ 07801",-74.563845,40.891246
171409,Morris Township,Residence,"2 Symor Drive, Morristown, NJ 07960",-74.457119,40.775384
171410,Morris Township,Residence,"1 Symor Drive, Morristown, NJ 07960",-74.457255,40.775971
171411,Chester Borough,Residence,"24 Mill Ridge Lane, Chester, NJ 07930",-74.706008,40.786139


In [380]:
combined_df.to_csv('./csv/morris_address.csv', index=False)