<a href="https://colab.research.google.com/github/BrianKEverett/County-Line/blob/main/Dissertation3_Everett.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Loading in programming packages

In [806]:
!pip install ydata-profiling
from ydata_profiling import ProfileReport



In [807]:
import time, os, sys, re
import zipfile, json, datetime, string
import numpy as np
from statistics import *

import matplotlib.pyplot as plt

import pandas as pd
import pandas_datareader as pdr
from pandas_datareader import wb
from pandas.io.formats.style import Styler
import plotly.express as px

import missingno as msno

from google.colab import files

import seaborn as sns

from google.colab import data_table
data_table.enable_dataframe_formatter()
data_table.max_columns = 50

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%matplotlib inline

plt.style.use('classic')

#Loading in datasets

In [808]:
permits=pd.read_csv("https://raw.githubusercontent.com/BrianKEverett/County-Line/main/Permits.csv")
# dataset can be found here: https://njdca.maps.arcgis.com/home/item.html?id=c754e8f800424bcbb6ad4e6e85b9f736 from NJ Dept of Community Affairs Website.
#this dataset was chosen mostly to assist with my dissertation proposal - I am seeking to explore the behaviors of planning boards in New Jersey, and one hypothesis I have is that planning board decisions are influenced by the effects of the County Line Balloting system, which is unique to 19 out of 21 counties in New Jersey. No other state in the US runs primary elections in this way.
#More on the County Line can be found here by Julia Sass Rubin: https://www.njpp.org/wp-content/uploads/2021/01/NJPP-Report-Does-the-County-Line-Matter-Update-wiht-Final-Vote-Counts.pdf

taxes=pd.read_csv ("https://raw.githubusercontent.com/BrianKEverett/County-Line/main/mediantax.csv")
# dataset can be retireved via: https://njdca.maps.arcgis.com/apps/webappviewer/index.html?id=96ec274c50a34890b23263f101e4ad9b from NJ Department of Community Affairs
# Another hypothesis I have is that the public narrative put forth by planning board members when approving controversial permits, "this will increase rateables for the township, lowering your taxes", does not actually come to fruition
#This data set is helpful for exploring that narrative, and seeing if the opposite is occurring, i.e. more development actually yields higher property taxes

#health=pd.read_csv ("https://raw.githubusercontent.com/BrianKEverett/County-Line/main/countyhealth.csv")
#dataset can be found here: https://www.countyhealthrankings.org/explore-health-rankings/rankings-data-documentation from County Health Rankings and Roadmaps, for 2022 to match same year of data for permits dataset
#With County Health Data, we can infer hypotheses about rates of development and the effect on well-being, liveability.

#Problem with health data set == only 22 observations for New Jersey, not a good sample.

municodes=pd.read_csv ("https://raw.githubusercontent.com/BrianKEverett/County-Line/main/Municodes.csv")
#Data file of all NJ municiaplities, and counties, with the corresponding municipalitiy DCA code. This data file will be most helpful for matching and merging.

#crime=pd.read_csv ("https://raw.githubusercontent.com/BrianKEverett/County-Line/main/CamdenCrime.csv") #not a good format for reading data!
#Dataset can be retireved here: https://www.nj.gov/njsp/ucr/uniform-crime-reports.shtml on the NJ Office of the Attorney General's website
#This crime data is important to consider when analyzing planning and zoning. Does any specific type of development correlate with increased crime? Can liveability theory be worked in here for whether or not communities have what they need to prevent crime?

jobs=pd.read_csv('https://raw.githubusercontent.com/BrianKEverett/County-Line/main/jobsdensity.csv')
#dataset can be built via the table selections on the NJ Community Affairs website - https://njdca.maps.arcgis.com/apps/webappviewer/index.html?id=96ec274c50a34890b23263f101e4ad9b
#Job density is a good variable to consider regarding new large dollar permits. Are some places growing more than others? Can this be attribute to the phenomenon of the County Line?

countysize=pd.read_csv('https://raw.githubusercontent.com/BrianKEverett/County-Line/main/NJCountySize.csv')
#dataset can be found via Wikipedia via 2020 census data - https://en.wikipedia.org/wiki/List_of_counties_in_New_Jersey

njtowns=pd.read_csv('https://raw.githubusercontent.com/BrianKEverett/County-Line/main/njtowns2.csv')
#Dataset can be found at: https://en.wikipedia.org/wiki/List_of_municipalities_in_New_Jersey#:~:text=The%20largest%20municipality%20by%20population,most%20populous%20being%20South%20Carolina.

#Initial Cleaning of Datasets

In [809]:
permits = permits.rename(columns={'DCA MUNI CODE': 'DCA'})
permits = permits.rename(columns={'MUNICIPALITY': 'Municipality'})
permits["Municipality"]= permits["Municipality"].str.title()
permits['Municipality'] = permits['Municipality'].str.replace(' Boro', ' Borough')
permits['Municipality'] = permits['Municipality'].str.replace(' Twp', ' Township')
del permits['ID']
del permits['BLOCK NUMBER']
del permits['PAMS PIN']
del permits['USE GROUP']
del permits['YCOORD']
del permits['XCOORD']
del permits['MATCH TYPE']
del permits['LOT NUMBER']
del permits['DATE ISSUED']
del permits['TAX CODE']
permits = permits.rename(columns={'TYPE': 'Permits'})

del municodes['MUNICIPALITY_CODE_DCA']
del municodes['MUNICIPALITY_NAME_DCA']
del municodes['MUNICIPALITY_CODE_GNIS']
del municodes['MUNICIPALITY_NAME_GNIS']
del municodes['MUNICIPALITY_CODE_FIPS']
municodes = municodes.rename(columns={'MUNICIPALITY_NAME_NJ-1040': 'Municipality'})
municodes = municodes.rename(columns={'MUNICIPALITY_CODE_NJ-1040': 'DCA'})
municodes = municodes.rename(columns={'COUNTY_NAME_COMMON': 'County'})
del municodes['MUNICIPALITY_NAME_COMMON']
municodes['County'] = municodes['County'].str.replace(' County', '')
municodes = municodes.set_index('Municipality')

countysize['Largest City Population']=countysize['Largest City Population'].str.replace(',','')

njtowns = njtowns.rename(columns={'Name': 'Municipality'})
del njtowns['Type']
del njtowns['Unnamed: 6']
del njtowns['Unnamed: 7']
del njtowns['Unnamed: 8']
del njtowns['Unnamed: 9']
del njtowns['Unnamed: 10']
njtowns.replace(',','', regex=True, inplace=True)

#jobs = jobs.set_index('Municipality')
del jobs['JobsVintage']
del jobs['Blk_Grp_Name']
del jobs['JobsDensity']

#taxes = taxes.set_index('Municipality')
del taxes['Tract_Name']
del taxes['Data_Vintage']
del taxes[' ']

In [810]:
codespermits = municodes.merge(permits, how='inner', on=['DCA'])
codespermits

Unnamed: 0,County,DCA,Municipality,Use Group Label,Permits,WORK VALUE
0,Atlantic,101,Absecon City,Mercantile,NEW,695000
1,Atlantic,101,Absecon City,Educational,ALT,4000000
2,Atlantic,102,Atlantic City,"Clubs, Dance Halls, Casinos, Restaurants, Tave...",ALT,2100000
3,Atlantic,102,Atlantic City,Residential (1 & 2 Family Homes),ALT,2100000
4,Atlantic,102,Atlantic City,"Churches, Libraries, Arcades, Comm. Halls, Gyms",ALT,337400
...,...,...,...,...,...,...
9778,Warren,2121,Washington Borough,Residential (1 & 2 Family Homes),ALT,1000000
9779,Warren,2122,Washington Township,Residential (1 & 2 Family Homes),ALT,426124
9780,Warren,2122,Washington Township,Educational,ALT,742000
9781,Warren,2122,Washington Township,Business,ALT,276000


#Created new dataset, Permits2, via groupby and agg functions in order to count number of permits per municipality while keeping the County column

In [811]:
permits2 = codespermits.groupby('Municipality').agg({'Permits': 'count', 'County': 'first'})
permits2

Unnamed: 0_level_0,Permits,County
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1
Aberdeen Township,5,Monmouth
Absecon City,2,Atlantic
Alexandria Township,11,Hunterdon
Allamuchy Township,4,Warren
Allendale Borough,10,Bergen
...,...,...
Woodbury Heights Borough,1,Gloucester
Woodcliff Lake Borough,19,Bergen
Woodland Park Borough,8,Passaic
Woolwich Township,25,Gloucester


In [812]:
njtowns.sort_values('Municipality')

Unnamed: 0,Municipality,County,Population 2020,Population 2010,Land Area (mi^2)
142,Aberdeen Township,Monmouth,19329,18157,5.444
259,Absecon City,Atlantic,9137,8411,5.468
384,Alexandria Township,Hunterdon,4809,4938,27.534
367,Allamuchy Township,Warren,5335,4323,19.992
322,Allendale Borough,Bergen,6848,6505,3.097
...,...,...,...,...,...
451,Woodlynne Borough,Camden,2902,2978,0.218
422,Woodstown Borough,Salem,3678,3505,1.575
202,Woolwich Township,Gloucester,12577,10200,21.072
545,Wrightstown Borough,Burlington,720,802,1.850


#Need to ask for help here because I went from 481 entries to 456, but still way, WAY, better than 180...

In [813]:
permits3 = permits2.merge(njtowns, how='inner', on=['Municipality'])
permits3

Unnamed: 0,Municipality,Permits,County_x,County_y,Population 2020,Population 2010,Land Area (mi^2)
0,Aberdeen Township,5,Monmouth,Monmouth,19329,18157,5.444
1,Absecon City,2,Atlantic,Atlantic,9137,8411,5.468
2,Alexandria Township,11,Hunterdon,Hunterdon,4809,4938,27.534
3,Allamuchy Township,4,Warren,Warren,5335,4323,19.992
4,Allendale Borough,10,Bergen,Bergen,6848,6505,3.097
...,...,...,...,...,...,...,...
451,Woodbury Heights Borough,1,Gloucester,Gloucester,3098,3055,1.246
452,Woodcliff Lake Borough,19,Bergen,Bergen,6128,5730,3.376
453,Woodland Park Borough,8,Passaic,Passaic,13484,11819,2.939
454,Woolwich Township,25,Gloucester,Gloucester,12577,10200,21.072


In [814]:
del permits3['County_y']
permits3 = permits3.rename(columns={'County_x': 'County'})

#Below for jobs and taxes datasets, I've aggregated the data so there are not multiple rows for the same municipality. Orginial datasets had multiple rows for 1 towns due to the way those data tracts are created, via zip code or something like that.

#The taxes dataset needs work because the median property tax totals are sometimes different within the same town due to different tracts. An average should be taken.

In [815]:
jobs['id'] = jobs.groupby(['Municipality','County']).ngroup()
agg_functions = {'Jobs': 'sum', 'Municipality': 'first', 'County': 'first'}
jobs2 = jobs.groupby(jobs['id']).aggregate(agg_functions)
jobs2 = jobs2.drop(labels=0, axis=0)
jobs2

Unnamed: 0_level_0,Jobs,Municipality,County
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1394,Aberdeen Township,Monmouth
2,755,Alexandria Township,Hunterdon
3,4243,Allendale Borough,Bergen
4,1072,Andover Township,Sussex
5,4993,Asbury Park City,Monmouth
...,...,...,...
302,48129,Woodbridge Township,Middlesex
303,137,Woodbury City,Gloucester
304,5390,Woodcliff Lake Borough,Bergen
305,3786,Woolwich Township,Gloucester


In [816]:
taxes['id'] = taxes.groupby(['Municipality','County']).ngroup()
agg_functions = {'Median_RE_Taxes': 'sum', 'Municipality': 'first', 'County': 'first'}
taxes2 = taxes.groupby(taxes['id']).aggregate(agg_functions)
taxes2

Unnamed: 0_level_0,Median_RE_Taxes,Municipality,County
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,44735.0,Aberdeen Township,Monmouth
1,9776.0,Alexandria Township/Milford Borough,Hunterdon
2,9084.0,Allamuchy Township,Warren
3,20000.0,Allendale Borough,Bergen
4,9263.0,Allentown Borough,Monmouth
...,...,...,...
377,10000.0,Wood-Ridge Borough/South Hackensack Township,Bergen
378,201714.0,Woodbridge Township,Middlesex
379,10000.0,Woodcliff Lake Borough,Bergen
380,19905.0,Woodland Park Borough,Passaic


In [817]:
jobs2

Unnamed: 0_level_0,Jobs,Municipality,County
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1394,Aberdeen Township,Monmouth
2,755,Alexandria Township,Hunterdon
3,4243,Allendale Borough,Bergen
4,1072,Andover Township,Sussex
5,4993,Asbury Park City,Monmouth
...,...,...,...
302,48129,Woodbridge Township,Middlesex
303,137,Woodbury City,Gloucester
304,5390,Woodcliff Lake Borough,Bergen
305,3786,Woolwich Township,Gloucester


#Merging with jobs dataset, we go from 456 to 281. Probably a problem with different names for Municipalities between each dataset. Ugh.

#But Maybe! I should try an earlier merge above to input municipal codes. Earlier I thought there were only county codes, but there are municipal codes just in PDF form.

In [818]:
permits4 = permits3.merge(jobs2, how='inner', on=['Municipality'])
permits4

Unnamed: 0,Municipality,Permits,County_x,Population 2020,Population 2010,Land Area (mi^2),Jobs,County_y
0,Aberdeen Township,5,Monmouth,19329,18157,5.444,1394,Monmouth
1,Alexandria Township,11,Hunterdon,4809,4938,27.534,755,Hunterdon
2,Allendale Borough,10,Bergen,6848,6505,3.097,4243,Bergen
3,Andover Township,4,Sussex,5996,6319,20.044,1072,Sussex
4,Asbury Park City,21,Monmouth,15188,16008,1.429,4993,Monmouth
...,...,...,...,...,...,...,...,...
281,Wood-Ridge Borough,7,Bergen,10137,7626,1.114,2452,Bergen
282,Woodbridge Township,70,Middlesex,103639,99585,23.258,48129,Middlesex
283,Woodbury City,9,Gloucester,9963,10174,2.020,137,Gloucester
284,Woodcliff Lake Borough,19,Bergen,6128,5730,3.376,5390,Bergen


In [819]:
permits4 = permits4.rename(columns={'County_x': 'County'})
del permits4 ['County_y']
permits4

Unnamed: 0,Municipality,Permits,County,Population 2020,Population 2010,Land Area (mi^2),Jobs
0,Aberdeen Township,5,Monmouth,19329,18157,5.444,1394
1,Alexandria Township,11,Hunterdon,4809,4938,27.534,755
2,Allendale Borough,10,Bergen,6848,6505,3.097,4243
3,Andover Township,4,Sussex,5996,6319,20.044,1072
4,Asbury Park City,21,Monmouth,15188,16008,1.429,4993
...,...,...,...,...,...,...,...
281,Wood-Ridge Borough,7,Bergen,10137,7626,1.114,2452
282,Woodbridge Township,70,Middlesex,103639,99585,23.258,48129
283,Woodbury City,9,Gloucester,9963,10174,2.020,137
284,Woodcliff Lake Borough,19,Bergen,6128,5730,3.376,5390


#Below right here, adding a column to indicate which towns fall under the County Line System based upon county. Seems to have worked fine.

In [820]:
permits4['County Line'] = 'Other'

In [821]:
permits4.loc[permits4['County'] == 'Salem', 'County Line'] = '0'
permits4.loc[permits4['County'] == 'Sussex', 'County Line'] = '0'
permits4.loc[permits4['County'] == 'Monmouth', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Passaic', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Bergen', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Warren', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Morris', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Essex', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Union', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Somerset', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Hunterdon', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Middlesex', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Mercer', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Ocean', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Burlington', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Camden', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Atlantic', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Gloucester', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Cumberland', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Cape May', 'County Line'] = '1'
permits4.loc[permits4['County'] == 'Hudson', 'County Line'] = '1'

permits4

Unnamed: 0,Municipality,Permits,County,Population 2020,Population 2010,Land Area (mi^2),Jobs,County Line
0,Aberdeen Township,5,Monmouth,19329,18157,5.444,1394,1
1,Alexandria Township,11,Hunterdon,4809,4938,27.534,755,1
2,Allendale Borough,10,Bergen,6848,6505,3.097,4243,1
3,Andover Township,4,Sussex,5996,6319,20.044,1072,0
4,Asbury Park City,21,Monmouth,15188,16008,1.429,4993,1
...,...,...,...,...,...,...,...,...
281,Wood-Ridge Borough,7,Bergen,10137,7626,1.114,2452,1
282,Woodbridge Township,70,Middlesex,103639,99585,23.258,48129,1
283,Woodbury City,9,Gloucester,9963,10174,2.020,137,1
284,Woodcliff Lake Borough,19,Bergen,6128,5730,3.376,5390,1


#Tried to mess around with dtypes because the correlation map below is not picking up several columns, and I do not know why.

In [822]:
permits4.astype({'Population 2020': 'int', 'Land Area (mi^2)':'float64', 'Population 2010': 'int', 'County Line': 'int', 'Jobs': 'int', 'Permits': 'int'}).dtypes

Municipality         object
Permits               int64
County               object
Population 2020       int64
Population 2010       int64
Land Area (mi^2)    float64
Jobs                  int64
County Line           int64
dtype: object

In [823]:
permits4

Unnamed: 0,Municipality,Permits,County,Population 2020,Population 2010,Land Area (mi^2),Jobs,County Line
0,Aberdeen Township,5,Monmouth,19329,18157,5.444,1394,1
1,Alexandria Township,11,Hunterdon,4809,4938,27.534,755,1
2,Allendale Borough,10,Bergen,6848,6505,3.097,4243,1
3,Andover Township,4,Sussex,5996,6319,20.044,1072,0
4,Asbury Park City,21,Monmouth,15188,16008,1.429,4993,1
...,...,...,...,...,...,...,...,...
281,Wood-Ridge Borough,7,Bergen,10137,7626,1.114,2452,1
282,Woodbridge Township,70,Middlesex,103639,99585,23.258,48129,1
283,Woodbury City,9,Gloucester,9963,10174,2.020,137,1
284,Woodcliff Lake Borough,19,Bergen,6128,5730,3.376,5390,1


#Correlation Map not showing Population 2020, or County Line

In [824]:
corr = permits4[['Population 2020','Land Area (mi^2)','Jobs','Permits','County Line']].corr()
corr.style.background_gradient(cmap='coolwarm').set_precision(2)

  corr = permits4[['Population 2020','Land Area (mi^2)','Jobs','Permits','County Line']].corr()
  corr.style.background_gradient(cmap='coolwarm').set_precision(2)


Unnamed: 0,Land Area (mi^2),Jobs,Permits
Land Area (mi^2),1.0,0.08,0.24
Jobs,0.08,1.0,0.61
Permits,0.24,0.61,1.0
