<a href="https://colab.research.google.com/github/BrianKEverett/County-Line/blob/main/Dissertation5_Everett.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install ydata-profiling
from ydata_profiling import ProfileReport

Collecting ydata-profiling
  Downloading ydata_profiling-4.6.4-py2.py3-none-any.whl (357 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/357.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/357.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m357.8/357.8 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Collecting visions[type_image_path]==0.7.5 (from ydata-profiling)
  Downloading visions-0.7.5-py3-none-any.whl (102 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.7/102.7 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
Collecting htmlmin==0.1.12 (from ydata-profiling)
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting phik<0.13,>=0.11.1 (from ydata-profiling)
  Downloading phik-0.12.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (68

In [2]:
import time, os, sys, re
import zipfile, json, datetime, string
import numpy as np
from statistics import *

import matplotlib.pyplot as plt

from scipy.stats import chi2_contingency
import pandas as pd
import pandas_datareader as pdr
from pandas_datareader import wb
from pandas.io.formats.style import Styler
import plotly.express as px

import missingno as msno

from google.colab import files

import seaborn as sns

from google.colab import data_table
data_table.enable_dataframe_formatter()
data_table.max_columns = 50

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%matplotlib inline

plt.style.use('classic')

In [3]:
permits=pd.read_csv("https://raw.githubusercontent.com/BrianKEverett/County-Line/main/Permits.csv")
# dataset can be found here: https://njdca.maps.arcgis.com/home/item.html?id=c754e8f800424bcbb6ad4e6e85b9f736 from NJ Dept of Community Affairs Website.
#this dataset was chosen mostly to assist with my dissertation proposal - I am seeking to explore the behaviors of planning boards in New Jersey, and one hypothesis I have is that planning board decisions are influenced by the effects of the County Line Balloting system, which is unique to 19 out of 21 counties in New Jersey. No other state in the US runs primary elections in this way.
#More on the County Line can be found here by Julia Sass Rubin: https://www.njpp.org/wp-content/uploads/2021/01/NJPP-Report-Does-the-County-Line-Matter-Update-wiht-Final-Vote-Counts.pdf

taxes=pd.read_csv ("https://raw.githubusercontent.com/BrianKEverett/County-Line/main/mediantax.csv")
# dataset can be retireved via: https://njdca.maps.arcgis.com/apps/webappviewer/index.html?id=96ec274c50a34890b23263f101e4ad9b from NJ Department of Community Affairs
# Another hypothesis I have is that the public narrative put forth by planning board members when approving controversial permits, "this will increase rateables for the township, lowering your taxes", does not actually come to fruition
#This data set is helpful for exploring that narrative, and seeing if the opposite is occurring, i.e. more development actually yields higher property taxes

municodes=pd.read_csv ("https://raw.githubusercontent.com/BrianKEverett/County-Line/main/Municodes.csv")
#Data file of all NJ municiaplities, and counties, with the corresponding municipalitiy DCA code. This data file will be most helpful for matching and merging.

#crime=pd.read_csv ("https://raw.githubusercontent.com/BrianKEverett/County-Line/main/CamdenCrime.csv") #not a good format for reading data!
#Dataset can be retireved here: https://www.nj.gov/njsp/ucr/uniform-crime-reports.shtml on the NJ Office of the Attorney General's website
#This crime data is important to consider when analyzing planning and zoning. Does any specific type of development correlate with increased crime? Can liveability theory be worked in here for whether or not communities have what they need to prevent crime?

jobs=pd.read_csv('https://raw.githubusercontent.com/BrianKEverett/County-Line/main/jobsdensity.csv')
#dataset can be built via the table selections on the NJ Community Affairs website - https://njdca.maps.arcgis.com/apps/webappviewer/index.html?id=96ec274c50a34890b23263f101e4ad9b
#Job density is a good variable to consider regarding new large dollar permits. Are some places growing more than others? Can this be attribute to the phenomenon of the County Line?

countysize=pd.read_csv('https://raw.githubusercontent.com/BrianKEverett/County-Line/main/NJCountySize.csv')
#dataset can be found via Wikipedia via 2020 census data - https://en.wikipedia.org/wiki/List_of_counties_in_New_Jersey

njtowns=pd.read_csv('https://raw.githubusercontent.com/BrianKEverett/County-Line/main/njtowns.csv')
#Dataset can be found at: https://en.wikipedia.org/wiki/List_of_municipalities_in_New_Jersey#:~:text=The%20largest%20municipality%20by%20population,most%20populous%20being%20South%20Carolina.

njcodes=pd.read_csv('https://raw.githubusercontent.com/BrianKEverett/County-Line/main/Municipalities_of_New_Jersey_20240131.csv')
#data was retireved via State of New Jersey Open Data Center: https://data.nj.gov/Reference-Data/Municipalities-of-New-Jersey/k9xb-zgh4/data_preview

In [4]:
permits = permits.rename(columns={'DCA MUNI CODE': 'DCA'})
permits = permits.rename(columns={'MUNICIPALITY': 'Municipality'})
permits["Municipality"]= permits["Municipality"].str.title()
permits['Municipality'] = permits['Municipality'].str.replace(' Boro', ' Borough')
permits['Municipality'] = permits['Municipality'].str.replace(' Twp', ' Township')
del permits['ID']
del permits['BLOCK NUMBER']
del permits['PAMS PIN']
del permits['USE GROUP']
del permits['YCOORD']
del permits['XCOORD']
del permits['MATCH TYPE']
del permits['LOT NUMBER']
del permits['DATE ISSUED']
del permits['TAX CODE']
permits = permits.rename(columns={'TYPE': 'Permits'})

del municodes['MUNICIPALITY_CODE_DCA']
del municodes['MUNICIPALITY_NAME_DCA']
del municodes['MUNICIPALITY_CODE_GNIS']
del municodes['MUNICIPALITY_NAME_GNIS']
del municodes['MUNICIPALITY_CODE_FIPS']
municodes = municodes.rename(columns={'MUNICIPALITY_NAME_NJ-1040': 'Municipality'})
municodes = municodes.rename(columns={'MUNICIPALITY_CODE_NJ-1040': 'DCA'})
municodes = municodes.rename(columns={'COUNTY_NAME_COMMON': 'County'})
del municodes['MUNICIPALITY_NAME_COMMON']
municodes['County'] = municodes['County'].str.replace(' County', '')
municodes = municodes.set_index('Municipality')

countysize['Largest City Population']=countysize['Largest City Population'].str.replace(',','')

njtowns = njtowns.rename(columns={'Name': 'Municipality'})
del njtowns['Type']
del njtowns['Unnamed: 7']
del njtowns['Unnamed: 8']
del njtowns['Unnamed: 9']
del njtowns['Unnamed: 10']
njtowns.replace(',','', regex=True, inplace=True)

#jobs = jobs.set_index('Municipality')
del jobs['JobsVintage']
del jobs['Blk_Grp_Name']
del jobs['JobsDensity']

#taxes = taxes.set_index('Municipality')
del taxes['Tract_Name']
del taxes['Data_Vintage']
del taxes[' ']

#njcodes
del njcodes ['MUNICIPALITY_NAME_COMMON']
del njcodes ['MUNICIPALITY_CODE_NJ-1040']
del njcodes ['MUNICIPALITY_NAME_NJ-1040']
del njcodes ['MUNICIPALITY_NAME_GNIS']
njcodes = njcodes.rename(columns={'COUNTY_NAME_COMMON': 'County'})
njcodes = njcodes.rename(columns={'MUNICIPALITY_NAME_DCA': 'Municipality'})
njcodes = njcodes.rename(columns={'MUNICIPALITY_CODE_DCA': 'DCA'})
njcodes = njcodes.rename(columns={'MUNICIPALITY_CODE_GNIS': 'GNIS'})
njcodes = njcodes.rename(columns={'MUNICIPALITY_CODE_FIPS': 'FIPS'})
njcodes['County'] = njcodes['County'].str.replace(' County', '')
njcodes['Municipality'] = njcodes['Municipality'].str.replace('Atlantic City City', 'Atlantic City')

In [5]:
permits1 = njtowns.merge(permits, how='inner', on=['DCA'])
del permits1['Municipality_y']
permits1 = permits1.rename(columns={'Municipality_x': 'Municipality'})
permits1

Unnamed: 0,DCA,Municipality,County,Population 2020,Population 2010,Land Area (mi^2),Use Group Label,Permits,WORK VALUE
0,1301,Aberdeen Township,Monmouth,19329,18157,5.444,Residential (1 & 2 Family Homes),ALT,389100
1,1301,Aberdeen Township,Monmouth,19329,18157,5.444,Residential (1 & 2 Family Homes),ADD,942600
2,1301,Aberdeen Township,Monmouth,19329,18157,5.444,Residential (1 & 2 Family Homes),ADD,853300
3,101,Absecon City,Atlantic,9137,8411,5.468,Mercantile,NEW,695000
4,101,Absecon City,Atlantic,9137,8411,5.468,Educational,ALT,4000000
...,...,...,...,...,...,...,...,...,...
9778,270,Wyckoff Township,Bergen,16585,16635,6.591,Residential (Multifamily),ALT,2506830
9779,270,Wyckoff Township,Bergen,16585,16635,6.591,Institutional or Group Home,ALT,360600
9780,270,Wyckoff Township,Bergen,16585,16635,6.591,Residential (1 & 2 Family Homes),ADD,322718
9781,270,Wyckoff Township,Bergen,16585,16635,6.591,Residential (1 & 2 Family Homes),ADD,267143


In [6]:
permits2 = permits1.groupby('Municipality').agg({'Permits': 'count', 'County': 'first', 'DCA': 'first', 'Population 2020': 'first', 'Land Area (mi^2)': 'first'})
permits2

Unnamed: 0_level_0,Permits,County,DCA,Population 2020,Land Area (mi^2)
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Aberdeen Township,3,Monmouth,1301,19329,5.444
Absecon City,2,Atlantic,101,9137,5.468
Alexandria Township,11,Hunterdon,1001,4809,27.534
Allamuchy Township,4,Warren,2101,5335,19.992
Allendale Borough,10,Bergen,201,6848,3.097
...,...,...,...,...,...
Woodbury Heights Borough,1,Gloucester,823,3098,1.246
Woodcliff Lake Borough,19,Bergen,268,6128,3.376
Woodland Park Borough,8,Passaic,1616,13484,2.939
Woolwich Township,25,Gloucester,824,12577,21.072


In [7]:
CountyLine_dictionary ={'Salem' : 0, 'Sussex' : 0, 'Camden' : 1, 'Burlington': 1, 'Gloucester': 1, 'Cape May': 1, 'Cumberland': 1, 'Atlantic': 1, 'Ocean': 1, 'Monmouth': 1, 'Mercer': 1, 'Middlesex': 1, 'Somerset': 1, 'Hunterdon': 1, 'Hudson': 1, 'Union': 1, 'Essex': 1, 'Passaic': 1, 'Morris': 1, 'Bergen': 1, 'Warren': 1 }
permits2['County Line'] = permits2['County'].map(CountyLine_dictionary)
print(permits2)

                          Permits      County   DCA Population 2020  \
Municipality                                                          
Aberdeen Township               3    Monmouth  1301           19329   
Absecon City                    2    Atlantic   101            9137   
Alexandria Township            11   Hunterdon  1001            4809   
Allamuchy Township              4      Warren  2101            5335   
Allendale Borough              10      Bergen   201            6848   
...                           ...         ...   ...             ...   
Woodbury Heights Borough        1  Gloucester   823            3098   
Woodcliff Lake Borough         19      Bergen   268            6128   
Woodland Park Borough           8     Passaic  1616           13484   
Woolwich Township              25  Gloucester   824           12577   
Wyckoff Township               30      Bergen   270           16585   

                          Land Area (mi^2)  County Line  
Municipality      

In [8]:
permits2[["Permits", "Population 2020", "Land Area (mi^2)", "County Line"]] = permits2[["Permits", "Population 2020", "Land Area (mi^2)", "County Line"]].apply(pd.to_numeric)

In [9]:
permits2

Unnamed: 0_level_0,Permits,County,DCA,Population 2020,Land Area (mi^2),County Line
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aberdeen Township,3,Monmouth,1301,19329,5.444,1
Absecon City,2,Atlantic,101,9137,5.468,1
Alexandria Township,11,Hunterdon,1001,4809,27.534,1
Allamuchy Township,4,Warren,2101,5335,19.992,1
Allendale Borough,10,Bergen,201,6848,3.097,1
...,...,...,...,...,...,...
Woodbury Heights Borough,1,Gloucester,823,3098,1.246,1
Woodcliff Lake Borough,19,Bergen,268,6128,3.376,1
Woodland Park Borough,8,Passaic,1616,13484,2.939,1
Woolwich Township,25,Gloucester,824,12577,21.072,1


In [10]:
permits2.sort_values(by='County Line', ascending=True)

Unnamed: 0_level_0,Permits,County,DCA,Population 2020,Land Area (mi^2),County Line
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Hopatcong Borough,11,Sussex,1912,14362,10.889,0
Green Township,1,Sussex,1908,3627,16.034,0
Salem City,19,Salem,1713,5296,2.344,0
Pennsville Township,5,Salem,1709,12684,21.266,0
Wantage Township,8,Sussex,1924,10811,66.760,0
...,...,...,...,...,...,...
Haddon Heights Borough,2,Camden,418,7495,1.566,1
Hackettstown Township,4,Warren,2108,10248,3.612,1
Hackensack City,58,Bergen,223,46030,4.191,1
Hardwick Township,2,Warren,2109,1598,37.525,1


In [11]:
corr = permits2[['Permits', 'Population 2020', 'Land Area (mi^2)', 'County Line']].corr()
corr.style.background_gradient(cmap='coolwarm').set_precision(2)

  corr.style.background_gradient(cmap='coolwarm').set_precision(2)


Unnamed: 0,Permits,Population 2020,Land Area (mi^2),County Line
Permits,1.0,0.65,0.07,0.11
Population 2020,0.65,1.0,0.21,0.11
Land Area (mi^2),0.07,0.21,1.0,-0.18
County Line,0.11,0.11,-0.18,1.0


Notes and Takeaway from above correlation chart:
The County Line variable does not seem to be correlated with any variable in any significant manner. Having said that, though, there are only 29 instances out of a universe of 479 shown where there are permits in municipalities not subject to the County Line. *It might be worth it *to try to build the same dataset but using Pandas dummy variable codes rather than creating a column the way I currently have - not sure if it makes a difference or not. https://pandas.pydata.org/docs/reference/api/pandas.get_dummies.html

Despite the above correlation table not showing a strong correlation between permits granted and the County Line, this table does show a very strong correlation between permits granted and Population 2020. This finding can be viewed as significant given AOK, Everett, and Mikhaeil (2024) which describes a quantitative phenomenon whereby happiness levels increase in areas that increase in population. Further analysis should be done to examine change in population as per AOK, et al especially since this dataset contains population data for both 2020 and 2010. More codes will need to be studied.

In [12]:
permits2

Unnamed: 0_level_0,Permits,County,DCA,Population 2020,Land Area (mi^2),County Line
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aberdeen Township,3,Monmouth,1301,19329,5.444,1
Absecon City,2,Atlantic,101,9137,5.468,1
Alexandria Township,11,Hunterdon,1001,4809,27.534,1
Allamuchy Township,4,Warren,2101,5335,19.992,1
Allendale Borough,10,Bergen,201,6848,3.097,1
...,...,...,...,...,...,...
Woodbury Heights Borough,1,Gloucester,823,3098,1.246,1
Woodcliff Lake Borough,19,Bergen,268,6128,3.376,1
Woodland Park Borough,8,Passaic,1616,13484,2.939,1
Woolwich Township,25,Gloucester,824,12577,21.072,1


In [13]:
permits2[['Permits', 'County Line']]

Unnamed: 0_level_0,Permits,County Line
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1
Aberdeen Township,3,1
Absecon City,2,1
Alexandria Township,11,1
Allamuchy Township,4,1
Allendale Borough,10,1
...,...,...
Woodbury Heights Borough,1,1
Woodcliff Lake Borough,19,1
Woodland Park Borough,8,1
Woolwich Township,25,1


In [14]:
pd.crosstab(permits2['Permits'], permits2['County Line'])

County Line,0,1
Permits,Unnamed: 1_level_1,Unnamed: 2_level_1
1,5,37
2,3,43
3,1,24
4,5,24
5,3,19
...,...,...
136,0,1
149,0,1
190,0,1
252,0,1


In [15]:
chi2, p, dof, ex = chi2_contingency(pd.crosstab(permits2['Permits'], permits2['County Line']))
p

0.9999999065674084

#This suggests a very strong correlation between 'permits granted' and the 'County Line'. Does it really??

help source for chi square == https://stackoverflow.com/questions/74107904/check-result-of-chi-square-test-on-pandas-columns-data

chi square refresher == https://www.askpython.com/python/examples/chi-square-test

In [16]:
permits2

Unnamed: 0_level_0,Permits,County,DCA,Population 2020,Land Area (mi^2),County Line
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aberdeen Township,3,Monmouth,1301,19329,5.444,1
Absecon City,2,Atlantic,101,9137,5.468,1
Alexandria Township,11,Hunterdon,1001,4809,27.534,1
Allamuchy Township,4,Warren,2101,5335,19.992,1
Allendale Borough,10,Bergen,201,6848,3.097,1
...,...,...,...,...,...,...
Woodbury Heights Borough,1,Gloucester,823,3098,1.246,1
Woodcliff Lake Borough,19,Bergen,268,6128,3.376,1
Woodland Park Borough,8,Passaic,1616,13484,2.939,1
Woolwich Township,25,Gloucester,824,12577,21.072,1


In [17]:
permits2[['Permits', 'Population 2020']]

Unnamed: 0_level_0,Permits,Population 2020
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1
Aberdeen Township,3,19329
Absecon City,2,9137
Alexandria Township,11,4809
Allamuchy Township,4,5335
Allendale Borough,10,6848
...,...,...
Woodbury Heights Borough,1,3098
Woodcliff Lake Borough,19,6128
Woodland Park Borough,8,13484
Woolwich Township,25,12577


In [18]:
pd.crosstab(permits2['Permits'], permits2['Population 2020'])



Population 2020,9,61,224,305,331,391,407,495,540,640,...,90296,90871,92297,95438,103639,107588,137298,159732,292449,311549
Permits,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,1,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
149,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
190,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
252,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [19]:
chi2, p, dof, ex = chi2_contingency(pd.crosstab(permits2['Permits'], permits2['Population 2020']))
p

0.4497455394979125

#This still shows a significant relationship between 'permits granted' and 'Population 2020' but at more than half the strength of the relationship between 'permits granted' and 'County Line'.

#Still, same possible problem exists as mentioned with the correlations table, only 29 cases of no county line in a universe of 479.

But: https://study.com/academy/lesson/p-values-in-statistics-significance-definition-explanation.html#:~:text=P%2DValue%20Explanation,-Since%20the%20P&text=A%20low%20P%2Dvalue%2C%20meaning,occurred%20due%20to%20random%20chance.
Frequently Asked Questions
What is the meaning of p-value?
The p-value is the probability that a value as extreme or more than the test statistic observed in a hypothesis test could occur, assuming that the null hypothesis is true. In other words, the p-value is the probability that the results of an experiment occurred due to random chance.

What does p-value .05 mean?
A p-value of .05 means that there is a 5% probability that a test statistic as extreme as or more extreme than the observed test statistic could occur, assuming that the null hypothesis is true. In other words, there is a 5% chance that the observed results occurred by random chance.

How do you interpret p-values?
In general, a low p-value (close to 0) means that it is unlikely that the results of an experiment occurred by random chance. A high p-value (close to 1) indicates that it is more likely that the results occurred by random chance. In a statistical hypothesis test, a low p-value indicates that the difference between certain characteristics in a population is not statistically significant. A high p-value indicates that the difference is statistically significant.
P-Value Explanation
Since the P-value is a probability, its possible values range from 0 to 1. A low P-value, meaning a P-value between 0 and 0.49, indicates that it is unlikely that the results of an experiment occurred due to random chance. In the case of a hypothesis test, a low P-value indicates that the observed difference between two groups, typically a sample and a population, is statistically significant. A high P-value, between 0.5 and 1.0, means that it is more likely that the results occurred by random chance, or that the difference is not statistically significant in the case of a hypothesis test.

The P-value explanation is commonly misinterpreted as the probability of a certain outcome. In hypothesis testing, it is sometimes misinterpreted as the probability that the alternative hypothesis is accepted or as the probability that the null hypothesis will be rejected. However, it is important to remember that the P-value is the probability that the outcome resulted due to random chance. For example, in a coin-toss experiment, the probability of getting either heads or tails when a fair coin is tossed is 50% every time, since there are only two possible outcomes (heads or tails). However, if a person wanted to know the probability that the coin is, in fact, fair and that the results are truly occurring by random chance, they must calculate the P-value.

How is P-Value Calculated?
In a hypothesis test, the P-value is the area under the distribution curve to the right of the test statistic
 when
 is a positive value and to the left of
 when
 is a negative value. P-value can be therefore be calculated with integrals solved either manually or using computational software.



In [20]:
permits2

Unnamed: 0_level_0,Permits,County,DCA,Population 2020,Land Area (mi^2),County Line
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aberdeen Township,3,Monmouth,1301,19329,5.444,1
Absecon City,2,Atlantic,101,9137,5.468,1
Alexandria Township,11,Hunterdon,1001,4809,27.534,1
Allamuchy Township,4,Warren,2101,5335,19.992,1
Allendale Borough,10,Bergen,201,6848,3.097,1
...,...,...,...,...,...,...
Woodbury Heights Borough,1,Gloucester,823,3098,1.246,1
Woodcliff Lake Borough,19,Bergen,268,6128,3.376,1
Woodland Park Borough,8,Passaic,1616,13484,2.939,1
Woolwich Township,25,Gloucester,824,12577,21.072,1


In [21]:
permits2[['Permits', 'Land Area (mi^2)']]

Unnamed: 0_level_0,Permits,Land Area (mi^2)
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1
Aberdeen Township,3,5.444
Absecon City,2,5.468
Alexandria Township,11,27.534
Allamuchy Township,4,19.992
Allendale Borough,10,3.097
...,...,...
Woodbury Heights Borough,1,1.246
Woodcliff Lake Borough,19,3.376
Woodland Park Borough,8,2.939
Woolwich Township,25,21.072


In [22]:
pd.crosstab(permits2['Permits'], permits2['Land Area (mi^2)'])



Land Area (mi^2),0.087,0.099,0.103,0.193,0.273,0.286,0.295,0.331,0.386,0.395,...,66.760,67.047,67.595,68.393,70.238,75.124,75.932,81.417,88.668,99.174
Permits,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,1,1,0,1,1,0,...,0,0,0,0,0,1,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
149,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
190,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
252,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
chi2, p, dof, ex = chi2_contingency(pd.crosstab(permits2['Permits'], permits2['Land Area (mi^2)']))
p

0.04947187140470426

##Remaining Tasks

Produce descriptive stats based upon original file, with county line and without

Proceed With adding jobs dataset

figure out taxes dataset

see if any SWB data would apply