# Finding the Best Neighborhood Based on Crime Data

In [24]:
import pandas as pd
import math

In [16]:
crime = pd.read_csv("crime.csv")

In [17]:
for (i) in crime:
    print(i)

PK
CCR
HIERARCHY
INCIDENTTIME
INCIDENTLOCATION
CLEAREDFLAG
INCIDENTNEIGHBORHOOD
INCIDENTZONE
INCIDENTHIERARCHYDESC
OFFENSES
INCIDENTTRACT
COUNCIL_DISTRICT
PUBLIC_WORKS_DIVISION
X
Y


## Finding the neighborhood with the most number of incidents

In [18]:
## Create a dict of counters for every neighborhood
numIncidents = {}

In [19]:
# Iterate through the data table, incrementing counts for neighborhoods and
## creating entries for neighborhoods if they do not already exist in the dict.
for i in crime.index:
    if crime["INCIDENTNEIGHBORHOOD"][i] in numIncidents.keys():
        numIncidents[crime["INCIDENTNEIGHBORHOOD"][i]] += 1
    else:
        numIncidents[crime["INCIDENTNEIGHBORHOOD"][i]] = 1

In [99]:
# Hold values for all neighborhoods that have crime numbers lower than the threshold
mostIncidents = []
mostThreshold = 0

leastIncidents = []
leastThreshold = math.inf

# Function to update the above variables based on given low/high thresholds
# The expected ranges of low_threshold and high_threshold are 1-198 (already known lowest/highest values)
def findNeighborhoods(lessThan, greaterThan):
    global mostIncidents
    global mostThreshold
    global leastIncidents
    global leastThreshold

    highers = []
    lowers = []
    
    for n in numIncidents.keys():
        if numIncidents[n] >= greaterThan:
            highers.append(n)
        if numIncidents[n] <= lessThan:
            lowers.append(n)
            
    mostIncidents = highers
    mostThreshold = greaterThan
    
    leastIncidents = lowers
    leastThreshold = lessThan

# Print neighborhoods found above and list their specific number of incidents.
def printExtremes():
    print("These neighborhoods had more than " + str(mostThreshold) + " incidents:\n")
    for n in sort(mostIncidents):
        print("   " + str(n) + ": " + str(numIncidents[n]))

    print("\n")

    print("These neighborhoods had less than " + str(leastThreshold) + " incidents:\n")
    for n in sort(leastIncidents):
        print("   " + str(n) + ": " + str(numIncidents[n]))
        
def sort(nList):
    global mostIncidents
    global leastIncidents
    
    if (len(nList) < 2):
        return nList
    
    p = nList[0]
    
    if (len(nList) < 3):
        if (numIncidents[nList[0]] > numIncidents[nList[1]]):
            t = nList[0]
            nList[0] = nList[1]
            nList[1] = t
        return nList
    else:
        lessP = [i for i in nList[1:] if numIncidents[i] <= numIncidents[p]]
        greaterP = [i for i in nList[1:] if numIncidents[i] > numIncidents[p]]
        return sort(lessP) + [p] + sort(greaterP)
                
            


In [100]:
findNeighborhoods(25, 150)
printExtremes()

These neighborhoods had more than 150 incidents:

   Central Business District: 198


These neighborhoods had less than 25 incidents:

   Ridgemont: 1
   East Carnegie: 1
   Outside County: 2
   Summer Hill: 2
   Chartiers City: 2
   Regent Square: 2
   Glen Hazel: 3
   Mt. Oliver Boro: 3
   Esplen: 4
   Hays: 4
   Mount Oliver: 5
   Arlington Heights: 6
   Spring Garden: 7
   Swisshelm Park: 8
   Fairywood: 8
   Outside City: 8
   Friendship: 8
   Allegheny West: 9
   Oakwood: 9
   Windgap: 9
   Fineview: 10
   Outside State: 10
   West End: 11
   Banksville: 11
   Beltzhoover: 13
   Upper Lawrenceville: 14
   Morningside: 14
   Polish Hill: 14
   Terrace Village: 14
   Spring Hill-City View: 14
   Manchester: 15
   California-Kirkbride: 15
   Duquesne Heights: 15
   South Shore: 16
   Upper Hill: 16
   South Oakland: 16
   Arlington: 17
   Point Breeze North: 17
   Homewood West: 17
   Lower Lawrenceville: 17
   Westwood: 17
   Lincoln Place: 17
   Troy Hill: 17
   Bon Air: 18
   Poi

## Preliminary Conclusion

If only considering the number of crime incidents reported from each neighborhood, East Carnegie and Ridgemont should be considered the 'best' neighborhoods by this metric.

