# Finding the Best Neighborhood Based on Crime Data

In [24]:
import pandas as pd
import math

In [16]:
crime = pd.read_csv("crime.csv")

In [113]:
for i in crime.columns:
    print(i)

PK
CCR
HIERARCHY
INCIDENTTIME
INCIDENTLOCATION
CLEAREDFLAG
INCIDENTNEIGHBORHOOD
INCIDENTZONE
INCIDENTHIERARCHYDESC
OFFENSES
INCIDENTTRACT
COUNCIL_DISTRICT
PUBLIC_WORKS_DIVISION
X
Y


## Finding the Neighborhoods With Low/High Occurrences

In [18]:
## Create a dict of counters for every neighborhood
numIncidents = {}

In [19]:
# Iterate through the data table, incrementing counts for neighborhoods and
## creating entries for neighborhoods if they do not already exist in the dict.
for i in crime.index:
    if crime["INCIDENTNEIGHBORHOOD"][i] in numIncidents.keys():
        numIncidents[crime["INCIDENTNEIGHBORHOOD"][i]] += 1
    else:
        numIncidents[crime["INCIDENTNEIGHBORHOOD"][i]] = 1

In [103]:
# Hold values for all neighborhoods that have crime numbers lower than the threshold
mostIncidents = []
mostThreshold = 0

leastIncidents = []
leastThreshold = math.inf

# Function to update the above variables based on given low/high thresholds
# The expected ranges of low_threshold and high_threshold are 1-198 (already known lowest/highest values)
def findNeighborhoods(lessThan, greaterThan):
    global mostIncidents
    global mostThreshold
    global leastIncidents
    global leastThreshold

    highers = []
    lowers = []
    
    for n in numIncidents.keys():
        if numIncidents[n] >= greaterThan:
            highers.append(n)
        if numIncidents[n] <= lessThan:
            lowers.append(n)
            
    mostIncidents = sort(highers)
    mostThreshold = greaterThan
    
    leastIncidents = sort(lowers)
    leastThreshold = lessThan

# Print neighborhoods found above and list their specific number of incidents.
def printExtremes():
    print("These neighborhoods had more than " + str(mostThreshold) + " incidents in the past 30 days (3/14/2022 - 4/13/2022):\n")
    for n in mostIncidents:
        print("   " + str(n) + ": " + str(numIncidents[n]))

    print("\n")

    print("These neighborhoods had less than " + str(leastThreshold) + " incidents in the past 30 days (3/14/2022 - 4/13/2022):\n")
    for n in leastIncidents:
        print("   " + str(n) + ": " + str(numIncidents[n]))
        
def sort(nList):
    global mostIncidents
    global leastIncidents
    
    if (len(nList) < 2):
        return nList
    
    p = nList[0]
    
    if (len(nList) < 3):
        if (numIncidents[nList[0]] > numIncidents[nList[1]]):
            t = nList[0]
            nList[0] = nList[1]
            nList[1] = t
        return nList
    else:
        lessP = [i for i in nList[1:] if numIncidents[i] <= numIncidents[p]]
        greaterP = [i for i in nList[1:] if numIncidents[i] > numIncidents[p]]
        return sort(lessP) + [p] + sort(greaterP)
                
            


In [105]:
findNeighborhoods(25, 125)
printExtremes()

These neighborhoods had more than 125 incidents in the past 30 days (3/14/2022 - 4/13/2022):

   South Side Flats: 127
   Carrick: 129
   Central Business District: 198


These neighborhoods had less than 25 incidents in the past 30 days (3/14/2022 - 4/13/2022):

   Ridgemont: 1
   East Carnegie: 1
   Outside County: 2
   Summer Hill: 2
   Chartiers City: 2
   Regent Square: 2
   Glen Hazel: 3
   Mt. Oliver Boro: 3
   Esplen: 4
   Hays: 4
   Mount Oliver: 5
   Arlington Heights: 6
   Spring Garden: 7
   Swisshelm Park: 8
   Fairywood: 8
   Outside City: 8
   Friendship: 8
   Allegheny West: 9
   Oakwood: 9
   Windgap: 9
   Fineview: 10
   Outside State: 10
   West End: 11
   Banksville: 11
   Beltzhoover: 13
   Upper Lawrenceville: 14
   Morningside: 14
   Polish Hill: 14
   Terrace Village: 14
   Spring Hill-City View: 14
   Manchester: 15
   California-Kirkbride: 15
   Duquesne Heights: 15
   South Shore: 16
   Upper Hill: 16
   South Oakland: 16
   Arlington: 17
   Point Breeze Nort

## Interpreting With Population Data

In [106]:
pp = pd.read_csv("population.csv")

In [115]:
for i in pp.columns:
    print(i)

Neighborhood
Id
Estimate; Total
Margin of Error; Total


In [116]:
populations = {}
for i in pp.index:
    populations[pp["Neighborhood"][i]] = pp["Estimate; Total"][i]

In [129]:
print(populations["Mt. Oliver"])

598.0


In [155]:
populationCrimeRatios = {}

populationCrimeRatios["Central Northside"] = 2892.0 / numIncidents["Central North Side"]
populationCrimeRatios["Mt. Oliver"] = 3399.0 / numIncidents["Mount Oliver"]
for n in populations.keys():
    if n != "Central Northside" and n != "Mt. Oliver":
        if (n in numIncidents.keys()):
            if numIncidents[n] != 0:
                populationCrimeRatios[n] = populations[n] / numIncidents[n]
            else:
                populationCrimeRatios[n] = 0
        else:
            numIncidents[n] = 0
            
def findProportions(greaterThan):
    highers = []
    for n in populationCrimeRatios.keys():
        if populationCrimeRatios[n] >= greaterThan:
            highers.append(n)
    return highers

n = 0
for i in findProportions(78):
    print(i)
    n += 1
print(n)

Central Northside
Mt. Oliver
Arlington
Banksville
Beechview
Beltzhoover
Bloomfield
Bluff
Brighton Heights
Brookline
Carrick
Central Lawrenceville
Central Oakland
Chartiers City
Crafton Heights
Duquesne Heights
East Carnegie
Elliott
Fairywood
Fineview
Friendship
Garfield
Glen Hazel
Greenfield
Hays
Hazelwood
Highland Park
Lincoln Place
Lincoln-Lemington-Belmar
Lower Lawrenceville
Manchester
Marshall-Shadeland
Morningside
Mount Washington
North Oakland
Oakwood
Overbrook
Perry North
Point Breeze
Point Breeze North
Polish Hill
Regent Square
Ridgemont
Shadyside
Sheraden
South Oakland
South Side Slopes
Spring Garden
Spring Hill-City View
Squirrel Hill North
Squirrel Hill South
Stanton Heights
Summer Hill
Swisshelm Park
Terrace Village
Troy Hill
Upper Hill
Upper Lawrenceville
West Oakland
Westwood
Windgap
61


In [149]:
print(populationCrimeRatios["Carrick"])

78.46511627906976


In [151]:
pp.count()

Neighborhood              91
Id                        91
Estimate; Total           91
Margin of Error; Total    91
dtype: int64

#### This makes Carrick approximately 67th percentile for population-crime ratio.

Given that Carrick is at a reasonable rank for its crime-population ratio, Carrick is supported as the potential best neighborhood based on this metric as well as the education metric.