In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
coviddata = pd.read_csv("covid_19_cases_by_place.csv", index_col="neighborhood_municipality", parse_dates=True)

coviddata.drop('Undefined', inplace = True)
coviddata.drop(['update_date'], axis=1, inplace=True)

##remove neighborhoods that tested less than 100 people
index = coviddata[coviddata['indv_tested'] < 100 ].index 
coviddata.drop(index, inplace=True)

coviddata['percentage'] = (coviddata['cases']/coviddata['indv_tested'])*100
min = coviddata['percentage'].min()
max = coviddata['percentage'].max()

print('\033[1m' + '-----Lowest Percentage of Positive Cases per Covid Test-----'+ '\033[0m') #prints in bold, pretty cool
print(coviddata.iloc[coviddata['percentage'].argmin()])
print('min is', int(min),'%')

print('\033[1m' + '\n-----Highest Percentage of Positive Cases per Covid Test-----'+ '\033[0m')
print(coviddata.iloc[coviddata['percentage'].argmax()])
print('max is', int(max),'%')

print('\033[1m' + '\n-----Top 15 neighborhoods for minimum covid cases-----'+ '\033[0m')
#sort data by lowest covid percentage
coviddata_sorted = coviddata.sort_values(by='percentage')

#make clean dataframe with rankings
covidNeighborhoods = list(coviddata_sorted.index)
covidRanks = pd.DataFrame(columns=['rank', 'neighborhood'])
covidRanks['neighborhood'] = covidNeighborhoods
covidrankings = list(covidRanks.index)
covidRanks['rank'] = covidrankings
covidRanks.head(15)

[1m-----Lowest Percentage of Positive Cases per Covid Test-----[0m
indv_tested    5548.000000
cases           374.000000
deaths            6.000000
percentage        6.741168
Name: Squirrel Hill North (Pittsburgh), dtype: float64
min is 6 %
[1m
-----Highest Percentage of Positive Cases per Covid Test-----[0m
indv_tested    108.000000
cases           34.000000
deaths           0.000000
percentage      31.481481
Name: West Elizabeth, dtype: float64
max is 31 %
[1m
-----Top 15 neighborhoods for minimum covid cases-----[0m


Unnamed: 0,rank,neighborhood
0,0,Squirrel Hill North (Pittsburgh)
1,1,Edgeworth
2,2,Friendship (Pittsburgh)
3,3,Point Breeze (Pittsburgh)
4,4,Shadyside (Pittsburgh)
5,5,Edgewood
6,6,Squirrel Hill South (Pittsburgh)
7,7,Swisshelm Park (Pittsburgh)
8,8,Regent Square (Pittsburgh)
9,9,North Shore (Pittsburgh)


In [3]:
crimedata = pd.read_csv("non_traffic_citations.csv", parse_dates=True)
crimedata.drop(['PK'], axis=1, inplace=True)
crimedata.drop(['CCR'], axis=1, inplace=True)
crimedata.drop(['GENDER'], axis=1, inplace=True)
crimedata.drop(['RACE'], axis=1, inplace=True)
crimedata.drop(['AGE'], axis=1, inplace=True)
crimedata.drop(['CITEDTIME'], axis=1, inplace=True)
crimedata.drop(['INCIDENTLOCATION'], axis=1, inplace=True)
crimedata.drop(['INCIDENTTRACT'], axis=1, inplace=True)
crimedata.drop(['COUNCIL_DISTRICT'], axis=1, inplace=True)
crimedata.drop(['PUBLIC_WORKS_DIVISION'], axis=1, inplace=True)
crimedata.drop(['X'], axis=1, inplace=True)
crimedata.drop(['Y'], axis=1, inplace=True)
crimedata.drop(['OFFENSES'], axis=1, inplace=True)
crimedata.drop(['ZONE'], axis=1, inplace=True)

#ignore occurences with no listed neighborhood
crimedata = crimedata[crimedata["NEIGHBORHOOD"].str.contains('Unable To Retrieve Address')==False]

# Add column of occurences of each neighborhood
crimedata['counts'] = crimedata['NEIGHBORHOOD'].map(crimedata['NEIGHBORHOOD'].value_counts())

# remove all the duplictes so each neighborhod is listed once
crimedata = crimedata[~(crimedata.duplicated(['NEIGHBORHOOD']))].reset_index(drop=True)

# sort data by number of reported crimes
crimedata_sorted = crimedata.sort_values(by='counts', ignore_index=True,)

#add ranks column
crimeNeighborhood = (crimedata_sorted.NEIGHBORHOOD)
crimeRanks = pd.DataFrame(columns=['rank', 'neighborhood'])
crimeRanks['neighborhood'] = crimeNeighborhood
crimerankings = list(crimeRanks.index)
crimeRanks['rank'] = crimerankings


print('\033[1m' + '-----Top 15 Neighborhoods with Lowest Reported Non-Traffic Crimes' + '\033[0m')
crimeRanks.head(15)



[1m-----Top 15 Neighborhoods with Lowest Reported Non-Traffic Crimes[0m


Unnamed: 0,rank,neighborhood
0,0,Chartiers City
1,1,Mt. Oliver Boro
2,2,Ridgemont
3,3,Outside State
4,4,St. Clair
5,5,Swisshelm Park
6,6,Mt. Oliver Neighborhood
7,7,Oakwood
8,8,New Homestead
9,9,Summer Hill


In [None]:
firedata = pd.read_csv("Fire_Data.csv", parse_dates=True)

#ignore data points with no neighborhood listed
firedata = firedata[firedata["neighborhood"].str.contains('NaN')==False]

# Add column of occurences of each neighborhood
firedata['counts'] = firedata['neighborhood'].map(firedata['neighborhood'].value_counts())

# remove all the duplictes so each neighborhod is listed once
firedata = firedata[~(firedata.duplicated(['neighborhood']))].reset_index(drop=True)

# sort data by number of fires
firedata_sorted = firedata.sort_values(by='counts', ignore_index=True,)
firedata_sorted.head(50)

# make cleaner dataframe with ranks
fireNeighborhoods = (firedata_sorted.neighborhood)
fireRanks = pd.DataFrame(columns=['rank', 'neighborhood'])
fireRanks['neighborhood'] = fireNeighborhoods
firerankings = list(fireRanks.index)
fireRanks['rank'] = firerankings

fireRanks.head()

In [13]:
allneighborhoods = fireNeighborhoods + crimeNeighborhood + covidNeighborhoods
print('total is ',len(allneighborhoods), 'fire has',len(fireNeighborhoods))

ValueError: operands could not be broadcast together with shapes (95,) (208,) 

https://data.wprdc.org/dataset/allegheny-county-covid-19-tests-cases-and-deaths

https://data.wprdc.org/dataset/allegheny-county-crash-data

https://data.wprdc.org/dataset/non-traffic-citations

https://data.wprdc.org/dataset/fire-incidents-in-city-of-pittsburgh

In [None]:
crashdata = pd.read_csv("Crash_Data.csv", parse_dates=True)
muncodes = pd.read_csv("municipalitycodes.csv", parse_dates=True)
crashdata.drop(crashdata.iloc[:, 0:3], inplace = True, axis = 1)
crashdata.drop(crashdata.iloc[:, 1:189], inplace = True, axis = 1)

# Add column of occurences of each neighborhood
crashdata['counts'] = crashdata['MUNICIPALITY'].map(crashdata['MUNICIPALITY'].value_counts())

# remove all the duplictes so each neighborhod is listed once
crashdata = crashdata[~(crashdata.duplicated(['MUNICIPALITY']))].reset_index(drop=True)

# sort data by number of crashes
crashdata_sorted = crashdata.sort_values(by='counts', ignore_index=True,)

for i in range(len(crashdata_sorted.MUNICIPALITY)):
    for j in range(len(muncodes.Code)):
        if crashdata_sorted.MUNICIPALITY[i] == muncodes.Code[j]:
            crashdata_sorted.loc[i, 'MUNICIPALITY'] = muncodes.Municipality[j]

#add ranks column
munlist = (crashdata_sorted.MUNICIPALITY)
crashRanks = pd.DataFrame(columns=['rank', 'neighborhood'])
crashRanks['neighborhood'] = munlist
rank_list3 = list(crashRanks.index)
crashRanks['rank'] = rank_list3

crashRanks.head(40)


