In [2]:
import pandas as pd
import matplotlib.pyplot as plt


In [3]:
%matplotlib inline

getting overarching metric

In [4]:
# non-traffic citations
citation_df = pd.read_csv("nontraffic_citations.csv")
citation_counts = citation_df['NEIGHBORHOOD'].value_counts()
citation_counts_df = citation_counts.reset_index()
citation_counts_df.columns = ['Neighborhood', 'Citation_Count']

In [5]:
# police incidents
crime = pd.read_csv("crime_rates.csv")
neighborhood = crime['INCIDENTNEIGHBORHOOD'].value_counts()
crime_counts_df = neighborhood.reset_index()
crime_counts_df.columns = ['Neighborhood','Incident_Count']

In [6]:
# parks
park_df = pd.read_csv("parks.csv")
park_count = park_df['neighborhood'].value_counts()
park_count_df = park_count.reset_index()
park_count_df.columns = ['Neighborhood', 'Park_Count']


In [7]:
# merge data frames into one
merge_dfs = pd.merge(citation_counts_df, crime_counts_df, on='Neighborhood')
metrics_df = pd.merge(merge_dfs,park_count_df, on='Neighborhood')


In [8]:
#normalize data
citations = metrics_df['Citation_Count']
metrics_df['Citation_Count'] = (citations - citations.min()) / (citations.max() - citations.min())

incidents = metrics_df['Incident_Count']
metrics_df['Incident_Count'] = (incidents - incidents.min()) / (incidents.max() - incidents.min())

parks = metrics_df['Park_Count']
metrics_df['Park_Count'] = (parks - parks.min()) / (parks.max() - parks.min())

print(metrics_df)

                 Neighborhood  Citation_Count  Incident_Count  Park_Count
0            South Side Flats        1.000000        0.963581    0.454545
1   Central Business District        0.586126        1.000000    0.818182
2             Central Oakland        0.192828        0.251250    0.272727
3                 North Shore        0.167549        0.174043    0.181818
4                     Carrick        0.128748        0.596337    0.181818
..                        ...             ...             ...         ...
67                    Oakwood        0.000882        0.014210    0.000000
68              Regent Square        0.000882        0.011157    0.000000
69                       Hays        0.000882        0.011263    0.000000
70              New Homestead        0.000588        0.000000    0.000000
71             Swisshelm Park        0.000000        0.004473    0.181818

[72 rows x 4 columns]


In [9]:
# Rank neighborhoods, citations and incidents lower is better, parks higher is better
metrics_df['Citation_Rank'] = metrics_df['Citation_Count'].rank(ascending=True)
metrics_df['Incident_Rank'] = metrics_df['Incident_Count'].rank(ascending=True)
metrics_df['Park_Rank'] = metrics_df['Park_Count'].rank(ascending=False)

print(metrics_df)


                 Neighborhood  Citation_Count  Incident_Count  Park_Count  \
0            South Side Flats        1.000000        0.963581    0.454545   
1   Central Business District        0.586126        1.000000    0.818182   
2             Central Oakland        0.192828        0.251250    0.272727   
3                 North Shore        0.167549        0.174043    0.181818   
4                     Carrick        0.128748        0.596337    0.181818   
..                        ...             ...             ...         ...   
67                    Oakwood        0.000882        0.014210    0.000000   
68              Regent Square        0.000882        0.011157    0.000000   
69                       Hays        0.000882        0.011263    0.000000   
70              New Homestead        0.000588        0.000000    0.000000   
71             Swisshelm Park        0.000000        0.004473    0.181818   

    Citation_Rank  Incident_Rank  Park_Rank  
0            72.0           7

In [10]:
#avg ranks
metrics_df["Avg_rank"] = metrics_df[['Citation_Rank', 'Incident_Rank', 'Park_Rank']].mean(axis=1)
print(metrics_df)

                 Neighborhood  Citation_Count  Incident_Count  Park_Count  \
0            South Side Flats        1.000000        0.963581    0.454545   
1   Central Business District        0.586126        1.000000    0.818182   
2             Central Oakland        0.192828        0.251250    0.272727   
3                 North Shore        0.167549        0.174043    0.181818   
4                     Carrick        0.128748        0.596337    0.181818   
..                        ...             ...             ...         ...   
67                    Oakwood        0.000882        0.014210    0.000000   
68              Regent Square        0.000882        0.011157    0.000000   
69                       Hays        0.000882        0.011263    0.000000   
70              New Homestead        0.000588        0.000000    0.000000   
71             Swisshelm Park        0.000000        0.004473    0.181818   

    Citation_Rank  Incident_Rank  Park_Rank   Avg_rank  
0            72.0 

In [11]:
best_neighborhood = metrics_df.sort_values(by='Avg_rank')
print(best_neighborhood)

           Neighborhood  Citation_Count  Incident_Count  Park_Count  \
71       Swisshelm Park        0.000000        0.004473    0.181818   
66        Spring Garden        0.001176        0.030419    0.090909   
63          Polish Hill        0.003527        0.038103    0.090909   
53  Upper Lawrenceville        0.008230        0.068154    0.181818   
51             Westwood        0.008818        0.066575    0.181818   
..                  ...             ...             ...         ...   
4               Carrick        0.128748        0.596337    0.181818   
12       Homewood North        0.059083        0.322667    0.090909   
22           East Hills        0.042916        0.242829    0.000000   
13        North Oakland        0.056143        0.225514    0.000000   
11       Homewood South        0.064374        0.359718    0.000000   

    Citation_Rank  Incident_Rank  Park_Rank   Avg_rank  
71            1.0            3.0       25.0   9.666667  
66            6.0           10.0 