### **The Code**

#### As per usual...

In [1]:
import pandas as pd
import numpy as np
%matplotlib inline

#### Adding the data sets

In [2]:
# Pittsburgh Police Arrest Data
pgh_arrest_data = pd.read_csv("https://data.wprdc.org/datastore/dump/e03a89dd-134a-4ee8-a2bd-62c40aeebc6f",
                           index_col="ARRESTTIME", 
                           parse_dates=True)

#Non-traffic Citations
pgh_citations = pd.read_csv("https://data.wprdc.org/datastore/dump/6b11e87d-1216-463d-bbd3-37460e539d86",
                                index_col="CITEDTIME",
                                parse_dates=True)

# Pittsburgh American Community Survey 2015 - Miscellaneous Data 
pop_data = pd.read_csv("total-population.csv", index_col="Neighborhood")

# Pittsburgh American Community Survey 2015, School Enrollment 
school_enrollment = pd.read_csv('school-enrollment-by-detailed-level-of-school-for-the-population-3-years-and-over.csv', index_col="Neighborhood")

#### Reformatting the data sets to select only the stuff that we need as well as making them look nicer

In [3]:
# Reformat arrest data
pgh_arrest_data = pgh_arrest_data.rename(columns={'INCIDENTNEIGHBORHOOD':'Neighborhood', 'OFFENSES':'Offenses'})
arrest_data = pgh_arrest_data.loc[:,["Offenses","Neighborhood"]].groupby('Neighborhood').count()

# Reformat citation data
pgh_citations = pgh_citations.rename(columns={'NEIGHBORHOOD':'Neighborhood', 'OFFENSES':'Citations'})
citation_data = pgh_citations.loc[:,["Citations","Neighborhood"]].groupby('Neighborhood').count()

# Reformat school data
school_data = school_enrollment.loc[:,["Estimate; Total:", "Estimate; Not enrolled in school"]]

#### Adding Population & Calculating Rates (Dropping All NAN)

In [5]:
population = pop_data.loc[:,"Estimate; Total"]
citation_data['Estimated Population'] = population
citation_data['Citations per Capita'] = citation_data.loc[:,"Citations"]/citation_data.loc[:,'Estimated Population']
citation_data.dropna()

arrest_data['Estimated Population'] = population
arrest_data['Offenses per Capita'] = arrest_data.loc[:,"Offenses"]/arrest_data.loc[:,'Estimated Population']
arrest_data.dropna()

school_data["Ratio not enrolled"] = school_data.loc[:,"Estimate; Not enrolled in school"]/school_data.loc[:,"Estimate; Total:"]

#### Now lets look at the data

In [6]:
arrest_data.head()

Unnamed: 0_level_0,Offenses,Estimated Population,Offenses per Capita
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Allegheny Center,663,1411.0,0.46988
Allegheny West,76,343.0,0.221574
Allentown,546,2558.0,0.213448
Arlington,169,1852.0,0.091253
Arlington Heights,101,272.0,0.371324


In [7]:
citation_data.head()

Unnamed: 0_level_0,Citations,Estimated Population,Citations per Capita
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Allegheny Center,118,1411.0,0.083629
Allegheny West,12,343.0,0.034985
Allentown,91,2558.0,0.035575
Arlington,37,1852.0,0.019978
Arlington Heights,5,272.0,0.018382


In [8]:
school_data.head()

Unnamed: 0_level_0,Estimate; Total:,Estimate; Not enrolled in school,Ratio not enrolled
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Allegheny Center,1389.0,995.0,0.716343
Allegheny West,343.0,282.0,0.822157
Allentown,2520.0,1555.0,0.617063
Arlington,1823.0,1396.0,0.765771
Arlington Heights,213.0,173.0,0.812207


In [13]:
arrest_data.loc[:,["Offenses per Capita"]].sort_values(by=['Offenses per Capita'], ascending=True).plot(kind='bar', font.siz figsize=(100,50))

SyntaxError: keyword can't be an expression (<ipython-input-13-b37269b4789f>, line 1)