In [71]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [21]:
db = pd.read_csv('CCRB-Complaint-Data_202007271729/allegations_202007271729.csv')
db['name'] =  db['first_name'] + ' ' + db['last_name']
db = db.drop(columns=['first_name', 'last_name'])
colOrder = ['unique_mos_id', 'name', 'command_now', 'shield_no', 'complaint_id',
       'month_received', 'year_received', 'month_closed', 'year_closed',
       'command_at_incident', 'rank_abbrev_incident', 'rank_abbrev_now',
       'rank_now', 'rank_incident', 'mos_ethnicity', 'mos_gender',
       'mos_age_incident', 'complainant_ethnicity', 'complainant_gender',
       'complainant_age_incident', 'fado_type', 'allegation', 'precinct',
       'contact_reason', 'outcome_description', 'board_disposition']
db = db[colOrder]
db['allegation'] = db['allegation'].str.title()
race = ['Unknown', 'Refused', 'American Indian']
db = db.drop(db[db['complainant_ethnicity'].isin(race)].index)

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)

In [22]:
db

Unnamed: 0,unique_mos_id,name,command_now,shield_no,complaint_id,month_received,year_received,month_closed,year_closed,command_at_incident,rank_abbrev_incident,rank_abbrev_now,rank_now,rank_incident,mos_ethnicity,mos_gender,mos_age_incident,complainant_ethnicity,complainant_gender,complainant_age_incident,fado_type,allegation,precinct,contact_reason,outcome_description,board_disposition
0,10004,Jonathan Ruiz,078 PCT,8409,42835,7,2019,5,2020,078 PCT,POM,POM,Police Officer,Police Officer,Hispanic,M,32,Black,Female,38.0,Abuse of Authority,Failure To Provide Rtka Card,78.0,Report-domestic dispute,No arrest made or summons issued,Substantiated (Command Lvl Instructions)
1,10007,John Sears,078 PCT,5952,24601,11,2011,8,2012,PBBS,POM,POM,Police Officer,Police Officer,White,M,24,Black,Male,26.0,Discourtesy,Action,67.0,Moving violation,Moving violation summons issued,Substantiated (Charges)
2,10007,John Sears,078 PCT,5952,24601,11,2011,8,2012,PBBS,POM,POM,Police Officer,Police Officer,White,M,24,Black,Male,26.0,Offensive Language,Race,67.0,Moving violation,Moving violation summons issued,Substantiated (Charges)
3,10007,John Sears,078 PCT,5952,26146,7,2012,9,2013,PBBS,POM,POM,Police Officer,Police Officer,White,M,25,Black,Male,45.0,Abuse of Authority,Question,67.0,PD suspected C/V of violation/crime - street,No arrest made or summons issued,Substantiated (Charges)
4,10009,Noemi Sierra,078 PCT,24058,40253,8,2018,2,2019,078 PCT,POF,POF,Police Officer,Police Officer,Hispanic,F,39,,,16.0,Force,Physical Force,67.0,Report-dispute,Arrest - other violation/crime,Substantiated (Command Discipline A)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33353,9992,Tomasz Pulawski,078 PCT,2642,35671,8,2016,2,2017,066 PCT,POM,SGT,Sergeant,Police Officer,White,M,36,Asian,Male,21.0,Discourtesy,Word,66.0,Moving violation,Moving violation summons issued,Unsubstantiated
33354,9992,Tomasz Pulawski,078 PCT,2642,35671,8,2016,2,2017,066 PCT,POM,SGT,Sergeant,Police Officer,White,M,36,Asian,Male,21.0,Abuse of Authority,Interference With Recording,66.0,Moving violation,Moving violation summons issued,Unsubstantiated
33355,9992,Tomasz Pulawski,078 PCT,2642,35671,8,2016,2,2017,066 PCT,POM,SGT,Sergeant,Police Officer,White,M,36,Asian,Male,21.0,Abuse of Authority,Search (Of Person),66.0,Moving violation,Moving violation summons issued,Substantiated (Formalized Training)
33356,9992,Tomasz Pulawski,078 PCT,2642,35671,8,2016,2,2017,066 PCT,POM,SGT,Sergeant,Police Officer,White,M,36,Asian,Male,21.0,Abuse of Authority,Vehicle Search,66.0,Moving violation,Moving violation summons issued,Substantiated (Formalized Training)


In [36]:
ethnicityCount = db.groupby('complainant_ethnicity')[['name']].count().reset_index().rename(columns={'name':'count'})
ethnicityCount

Unnamed: 0,complainant_ethnicity,count
0,Asian,532
1,Black,17114
2,Hispanic,6424
3,Other Race,677
4,White,2783


In [148]:
color_discrete_map = {
    "Black": "#165C96", "Hispanic": "#CC5A00", "White": "#cccccc", "Asian": "#aaaaaa", "Other Race": "#888888"       
}

fig = px.bar(ethnicityCount, x='complainant_ethnicity', y='count', color='complainant_ethnicity',
             color_discrete_map=color_discrete_map, category_orders={"complainant_ethnicity": ["Black", "Hispanic", "White", "Asian", "Other"]},
             title='Black and Hispanic Complainants Are The Most Vocal In New York',width=700, height=450)
fig.update_xaxes(categoryorder = 'array', categoryarray = ['Black', 'Hispanic', 'White', 'Asian', 'Other Race'])
fig.update_layout(title_font=dict(size=18))
fig.write_image('for.png')
fig.show()

In [115]:
precinct = np.arange(40.0, 53.0)
bronx = db[db['precinct'].isin(precinct)].groupby('complainant_ethnicity')[['name']].count().reset_index()
bronx['name'] = (bronx['name'] / sum(bronx['name'])) * 100
bronx['name'] = bronx['name'].round(1)
bronx = bronx.reindex([1,2,4,0,3])
bronx

Unnamed: 0,complainant_ethnicity,name
1,Black,55.4
2,Hispanic,37.5
4,White,4.2
0,Asian,0.8
3,Other Race,2.0


In [117]:
bronxCensus = pd.DataFrame(columns=['Ethnicity', 'Percent'])
bronxCensus.loc[0] = ['Asian', 4.6]
bronxCensus.loc[1] = ['Black', 28.5]
bronxCensus.loc[2] = ['Hispanic', 54.8]
bronxCensus.loc[3] = ['Other Race', 1.3]
bronxCensus.loc[4] = ['White', 8.9]
bronxCensus = bronxCensus.reindex([1,2,4,0,3])
bronxCensus

Unnamed: 0,Ethnicity,Percent
1,Black,28.5
2,Hispanic,54.8
4,White,8.9
0,Asian,4.6
3,Other Race,1.3


In [151]:
offset = 0.1

fig = go.Figure()

fig.add_trace(go.Bar(
    x=bronx['complainant_ethnicity'],
    y=bronx['name'] + offset,
    name='Complaintants',
    marker_color='steelblue',
    text=[f"{v}%" for v in bronx['name']],
    textposition='inside',
    textfont=dict(
        color="#d0d0d0",
        size=12
    )
))

# Census bars
fig.add_trace(go.Bar(
    x=bronxCensus['Ethnicity'],
    y=bronxCensus['Percent'] + offset,
    name='Census',
    marker_color='orange',
    text=[f"{v:.1f}%" for v in bronxCensus['Percent']],
    textposition='inside',
    textfont=dict(
        color="#d0d0d0",
        size=12
    )
))

tickvals = [0.1, 1, 5, 10, 20, 50]
ticktext = ["0", "1", "5", "10", "20", "50"]

fig.update_layout(
    title="New York Police Department Complaints Roughly Mirror City Demographics",
    xaxis=dict(
        title="Race",
        showgrid=False,
        zeroline=False,
        showline=False
    ),
    yaxis=dict(
        type='log',
        tickvals=tickvals,
        ticktext=ticktext,
        showgrid=False,
        zeroline=False,
        title=dict(text='Percentage (in Bronx)', font=dict(color='grey'))
    ),
    plot_bgcolor='lightgray',
    paper_bgcolor='white',
    barmode='group',
    width=700,
    height=450
)

fig.write_image('against.png')
fig.show()