In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

US Racial Distribution according to U.S. Census Bureau is:

1. * White: 60.1% (Non-Hispanic)

2. * Hispanic: 18.5%

3. * Black: 12.2%

4. * Asian: 5.6%

5. * Multiple Races: 2.8%

6. * American Indian/Alaska Native: 0.7%

7. * Native Hawaiian/Other Pacific Islander: 0.2%

Currently, the white population makes up the vast majority of the United States’ population, accounting for some 250.52 million people in 2019.

The Black or African American resident population totaled 44.1 million people in the same year.

The Hispanic population contribute to 60.57 million inhabitants in 2019 in US. 

In 2019, there were around 19.5 million people of Asian origin living in the United States.



In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly import tools
from plotly.subplots import make_subplots

In [None]:
df = pd.read_csv('/kaggle/input/us-police-shootings/shootings.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isna().sum()

Creating a new column "year" to analyse how US Police shooting has increased or decreased with year.

In [None]:
df['date'] =pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year

# Univariate Analysis

In [None]:
df['armed'].nunique()

In [None]:
df['arms_category'].value_counts()

For analysis I am not using armed column and only using arms category.

In [None]:
armed = df['arms_category'].value_counts()
fig = px.bar(x=armed.index, y=armed, title='Arms Category Used By Person Police Shoot')
fig['layout'].update(height=600, width=900)
fig.show()

Guns madeup the majority of weapon used by individual police shot. 

In [None]:
fig = go.Figure()

fig.add_trace(go.Histogram(x=df['age'],
                          xbins=dict(
                          start=15,
                          end=74,
                          size=1),
                          opacity=1))

fig.update_layout(title_text='Age Distribution',
                 xaxis_title='Age',
                 yaxis_title='Count',
                 bargap=0.05,
                 xaxis={'showgrid':False},
                 yaxis={'showgrid':False},
                 template='seaborn',
                 height=600,
                 width=1000)
fig.show()

In [None]:
colors = ['lightblue','navy']

trace1 = go.Pie(values=df['gender'].value_counts(), labels=df['gender'].value_counts().index,
                textinfo='percent',insidetextorientation='radial',legendgroup='show')

trace2 = go.Pie(values=df['manner_of_death'].value_counts(), labels=df['manner_of_death'].value_counts().index,
               textinfo='percent',insidetextorientation='radial',legendgroup='show')

trace3 = go.Pie(values=df['signs_of_mental_illness'].value_counts(), labels=df['signs_of_mental_illness'].value_counts().index,
               textinfo='percent',insidetextorientation='radial',legendgroup='show')

trace4 = go.Pie(values=df['body_camera'].value_counts(), labels=df['body_camera'].value_counts().index,
               textinfo='percent',insidetextorientation='radial',legendgroup='show')

fig =make_subplots(rows=2, cols=2, specs=[[{'type':'pie'}, {'type':'pie'}], [{'type':'pie'}, {'type':'pie'}]],
                  subplot_titles=('Gender Distribution','Manner Of Death Distribution','Mental Illness Distribution',
                                 'Body Camera Distribution'))


fig.append_trace(trace1,1,1)
fig.append_trace(trace2,1,2)
fig.append_trace(trace3,2,1)
fig.append_trace(trace4,2,2)

fig['layout'].update(title='US Police Shooting Binary Variable Analysis',height=900,)
fig.update_traces(pull=[ 0.2, 0],hoverinfo="label+percent",textfont_size=15, marker=dict(colors=colors, line=dict(color='#000000', width=2)))

fig.show()

***95%** of people shot by US police are male.

***95%** are shot and only **5%** people are shot and tasered in US police shooting.*

*Only **22%** people shot by police suffered from mental illness.*

***88%** of police men didn't have their body camera at the time of shooting.*

In [None]:
print('Total number of US cities where Police Shooting were recorded are: ',df['city'].nunique())

In [None]:
city = df['city'].value_counts().head(10)
fig = px.bar(x = city.index, y=city, title='Top 10 US Police Shooting City Count', height=600, width=500)
fig.update_traces(marker_color=['pink','plum','mediumorchid', 'darkviolet','mediumpurple','purple','indigo','thistle','mediumvioletred','blueviolet'])
fig.show()

 *Majority of police shooting cases that happened between 2015 to 2020 were in **Los Angeles**.*

In [None]:
print('Total number of states where US police shooting were recorded are: ',df['state'].nunique())

In [None]:
plt.figure(figsize=(25,8))
sns.countplot(x=df['state'])
plt.title('US State Shooting Count', fontsize=15)

***California** state has the most police shooting cases recorded*.

***Texas** is second in Police shooting count.

# ******Bivariate Analysis

In [None]:
plt.figure(figsize=(8,5))
sns.boxplot(x=df['race'], y=df['age'])
plt.title('Race VS Age Distribution', fontsize=15)

Majority of individuals shot by police range between ages 30 to 40.

In [None]:

trace1= go.Bar(x=df['race'].value_counts().index, y=df['race'].value_counts(), text = (df['race'].value_counts()/len(df['race'])*100))

trace2= go.Bar(x = df['threat_level'].value_counts().index, y=df['threat_level'].value_counts(),text=(df['threat_level'].value_counts()/len(df['threat_level'])*100))

trace3= go.Bar(x=df['flee'].value_counts().index, y=df['flee'].value_counts(),text=(df['flee'].value_counts()/len(df['flee'])*100))

trace4= go.Bar(x=df['arms_category'].value_counts().index, y=df['arms_category'].value_counts(), text=(df['arms_category'].value_counts()/len(df['arms_category'])*100))

trace5 = go.Bar(x=df['year'].value_counts().index, y=df['year'].value_counts(),text=(df['year'].value_counts()/len(df['year'])*100))

fig = make_subplots(rows=3, cols=2, specs=[[{"type": "bar"},{"type": "bar"}],
                                           [{"type": "bar"},{"type": "bar"}],
                                          [{"type": "bar"},None]],
                   subplot_titles=('Race Distribution','Threat Level Count In US Shooting','Flee In US Shooting',
                                  'Arms Category Used In US Shooting','Which Year Most Shooting occured'))

fig.append_trace(trace1,1,1)
fig.append_trace(trace2,1,2)
fig.append_trace(trace3,2,1)
fig.append_trace(trace4,2,2)
fig.append_trace(trace5,3,1)

fig['layout'].update(title='US Police Shooting Univariate Analysis',autosize=False,
                    height=1600, width=1000,)
fig.update_traces(marker_color=['papayawhip','peachpuff','peru','chocolate','pink','plum','powderblue','purple'],textposition='outside',texttemplate='%{text:.4s}')

fig.show()

In [None]:
white =df[df['race']=='White']
black =df[df['race']=='Black']
asian =df[df['race']=='Asian']
native =df[df['race']=='Native']
other =df[df['race']=='Other']
hispanic =df[df['race']=='Hispanic']

In [None]:
trace01 = go.Bar(x = white['gender'].value_counts().index, y=white['gender'].value_counts(), text=(white['gender'].value_counts()/len(white['gender'])*100))
trace02 = go.Bar(x=black['gender'].value_counts().index, y=black['gender'].value_counts(), text=(black['gender'].value_counts()/len(black['gender'])*100))
trace03 = go.Bar(x =asian['gender'].value_counts().index, y=asian['gender'].value_counts(),text=(asian['gender'].value_counts()/len(asian['gender'])*100))
trace04 = go.Bar(x=native['gender'].value_counts().index, y=native['gender'].value_counts(),text=(native['gender'].value_counts()/len(native['gender'])*100))
trace05 = go.Bar(x=hispanic['gender'].value_counts().index, y=hispanic['gender'].value_counts(),text=(hispanic['gender'].value_counts()/len(hispanic['gender'])*100))
trace06 = go.Bar(x=other['gender'].value_counts().index, y=other['gender'].value_counts(), text=(other['gender'].value_counts()/len(other['gender'])*100))

fig=make_subplots(rows=2, cols=3,  specs=[[{"type": "bar"},{"type": "bar"},{"type": "bar"}],
                                         [{"type": "bar"},{"type": "bar"},{"type": "bar"}]],
                 
                  subplot_titles=('White Race Gender Distribution','Black Race Gender Distribution','Asian Race Gender Distribution',
                                'Native Race Gender Distribution','Hispanic Race Gender Distribution','Other Race Gender Distribution'))

fig.append_trace(trace01,1,1)
fig.append_trace(trace02,1,2)
fig.append_trace(trace03,1,3)
fig.append_trace(trace04,2,1)
fig.append_trace(trace05,2,2)
fig.append_trace(trace06,2,3)


fig['layout'].update(height=1000, width=1100, title='Race VS Gender Distribution')
fig.update_traces(marker_color=['pink','plum','powderblue','purple'],textposition='outside',texttemplate='%{text:.4s}')

In [None]:
trace01 = go.Bar(x = white['manner_of_death'].value_counts().index, y=white['manner_of_death'].value_counts(), text=(white['manner_of_death'].value_counts()/len(white['manner_of_death'])*100))
trace02 = go.Bar(x=black['manner_of_death'].value_counts().index, y=black['manner_of_death'].value_counts(), text=(black['manner_of_death'].value_counts()/len(black['manner_of_death'])*100))
trace03 = go.Bar(x =asian['manner_of_death'].value_counts().index, y=asian['manner_of_death'].value_counts(),text=(asian['manner_of_death'].value_counts()/len(asian['manner_of_death'])*100))
trace04 = go.Bar(x=native['manner_of_death'].value_counts().index, y=native['manner_of_death'].value_counts(),text=(native['manner_of_death'].value_counts()/len(native['manner_of_death'])*100))
trace05 = go.Bar(x=hispanic['manner_of_death'].value_counts().index, y=hispanic['manner_of_death'].value_counts(),text=(hispanic['manner_of_death'].value_counts()/len(hispanic['manner_of_death'])*100))
trace06 = go.Bar(x=other['manner_of_death'].value_counts().index, y=other['manner_of_death'].value_counts(), text=(other['manner_of_death'].value_counts()/len(other['manner_of_death'])*100))

fig=make_subplots(rows=2, cols=3,  specs=[[{"type": "bar"},{"type": "bar"},{"type": "bar"}],
                                         [{"type": "bar"},{"type": "bar"},{"type": "bar"}]],
                 
                  subplot_titles=('White Race VS Manner Of Death','Black Race VS Manner Of Death','Asian Race VS Manner Of Death',
                                'Native Race VS Manner Of Death','Hispanic Race VS Manner Of Death','Other Race VS Manner Of Death'))

fig.append_trace(trace01,1,1)
fig.append_trace(trace02,1,2)
fig.append_trace(trace03,1,3)
fig.append_trace(trace04,2,1)
fig.append_trace(trace05,2,2)
fig.append_trace(trace06,2,3)


fig['layout'].update(height=1000, width=1100, title='Race VS Manner Of Death')
fig.update_traces(marker_color=['powderblue','purple'],textposition='outside',texttemplate='%{text:.4s}')

Majority of shot and tasered individuals belong to Asian and Other races.

Native have the lowest percentage of thoes shot and tasered.

In [None]:
line_colors = ['darkturquoise', 'aqua', 'aquamarine','aliceblue' ]

trace11 = go.Pie(values=white['flee'].value_counts(),labels=white['flee'].value_counts().index)

trace12 = go.Pie(values=black['flee'].value_counts(), labels=black['flee'].value_counts().index)

trace13 = go.Pie(values=asian['flee'].value_counts(), labels=asian['flee'].value_counts().index)

trace14 =go.Pie(values=hispanic['flee'].value_counts(), labels=hispanic['flee'].value_counts().index)

trace15 = go.Pie(values=native['flee'].value_counts(),labels=native['flee'].value_counts().index)

trace16 = go.Pie(values=other['flee'].value_counts(), labels=other['flee'].value_counts().index)



fig = make_subplots(rows=3, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}],
                                           [{'type': 'domain'},{'type':'domain'}],
                                          [{'type':'domain'},{'type':'domain'}]],
                   subplot_titles=('White Race VS Flee','Black Race VS Flee','Asian Race VS Flee','Hispanic Race VS Flee','Native Race VS Flee','Other Race VS Flee'))

fig.append_trace(trace11,1,1)
fig.append_trace(trace12,1,2)
fig.append_trace(trace13,2,1)
fig.append_trace(trace14,2,2)
fig.append_trace(trace15,3,1)
fig.append_trace(trace16,3,2)

fig.update_traces(pull=[0.0,0.0,0.2,0.0], hole=.4, hoverinfo="label+percent", marker=dict(colors=line_colors, line=dict(color='black', width=2)))
fig['layout'].update(height=1300,title='Bivariate Analysis Race VS Flee', titlefont_size=20)
fig.show()


Data from all races shows that majority of people were not showing any resistance at the time of shooting.

In [None]:
color = ['gold','darkorange']
trace10 = go.Pie(values=white['signs_of_mental_illness'].value_counts(),labels=white['signs_of_mental_illness'].value_counts().index)

trace20 = go.Pie(values=black['signs_of_mental_illness'].value_counts(), labels=black['signs_of_mental_illness'].value_counts().index)

trace30 = go.Pie(values=asian['signs_of_mental_illness'].value_counts(), labels=asian['signs_of_mental_illness'].value_counts().index)

trace40 = go.Pie(values=hispanic['signs_of_mental_illness'].value_counts(), labels=hispanic['signs_of_mental_illness'].value_counts().index)

trace50 = go.Pie(values=native['signs_of_mental_illness'].value_counts(),labels=native['signs_of_mental_illness'].value_counts().index)

trace60 = go.Pie(values=other['signs_of_mental_illness'].value_counts(), labels=other['signs_of_mental_illness'].value_counts().index)



fig = make_subplots(rows=3, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}],
                                           [{'type': 'domain'},{'type':'domain'}],
                                          [{'type':'domain'},{'type':'domain'}]],
                   subplot_titles=('White Race VS Mental Illness','Black Race VS Mental Illness','Asian Race VS Mental Illness',
                                   'Hispanic Race VS Mental Illness','Native Race VS Mental Illness','Other Race VS Mental Illness'))

fig.append_trace(trace10,1,1)
fig.append_trace(trace20,1,2)
fig.append_trace(trace30,2,1)
fig.append_trace(trace40,2,2)
fig.append_trace(trace50,3,1)
fig.append_trace(trace60,3,2)

fig.update_traces(hole=.4, hoverinfo="label+percent", marker_colors=color, marker_line=dict(color='black', width=2))
fig['layout'].update(height=1100, title='Bivariate Analysis Race VS Mental Illness', titlefont_size=20)
fig.show()


The prevalence of Mental illness is highest among White and Asians.

The lowest percentage of Mental illness is seen in Blacks.

In [None]:
night_colors = ['rgb(56, 75, 126)', 'rgb(18, 36, 37)', 'rgb(34, 53, 101)',
                'rgb(33, 75, 99)']

trace10 = go.Pie(values=white['threat_level'].value_counts(),labels=white['threat_level'].value_counts().index)

trace20 = go.Pie(values=black['threat_level'].value_counts(), labels=black['threat_level'].value_counts().index)

trace30 = go.Pie(values=asian['threat_level'].value_counts(), labels=asian['threat_level'].value_counts().index)

trace40 = go.Pie(values=hispanic['threat_level'].value_counts(), labels=hispanic['threat_level'].value_counts().index)

trace50 = go.Pie(values=native['threat_level'].value_counts(),labels=native['threat_level'].value_counts().index)

trace60 = go.Pie(values=other['threat_level'].value_counts(), labels=other['threat_level'].value_counts().index)



fig = make_subplots(rows=3, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}],
                                           [{'type': 'domain'},{'type':'domain'}],
                                          [{'type':'domain'},{'type':'domain'}]],
                   subplot_titles=('White Race VS Threat','Black Race VS Threat','Asian Race VS Threat',
                                   'Hispanic Race VS Threat','Native Race VS Threat','Other Race VS Threat'))

fig.append_trace(trace10,1,1)
fig.append_trace(trace20,1,2)
fig.append_trace(trace30,2,1)
fig.append_trace(trace40,2,2)
fig.append_trace(trace50,3,1)
fig.append_trace(trace60,3,2)

fig.update_traces(hole=.4, hoverinfo="label+percent", marker_colors=night_colors, pull=[0.0,0.0,0.2,0.0], marker_line=dict(color='white', width=2))
fig['layout'].update(height=1300, title='Bivariate Analysis Race VS Threat', titlefont_size=20)
fig.show()


According to the data we have majority of individual attacked the police before they were shot.

In [None]:
clr = ['mediumturquoise','lightgreen','seagreen',"rgb(114, 78, 145)",'palegreen','olive']

trace10 = go.Pie(values=white['arms_category'].value_counts(),labels=white['arms_category'].value_counts().index)

trace20 = go.Pie(values=black['arms_category'].value_counts(), labels=black['arms_category'].value_counts().index)

trace30 = go.Pie(values=asian['arms_category'].value_counts(), labels=asian['arms_category'].value_counts().index)

trace40 = go.Pie(values=hispanic['arms_category'].value_counts(), labels=hispanic['arms_category'].value_counts().index)

trace50 = go.Pie(values=native['arms_category'].value_counts(),labels=native['arms_category'].value_counts().index)

trace60 = go.Pie(values=other['arms_category'].value_counts(), labels=other['arms_category'].value_counts().index)



fig = make_subplots(rows=3, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}],
                                           [{'type': 'domain'},{'type':'domain'}],
                                          [{'type':'domain'},{'type':'domain'}]],
                   subplot_titles=('White Race VS Arms Category','Black Race Arms Category','Asian Race Arms Category',
                                   'Hispanic Race Arms Category','Native Race Arms Category','Other Race Arms Category'))

fig.append_trace(trace10,1,1)
fig.append_trace(trace20,1,2)
fig.append_trace(trace30,2,1)
fig.append_trace(trace40,2,2)
fig.append_trace(trace50,3,1)
fig.append_trace(trace60,3,2)

fig.update_traces(hole=.3, hoverinfo="label+percent", marker_colors=clr, marker_line=dict(color='darkgreen', width=2))
fig['layout'].update(height=1600, title='Bivariate Analysis Race VS Arms Category', titlefont_size=20)
fig.show()


Most of the shot individuals possessed guns and sharp objects.

In [None]:
fig = px.histogram(df['state'], color=df['race'])
fig.show()

Most of the cases against White, Black and Hispanic race were recorded in California state.

In [None]:
ax = df.groupby(['year','race'])['name'].count().reset_index(name='count')
ax.style.background_gradient(cmap="CMRmap_r")

In [None]:
fig = px.line(x=ax['year'], y=ax['count'], color=ax['race'])
fig.show()

Its hard to comment rise or decline in police shooting as we only have first half of year 2020. 