# Introduction
Visualization of statistics that support the claims of Black Lives Matter movement, data from 2015 and 2016.

Data source: https://www.theguardian.com/us-news/ng-interactive/2015/jun/01/about-the-counted

Idea from BuzzFeed article: https://www.buzzfeednews.com/article/peteraldhous/race-and-police-shootings

### Imports
Libraries and data

In [80]:
import pandas as pd

from bokeh.io import output_notebook, show, export_png
from bokeh.plotting import figure, output_file
from bokeh.models import HoverTool, ColumnDataSource,NumeralTickFormatter
from bokeh.palettes import Spectral4, PuBu4
from bokeh.transform import dodge
from bokeh.layouts import gridplot

In [2]:
selectcolumns=['raceethnicity','armed']
df1 = pd.read_csv('the-counted-2015.csv',usecols=selectcolumns)
df1.head()

Unnamed: 0,raceethnicity,armed
0,Black,No
1,White,Firearm
2,White,No
3,Hispanic/Latino,No
4,Asian/Pacific Islander,Firearm


In [3]:
df2 = pd.read_csv('the-counted-2016.csv',usecols=selectcolumns)
df2.head()

Unnamed: 0,raceethnicity,armed
0,Black,Firearm
1,White,Firearm
2,White,Knife
3,White,Knife
4,White,Firearm


In [4]:
df=pd.concat([df1,df2])
df.shape # df contains "The Counted" data from both 2015 and 2016

(2239, 2)

Source for ethnicities percentage in 2015: https://www.statista.com/statistics/270272/percentage-of-us-population-by-ethnicities/

Source for population total: https://en.wikipedia.org/wiki/Demography_of_the_United_States#Vital_statistics_from_1935

In [5]:
ethndic={"White": 61.72,
         "Latino": 17.66,
         "Black": 12.38,
         "Others": (5.28+2.05+0.73+0.17)
        }
#print(type(ethndic))
print(ethndic)
population=(321442000 + 323100000)/2 # average between 2015 and 2016 data
# estimates by ethnicity
ethnestim={"White": round((population*ethndic["White"]/100)),
         "Latino": round((population*ethndic["Latino"]/100)),
         "Black": round((population*ethndic["Black"]/100)),
         "Others": round((population*ethndic["Others"]/100))
        }
print(ethnestim)

{'White': 61.72, 'Latino': 17.66, 'Black': 12.38, 'Others': 8.23}
{'White': 198905661, 'Latino': 56913059, 'Black': 39897150, 'Others': 26522903}


# Analysis

In [6]:
df.groupby(by='raceethnicity').describe()

Unnamed: 0_level_0,armed,armed,armed,armed
Unnamed: 0_level_1,count,unique,top,freq
raceethnicity,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Arab-American,7,4,No,2
Asian/Pacific Islander,45,7,Knife,15
Black,573,8,Firearm,283
Hispanic/Latino,378,8,Firearm,161
Native American,37,6,Firearm,17
Other,1,1,Firearm,1
Unknown,40,6,Firearm,18
White,1158,8,Firearm,564


Check if there are any missing values:

In [7]:
df.isna().sum()

raceethnicity    0
armed            0
dtype: int64

In [8]:
df = df[(df.raceethnicity != 'Arab-American') & (df.raceethnicity != 'Unknown')]
# no data available about the percentage of this ethnicity over population, so it is discarded
df.replace(to_replace=['Asian/Pacific Islander','Native American','Other'],value='Others',inplace=True)
# those categories all fall under Others in the population percentages found online
df.replace(to_replace=['Hispanic/Latino'],value='Latino',inplace=True)
# this value is renamed for consistency with population ethnicity data

In [9]:
df.groupby(by='raceethnicity').describe()

Unnamed: 0_level_0,armed,armed,armed,armed
Unnamed: 0_level_1,count,unique,top,freq
raceethnicity,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Black,573,8,Firearm,283
Latino,378,8,Firearm,161
Others,83,7,Firearm,32
White,1158,8,Firearm,564


In [10]:
def givepercent (dtf,ethnicity):
    # Function to compute percentages by ethnicity
    return round(((dtf.raceethnicity == ethnicity).sum()/(dtf.shape[0])*100),2)

In [11]:
killed={"White":(df.raceethnicity == 'White').sum(),
         "Latino": (df.raceethnicity == 'Latino').sum(),
         "Black": (df.raceethnicity == 'Black').sum(),
         "Others": (df.raceethnicity == 'Others').sum()
        }
print(killed)
killedperc={"White": givepercent(df,'White'), 
         "Latino": givepercent(df,'Latino'),
         "Black": givepercent(df,'Black'),
         "Others": givepercent(df,'Others')
        }
print(killedperc)

{'White': 1158, 'Latino': 378, 'Black': 573, 'Others': 83}
{'White': 52.83, 'Latino': 17.24, 'Black': 26.14, 'Others': 3.79}


In [12]:
df.groupby(by='armed').describe()

Unnamed: 0_level_0,raceethnicity,raceethnicity,raceethnicity,raceethnicity
Unnamed: 0_level_1,count,unique,top,freq
armed,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Disputed,16,3,Black,12
Firearm,1040,4,White,564
Knife,299,4,White,156
No,400,4,White,201
Non-lethal firearm,92,4,White,57
Others,143,4,White,75
Unknown,123,4,White,65
Vehicle,79,4,White,37


The analysis is limited to the value *No*, but could consider *Disputed* and *Non-lethal firearm*, which constitute other 108 data points.

In [13]:
dfunarmed = df[(df.armed == 'No')]
dfunarmed.groupby(by='raceethnicity').describe()

Unnamed: 0_level_0,armed,armed,armed,armed
Unnamed: 0_level_1,count,unique,top,freq
raceethnicity,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Black,121,1,No,121
Latino,67,1,No,67
Others,11,1,No,11
White,201,1,No,201


In [14]:
unarmed={"White":(dfunarmed.raceethnicity == 'White').sum(),
         "Latino": (dfunarmed.raceethnicity == 'Latino').sum(),
         "Black": (dfunarmed.raceethnicity == 'Black').sum(),
         "Others": (dfunarmed.raceethnicity == 'Others').sum()
        }
print(unarmed)
unarmedperc={"White":givepercent(dfunarmed,'White'),
         "Latino": givepercent(dfunarmed,'Latino'),
         "Black": givepercent(dfunarmed,'Black'),
         "Others": givepercent(dfunarmed,'Others')
        }
print(unarmedperc)

{'White': 201, 'Latino': 67, 'Black': 121, 'Others': 11}
{'White': 50.25, 'Latino': 16.75, 'Black': 30.25, 'Others': 2.75}


In [24]:
def percent1ethn (portion,population,decimals):
    # Function to compute the percentage of the portion killed over a given population
    return round((portion/population*100),decimals)

In [25]:
killed1ethn={"White": percent1ethn(killed['White'],ethnestim['White'],6), 
         "Latino": percent1ethn(killed['Latino'],ethnestim['Latino'],6), 
         "Black": percent1ethn(killed['Black'],ethnestim['Black'],6), 
         "Others": percent1ethn(killed['Others'],ethnestim['Others'],6)
        }
print(killed1ethn)
unarmedoverkilled={"White": percent1ethn(unarmed['White'],killed['White'],2), 
         "Latino": percent1ethn(unarmed['Latino'],killed['Latino'],2), 
         "Black": percent1ethn(unarmed['Black'],killed['Black'],2), 
         "Others": percent1ethn(unarmed['Others'],killed['Others'],2)
        }
print(unarmedoverkilled)

{'White': 0.000582, 'Latino': 0.000664, 'Black': 0.001436, 'Others': 0.000313}
{'White': 17.36, 'Latino': 17.72, 'Black': 21.12, 'Others': 13.25}


In [114]:
ethnicities = list(ethndic.keys())
populethn = list(ethndic.values())
killed = list(killedperc.values())
unarmed = list(unarmedperc.values())

data1 = {'ethnicities' : ethnicities,
        'populethn'   : populethn,
        'killed'   : killed,
        'unarmed'   : unarmed}

source = ColumnDataSource(data=data1)

# Results

In [115]:
TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select"
palette=Spectral4
titlefontsize='16pt'

cplot = figure(title="The Counted (data from 2015 and 2016)", tools=TOOLS,
               x_range=ethnicities, y_range=(0, 75))#, sizing_mode='scale_both')

cplot.vbar(x=dodge('ethnicities',  0.25, range=cplot.x_range),top='populethn', source=source,
           width=0.4,line_width=0 ,line_color=None, legend='Ethnicity % over population',
           color=str(Spectral4[0]), name='populethn')

cplot.vbar(x=dodge('ethnicities', -0.25, range=cplot.x_range), top='killed', source=source,
           width=0.4, line_width=0 ,line_color=None, legend="Killed % over total killed",
           color=str(Spectral4[2]), name="killed")

cplot.vbar(x=dodge('ethnicities',  0.0, range=cplot.x_range), top='unarmed', source=source,
           width=0.4, line_width=0 ,line_color=None, legend="Unarmed % over total unarmed",
          color=str(Spectral4[1]), name="unarmed")

cplot.add_tools(HoverTool(names=["unarmed"],
    tooltips=[
    ( 'Population', '@populethn{(00.00)}%' ),
    ( 'Killed', '@killed{(00.00)}%' ),
    ( 'Unarmed', '@unarmed{(00.00)}%' )], # Fields beginning with @ display values from ColumnDataSource. 
    mode='vline'))

#cplot.x_range.range_padding = 0.1
cplot.xgrid.grid_line_color = None

cplot.legend.location = "top_right"
cplot.xaxis.axis_label = "Ethnicity"
cplot.xaxis.axis_label_text_font_size='18pt'

cplot.xaxis.minor_tick_line_color = None
cplot.title.text_font_size=titlefontsize
cplot.legend.label_text_font_size='16pt'
cplot.xaxis.major_label_text_font_size='16pt'
cplot.yaxis.major_label_text_font_size='16pt'

In [116]:
perckillethn = list(killed1ethn.values())

data2 = {'ethnicities' : ethnicities,
        'perckillethn'   : perckillethn}

source = ColumnDataSource(data=dict(data2, color=PuBu4))

In [117]:
plot2 = figure(title="Killed % over population with same ethnicity",
               tools=TOOLS, x_range=ethnicities, y_range=(0, max(perckillethn)*1.2))#, sizing_mode='scale_both')

plot2.vbar(x=dodge('ethnicities',  0.0, range=cplot.x_range), top='perckillethn', source=source,
           width=0.4, line_width=0 ,line_color=None, legend="",
          color='color', name="perckillethn")

plot2.add_tools(HoverTool(names=["perckillethn"],
    tooltips=[
    ( 'Killed', '@perckillethn{(0.00000)}%' )],
    #( 'Unarmed', '@unarmed{(00.00)}%' )], # Fields beginning with @ display values from ColumnDataSource. 
    mode='vline'))

#plot2.x_range.range_padding = 0.1
plot2.xgrid.grid_line_color = None

plot2.xaxis.axis_label = "Ethnicity"
plot2.xaxis.axis_label_text_font_size='18pt'

plot2.xaxis.minor_tick_line_color = None
plot2.title.text_font_size=titlefontsize
plot2.xaxis.major_label_text_font_size='16pt'
plot2.yaxis.major_label_text_font_size='16pt'
plot2.yaxis[0].formatter = NumeralTickFormatter(format="0.0000")

In [118]:
percunarmethn = list(unarmedoverkilled.values())

data3 = {'ethnicities' : ethnicities,
        'percunarmethn'   : percunarmethn}

source = ColumnDataSource(data=dict(data3, color=PuBu4))

In [119]:
plot3 = figure(title="Unarmed % over killed with same ethnicity",
               tools=TOOLS, x_range=ethnicities, y_range=(0, max(percunarmethn)*1.2))#, sizing_mode='scale_both')

plot3.vbar(x=dodge('ethnicities',  0.0, range=cplot.x_range), top='percunarmethn', source=source,
           width=0.4, line_width=0 ,line_color=None, legend="",
          color='color', name="percunarmethn")

plot3.add_tools(HoverTool(names=["percunarmethn"],
    tooltips=[
    ( 'Unarmed', '@percunarmethn{(00.00)}%' )],
    #( 'Unarmed', '@unarmed{(00.00)}%' )], # Fields beginning with @ display values from ColumnDataSource. 
    mode='vline'))

#plot3.x_range.range_padding = 0.1
plot3.xgrid.grid_line_color = None

plot3.xaxis.axis_label = "Ethnicity"
plot3.xaxis.axis_label_text_font_size='18pt'

plot3.xaxis.minor_tick_line_color = None
plot3.title.text_font_size=titlefontsize
plot3.xaxis.major_label_text_font_size='16pt'
plot3.yaxis.major_label_text_font_size='16pt'

In [120]:
output_file("thecounted.html", title="The Counted Visualization")

export_png(cplot, filename="bokeh_thecounted.png")

output_notebook()
gplot=gridplot([cplot, plot2, plot3], sizing_mode='stretch_both', ncols=3)#, plot_width=800, plot_height=600)
show(gplot)  # open a browser

Hover on the bar charts to read the percentage values.

# Conclusions
The plot shows that if the people shot by police were proportional to the population distribution, the orange and green bar charts should have been almost the same height as the blue ones. Although this is true for Latino ethnicity, it is not for the Black one: this is the second most represented among killed and among those killed who were unarmed.