# Introduction
Visualization of statistics that support the claims of Black Lives Matter movement, data from 2015 and 2016.

Data source: https://www.theguardian.com/us-news/ng-interactive/2015/jun/01/about-the-counted

Idea from BuzzFeed article: https://www.buzzfeednews.com/article/peteraldhous/race-and-police-shootings

### Imports
Libraries and data

In [1]:
import pandas as pd

#from sklearn.metrics import classification_report

from bokeh.io import output_notebook, show, export_png
from bokeh.plotting import figure, output_file
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.palettes import Spectral4
from bokeh.transform import dodge

In [2]:
selectcolumns=['raceethnicity','armed']
df1 = pd.read_csv('the-counted-2015.csv',usecols=selectcolumns)
df1.head()

Unnamed: 0,raceethnicity,armed
0,Black,No
1,White,Firearm
2,White,No
3,Hispanic/Latino,No
4,Asian/Pacific Islander,Firearm


In [3]:
df2 = pd.read_csv('the-counted-2016.csv',usecols=selectcolumns)
df2.head()

Unnamed: 0,raceethnicity,armed
0,Black,Firearm
1,White,Firearm
2,White,Knife
3,White,Knife
4,White,Firearm


In [4]:
df=pd.concat([df1,df2])
df.shape

(2239, 2)

Source for ethnicities percentage in 2015: https://www.statista.com/statistics/270272/percentage-of-us-population-by-ethnicities/

In [5]:
ethndic={"White": 61.72,
         "Latino": 17.66,
         "Black": 12.38,
         "Others": (5.28+2.05+0.73+0.17)
        }
print(type(ethndic))
print(ethndic)

<class 'dict'>
{'White': 61.72, 'Latino': 17.66, 'Black': 12.38, 'Others': 8.23}


# Analysis

In [6]:
df.groupby(by='raceethnicity').describe()

Unnamed: 0_level_0,armed,armed,armed,armed
Unnamed: 0_level_1,count,unique,top,freq
raceethnicity,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Arab-American,7,4,Firearm,2
Asian/Pacific Islander,45,7,Knife,15
Black,573,8,Firearm,283
Hispanic/Latino,378,8,Firearm,161
Native American,37,6,Firearm,17
Other,1,1,Firearm,1
Unknown,40,6,Firearm,18
White,1158,8,Firearm,564


Check if there are any missing values:

In [7]:
df.isna().sum()

raceethnicity    0
armed            0
dtype: int64

In [8]:
df = df[(df.raceethnicity != 'Arab-American') & (df.raceethnicity != 'Unknown')] # no data for these ethnicities
df.replace(to_replace=['Asian/Pacific Islander','Native American','Other'],value='Others',inplace=True)
df.replace(to_replace=['Hispanic/Latino'],value='Latino',inplace=True)

In [9]:
df.groupby(by='raceethnicity').describe()

Unnamed: 0_level_0,armed,armed,armed,armed
Unnamed: 0_level_1,count,unique,top,freq
raceethnicity,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Black,573,8,Firearm,283
Latino,378,8,Firearm,161
Others,83,7,Firearm,32
White,1158,8,Firearm,564


In [10]:
def givepercent (dtf,ethnicity):
    # Function to compute percentages by ethnicity
    return round(((dtf.raceethnicity == ethnicity).sum()/(dtf.shape[0])*100),2)

In [11]:
killed={"White":(df.raceethnicity == 'White').sum(),
         "Latino": (df.raceethnicity == 'Latino').sum(),
         "Black": (df.raceethnicity == 'Black').sum(),
         "Others": (df.raceethnicity == 'Others').sum()
        }
print(killed)
killedperc={"White": givepercent(df,'White'), 
         "Latino": givepercent(df,'Latino'),
         "Black": givepercent(df,'Black'),
         "Others": givepercent(df,'Others')
        }
print(killedperc)

{'White': 1158, 'Latino': 378, 'Black': 573, 'Others': 83}
{'White': 52.83, 'Latino': 17.24, 'Black': 26.14, 'Others': 3.79}


In [12]:
df.groupby(by='armed').describe()

Unnamed: 0_level_0,raceethnicity,raceethnicity,raceethnicity,raceethnicity
Unnamed: 0_level_1,count,unique,top,freq
armed,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Disputed,16,3,Black,12
Firearm,1040,4,White,564
Knife,299,4,White,156
No,400,4,White,201
Non-lethal firearm,92,4,White,57
Others,143,4,White,75
Unknown,123,4,White,65
Vehicle,79,4,White,37


In [13]:
dfunarmed = df[(df.armed == 'No')]
dfunarmed.groupby(by='raceethnicity').describe()

Unnamed: 0_level_0,armed,armed,armed,armed
Unnamed: 0_level_1,count,unique,top,freq
raceethnicity,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Black,121,1,No,121
Latino,67,1,No,67
Others,11,1,No,11
White,201,1,No,201


In [14]:
unarmed={"White":(dfunarmed.raceethnicity == 'White').sum(),
         "Latino": (dfunarmed.raceethnicity == 'Latino').sum(),
         "Black": (dfunarmed.raceethnicity == 'Black').sum(),
         "Others": (dfunarmed.raceethnicity == 'Others').sum()
        }
print(unarmed)
unarmedperc={"White":givepercent(dfunarmed,'White'),
         "Latino": givepercent(dfunarmed,'Latino'),
         "Black": givepercent(dfunarmed,'Black'),
         "Others": givepercent(dfunarmed,'Others')
        }
print(unarmedperc)

{'White': 201, 'Latino': 67, 'Black': 121, 'Others': 11}
{'White': 50.25, 'Latino': 16.75, 'Black': 30.25, 'Others': 2.75}


In [15]:
ethnicities = list(ethndic.keys())
populethn = list(ethndic.values())
killed = list(killedperc.values())
unarmed = list(unarmedperc.values())

data = {'ethnicities' : ethnicities,
        'populethn'   : populethn,
        'killed'   : killed,
        'unarmed'   : unarmed}

source = ColumnDataSource(data=data)

# Results

In [18]:
TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select"
palette=Spectral4

cplot = figure(title="The Counted Visualization (data from 2015 and 2016)", tools=TOOLS,
               x_range=ethnicities, y_range=(0, 80), sizing_mode='scale_both')

cplot.vbar(x=dodge('ethnicities',  0.25, range=cplot.x_range),top='populethn', source=source,
           width=0.4,line_width=0 ,line_color=None, legend='Ethnicity % over population',
           color=str(Spectral4[0]), name='populethn')

cplot.vbar(x=dodge('ethnicities', -0.25, range=cplot.x_range), top='killed', source=source,
           width=0.4, line_width=0 ,line_color=None, legend="Killed % over total killed",
           color=str(Spectral4[2]), name="killed")

cplot.vbar(x=dodge('ethnicities',  0.0, range=cplot.x_range), top='unarmed', source=source,
           width=0.4, line_width=0 ,line_color=None, legend="Unarmed % over total unarmed",
          color=str(Spectral4[1]), name="unarmed")

cplot.add_tools(HoverTool(names=["unarmed"],
    tooltips=[
    ( 'Population', '@populethn{(00.00)}%' ),
    ( 'Killed', '@killed{(00.00)}%' ),
    ( 'Unarmed', '@unarmed{(00.00)}%' )], # Fields beginning with @ display values from ColumnDataSource. 
    mode='vline'))

#cplot.x_range.range_padding = 0.1
cplot.xgrid.grid_line_color = None

cplot.legend.location = "top_right"
cplot.xaxis.axis_label = "Ethnicity"
cplot.xaxis.axis_label_text_font_size='18pt'

cplot.xaxis.minor_tick_line_color = None
cplot.title.text_font_size='20pt'
cplot.legend.label_text_font_size='16pt'
cplot.xaxis.major_label_text_font_size='16pt'
cplot.yaxis.major_label_text_font_size='16pt'

In [23]:
output_file("thecounted.html", title="The Counted Visualization")

show(cplot)  # open a browser
#export_png(cplot, filename="bokeh_thecounted.png")

Hover on the bar charts to read the percentage values.

# Conclusions
The plot shows that if the people shot by police were proportional to the population distribution, the orange and green bar charts should have been almost the same height as the blue ones. Although this is true for Latino ethnicity, it is not for the Black one: this is the second most represented among killed and among those killed who were unarmed.