In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
df = pd.read_csv('fatal-police-shootings-data.csv')
df.index = pd.to_datetime(df.date,format='%Y-%m-%d')
df.sample(5)

Unnamed: 0_level_0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2017-04-18,2511,Casey Desper,2017-04-18,shot,,32.0,M,W,Brandywine,WV,False,attack,Car,False
2019-01-28,4422,Gregory Griffin,2019-01-28,shot,unarmed,46.0,M,B,Newark,NJ,False,other,Car,False
2016-08-16,1790,Larry Eugene Kurtley,2016-08-16,shot,toy weapon,53.0,M,W,Apache Junction,AZ,False,other,Not fleeing,False
2019-10-30,5147,Michael Pinkerman,2019-10-30,shot,gun,,M,W,Ona,WV,False,attack,Not fleeing,False
2016-05-04,1509,Richard Ferretti,2016-05-04,shot,,52.0,M,W,Philadelphia,PA,False,other,Not fleeing,False


In [3]:
black_mask = df.race == 'B'
white_mask = df.race == 'W'
hisp_mask = df.race == 'H'
utah_mask = df.state == 'UT'
nv_mask = df.state == 'NV'
provo_mask = df.city == 'Provo'
unarmed_mask = df.armed == 'unarmed'
undet_mask = df.armed == 'undetermined'
toy_mask = df.armed == 'toy weapon' 
nan_mask = df.armed.isnull()
gun_mask = df.armed == 'gun'

In [4]:
def get_arm_disparity(race, mask, prop, armed=True):
    
    if armed:
        arm_mask = gun_mask
    else:
        arm_mask = unarmed_mask
        
    deaths = []
    race_deaths = []
    deaths.append(df[arm_mask].id.count())
    race_deaths.append(df[mask & arm_mask].id.count())
    
    if armed:
        print(f'Data for armed {race} suspects:')
    else:
        print(f'Data for unarmed {race} suspects:')
    
    print('Total Deaths: ', np.sum(deaths))
    print(f'{race} Deaths: ', np.sum(race_deaths))
    what_it_is = np.sum(race_deaths) / np.sum(deaths)
    what_it_should_be = prop
    disparity = (what_it_is - what_it_should_be) / what_it_should_be
    print('Disparity: ', disparity, '\n')

In [5]:
get_arm_disparity('WHITE', white_mask, .72, armed=True)
get_arm_disparity('WHITE', white_mask, .72, armed=False)
get_arm_disparity('BLACK', black_mask, .14, armed=True)
get_arm_disparity('BLACK', black_mask, .14, armed=False)
get_arm_disparity('HISPANIC', hisp_mask, .183, armed=True)
get_arm_disparity('HISPANIC', hisp_mask, .183, armed=False)

Data for armed WHITE suspects:
Total Deaths:  3169
WHITE Deaths:  1486
Disparity:  -0.3487255005083973 

Data for unarmed WHITE suspects:
Total Deaths:  356
WHITE Deaths:  146
Disparity:  -0.4303995006242197 

Data for armed BLACK suspects:
Total Deaths:  3169
BLACK Deaths:  772
Disparity:  0.7400712257133839 

Data for unarmed BLACK suspects:
Total Deaths:  356
BLACK Deaths:  125
Disparity:  1.5080256821829854 

Data for armed HISPANIC suspects:
Total Deaths:  3169
HISPANIC Deaths:  460
Disparity:  -0.20679671751789452 

Data for unarmed HISPANIC suspects:
Total Deaths:  356
HISPANIC Deaths:  63
Disparity:  -0.032971081230429224 



In [6]:
def get_disparity(race, mask, prop):
       
    deaths = []
    race_deaths = []
    deaths.append(df.id.count())
    race_deaths.append(df[mask].id.count())
 
    print(f'Data for {race} suspects:')
    
    print('Total Deaths: ', np.sum(deaths))
    print(f'{race} Deaths: ', np.sum(race_deaths))
    what_it_is = np.sum(race_deaths) / np.sum(deaths)
    what_it_should_be = prop
    disparity = (what_it_is - what_it_should_be) / what_it_should_be
    print('Disparity: ', disparity, '\n')

In [7]:
get_disparity('WHITE', white_mask, .72)
get_disparity('BLACK', black_mask, .14)
get_disparity('HISPANIC', hisp_mask, .183)

Data for WHITE suspects:
Total Deaths:  5587
WHITE Deaths:  2528
Disparity:  -0.37155698745102717 

Data for BLACK suspects:
Total Deaths:  5587
BLACK Deaths:  1318
Disparity:  0.685034135365261 

Data for HISPANIC suspects:
Total Deaths:  5587
HISPANIC Deaths:  920
Disparity:  -0.10017497684417667 



In [8]:
def get_state_disparity(race, state, race_mask, state_mask, prop):
  
    deaths = []
    race_deaths = []
    deaths.append(df[(state_mask)].id.count())
    race_deaths.append(df[(race_mask & state_mask)].id.count())
    
    print(f'Data for {race} suspects in {state}:')
    
    print('Total Deaths: ', np.sum(deaths))
    print(f'{race} Deaths: ', np.sum(race_deaths))
    what_it_is = np.sum(race_deaths) / np.sum(deaths)
    what_it_should_be = prop
    disparity = (what_it_is - what_it_should_be) / what_it_should_be
    print('Disparity: ', disparity, '\n')

In [9]:
get_state_disparity('WHITE', 'UTAH', white_mask, utah_mask, .907)
get_state_disparity('BLACK', 'UTAH', black_mask, utah_mask, .014)
get_state_disparity('HISPANIC', 'UTAH', hisp_mask, utah_mask, .142)

Data for WHITE suspects in UTAH:
Total Deaths:  66
WHITE Deaths:  39
Disparity:  -0.3485015535732184 

Data for BLACK suspects in UTAH:
Total Deaths:  66
BLACK Deaths:  7
Disparity:  6.575757575757576 

Data for HISPANIC suspects in UTAH:
Total Deaths:  66
HISPANIC Deaths:  12
Disparity:  0.2804097311139566 



In [10]:
get_state_disparity('WHITE', 'PROVO', white_mask, provo_mask, .907)
get_state_disparity('BLACK', 'PROVO', black_mask, provo_mask, .014)
get_state_disparity('HISPANIC', 'PROVO', hisp_mask, provo_mask, .142)

Data for WHITE suspects in PROVO:
Total Deaths:  1
WHITE Deaths:  1
Disparity:  0.10253583241455344 

Data for BLACK suspects in PROVO:
Total Deaths:  1
BLACK Deaths:  0
Disparity:  -1.0 

Data for HISPANIC suspects in PROVO:
Total Deaths:  1
HISPANIC Deaths:  0
Disparity:  -1.0 



In [11]:
get_state_disparity('WHITE', 'Nevada', white_mask, nv_mask, .739)
get_state_disparity('BLACK', 'Nevada', black_mask, nv_mask, .103)
get_state_disparity('HISPANIC', 'Nevada', hisp_mask, nv_mask, .292)

Data for WHITE suspects in Nevada:
Total Deaths:  102
WHITE Deaths:  43
Disparity:  -0.42954177611504685 

Data for BLACK suspects in Nevada:
Total Deaths:  102
BLACK Deaths:  15
Disparity:  0.4277555682467163 

Data for HISPANIC suspects in Nevada:
Total Deaths:  102
HISPANIC Deaths:  28
Disparity:  -0.05989793177544977 



In [12]:
def get_state_diff_yr(state, state_mask, year, pop, prob_white, prob_black, prob_hisp):

    black_mask = df.race == 'B'
    white_mask = df.race == 'W'
    hisp_mask = df.race == 'H'

    deaths = df[(state_mask)][year].id.count()
    black_deaths = df[(black_mask & state_mask)][year].id.count()
    white_deaths = df[(white_mask & state_mask)][year].id.count()
    hisp_deaths  = df[( hisp_mask & state_mask)][year].id.count()

    prob_black_giv_death = black_deaths / deaths
    prob_white_giv_death = white_deaths / deaths
    prob_hisp_giv_death  =  hisp_deaths / deaths

    prob_death = deaths / pop

    prob_death_giv_black = prob_black_giv_death * prob_death / prob_black
    prob_death_giv_white = prob_white_giv_death * prob_death / prob_white
    prob_death_giv_hisp  = prob_hisp_giv_death  * prob_death /  prob_hisp


    print(f'Death Disparity Among Races in {state} in {year}\n')    
    print('Total Deaths: ', deaths)
    print('White Deaths: ', white_deaths)
    print('Black Deaths: ', black_deaths)
    print('Hispanic Deaths: ', hisp_deaths)
    print('')
    print('Probability if being killed by police if black: ', prob_death_giv_black)
    print('Probability if being killed by police if white: ', prob_death_giv_white)
    print('Probability if being killed by police if hisp:  ',  prob_death_giv_hisp)

    print(f'Black people are {np.round(prob_death_giv_black / prob_death_giv_white, 3)} times more likely to be killed by police than white people')

In [13]:
get_state_diff_yr('Nevada', nv_mask, '2019', 3.08e6, .739, .103, .292)

Death Disparity Among Races in Nevada in 2019

Total Deaths:  11
White Deaths:  3
Black Deaths:  2
Hispanic Deaths:  4

Probability if being killed by police if black:  6.304375236414073e-06
Probability if being killed by police if white:  1.3180324411718187e-06
Probability if being killed by police if hisp:   4.4476071873332155e-06
Black people are 4.783 times more likely to be killed by police than white people


In [16]:
get_state_diff_yr('Utah', utah_mask, '2019', 3.206e6, .906, .015, .144)

Death Disparity Among Races in Utah in 2019

Total Deaths:  12
White Deaths:  6
Black Deaths:  2
Hispanic Deaths:  3

Probability if being killed by police if black:  4.158868787689748e-05
Probability if being killed by police if white:  2.065663305143915e-06
Probability if being killed by police if hisp:   6.498232480765232e-06
Black people are 20.133 times more likely to be killed by police than white people


In [28]:
def get_diff_yr(year, pop, prob_white, prob_black, prob_hisp):

    black_mask = df.race == 'B'
    white_mask = df.race == 'W'
    hisp_mask = df.race == 'H'

    deaths = df[year].id.count()
    black_deaths = df[black_mask][year].id.count()
    white_deaths = df[white_mask][year].id.count()
    hisp_deaths  = df[ hisp_mask][year].id.count()

    prob_black_giv_death = black_deaths / deaths
    prob_white_giv_death = white_deaths / deaths
    prob_hisp_giv_death  =  hisp_deaths / deaths

    prob_death = deaths / pop

    prob_death_giv_black = prob_black_giv_death * prob_death / prob_black
    prob_death_giv_white = prob_white_giv_death * prob_death / prob_white
    prob_death_giv_hisp  = prob_hisp_giv_death  * prob_death /  prob_hisp


    print(f'Death Disparity Among Races in {year}\n')    
    print('Total Deaths: ', deaths)
    print('White Deaths: ', white_deaths)
    print('Black Deaths: ', black_deaths)
    print('Hispanic Deaths: ', hisp_deaths)
    print('')
    print('Probability if being killed by police if black: ', prob_death_giv_black)
    print('Probability if being killed by police if white: ', prob_death_giv_white)
    print('Probability if being killed by police if hisp:  ',  prob_death_giv_hisp)

    print('')

    print(f'Hispanic people are {np.round(prob_death_giv_hisp / prob_death_giv_white, 3)} times more likely to be killed by police than white people')

    print(f'Black people are {np.round(prob_death_giv_black / prob_death_giv_white, 3)} times more likely to be killed by police than white people')

In [29]:
get_diff_yr('2020', 328.2e6, .72, .14, .183)

Death Disparity Among Races in 2020

Total Deaths:  656
White Deaths:  240
Black Deaths:  123
Hispanic Deaths:  80

Probability if being killed by police if black:  2.676939148602768e-06
Probability if being killed by police if white:  1.0156408693885842e-06
Probability if being killed by police if hisp:   1.3319880254276513e-06
Hispanic people are 1.311 times more likely to be killed by police than white people
Black people are 2.636 times more likely to be killed by police than white people


In [30]:
get_diff_yr('2019', 328.2e6, .72, .14, .183)

Death Disparity Among Races in 2019

Total Deaths:  999
White Deaths:  403
Black Deaths:  250
Hispanic Deaths:  163

Probability if being killed by police if black:  5.440933228867415e-06
Probability if being killed by police if white:  1.7054302931816645e-06
Probability if being killed by police if hisp:   2.71392560180884e-06
Hispanic people are 1.591 times more likely to be killed by police than white people
Black people are 3.19 times more likely to be killed by police than white people


In [31]:
get_diff_yr('2018', 328.2e6, .72, .14, .183)

Death Disparity Among Races in 2018

Total Deaths:  990
White Deaths:  456
Black Deaths:  229
Hispanic Deaths:  165

Probability if being killed by police if black:  4.983894837642551e-06
Probability if being killed by police if white:  1.9297176518383103e-06
Probability if being killed by police if hisp:   2.7472253024445307e-06
Hispanic people are 1.424 times more likely to be killed by police than white people
Black people are 2.583 times more likely to be killed by police than white people


In [15]:
def get_state_diff(state, state_mask, pop, prob_white, prob_black, prob_hisp):

    years = ['2017','2018','2019']
    black_mask = df.race == 'B'
    white_mask = df.race == 'W'
    hisp_mask = df.race == 'H'
    deaths = np.zeros(3)
    black_deaths = np.zeros(3)
    white_deaths = np.zeros(3)
    hisp_deaths = np.zeros(3)
    
    for i,year in enumerate(years):
        
        deaths[i] = df[(state_mask)][year].id.count()
        black_deaths[i] = df[(black_mask & state_mask)][year].id.count()
        white_deaths[i] = df[(white_mask & state_mask)][year].id.count()
        hisp_deaths[i]  = df[( hisp_mask & state_mask)][year].id.count()
    
    prob_black_giv_death = black_deaths / deaths
    prob_white_giv_death = white_deaths / deaths
    prob_hisp_giv_death  =  hisp_deaths / deaths

    prob_death = deaths / pop

    prob_death_giv_black = prob_black_giv_death * prob_death / prob_black
    prob_death_giv_white = prob_white_giv_death * prob_death / prob_white
    prob_death_giv_hisp  = prob_hisp_giv_death  * prob_death /  prob_hisp


    print(f'Death Disparity Among Races in {state}\n')    
    print('Mean Yearly Deaths: ', deaths.mean())
    print('Mean Yearly White Deaths: ', white_deaths.mean())
    print('Mean Yearly Black Deaths: ', black_deaths.mean())
    print('Mean Yearly Hispanic Deaths: ', hisp_deaths.mean())
    print('')
    print('Mean yearly probability if being killed by police if black: ', prob_death_giv_black.mean())
    print('Mean yearly probability if being killed by police if white: ', prob_death_giv_white.mean())
    print('Mean yearly probability if being killed by police if hisp:  ',  prob_death_giv_hisp.mean())

    print(f'Black people are on average {np.round(prob_death_giv_black / prob_death_giv_white, 3).mean()} times more likely to be killed by police than white people')

In [16]:
get_state_diff('Utah', utah_mask, 3.206e6, .906, .015, .144)

Death Disparity Among Races in Utah

Mean Yearly Deaths:  12.666666666666666
Mean Yearly White Deaths:  6.0
Mean Yearly Black Deaths:  2.0
Mean Yearly Hispanic Deaths:  3.6666666666666665

Mean yearly probability if being killed by police if black:  4.158868787689748e-05
Mean yearly probability if being killed by police if white:  2.0656633051439145e-06
Mean yearly probability if being killed by police if hisp:   7.942284143157504e-06
Black people are on average 22.817666666666668 times more likely to be killed by police than white people


In [17]:
get_state_diff('Nevada', nv_mask, 3.08e6, .739, .103, .292)

Death Disparity Among Races in Nevada

Mean Yearly Deaths:  16.333333333333332
Mean Yearly White Deaths:  5.666666666666667
Mean Yearly Black Deaths:  2.3333333333333335
Mean Yearly Hispanic Deaths:  5.333333333333333

Mean yearly probability if being killed by police if black:  7.355104442483084e-06
Mean yearly probability if being killed by police if white:  2.4896168333245464e-06
Mean yearly probability if being killed by police if hisp:   5.930142916444287e-06
Black people are on average 3.302666666666667 times more likely to be killed by police than white people


In [18]:
df['2015']

Unnamed: 0_level_0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2015-01-02,3,Tim Elliot,2015-01-02,shot,gun,53.0,M,A,Shelton,WA,True,attack,Not fleeing,False
2015-01-02,4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,W,Aloha,OR,False,attack,Not fleeing,False
2015-01-03,5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,H,Wichita,KS,False,other,Not fleeing,False
2015-01-04,8,Matthew Hoffman,2015-01-04,shot,toy weapon,32.0,M,W,San Francisco,CA,True,attack,Not fleeing,False
2015-01-04,9,Michael Rodriguez,2015-01-04,shot,nail gun,39.0,M,H,Evans,CO,False,attack,Not fleeing,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015-12-29,1133,Siolosega Velega-Nuufolau,2015-12-29,shot,knife,50.0,F,O,Santa Nella,CA,True,other,Not fleeing,False
2015-12-29,1134,Tien Hua,2015-12-29,shot,unknown weapon,,M,A,Rosemead,CA,False,undetermined,Not fleeing,False
2015-12-30,1135,Fred Perez,2015-12-30,shot,knife,55.0,M,H,Fresno,CA,False,attack,Not fleeing,True
2015-12-30,1136,John Randell Veach,2015-12-30,shot,undetermined,39.0,M,,Rawlins,WY,False,undetermined,,False
