In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
df = pd.read_csv('fatal-police-shootings-data.csv')
df.index = pd.to_datetime(df.date,format='%Y-%m-%d')
df.sample(5)

Unnamed: 0_level_0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2018-07-11,3852,TK TK,2018-07-11,shot,knife,,F,,Willis,TX,False,other,Not fleeing,False
2019-06-02,4748,Miles Hall,2019-06-02,shot,crowbar,23.0,M,B,Walnut Creek,CA,True,attack,Not fleeing,True
2015-07-18,652,Kevin Thomas Snyder,2015-07-18,shot,gun,46.0,M,W,Phoenix,AZ,False,attack,Not fleeing,False
2018-12-10,4289,George Penev,2018-12-10,shot,knife,23.0,M,W,Fredonia,NY,True,attack,Not fleeing,False
2019-10-14,5099,Christopher Whitfield,2019-10-14,shot,unarmed,31.0,M,B,Ethel,LA,True,undetermined,Foot,False


In [3]:
black_mask = df.race == 'B'
white_mask = df.race == 'W'
hisp_mask = df.race == 'H'
utah_mask = df.state == 'UT'
provo_mask = df.city == 'Provo'
unarmed_mask = df.armed == 'unarmed'
undet_mask = df.armed == 'undetermined'
toy_mask = df.armed == 'toy weapon' 
nan_mask = df.armed.isnull()
gun_mask = df.armed == 'gun'

In [4]:
def get_arm_disparity(race, mask, prop, armed=True):
    
    if armed:
        arm_mask = gun_mask
    else:
        arm_mask = unarmed_mask
        
    deaths = []
    race_deaths = []
    deaths.append(df[arm_mask].id.count())
    race_deaths.append(df[mask & arm_mask].id.count())
    
    if armed:
        print(f'Data for armed {race} suspects:')
    else:
        print(f'Data for unarmed {race} suspects:')
    
    print('Total Deaths: ', np.sum(deaths))
    print(f'{race} Deaths: ', np.sum(race_deaths))
    what_it_is = np.sum(race_deaths) / np.sum(deaths)
    what_it_should_be = prop
    disparity = (what_it_is - what_it_should_be) / what_it_should_be
    print('Disparity: ', disparity, '\n')

In [5]:
get_arm_disparity('WHITE', white_mask, .72, armed=True)
get_arm_disparity('WHITE', white_mask, .72, armed=False)
get_arm_disparity('BLACK', black_mask, .14, armed=True)
get_arm_disparity('BLACK', black_mask, .14, armed=False)
get_arm_disparity('HISPANIC', hisp_mask, .183, armed=True)
get_arm_disparity('HISPANIC', hisp_mask, .183, armed=False)

Data for armed WHITE suspects:
Total Deaths:  3067
WHITE Deaths:  1453
Disparity:  -0.3420099264572691 

Data for unarmed WHITE suspects:
Total Deaths:  352
WHITE Deaths:  145
Disparity:  -0.42787247474747475 

Data for armed BLACK suspects:
Total Deaths:  3067
BLACK Deaths:  762
Disparity:  0.7746518235595509 

Data for unarmed BLACK suspects:
Total Deaths:  352
BLACK Deaths:  123
Disparity:  1.495941558441558 

Data for armed HISPANIC suspects:
Total Deaths:  3067
HISPANIC Deaths:  448
Disparity:  -0.20179738125399763 

Data for unarmed HISPANIC suspects:
Total Deaths:  352
HISPANIC Deaths:  63
Disparity:  -0.021982116244411265 



In [6]:
def get_disparity(race, mask, prop):
       
    deaths = []
    race_deaths = []
    deaths.append(df.id.count())
    race_deaths.append(df[mask].id.count())
 
    print(f'Data for {race} suspects:')
    
    print('Total Deaths: ', np.sum(deaths))
    print(f'{race} Deaths: ', np.sum(race_deaths))
    what_it_is = np.sum(race_deaths) / np.sum(deaths)
    what_it_should_be = prop
    disparity = (what_it_is - what_it_should_be) / what_it_should_be
    print('Disparity: ', disparity, '\n')

In [7]:
get_disparity('WHITE', white_mask, .72)
get_disparity('BLACK', black_mask, .14)
get_disparity('HISPANIC', hisp_mask, .183)

Data for WHITE suspects:
Total Deaths:  5424
WHITE Deaths:  2478
Disparity:  -0.3654744346116027 

Data for BLACK suspects:
Total Deaths:  5424
BLACK Deaths:  1298
Disparity:  0.7093341761483353 

Data for HISPANIC suspects:
Total Deaths:  5424
HISPANIC Deaths:  904
Disparity:  -0.08925318761384338 



In [8]:
def get_state_disparity(race, state, race_mask, state_mask, prop):
  
    deaths = []
    race_deaths = []
    deaths.append(df[(state_mask)].id.count())
    race_deaths.append(df[(race_mask & state_mask)].id.count())
    
    print(f'Data for {race} suspects in {state}:')
    
    print('Total Deaths: ', np.sum(deaths))
    print(f'{race} Deaths: ', np.sum(race_deaths))
    what_it_is = np.sum(race_deaths) / np.sum(deaths)
    what_it_should_be = prop
    disparity = (what_it_is - what_it_should_be) / what_it_should_be
    print('Disparity: ', disparity, '\n')

In [9]:
get_state_disparity('WHITE', 'UTAH', white_mask, utah_mask, .907)
get_state_disparity('BLACK', 'UTAH', black_mask, utah_mask, .014)
get_state_disparity('HISPANIC', 'UTAH', hisp_mask, utah_mask, .142)

Data for WHITE suspects in UTAH:
Total Deaths:  60
WHITE Deaths:  36
Disparity:  -0.33847850055126794 

Data for BLACK suspects in UTAH:
Total Deaths:  60
BLACK Deaths:  7
Disparity:  7.333333333333333 

Data for HISPANIC suspects in UTAH:
Total Deaths:  60
HISPANIC Deaths:  12
Disparity:  0.4084507042253523 



In [10]:
get_state_disparity('WHITE', 'PROVO', white_mask, provo_mask, .907)
get_state_disparity('BLACK', 'PROVO', black_mask, provo_mask, .014)
get_state_disparity('HISPANIC', 'PROVO', hisp_mask, provo_mask, .142)

Data for WHITE suspects in PROVO:
Total Deaths:  1
WHITE Deaths:  1
Disparity:  0.10253583241455344 

Data for BLACK suspects in PROVO:
Total Deaths:  1
BLACK Deaths:  0
Disparity:  -1.0 

Data for HISPANIC suspects in PROVO:
Total Deaths:  1
HISPANIC Deaths:  0
Disparity:  -1.0 



In [None]:
def get_state_diff_yr(state, state_mask, year, pop, prob_white, prob_black, prob_hisp):

    black_mask = df.race == 'B'
    white_mask = df.race == 'W'
    hisp_mask = df.race == 'H'

    deaths = df[year][(state_mask)].id.count()
    black_deaths = df[year][(black_mask & state_mask)].id.count()
    white_deaths = df[year][(white_mask & state_mask)].id.count()
    hisp_deaths  = df[year][( hisp_mask & state_mask)].id.count()

    prob_black_giv_death = black_deaths / deaths
    prob_white_giv_death = white_deaths / deaths
    prob_hisp_giv_death  =  hisp_deaths / deaths

    prob_death = deaths / pop

    print(f'Death Disparity Among Races in {state}')    
    print('Total Deaths: ', np.sum(deaths))
    print(f'{race} Deaths: ', np.sum(race_deaths))
    what_it_is = np.sum(race_deaths) / np.sum(deaths)
    what_it_should_be = prop
    disparity = (what_it_is - what_it_should_be) / what_it_should_be
    print('Disparity: ', disparity, '\n')