In [1]:
import pandas as pd

In [5]:
# Read raw data
path_to_data = 'interactive_data.csv'
df = pd.read_csv(path_to_data, index_col=0)

In [6]:
# Dataset overview
df.head()

Unnamed: 0,Intent,Gender,Age,Race,Deaths,Population,Rate
1,None selected,None selected,None selected,None selected,33599,316299978,10.6
2,None selected,None selected,None selected,White,22079,197369634,11.2
3,None selected,None selected,None selected,Black,7765,38896382,20.0
4,None selected,None selected,None selected,Hispanic,3007,54049078,5.6
5,None selected,None selected,None selected,Asian/Pacific Islander,442,16315561,2.7


In [9]:
# Preprocessing the data (remove 'None selected')
df_filtered = df.loc[~(df=='None selected').any(axis=1)]
df_filtered.head()

Unnamed: 0,Intent,Gender,Age,Race,Deaths,Population,Rate
152,Suicide,Female,Under 15,White,19,15355910,0.1
153,Suicide,Female,Under 15,Black,1,4095428,0.0
154,Suicide,Female,Under 15,Hispanic,4,7330024,0.1
155,Suicide,Female,Under 15,Asian/Pacific Islander,1,1393440,0.1
156,Suicide,Female,Under 15,Other,0,1661877,0.0


In [22]:
# Compute statistics

all_death = df_filtered['Deaths'].sum()

# Suicides
suicides_nb = df_filtered[df_filtered['Intent']=='Suicide']['Deaths'].sum()
suicide_rate = suicides_nb/all_death
print(f'Nearly {suicide_rate*100} % of gun deaths are suicides.')

# Suicide of male
male_victim_nb = df_filtered[(df_filtered['Gender']=='Male')
                             & (df_filtered['Intent']=='Suicide')]['Deaths'].sum()
male_victime_rate = male_victim_nb/suicides_nb
print(f'More than {male_victime_rate * 100} % of suicide victims are male.')

# Homicides
homicides_nb = df_filtered[df_filtered['Intent']=='Homicide']['Deaths'].sum()
homicides_rate = homicides_nb/all_death
print(f'Around {homicides_rate*100} % of all gun deaths are homicides.')

# Homicide male 15-34 black
black_young_male_homicide_nb = df_filtered[(df_filtered['Intent']=='Homicide')
                 & (df_filtered['Gender']=='Male')
                 & (df_filtered['Age']=='15 - 34')
                 & (df_filtered['Race']=='Black')]['Deaths'].sum()

young_male_homicide_nb = df_filtered[(df_filtered['Intent']=='Homicide')
                 & (df_filtered['Gender']=='Male')
                 & (df_filtered['Age'] == '15 - 34')]['Deaths'].sum()

rate = black_young_male_homicide_nb/young_male_homicide_nb
print(f'Around  {rate*100} % of homicide victims who are males in the age-group of 15--34 are black.')

# Women homicides
women_homicide_nb = df_filtered[(df_filtered['Intent']=='Homicide')
                                & (df_filtered['Gender']=='Female')]['Deaths'].sum()
women_homicide_rate = women_homicide_nb/homicides_nb

print(f'Women constitue only {women_homicide_rate*100} % of the total homicide victims.')

Nearly 62.68194671826165 % of gun deaths are suicides.
More than 86.24275809668535 % of suicide victims are male.
Around 34.906980205387704 % of all gun deaths are homicides.
Around  66.12482748044778 % of homicide victims who are males in the age-group of 15--34 are black.
Women constitue only 15.289502856655583 % of the total homicide victims.
