In [1]:
# read data
import csv
f = open('guns.csv')
data = list(csv.reader(f))
headers = data[0][1:]
data = data[1:]
print(headers)
print(data[0])

['year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']
['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4']


In [2]:
# how many gun deaths happened in each year
years = [y[1] for y in data]
year_counts = dict()
for y in years:
    if y in year_counts:
        year_counts[y] +=1
    else:
        year_counts[y] = 1
print(year_counts)

import datetime
dates = [datetime.datetime(year = int(r[1]),month = int(r[2]), day =1) \
         for r in data]

{'2013': 33636, '2014': 33599, '2012': 33563}


In [3]:
# how many gun deaths happened in each month
months = [row[2] for row in data]
month_count = dict()
for m in months:
    if m in month_count:
        month_count[m] +=1
    else:
        month_count[m] = 1
print(month_count)

{'10': 8406, '12': 8413, '07': 8989, '03': 8289, '02': 7093, '09': 8508, '01': 8273, '08': 8783, '06': 8677, '04': 8455, '05': 8669, '11': 8243}


In [4]:
#  how gun deaths in the US vary by gender and race
sex_counts = dict()
race_counts = dict()
for row in data:
    if row[5] in sex_counts:
        sex_counts[row[5]]+=1
    else:
        sex_counts[row[5]] =1
    if row[7] in race_counts:
        race_counts[row[7]] +=1
    else:
        race_counts[row[7]] =1
print('sex_counts',sex_counts)
print('race_counts',race_counts)

sex_counts {'M': 86349, 'F': 14449}
race_counts {'White': 66237, 'Hispanic': 9022, 'Asian/Pacific Islander': 1326, 'Native American/Native Alaskan': 917, 'Black': 23296}


Interpretation: The number of male gun cases is roughly 6 times that of the female. The white ranks first in gun death and was followed by Black. However, without looking at the demographic data (such as proportion of each), I cannot conclude which gender or race demonstrated higher tendency of gun shot.

In [8]:
# use census data to continue analysis
f = open('census.csv')
c_data = list(csv.reader(f))
c_header = c_data[0]
c_data = c_data[1]
# get rate of gun death per 100000 people for each race
death_per_100000_white = race_counts['White']/int(c_data[10]) * 100000
death_per_100000_black = race_counts['Black']/int(c_data[12]) * 100000
print('death_per_100000_white',death_per_100000_white)
print('death_per_100000_black',death_per_100000_black)

death_per_100000_white 33.56849303419181
death_per_100000_black 57.8773477735196


Further interpretation: 
on average 33.6 out of 100,000 white people were killed by gun between 2012 and 2014. The number for black was 57.9
After considering the proportion, we can conclude that black people has higher probability involved in a gun death.

In [11]:
# calculating death ratio in each category
import re
def get_ratio(oringin_name,r_name):
    population = 0
    death = 0
    for i, item in enumerate(c_header):
        if re.search(r_name,item):
            population += int(c_data[i])
    for k,v in race_counts.items():
        if re.search(oringin_name,k):
            death += v
    return death/population * 100000

mapping = {'Asian/Pacific Islander':'Race Alone - Asian|Race Alone - Native Hawaiian and Other Pacific Islander',\
           'Black':'Race Alone - Black or African American',\
           'Hispanic':'Race Alone - Hispanic',\
           'Native American/Native Alaskan':'Race Alone - American Indian and Alaska Native',\
           'White':'Race Alone - White'}
race_per_hundredk = dict()
for k,v in mapping.items():
    race_per_hundredk[k] = get_ratio(k,v)
print(race_per_hundredk)

{'White': 33.56849303419181, 'Hispanic': 20.220491210910907, 'Asian/Pacific Islander': 8.374309664161762, 'Native American/Native Alaskan': 24.521955573811088, 'Black': 57.8773477735196}


In [12]:
# find out only homeside death record categorized by race
intents = [row[3] for row in data]
homicide_race_counts = dict()
for i, item in enumerate(intents):
    if re.search('[Hh]omicide',item):
        if data[i][7] in homicide_race_counts:
            homicide_race_counts[data[i][7]] += 1
        else:
            homicide_race_counts[data[i][7]] = 1
print(homicide_race_counts)

{'White': 9147, 'Hispanic': 5634, 'Asian/Pacific Islander': 559, 'Native American/Native Alaskan': 326, 'Black': 19510}


In [13]:
# calculate the ratio for each race
homicide_hundredk = dict()
for k,v in mapping.items():
    homicide_hundredk[k] = get_ratio(k,v)
print(homicide_hundredk)

{'White': 33.56849303419181, 'Hispanic': 20.220491210910907, 'Asian/Pacific Islander': 8.374309664161762, 'Native American/Native Alaskan': 24.521955573811088, 'Black': 57.8773477735196}


Interpretation: The result shows that Black is most likely involved in homicide.