# Exploring Gun Deaths in the US

In this project we will work analyzing data on gun deaths in the US.

First we will load the csv file guns-data from FiveThiryEight and display the first 5 rows in order to give a quick look into the data.

In [5]:
import csv

f = open("guns.csv",'r')

data = list(csv.reader(f))

print(data[:5])


[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


Now we will remove the header row and display the first 5 values to ensure that we have done it correctly.

In [6]:
data = data[1:]
print(data[:5])

[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


Now we will count the number of deaths per year and store the data in a ditcinary

In [7]:
year_counts = {}
for row in data:
    if row[1] in year_counts:
        year_counts[row[1]] += 1
    else:
        year_counts[row[1]] = 1
print(year_counts)

{'2013': 33636, '2014': 33599, '2012': 33563}


Now we are going to analyze the dates more accurately, in order to do that we will create a datetime object for each row in the dataset and count the deaths per each day

In [8]:
import datetime

dates = [
    datetime.datetime(year=int(row[1]) ,month=int(row[2]), day=1) 
    for row in data
]

print(dates[:5])

date_counts = {}

for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1

print(date_counts)

[datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0)]
{datetime.datetime(2012, 11, 1, 0, 0): 2729, datetime.datetime(2014, 7, 1, 0, 0): 2884, datetime.datetime(2013, 11, 1, 0, 0): 2758, datetime.datetime(2014, 1, 1, 0, 0): 2651, datetime.datetime(2012, 6, 1, 0, 0): 2826, datetime.datetime(2014, 8, 1, 0, 0): 2970, datetime.datetime(2013, 8, 1, 0, 0): 2859, datetime.datetime(2012, 4, 1, 0, 0): 2795, datetime.datetime(2013, 9, 1, 0, 0): 2742, datetime.datetime(2014, 11, 1, 0, 0): 2756, datetime.datetime(2012, 7, 1, 0, 0): 3026, datetime.datetime(2012, 5, 1, 0, 0): 2999, datetime.datetime(2012, 8, 1, 0, 0): 2954, datetime.datetime(2013, 10, 1, 0, 0): 2808, datetime.datetime(2013, 2, 1, 0, 0): 2375, datetime.datetime(2012, 3, 1, 0, 0): 2743, datetime.datetime(2014, 10, 1, 0, 0): 2865, datetime.datetime(2012, 9, 1, 0, 0): 2852, datetime.datetime(2014, 3, 1, 0, 0

Now we are going to count the deaths per race and sex

In [9]:
sex_counts = {}

for row in data:
    if row[5] in sex_counts:
        sex_counts[row[5]] += 1
    else:
        sex_counts[row[5]] = 1

print(sex_counts)

race_counts = {}

for row in data:
    if row[7] in race_counts:
        race_counts[row[7]] += 1
    else:
        race_counts[row[7]] = 1

print(race_counts)



{'F': 14449, 'M': 86349}
{'White': 66237, 'Asian/Pacific Islander': 1326, 'Black': 23296, 'Native American/Native Alaskan': 917, 'Hispanic': 9022}


In [10]:
import operator
sorted_x = sorted(date_counts.items(), key=operator.itemgetter(1))
print(sorted_x)

[(datetime.datetime(2012, 2, 1, 0, 0), 2357), (datetime.datetime(2014, 2, 1, 0, 0), 2361), (datetime.datetime(2013, 2, 1, 0, 0), 2375), (datetime.datetime(2014, 1, 1, 0, 0), 2651), (datetime.datetime(2014, 3, 1, 0, 0), 2684), (datetime.datetime(2012, 11, 1, 0, 0), 2729), (datetime.datetime(2012, 10, 1, 0, 0), 2733), (datetime.datetime(2013, 9, 1, 0, 0), 2742), (datetime.datetime(2012, 3, 1, 0, 0), 2743), (datetime.datetime(2014, 11, 1, 0, 0), 2756), (datetime.datetime(2013, 11, 1, 0, 0), 2758), (datetime.datetime(2012, 1, 1, 0, 0), 2758), (datetime.datetime(2013, 12, 1, 0, 0), 2765), (datetime.datetime(2012, 12, 1, 0, 0), 2791), (datetime.datetime(2012, 4, 1, 0, 0), 2795), (datetime.datetime(2013, 4, 1, 0, 0), 2798), (datetime.datetime(2013, 5, 1, 0, 0), 2806), (datetime.datetime(2013, 10, 1, 0, 0), 2808), (datetime.datetime(2012, 6, 1, 0, 0), 2826), (datetime.datetime(2012, 9, 1, 0, 0), 2852), (datetime.datetime(2014, 12, 1, 0, 0), 2857), (datetime.datetime(2013, 8, 1, 0, 0), 2859), (

- We've seen that the deaths per firegun had been increasing year after year
- Seeing the year distribution the months withs more deaths are the winter ones with february leading the team, and the ones with less deaths are the summer ones
- 86% of the victims are male members the other 14% womans


Now we are going to contrast the data of deaths per gun against the census one

In [11]:
f2 = open("census.csv",'r')
census = list(csv.reader(f2))
print(census[:5])

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


In [27]:
census_dir = {}

for i in range(10,17):
    census_dir[census[0][i]] = census[1][i]

mapping = {
    "Asian/Pacific Islander":int(census_dir['Race Alone - Asian']+census_dir['Race Alone - Native Hawaiian and Other Pacific Islander'])
    ,"Black":int(census_dir['Race Alone - Black or African American'])
    ,"Native American/Native Alaskan":int(census_dir['Race Alone - American Indian and Alaska Native'])
    ,"Hispanic":int(census_dir['Race Alone - Hispanic'])
    ,"White":int(census_dir['Race Alone - White'])
}

print(mapping)

race_per_hundredk = {}

for key,val in race_counts.items():
    race_per_hundredk[key] = val / mapping[key] * 100000
    
print(race_per_hundredk)

{'Hispanic': 44618105, 'Asian/Pacific Islander': 15159516674625, 'Black': 40250635, 'Native American/Native Alaskan': 3739506, 'White': 197318956}
{'White': 33.56849303419181, 'Asian/Pacific Islander': 8.746980714890115e-06, 'Black': 57.8773477735196, 'Native American/Native Alaskan': 24.521955573811088, 'Hispanic': 20.220491210910907}


In [42]:
intents = [x[3] for x in data]
races = [x[7] for x in data]

homicide_race_counts = {}

for i,val in enumerate(races):
    if intents[i] == 'Homicide':
        if val in homicide_race_counts:
            homicide_race_counts[val] += 1
        else:
            homicide_race_counts[val] = 1

print(homicide_race_counts)

{'White': 9147, 'Asian/Pacific Islander': 559, 'Black': 19510, 'Native American/Native Alaskan': 326, 'Hispanic': 5634}
