In [2]:
import csv

# Loading data
f = open("guns.csv", "r")
data = list(csv.reader(f))

data[0:5]

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education'],
 ['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]

In [4]:
# Removing headers
headers = data[0]
data = data[1:]

print(headers)
data[0:5]

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']


[['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'],
 ['5',
  '2012',
  '02',
  'Suicide',
  '0',
  'M',
  '31',
  'White',
  '100',
  'Other specified',
  '2']]

In [5]:
# Computing total deaths per year

years = [row[1] for row in data]

year_counts = {}

for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1

year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

In [6]:
# Converting dates to datetime object
# Since day is missing, replace it by placeholder value
import datetime as dt

dates = [dt.datetime(year = int(row[1]), month = int(row[2]), day = 1) for row in data]

dates[0:5]

# Counting deaths per unique month
date_counts = {}

for row in dates:
    if row not in date_counts:
        date_counts[row] = 1
    else:
        date_counts[row] += 1

date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

In [7]:
# Exploring sex and race relation to deaths

sex = [row[5] for row in data]
# print(set(sex)) # Sanity check

sex_counts = {}

for row in sex:
    if row not in sex_counts:
        sex_counts[row] = 1
    else:
        sex_counts[row] += 1
        
race = [row[7] for row in data]
# print(set(sex)) # Sanity check

race_counts = {}

for row in race:
    if row not in race_counts:
        race_counts[row] = 1
    else:
        race_counts[row] += 1
        
print(sex_counts)
race_counts

{'M': 86349, 'F': 14449}


{'Asian/Pacific Islander': 1326,
 'White': 66237,
 'Native American/Native Alaskan': 917,
 'Black': 23296,
 'Hispanic': 9022}

**Preliminary results**
- The number of deaths has not changed significantly between 2012-2014.
- The most deadly months appear to be the summer months.
- Victims are disproportionally male.
- The majority of victims are white.

There is a ratio of roughly 1/3 between deaths of blacks and deaths of whites. It would be interesting to investigate if it is a ratio representative of the general population. Similarly for other races.

In [8]:
# Importing census data

f = open("census.csv", "r")
census = list(csv.reader(f))

print(census)

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


In [9]:
# Computing gun deaths per 100000 members of a race
mapping = {
    "Asian/Pacific Islander": int(census[1][14]) + int(census[1][15]),
    "Black": int(census[1][12]),
    "Native American/Native Alaskan": int(census[1][13]),
    "Hispanic": int(census[1][11]),
    "White": int(census[1][10])    
}

race_per_hundredk = {}

for key in race_counts:
    race_per_hundredk[key] = race_counts[key]/mapping[key]*100000
    
race_per_hundredk

{'Asian/Pacific Islander': 8.374309664161762,
 'White': 33.56849303419181,
 'Native American/Native Alaskan': 24.521955573811088,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907}

In [10]:
# Exploring gun deaths due to homicide per 100000 members
intents = [row[3] for row in data]
races = [row[7] for row in data]

homicide_race_counts = {}

for index, value in enumerate(races):
    if intents[index] == "Homicide":
        if value not in homicide_race_counts:
            homicide_race_counts[value] = 1
        else:
            homicide_race_counts[value] += 1

homicide_race_per_hundredk = {}
            
for key in homicide_race_counts:
    homicide_race_per_hundredk[key] = homicide_race_counts[key]/mapping[key] * 100000

homicide_race_per_hundredk

{'White': 4.6356417981453335,
 'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Native American/Native Alaskan': 8.717729026240365,
 'Hispanic': 12.627161104219914}

**Findings**
- A very high proportion of deaths of black people is due to homicide.
- On the contrary, a relatively small proportion of white deaths is due to homicide.

Next, we look for a correlation between months and homicide rates.

In [22]:
# Explorin relation between month and homicide rate 
months = [int(row[2]) for row in data]

homicide_month_counts = {}

for index, value in enumerate(months):
    if intents[index] == "Homicide":
        if value not in homicide_month_counts:
            homicide_month_counts[value] = 1
        else:
            homicide_month_counts[value] += 1

months_dictionary = {
    1: "January",
    2: "February",
    3: "March",
    4: "April",
    5: "May",
    6: "June",
    7: "July",
    8: "August",
    9: "September",
    10: "October",
    11: "November",
    12: "December"
}

new_homicide_month_counts = {}

for key in homicide_month_counts:
    new_homicide_month_counts[months_dictionary[key]] = homicide_month_counts[key]
    
new_homicide_month_counts

{'March': 2780,
 'April': 2845,
 'June': 3130,
 'July': 3269,
 'August': 3125,
 'September': 2966,
 'October': 2968,
 'December': 3191,
 'January': 2829,
 'February': 2178,
 'May': 2976,
 'November': 2919}

_Summer_ is the deadliest season. However, _Winter_ months are also deadly. This is perhaps due to violence during the New Year celelbrations. 

Next, we explore which proportion of male and femal deaths respectively is due to homicide.

In [28]:
homicide_sex_counts = {}

for index, value in enumerate(sex):
    if intents[index] == "Homicide":
        if value in homicide_sex_counts:
            homicide_sex_counts[value] += 1
        else:
            homicide_sex_counts[value] = 1

print("Total number of gun deaths due to homicide per sex:", homicide_sex_counts)
print("Total number of gun deaths per sex:", sex_counts)
print("Proportion of homicides per sex:", homicide_sex_counts["M"]/sex_counts["M"], homicide_sex_counts["F"]/sex_counts["F"])

Total number of gun deaths due to homicide per sex: {'M': 29803, 'F': 5373}
Total number of gun deaths per sex: {'M': 86349, 'F': 14449}
Proportion of homicides per sex: 0.34514586156180155 0.3718596442660392
