# Guns Deaths in the USA (2012-2014)

## Importing and opening dataset

In [172]:
import csv
f = open("full_data.csv","r")
data_reader = csv.reader(f)
data = list(data_reader)

## Using list slicing to remove headers from a List of Lists

In [173]:
headers = data[0]
data = data[1:]

## Counts of Gun Deaths per year

In [174]:
years = [n[1] for n in data]

In [175]:
years_counts = {}

for each in years:
    if each in years_counts:
        years_counts[each] = years_counts[each] + 1
    else:
        years_counts[each] = 1

In [176]:
years_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

## Counts of Gun Deaths per year and month using datetime module

In [177]:
import datetime

In [178]:
dates = [datetime.datetime(year=int(n[1]), month=int(n[2]), day=1) for n in data]

In [179]:
dates[0:5]

[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

In [180]:
dates_counts = {}

for each in dates:
    if each in dates_counts:
        dates_counts[each] += 1
    else:
        dates_counts[each] = 1

In [181]:
dates_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

## Counts of Gun Deaths by gender

In [182]:
sex = [str(n[5]) for n in data]

In [183]:
sex_counts = {}

for each in sex:
    if each in sex_counts:
        sex_counts[each] += 1
    else:
        sex_counts[each] = 1

In [184]:
sex_counts

{'M': 86349, 'F': 14449}

## Counts of Gun Deaths by race

In [185]:
race = [n[7] for n in data]

In [187]:
race_counts = {}

for each in race:
    if each in race_counts:
        race_counts[each] += 1
    else:
        race_counts[each] = 1

## Counts of Gun Deaths by gender+race

In [188]:
gender_sex_counts_list = []

for n in data:
    if n[5] == "M" or n[5] == "F":
        sum1 = n[5] + " & " + n[7]
        gender_sex_counts_list.append(sum1)


In [189]:
gender_sex_counts = {}

for each in gender_sex_counts_list:
    if each in gender_sex_counts:
        gender_sex_counts[each] += 1
    else:
        gender_sex_counts[each] = 1

In [190]:
gender_sex_counts

{'M & Asian/Pacific Islander': 1082,
 'F & White': 10687,
 'M & White': 55550,
 'M & Native American/Native Alaskan': 790,
 'M & Black': 20978,
 'F & Black': 2318,
 'F & Native American/Native Alaskan': 127,
 'F & Hispanic': 1073,
 'M & Hispanic': 7949,
 'F & Asian/Pacific Islander': 244}

In [191]:
print(years_counts)

print(race_counts)
print(sex_counts)

print(gender_sex_counts)

{'2012': 33563, '2013': 33636, '2014': 33599}
{'Asian/Pacific Islander': 1326, 'White': 66237, 'Native American/Native Alaskan': 917, 'Black': 23296, 'Hispanic': 9022}
{'M': 86349, 'F': 14449}
{'M & Asian/Pacific Islander': 1082, 'F & White': 10687, 'M & White': 55550, 'M & Native American/Native Alaskan': 790, 'M & Black': 20978, 'F & Black': 2318, 'F & Native American/Native Alaskan': 127, 'F & Hispanic': 1073, 'M & Hispanic': 7949, 'F & Asian/Pacific Islander': 244}


# Findings so far

Up until this point data has been cleaned, headers removed for accurate data analysis, and some variables have been created for initial overview of gun deaths in the USA between 2012-2014.

Four new variables were created: years_counts (which counts the number of incidents per unique year); race_counts (accounting for the number of incidents per race); sex_counts (the total number of incidents according to gender); and gender_sex_counts (which analyses the intersection of the values of gender and race, per unique key).

Years Counts:
    - There is a stable number of incidents from the beginning of 2012 until 2014;
    - 2013 was the year with the highest number of gun deaths in the USA, with 33636 incidents;

Race Counts:
    - White (65,7%), Black (23,1%) and Hispanics (8,9%) make the top 3 of the races by gun deaths;

Gender Counts:
    - Males are the group that was most affected by gun deaths in the US during this period with        85,6% of the total.

Race+Gender Counts:
    - From the data, the highest number of gun deaths belong to the group Male + White (55,1% of the total of deaths), followed by Male + Black (20,8%), and Female + White (10,6%).

We now want to compare this data with the total population by race and display the values in a more tangible way by reducing the values to order of per 100k inhabitants.

For that we took a census dataset and mapped it according to our population keys: Asian/Pacific Islander, Native American/Native Alaskan, Black, Hispanic, White.

In [192]:
g = open("census.csv","r")
data_census = csv.reader(g)
census = list(data_census)
#census = census[1:]

In [193]:
print(census)

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


## Creating a map of total population values to match our population keys

In [194]:
mapping = {
    "Asian/Pacific Islander": 15159516 + 674625,
    "Native American/Native Alaskan": 3739506,
    "Black": 40250635,
    "Hispanic": 44618105,
    "White": 197318956
}

In [195]:
race_per_hundredk = {}

for each in race_counts:
    race_per_hundredk[each] = (race_counts[each] / mapping[each])*100000

In [196]:
race_per_hundredk

{'Asian/Pacific Islander': 8.374309664161762,
 'White': 33.56849303419181,
 'Native American/Native Alaskan': 24.521955573811088,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907}

## Homicide Rates by Race

In [197]:
intents = [n[3] for n in data]

In [198]:
homicide_race_counts = {}

for i, race in enumerate(race):
    if intents[i] == "Homicide":
        if race not in homicide_race_counts:
            homicide_race_counts[race] = 0
        homicide_race_counts[race] = homicide_race_counts[race] + 1

In [199]:
homicide_race_counts

{'White': 9147,
 'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Native American/Native Alaskan': 326,
 'Hispanic': 5634}

In [200]:
race_perhundredk_homicide = {}

for each in homicide_race_counts:
    race_perhundredk_homicide[each] = (homicide_race_counts[each] / mapping[each])*100000

In [201]:
race_perhundredk_homicide

{'White': 4.6356417981453335,
 'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Native American/Native Alaskan': 8.717729026240365,
 'Hispanic': 12.627161104219914}

# Findings (up to this point)
In previous findings it was found that the White and Males groups were the ones most affected by gun deaths in the USA.

If we "add" the total number of each population to equation the scenery changes drastically:
    - Black and Native American/Native Alaskan are, in general, the races (per 100k inhabitants) that are most affected by gun deaths;
    - Looking into deaths by homicide (per 100k inhabitants), Black and Hispanic are the most affected populations, while Asian/Pacific Islander and White are the least affected;
    
In the following line we will further investigate the rates of **homicide by gender**, **seasonality of deaths by guns**, **location** and **education of victim** factors, and others.    

# Bonus Analyses

## Homicide by Gender

In [202]:
homicide_sex_counts = {}

for i, gender in enumerate(sex):
    if gender not in homicide_sex_counts:
        homicide_sex_counts[gender] = 0
    if intents[i] == "Homicide":
        homicide_sex_counts[gender] = homicide_sex_counts[gender] + 1

In [203]:
homicide_sex_counts

{'M': 29803, 'F': 5373}

In [204]:
total_population = sum(mapping.values())
sex_perhundredk_homicide_counts = {}

for each in homicide_sex_counts:
    sex_perhundredk_homicide_counts[each] = (homicide_sex_counts[each] / total_population) * 100000

In [205]:
sex_perhundredk_homicide_counts

{'M': 9.87634787932396, 'F': 1.7805461582930453}

From 2012 to 2014 Males suffered 5,55 times more homicides than Females in the USA, per 100k inhabitants.

## Location analysis

In [206]:
location = [n[9] for n in data]
location_counts = {}

for each in location:
    if each in location_counts:
        location_counts[each] += 1
    else:
        location_counts[each] = 1

In [207]:
location_homicide_counts = {}

for i, locations in enumerate(location):
    if locations not in location_homicide_counts:
        location_homicide_counts[locations] = 0
    if intents[i] == "Homicide":
        location_homicide_counts[locations] += 1

In [208]:
import operator
sorted_location_homicide_counts = sorted(location_homicide_counts.items(), key=operator.itemgetter(1), reverse=True)

sorted_location_homicide_counts

[('Home', 13613),
 ('Street', 8867),
 ('Other specified', 5429),
 ('Other unspecified', 3604),
 ('Trade/service area', 1663),
 ('NA', 1383),
 ('School/instiution', 361),
 ('Industrial/construction', 85),
 ('Farm', 76),
 ('Residential institution', 73),
 ('Sports', 22)]

While checking the Homicide values in different settings, we found the different locations where homicides took place. After that, these values were ordered and it was found that most of the homicides happened at Home (13613) and on the Street (8867).

## Police involvement in the shooting

In [209]:
police = [n[4] for n in data]

police_counts = {}

for each in police:
    if each in police_counts:
        police_counts[each] += 1
    else:
        police_counts[each] = 1


In [210]:
police_homicide_counts = {}

for i, plc in enumerate(police):
    if plc not in police_homicide_counts:
        police_homicide_counts[plc] = 0
    if intents[i] == "Homicide":
        police_homicide_counts[plc] += 1

In [211]:
print("Total police interventions: " + str(police_counts))

print("Total police interventions for homicides: " + str(police_homicide_counts))

print("0 represents no involvement of a police officer; 1 does.")

Total police interventions: {'0': 99396, '1': 1402}
Total police interventions for homicides: {'0': 33774, '1': 1402}
0 represents no involvement of a police officer; 1 does.


Shockingly, every time a police officer was involved in a shooting, the cause of death was Homicide. It is unclear if the cause of death was caused by a perpretator or a police officer.

## Education of Homicide victims

In [212]:
education = [n[10] for n in data]

In [213]:
education_counts = {}

for each in education:
    if each in education_counts:
        education_counts[each] += 1
    else:
        education_counts[each] = 0

In [214]:
education_homicide_counts = {}

for i, educ in enumerate(education):
    if educ not in education_homicide_counts:
        education_homicide_counts[educ] = 0
    if intents[i] == "Homicide":
        education_homicide_counts[educ] += 1

In [215]:
sorted_education_homicide_counts = sorted(education_homicide_counts.items(), key=operator.itemgetter(1), reverse=True)

sorted_education_homicide_counts

[('HS/GED', 15649),
 ('Less than HS', 11839),
 ('Some college', 5640),
 ('BA+', 1559),
 ('NA', 489)]

## Seasonality of Homicides

In [216]:
def sorter(self):
    newlst = sorted(self.items(), key=operator.itemgetter(1), reverse=True)
    return newlst

In [217]:
sorted_dates = sorter(dates_counts)

In [218]:
homicides_dates = {}

for i, date in enumerate(dates):
    if date not in homicides_dates:
        homicides_dates[date] = 0
    if intents[i] == "Homicide":
        homicides_dates[date] += 1
                
sorted_homicides_dates = sorter(homicides_dates)
sorted_homicides_dates[0:10]

[(datetime.datetime(2012, 7, 1, 0, 0), 1160),
 (datetime.datetime(2013, 7, 1, 0, 0), 1137),
 (datetime.datetime(2012, 8, 1, 0, 0), 1090),
 (datetime.datetime(2012, 12, 1, 0, 0), 1083),
 (datetime.datetime(2014, 12, 1, 0, 0), 1080),
 (datetime.datetime(2012, 9, 1, 0, 0), 1070),
 (datetime.datetime(2013, 6, 1, 0, 0), 1066),
 (datetime.datetime(2012, 6, 1, 0, 0), 1044),
 (datetime.datetime(2014, 8, 1, 0, 0), 1035),
 (datetime.datetime(2013, 12, 1, 0, 0), 1028)]

Seasonality for homicides was also studied and it seems Summer months (July, August) and December are when most homicides happen. July was the month with most fatalities for two straight years in 2012 and 2013.

# Findings on Homicides
The most vulnerable group to suffer from a homicide crime by gun in the US belongs to a Male category that is either still in High School/GED, or only completed it, and usually there isn't a police officer intervening (this would need further investigation to understand what kind of role they play). Also, it was discovered that the victims are usually killed at their homes and that the peak of homicides happend during July, August, and December.