# Analyzing Gun deaths in US from year 2012 to 2014
# Correlating it with census data as of April, 2010 to get the Percentages

#### import guns.csv

In [None]:
import csv
data = list(csv.reader(open('guns.csv','r')))
data[:5]

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education'],
 ['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]

#### move header to headers and strip it off of data

In [2]:
headers = data[0]
data = data[1:]

### Get number of deaths per year

In [3]:
year_counts = {}
for d in data:
    year_counts[d[1]] = year_counts.get(d[1],0) + 1
year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

### Get number of deaths per month

In [4]:
from datetime import datetime as dt
date_counts = {}
for d in data:
    curr_dt = dt(year = int(d[1]), month = int(d[2]), day = 1)
    date_counts[curr_dt] = date_counts.get(curr_dt,0) + 1

date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

### Get number of deaths by sex and race

In [5]:
sex_counts, race_counts = {}, {}
for d in data:
    sex_counts[d[5]] = sex_counts.get(d[5],0) + 1
    race_counts[d[7]] = race_counts.get(d[7],0) + 1

sex_counts

{'F': 14449, 'M': 86349}

In [6]:
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

Number of deaths doesn't differ a lot between 2012 and 2014

Number of deaths is consistent across all months

Number of deaths in males is almost 5 times that of females

Number of deaths in whites is triple that of the next highest

In [7]:
census = list(csv.reader(open('census.csv')))
census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

## Import Census data as of April 1, 2010

In [8]:
census_dict = {}
r1, r2 = census[0], census[1]

In [9]:
for i,j in enumerate(r1):
    census_dict[j] = r2[i]
census_dict

{'Geography': 'United States',
 'Hispanic Origin': 'Total',
 'Id': '0100000US',
 'Id2': '',
 'Race Alone - American Indian and Alaska Native': '3739506',
 'Race Alone - Asian': '15159516',
 'Race Alone - Black or African American': '40250635',
 'Race Alone - Hispanic': '44618105',
 'Race Alone - Native Hawaiian and Other Pacific Islander': '674625',
 'Race Alone - White': '197318956',
 'Sex': 'Both Sexes',
 'Total': '308745538',
 'Two or More Races': '6984195',
 'Year': 'April 1, 2010 Census'}

### Map races from guns.csv file to census file

In [10]:
race_dict = {
 'Asian/Pacific Islander' : 'Race Alone - Asian',
 'Black': 'Race Alone - Black or African American',
 'Hispanic': 'Race Alone - Hispanic',
 'Native American/Native Alaskan' : 'Race Alone - American Indian and Alaska Native',
 'White' : 'Race Alone - White'
}

In [11]:
race_dict

{'Asian/Pacific Islander': 'Race Alone - Asian',
 'Black': 'Race Alone - Black or African American',
 'Hispanic': 'Race Alone - Hispanic',
 'Native American/Native Alaskan': 'Race Alone - American Indian and Alaska Native',
 'White': 'Race Alone - White'}

In [12]:
race_totals = {}
for i in race_dict.keys():
    race_totals[i] = int(census_dict[race_dict[i]])

In [13]:
race_totals

{'Asian/Pacific Islander': 15159516,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 3739506,
 'White': 197318956}

## Gun deaths per race as a % against population

In [27]:
race_deaths_per_hundredk = {}
for k,v in race_counts.items():
    race_deaths_per_hundredk[k] = v/race_totals[k] * 100000

In [28]:
race_deaths_per_hundredk

{'Asian/Pacific Islander': 8.74698110414607,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

### Homicide Deaths per race

In [21]:
homicide_race_counts = {}
for d in data:
    if d[3] == 'Homicide':
        homicide_race_counts[d[7]] = homicide_race_counts.get(d[7],0) + 1
homicide_race_counts

{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}

##### Work area to get distinct intents/races etc.,

In [20]:
distinct_intents = set()
for d in data:
    distinct_intents.add(d[3])
distinct_intents

{'Accidental', 'Homicide', 'NA', 'Suicide', 'Undetermined'}

In [22]:
intents = set(d[3] for d in data)

In [23]:
intents

{'Accidental', 'Homicide', 'NA', 'Suicide', 'Undetermined'}

In [24]:
races = set(d[7] for d in data)

In [25]:
races

{'Asian/Pacific Islander',
 'Black',
 'Hispanic',
 'Native American/Native Alaskan',
 'White'}

In [30]:
homicide_deaths_race_hundredk = {}
for r in races:
    homicide_deaths_race_hundredk[r] = homicide_race_counts[r]/race_totals[r] * 100000

In [31]:
homicide_deaths_race_hundredk

{'Asian/Pacific Islander': 3.687452818414519,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}