In [1]:
import csv
import datetime

In [2]:
data = list(csv.reader(open('guns.csv')))

In [3]:
headers = data[0]
data = data[1:]

In [4]:
def count_incidents(incident_index, row_map=None):
    if row_map is None:
        row_map = lambda row, i: row[i]
    incident_dict = generate_data_to_counts_dict(incident_index, row_map)
    for row in data:
        incident = row_map(row,incident_index)
        incident_dict[incident] += 1
    return incident_dict
        
def generate_data_to_counts_dict(row_index, row_map):
    return {field: 0 for field in set([row_map(row,row_index) for row in data])}

def generate_entry_datetime(row):
    return datetime.datetime(year=int(row[1]), month=int(row[2]), day=1)

In [5]:
year_counts = count_incidents(1)
year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

In [6]:
date_counts = count_incidents(None, lambda x, i: generate_entry_datetime(x))
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

In [7]:
sex_counts = count_incidents(5)
sex_counts

{'F': 14449, 'M': 86349}

In [8]:
race_counts = count_incidents(7)
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

The incidents count distributed quite equally across the presented dates, and the clues that probably should be pursued are in areas of race and gender

In [9]:
census = list(csv.reader(open('census.csv')))
census_header = census[0]
census = census[1]

In [10]:
mapping = {
    'Asian/Pacific Islander': int(census[14]) + int(census[15]),
    'Black': int(census[12]),
    'Hispanic': int(census[11]),
    'Native American/Native Alaskan': int(census[13]),
    'White': int(census[10])
}
mapping

{'Asian/Pacific Islander': 15834141,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 3739506,
 'White': 197318956}

In [11]:
race_per_hundredk = {race: (race_counts[race] / mapping[race] * 100000) for race in list(race_counts.keys()) }
race_per_hundredk

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

In [12]:
homicide_race_counts = {}
for row in data:
    if row[3] == 'Homicide':
        if row[7] in homicide_race_counts:
            homicide_race_counts[row[7]] += 1
        else:
            homicide_race_counts[row[7]] = 1
homicide_race_counts

{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}

In [13]:
homicide_race_per_hundredk  = {race: (homicide_race_counts[race] / mapping[race] * 100000) for race in list(homicide_race_counts.keys()) }
homicide_race_per_hundredk

{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}