In [1]:
import csv
import datetime

In [8]:
from collections import Counter

In [41]:
class Dataset:
    from collections import Counter
    
    def __init__(self,data,header=False):
        """Dataset initializer. If header row is present,
        set `header=True` otherwise set it to `False`"""
        self.data = data
        if header:
            self.header = self.data[0]
            self.data = self.data[1:]
            
    def __str__(self):
        strformat = "["
        for row in self.data[:10]:
            strformat += str(row)
            strformat +="\n"
        return strformat + "]"
            
    def column(self,column=None):
        """Extracts the given column from dataset as a list."""
        if column in self.header:
            idx = self.header.index(column)
            return [row[idx] for row in self.data]
        elif isinstance(column,int) and 0<=column<len(self.header):
            return [row[column] for row in self.data]
        else:
            return None
        
    def add_column(self,column=None, name=None):
        if column is not None and len(column)==len(self.data):
            for i,row in enumerate(self.data):
                row.append(column[i])
            if name is not None:
                self.header.append(name)
        return True
    
    def column_uniques(self,column):
        """return set of uniq values in given column."""
        values = self.column(column)
        return set(values)
    
    def value_counts(self,column):
        """returns dict object counting how many times each
        value occurs in the given column of dataset."""
        values = self.column(column)
        return dict(Counter(values).items())

In [51]:
f = open("guns.csv","r")
csvreader = csv.reader(f)
data = list(csvreader)
dataset = Dataset(data=data, header=True)

In [52]:
dataset.header

['',
 'year',
 'month',
 'intent',
 'police',
 'sex',
 'age',
 'race',
 'hispanic',
 'place',
 'education']

In [53]:
years = dataset.column("year")
year_counts = dataset.value_counts("year")
print(year_counts)

{'2013': 33636, '2012': 33563, '2014': 33599}


In [54]:
years = list(map(int,years))

In [55]:
months = list(map(int,dataset.column("month")))

In [56]:
from datetime import datetime

In [57]:
dates = [datetime(year=y,month=m, day=1) for y,m in zip(years,months)]
dates[:5]

[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

In [58]:
dataset.add_column(column=dates, name='date')

True

In [60]:
dataset.header

['',
 'year',
 'month',
 'intent',
 'police',
 'sex',
 'age',
 'race',
 'hispanic',
 'place',
 'education',
 'date']

In [69]:
date_counts = dataset.value_counts("date")

In [70]:
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

In [71]:
sex_counts = dataset.value_counts("sex")
race_counts = dataset.value_counts("race")
sex_counts

{'F': 14449, 'M': 86349}

In [72]:
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

In [74]:
f_census = open("census.csv","r")
census_csvreader = csv.reader(f_census)
census_data = list(census_csvreader)
census_dataset = Dataset(data=census_data,header=True)
print(census_dataset)

[['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']
]


In [77]:
census_dataset.header

['Id',
 'Year',
 'Id',
 'Sex',
 'Id',
 'Hispanic Origin',
 'Id',
 'Id2',
 'Geography',
 'Total',
 'Race Alone - White',
 'Race Alone - Hispanic',
 'Race Alone - Black or African American',
 'Race Alone - American Indian and Alaska Native',
 'Race Alone - Asian',
 'Race Alone - Native Hawaiian and Other Pacific Islander',
 'Two or More Races']

In [88]:
# Map population count to the associated race.
race_population = {
    "Asian/Pacific Islander":int(census_dataset.column("Race Alone - Asian")[0]) + int(census_dataset.column("Race Alone - Native Hawaiian and Other Pacific Islander")[0]),
    "White": int(census_dataset.column("Race Alone - Asian")[0]),
    "Hispanic": int(census_dataset.column("Race Alone - Hispanic")[0]),
    "Native American/Native Alaskan": int(census_dataset.column("Race Alone - American Indian and Alaska Native")[0]),
    "Black": int(census_dataset.column("Race Alone - Black or African American")[0])
}
# Count how many deaths for each race per 100000 citizens.
race_per_hundredk = {key:race_counts[key]/race_population[key] * 10000 for key in race_counts.keys()}
race_per_hundredk

{'Asian/Pacific Islander': 0.8374309664161763,
 'Black': 5.78773477735196,
 'Hispanic': 2.022049121091091,
 'Native American/Native Alaskan': 2.452195557381109,
 'White': 43.693347465710644}

In [95]:
intents = dataset.column("intent")
races = dataset.column("race")

In [101]:
homicide_race_counts = {}
for i,intnt in enumerate(intents):
    if intnt=="Homicide":
        homicide_race_counts[races[i]] = homicide_race_counts.get(races[i],0) + 1

        # Count how many Homicide deaths for each race per 100000 citizens.
homicide_race_counts = {key:homicide_race_counts[key]/race_population[key] * 100000 for key in homicide_race_counts.keys()}
homicide_race_counts

{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 60.33833797860037}

**Insights:**
1. White people are most - ~44 per 100000 - died because of all kinds of violence.
2. The most people died of homicide in any race are White People being 60 deaths per 100000
3. The second most died of homicide are Black people being 48 deaths per 100000
