# Python: Datasets analysis without using Pandas

## Dataset discovery

In [1]:
import csv

In [2]:
with open("guns.csv") as f:
    guns_csv_reader = csv.reader(f)
    guns = list(guns_csv_reader)

In [3]:
print(guns[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


In [4]:
headers = guns[:1]
guns = guns[1:]

In [5]:
print(headers)

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']]


In [6]:
print(guns[:5])

[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


## Count the number of firearm deaths each year

In [7]:
years = [row[1] for row in guns]
years_counts = {}

for year in years:
    if year not in years_counts:
        years_counts[year] = 0
    years_counts[year] += 1

In [8]:
print(years_counts)

{'2012': 33563, '2013': 33636, '2014': 33599}


## Exploration of the number of deaths per month

In [9]:
import datetime

In [10]:
dates = [datetime.datetime(year = int(row[1]), month = int(row[2]), day = 1) for row in guns]
dates[0:5]

[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

In [11]:
dates_counts = {}

for date in dates:
    if date not in dates_counts:
        dates_counts[date] = 0
    dates_counts[date] += 1

In [12]:
dates_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

## Exploration of the number of deaths by origin and gender

In [13]:
sexes = [row[5] for row in guns]
sexes_counts = {}

for sex in sexes:
    if sex not in sexes_counts:
        sexes_counts[sex] = 0
    sexes_counts[sex] += 1

In [14]:
sexes_counts

{'M': 86349, 'F': 14449}

In [15]:
races = [row[7] for row in guns]
races_counts = {}

for race in races:
    if race not in races_counts:
        races_counts[race] = 0
    races_counts[race] += 1

In [16]:
races_counts

{'Asian/Pacific Islander': 1326,
 'White': 66237,
 'Native American/Native Alaskan': 917,
 'Black': 23296,
 'Hispanic': 9022}

## Dataset total population by origin in the US

In [17]:
with open("census.csv", "r") as f:
    census_csv_reader = csv.reader(f)
    census = list(census_csv_reader)

In [18]:
print(census)

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


## Calculate the ratio of firearm deaths by origin

In [19]:
# Example 
races_counts

{'Asian/Pacific Islander': 1326,
 'White': 66237,
 'Native American/Native Alaskan': 917,
 'Black': 23296,
 'Hispanic': 9022}

In [20]:
white_ratio_per_100000 = races_counts['White'] / int(census[1][10]) * 100000
white_ratio_per_100000

33.56849303419181

In [21]:
mapping = {
    "Asian/Pacific Islander" : 15159516 + 674625,
    "Native American/Native Alaskan" : 3739506,
    "Black" : 40250635,
    "Hispanic" : 44618105,
    "White" : 197318956
}

In [22]:
ratios = {}

for k , v in races_counts.items():
    ratios[k] = (v / mapping[k]) * 100000

In [23]:
ratios

{'Asian/Pacific Islander': 8.374309664161762,
 'White': 33.56849303419181,
 'Native American/Native Alaskan': 24.521955573811088,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907}

## Filter by homicide

In [24]:
intents = [row[3] for row in guns]
homicides_counts = {}

for i , race in enumerate(races):
    if race not in homicides_counts:
        homicides_counts[race] = 0
    if intents[i] == "Homicide":
        homicides_counts[race] += 1

In [25]:
homicides_counts

{'Asian/Pacific Islander': 559,
 'White': 9147,
 'Native American/Native Alaskan': 326,
 'Black': 19510,
 'Hispanic': 5634}

In [26]:
homicides_ratios = {}

for k , v in homicides_counts.items():
    homicides_ratios[k] = (v / mapping[k]) * 100000

In [27]:
homicides_ratios

{'Asian/Pacific Islander': 3.530346230970155,
 'White': 4.6356417981453335,
 'Native American/Native Alaskan': 8.717729026240365,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914}