# Processing

In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC"
}

swaped_states = {value:key for key, value in us_state_to_abbrev.items()}

## Loading datasets

In [3]:
shootings = pd.read_csv('./datasets/shootings2.csv')
gun_possesion = pd.read_csv('./datasets/GunsOwnerShip_dataset.csv')
hdi = pd.read_csv('./datasets/HDI.csv')
shootings['date'] = pd.to_datetime(shootings['date'])

### Concat gun-ownership

In [4]:
def ownership_value(state_tag):
    """ Para cada estado devuelve el valor de posesion de armas de este"""
    try:
        state_name = swaped_states[state_tag]
        return gun_possesion[gun_possesion["State"] == state_name]["gunOwnership"].to_list()[0]
    except:
        return np.nan
    
shooting_v1 = shootings.copy()
shooting_v1["gun-ownership"] = shooting_v1.apply(lambda row : ownership_value(row["state"]), axis=1)
shooting_v1.head()

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,arms_category,gun-ownership
0,3,Tim Elliot,2015-01-02,shot,gun,53.0,M,Asian,Shelton,WA,True,attack,Not fleeing,False,Guns,
1,4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,White,Aloha,OR,False,attack,Not fleeing,False,Guns,
2,5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,Hispanic,Wichita,KS,False,other,Not fleeing,False,Unarmed,
3,8,Matthew Hoffman,2015-01-04,shot,toy weapon,32.0,M,White,San Francisco,CA,True,attack,Not fleeing,False,Other unusual objects,
4,9,Michael Rodriguez,2015-01-04,shot,nail gun,39.0,M,Hispanic,Evans,CO,False,attack,Not fleeing,False,Piercing objects,


In [5]:
shooting_v1.to_csv('./datasets/shootings2_v1.csv', index=False)


### Concat HDI

In [6]:
selected_cols = ['Region', '2015', '2016', '2017', '2018', '2019']
hdi_v1 = hdi[selected_cols]
hdi_v1.head()

Unnamed: 0,Region,2015,2016,2017,2018,2019
0,Total,0.921,0.922,0.924,0.926,0.926
1,Alabama,0.88,0.882,0.884,0.885,0.886
2,Alaska,0.934,0.935,0.936,0.936,0.936
3,Arizona,0.908,0.909,0.911,0.912,0.913
4,Arkansas,0.881,0.882,0.884,0.885,0.886


In [7]:
hdi_v1.rename(columns={col: 'hdi_' + col for col in hdi_v1.columns if col in selected_cols[1::]}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [8]:
def value_hdi(row):
    anno = row['date'].year
    if anno != 2020:
        state = row["state"]
        state_name = swaped_states[state]
        return hdi_v1[hdi_v1['Region'] == state_name]['hdi_' + str(anno)].to_list()[0]
    return np.nan

shooting_v1["hdi"] = shooting_v1.apply(lambda row : value_hdi(row), axis=1)


## Concat gun-ownership carli

In [9]:
shooting_v1.head()

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,arms_category,gun-ownership,hdi
0,3,Tim Elliot,2015-01-02,shot,gun,53.0,M,Asian,Shelton,WA,True,attack,Not fleeing,False,Guns,,0.94
1,4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,White,Aloha,OR,False,attack,Not fleeing,False,Guns,,0.93
2,5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,Hispanic,Wichita,KS,False,other,Not fleeing,False,Unarmed,,0.922
3,8,Matthew Hoffman,2015-01-04,shot,toy weapon,32.0,M,White,San Francisco,CA,True,attack,Not fleeing,False,Other unusual objects,,0.93
4,9,Michael Rodriguez,2015-01-04,shot,nail gun,39.0,M,Hispanic,Evans,CO,False,attack,Not fleeing,False,Piercing objects,,0.942


In [10]:
def ov(row):
    state_name = swaped_states[row["state"]]
    anno = row['date'].year
    return gun_possesion[(gun_possesion["state"] == state_name) & 
                         (gun_possesion["year"] == anno)]["ratio"].to_list()[0]


shooting_v2 = shooting_v1.copy()
shooting_v1["gun-ownership-ratio"] = shooting_v1.apply(lambda row : ov(row), axis=1)
shooting_v1.head()

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,arms_category,gun-ownership,hdi,gun-ownership-ratio
0,3,Tim Elliot,2015-01-02,shot,gun,53.0,M,Asian,Shelton,WA,True,attack,Not fleeing,False,Guns,,0.94,0.07008
1,4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,White,Aloha,OR,False,attack,Not fleeing,False,Guns,,0.93,0.068713
2,5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,Hispanic,Wichita,KS,False,other,Not fleeing,False,Unarmed,,0.922,0.063853
3,8,Matthew Hoffman,2015-01-04,shot,toy weapon,32.0,M,White,San Francisco,CA,True,attack,Not fleeing,False,Other unusual objects,,0.93,0.045267
4,9,Michael Rodriguez,2015-01-04,shot,nail gun,39.0,M,Hispanic,Evans,CO,False,attack,Not fleeing,False,Piercing objects,,0.942,0.083248


## Dataset base: poblation

In [84]:
population = pd.read_csv('./datasets/Poblation_dataset.csv')
shootings = pd.read_csv('./datasets/shootings2.csv')
gun_possesion = pd.read_csv('./datasets/GunsOwnerShip_dataset.csv')
hdi = pd.read_csv('./datasets/HDI.csv')

shootings['date'] = pd.to_datetime(shootings['date'])

In [85]:
population = population[population.year != 2020]
population = population.drop('Unnamed: 0', axis=1)
population.head()

Unnamed: 0,state,year,poblation
0,Alabama,2015,4854803
1,Alaska,2015,738430
2,Arizona,2015,6832810
3,Arkansas,2015,2979732
4,California,2015,38904296


In [15]:
shootings.head()

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,arms_category
0,3,Tim Elliot,2015-01-02,shot,gun,53.0,M,Asian,Shelton,WA,True,attack,Not fleeing,False,Guns
1,4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,White,Aloha,OR,False,attack,Not fleeing,False,Guns
2,5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,Hispanic,Wichita,KS,False,other,Not fleeing,False,Unarmed
3,8,Matthew Hoffman,2015-01-04,shot,toy weapon,32.0,M,White,San Francisco,CA,True,attack,Not fleeing,False,Other unusual objects
4,9,Michael Rodriguez,2015-01-04,shot,nail gun,39.0,M,Hispanic,Evans,CO,False,attack,Not fleeing,False,Piercing objects


In [69]:
gun_possesion = gun_possesion.drop('Unnamed: 0', axis=1)
gun_possesion.head()

Unnamed: 0,state,year,total of guns,poblation,ratio
0,Alabama,2015,737509,4854803,0.151913
1,Alabama,2016,616947,4866824,0.126766
2,Alabama,2017,477345,4877989,0.097857
3,Alabama,2018,474294,4891628,0.09696
4,Alabama,2019,690084,4907965,0.140605


In [58]:
selected_cols = ['Region', '2015', '2016', '2017', '2018', '2019']
hdi_v1 = hdi[selected_cols]
hdi_v1.head()


Unnamed: 0,Region,2015,2016,2017,2018,2019
0,Total,0.921,0.922,0.924,0.926,0.926
1,Alabama,0.88,0.882,0.884,0.885,0.886
2,Alaska,0.934,0.935,0.936,0.936,0.936
3,Arizona,0.908,0.909,0.911,0.912,0.913
4,Arkansas,0.881,0.882,0.884,0.885,0.886


### **Population <-- hdi + gun_possesion**

In [87]:
def value_hdi(row):
    anno = row['year']
    state = row["state"]
    #state_name = swaped_states[state]
    try:
        return hdi_v1[hdi_v1['Region'] == state][str(anno)].to_list()[0]
    except:
        return np.nan

population["hdi"] = population.apply(lambda row : value_hdi(row), axis=1)

In [88]:
def gunpos(row, col : str):
    anno = row['year']
    state = row["state"]
    #state_name = swaped_states[state]
    try:
        return gun_possesion[(gun_possesion['state'] == state) &
                            (gun_possesion['year'] == anno)][str(col)].to_list()[0]
    except:
        return np.nan


population["gp_ratio"] = population.apply(lambda row : gunpos(row, 'ratio'), axis=1)
population.head()

Unnamed: 0,state,year,poblation,gp_ratio,hdi
0,Alabama,2015,4854803,0.151913,0.88
1,Alaska,2015,738430,0.11595,0.934
2,Arizona,2015,6832810,0.048507,0.908
3,Arkansas,2015,2979732,0.086365,0.881
4,California,2015,38904296,0.045267,0.93
