# Gun Violence in the U.S. from 2013-2018

##### Team: Andres Baez, Edward Chen

## Project Overview:
***
- Reseach question, hypothesis
  - What do we hope to achieve?
- Datasets and Background & Prior Work
  - Brief summary of our datasets
  - Previous work that have been done using these datasets
- Data Cleaning and Analysis
  - Gun Violence Data 
- Discussion and Moving Forward
- Conclusion and Implications
  - What further questions we may have
  - How to improve our study


## Research Question
***
Short Answer:

Long Answer:

## Hypothesis
***

## Datasets:
***

Dataset Name:

Link to dataset:

Description: 


Dataset Name:

Link to dataset:

Description: 


Dataset Name:

Link to dataset:

Description: 

## Background and Prior Work
***

## Methods
***
Data Cleaning:

Data Analysis:

What to report:

#### Data Cleaning and Analysis per Dataset
***

In [None]:
#import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from plotly import __version__
print ('plotly version: ', __version__) # requires version >= 1.9.0
from plotly.offline import init_notebook_mode, iplot, plot
import plotly.plotly as py

init_notebook_mode(connected=True)

import folium
from folium.plugins import HeatMap

In [None]:
#Read in data
df = pd.read_csv('gv_data.csv')
print(df.describe())
print(df.head())

In [None]:
df['n_casualties'] =  df.apply(lambda row: row.n_killed + row.n_injured, axis=1)
print(df.describe())

In [None]:
def hex_to_rgb(value):
    value = value.lstrip('#')
    lv = len(value)
    return tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3))
print(hex_to_rgb("#472D7BFF"))
print(hex_to_rgb("#31688EFF"))
print(hex_to_rgb("#1F9A8AFF"))
print(hex_to_rgb("#5DC863FF"))
print(hex_to_rgb("#E3E418FF"))

In [None]:
#initial cleaning on data

#clean data to remove rows where lat and lon are NaN
df = df[np.isfinite(df['latitude']) & np.isfinite(df['longitude'])]

In [None]:
#plot density map on U.S. for number of casualities by long/lat
limits = [(0,2),(3,10),(11,20),(21,50),(50,200)]
colors = ["rgb(0,116,217)","rgb(255,65,54)","rgb(133,20,75)","rgb(255,133,27)","lightgrey"]
density_cas = []
scale = 1

for i in range(len(limits)):
    lim = limits[i]
    df_sub = df[(df['n_casualties'] >= lim[0]) & (df['n_casualties'] <= lim[1])]
    cas = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = df_sub['longitude'],
        lat = df_sub['latitude'],
        text = "Number of guns involved: "+str(df_sub['n_guns_involved']),
        marker = dict(
            size = df_sub['n_casualties']/scale,
            # sizeref = 2. * max(df_sub['pop']/scale) / (25 ** 2),
            color = colors[i],
            line = dict(width=0.5, color='rgb(40,40,40)'),
            sizemode = 'area'
        ),
        name = '{0} - {1}'.format(lim[0],lim[1]) )
    density_cas.append(cas)

layout = dict(
        title = '2013-2018 Gun Violence<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        ),
    )

fig = dict(data=density_cas, layout=layout)
iplot(fig, show_link=False)

In [None]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

In [None]:
#aggregate density by state
cas_by_state = {}
for state, n, k, i in zip(df['state'], df['n_casualties'], df['n_killed'], df['n_injured']):
    if state == 'District of Columbia':
        continue
    if state in cas_by_state:
        cas_by_state[state][0] += n
        cas_by_state[state][1] += k
        cas_by_state[state][2] += i
    else:
        cas_by_state[state] = [n, k, i]
        
states = list(cas_by_state.keys())
casualties = [item[0] for item in list(cas_by_state.values())]
max_cas = max(casualties)

df_state = pd.DataFrame(states, columns=['states'])
df_state['n_casualties'] = casualties

df_state['cas_density'] = df_state.apply(lambda row: row.n_casualties/max_cas, axis=1)
df_state['code'] = df_state.apply(lambda row: us_state_abbrev[row.states], axis=1)


#get number killed and injured separated by state        
killed = [item[1] for item in list(cas_by_state.values())]
injured = [item[2] for item in list(cas_by_state.values())]

df_state['n_killed'] = killed
df_state['n_injured'] = injured
        
        
df_state['text'] = df_state.apply(lambda row: str(row.states) + '<br>' +\
                                  'Killed: '+str(row.n_killed)+
                                  ', Injured: '+str(row.n_injured), axis=1)


print(df_state.head())

In [None]:
#choropleth map for gun casualties by state

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = df_state['code'],
        z = df_state['cas_density'].astype(float),
        text = df_state['text'],
        locationmode = 'USA-states',
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Casualties ~ max 16231")
        ) ]

layout = dict(
        title = '2013 US Gun Violence Casualties by State<br>(Hover for breakdown)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
iplot(fig)

## Ethics and Privacy

## Dicussion

## Moving Forward