In [1]:
#Author: Patrick Callahan
#pcallahan@labarchives.com
#LabArchives, LLC

#the below two lines of code are only for display purposes when using jupyter notebook. If using another IDE, you can delete the next two lines
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [1]:
#import needed python libraries
import numpy as np
import pandas as pd
import altair as alt
from altair import datum
from vega_datasets import data
alt.data_transformers.enable('json') 
#import raw data
df = pd.read_csv("stage3.csv")
df['date'] = pd.to_datetime(df['date'])
df = df.fillna("NO_DATA")

# used to sort months in human-readable fashion
months = ['January','February','March','April','May','June','July','August','September','October','November','December']
MONTH=[]
for date in df.date:
    d = date.month
    MONTH.append(months[d-1])
df['month'] = MONTH

# Get Weekday of Hate Crime Reports

# used to sort weekdays in human-readable fashion
weekdays = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
WEEKDAY=[]
for date in df.date:
    d = date.weekday()
    WEEKDAY.append(weekdays[d])
df['weekday'] = WEEKDAY

norm_gun_types = []

#the below code block breaks each value in the participant_relationship column into a list of characters
for each in df.gun_type:
    l = list(each)
    l = [i for i, next_i in zip(l, l[1:] + [None]) #for each list, create a copy, and compare items such
             if (i, next_i) != ('|', '|')]         #that the 'first' and 'next' items (relative to the
    l = [i for i, next_i in zip(l, l[1:] + [None]) #original list) are always compared. Remove consecutive
             if (i, next_i) != (':', ':')]         #duplicates of the specified characters
    s = ''.join(l)
    norm_gun_types.append(s)

df = df.drop('gun_type',1)
df.insert(len(df.columns), 'gun_type', norm_gun_types)
acc_gun_dat = df[df['incident_characteristics'].str.contains("Accident", case=True)==True].reset_index()
acc_gun_dat = acc_gun_dat.drop('index',1)

In [5]:
acc_gun_dat

Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,incident_url_fields_missing,...,participant_name,participant_relationship,participant_status,participant_type,sources,state_house_district,state_senate_district,month,weekday,gun_type
0,95266,2014-01-01,Utah,Hurricane,2500 S. 3900 W.,0,1,http://www.gunviolencearchive.org/incident/95266,http://www.stgeorgeutah.com/news/archive/2014/...,False,...,NO_DATA,NO_DATA,0::Injured,0::Victim,http://www.stgeorgeutah.com/news/archive/2014/...,71,29,January,Wednesday,NO_DATA
1,92563,2014-01-01,Mississippi,Bogue Chitto,1347 Brumfield Rd SW,1,0,http://www.gunviolencearchive.org/incident/92563,http://www.wapt.com/news/central-mississippi/j...,False,...,0::Karlianna Celeste Brumfield,1::Family,0::Killed||1::Unharmed,0::Victim||1::Subject-Suspect,http://www.wapt.com/news/central-mississippi/j...,53,39,January,Wednesday,0:Unknown
2,92282,2014-01-01,South Carolina,Olanta,Highway 301,0,1,http://www.gunviolencearchive.org/incident/92282,http://www.carolinalive.com/news/story.aspx,False,...,NO_DATA,1::Family,0::Injured||1::Unharmed,0::Victim||1::Subject-Suspect,http://www.carolinalive.com/news/story.aspx,61,36,January,Wednesday,NO_DATA
3,92117,2014-01-01,Kentucky,Cynthiana,NO_DATA,0,1,http://www.gunviolencearchive.org/incident/92117,https://www.wtvq.com/2014/01/01/harrison-sheri...,False,...,0::Dustin Mullins||1::Timothy Turner,1::Friends,"0::Injured||1::Unharmed, Arrested",0::Victim||1::Subject-Suspect,http://www.lex18.com/news/man-arrested-in-conn...,78,27,January,Wednesday,0:Unknown
4,95264,2014-01-01,Kansas,Wichita,near I-135 and Pawnee,0,1,http://www.gunviolencearchive.org/incident/95264,http://www.kake.com/home/headlines/Man-critica...,False,...,NO_DATA,NO_DATA,0::Injured||1::Unharmed,0::Victim||1::Subject-Suspect,http://www.kake.com/home/headlines/Man-critica...,98,28,January,Wednesday,NO_DATA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8220,1083379,2018-03-31,South Dakota,Aberdeen,1600 Block of Melody Lane,0,2,http://www.gunviolencearchive.org/incident/108...,http://www.ksfy.com/content/news/Two-people-in...,False,...,NO_DATA,NO_DATA,0::Injured||1::Injured,0::Victim||1::Victim,https://www.aberdeennews.com/news/local/charge...,3,3,March,Saturday,0:Unknown
8221,1083070,2018-03-31,Pennsylvania,Temple,4400 block of Kutztown Rd,1,0,http://www.gunviolencearchive.org/incident/108...,http://www.wfmz.com/724191487,False,...,0::Daniel J Smith||1::Daniel Brymer,NO_DATA,"0::Killed||1::Unharmed, Arrested",0::Victim||1::Subject-Suspect,http://www.wfmz.com/724191487,126,11,March,Saturday,0:Unknown
8222,1082607,2018-03-31,California,Salinas,Harris Ct,0,0,http://www.gunviolencearchive.org/incident/108...,https://www.montereysheriff.org/mcsologs/DPL.PDF,False,...,0::Ronaldo Hernandez Lopez||1::Pablo Luis Perez,NO_DATA,"0::Unharmed, Arrested||1::Unharmed, Arrested",0::Subject-Suspect||1::Subject-Suspect,https://www.montereysheriff.org/mcsologs/DPL.PDF,30,12,March,Saturday,0:Unknown
8223,1081899,2018-03-31,New York,Spencerport (Ogden),Stony Point Rd,0,1,http://www.gunviolencearchive.org/incident/108...,https://twitter.com/mcfw/status/98023454942998...,False,...,NO_DATA,NO_DATA,0::Injured,0::Victim,https://twitter.com/mcfw/status/98023454942998...,NO_DATA,NO_DATA,March,Saturday,0:Unknown


In [15]:
df

Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,incident_url_fields_missing,...,participant_name,participant_relationship,participant_status,participant_type,sources,state_house_district,state_senate_district,month,weekday,gun_type
0,461105,2013-01-01,Pennsylvania,Mckeesport,1506 Versailles Avenue and Coursin Street,0,4,http://www.gunviolencearchive.org/incident/461105,http://www.post-gazette.com/local/south/2013/0...,False,...,0::Julian Sims,NO_DATA,0::Arrested||1::Injured||2::Injured||3::Injure...,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://pittsburgh.cbslocal.com/2013/01/01/4-pe...,NO_DATA,NO_DATA,January,Tuesday,NO_DATA
1,460726,2013-01-01,California,Hawthorne,13500 block of Cerise Avenue,1,3,http://www.gunviolencearchive.org/incident/460726,http://www.dailybulletin.com/article/zz/201301...,False,...,0::Bernard Gillis,NO_DATA,0::Killed||1::Injured||2::Injured||3::Injured,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://losangeles.cbslocal.com/2013/01/01/man-...,62,35,January,Tuesday,NO_DATA
2,478855,2013-01-01,Ohio,Lorain,1776 East 28th Street,1,3,http://www.gunviolencearchive.org/incident/478855,http://chronicle.northcoastnow.com/2013/02/14/...,False,...,0::Damien Bell||1::Desmen Noble||2::Herman Sea...,NO_DATA,"0::Injured, Unharmed, Arrested||1::Unharmed, A...",0::Subject-Suspect||1::Subject-Suspect||2::Vic...,http://www.morningjournal.com/general-news/201...,56,13,January,Tuesday,0:Unknown|1:Unknown
3,478925,2013-01-05,Colorado,Aurora,16000 block of East Ithaca Place,4,0,http://www.gunviolencearchive.org/incident/478925,http://www.dailydemocrat.com/20130106/aurora-s...,False,...,0::Stacie Philbrook||1::Christopher Ratliffe||...,NO_DATA,0::Killed||1::Killed||2::Killed||3::Killed,0::Victim||1::Victim||2::Victim||3::Subject-Su...,http://denver.cbslocal.com/2013/01/06/officer-...,40,28,January,Saturday,NO_DATA
4,478959,2013-01-07,North Carolina,Greensboro,307 Mourning Dove Terrace,2,2,http://www.gunviolencearchive.org/incident/478959,http://www.journalnow.com/news/local/article_d...,False,...,0::Danielle Imani Jameison||1::Maurice Eugene ...,3::Family,0::Injured||1::Injured||2::Killed||3::Killed,0::Victim||1::Victim||2::Victim||3::Subject-Su...,http://myfox8.com/2013/01/08/update-mother-sho...,62,27,January,Monday,0:Handgun|1:Handgun
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239672,1083142,2018-03-31,Louisiana,Rayne,North Riceland Road and Highway 90,0,0,http://www.gunviolencearchive.org/incident/108...,http://www.klfy.com/news/local/rayne-woman-cha...,False,...,0::Jhkeya Tezeno,NO_DATA,"0::Unharmed, Arrested",0::Subject-Suspect,http://www.klfy.com/news/local/rayne-woman-cha...,NO_DATA,NO_DATA,March,Saturday,0:Unknown
239673,1083139,2018-03-31,Louisiana,Natchitoches,247 Keyser Ave,1,0,http://www.gunviolencearchive.org/incident/108...,http://www.ksla.com/story/37854648/man-wanted-...,False,...,0::Jamal Haskett||1::Jaquarious Tyjuan Ardison,NO_DATA,"0::Killed||1::Unharmed, Arrested",0::Victim||1::Subject-Suspect,http://www.ksla.com/story/37854648/man-wanted-...,23,31,March,Saturday,0:Unknown
239674,1083151,2018-03-31,Louisiana,Gretna,1300 block of Cook Street,0,1,http://www.gunviolencearchive.org/incident/108...,http://www.nola.com/crime/index.ssf/2018/04/sh...,False,...,NO_DATA,NO_DATA,0::Injured,0::Victim,http://www.nola.com/crime/index.ssf/2018/04/sh...,85,7,March,Saturday,0:Unknown
239675,1082514,2018-03-31,Texas,Houston,12630 Ashford Point Dr,1,0,http://www.gunviolencearchive.org/incident/108...,https://www.chron.com/news/houston-texas/houst...,False,...,0::Leroy Ellis,NO_DATA,0::Killed,0::Victim,http://www.khou.com/article/news/hpd-investiga...,149,17,March,Saturday,0:Unknown


In [2]:
dist_by_Month_and_Weekday = alt.Chart(acc_gun_dat).mark_bar().encode(
    alt.X('month:N', sort=months),
    alt.Y('count():Q'),
    color=alt.Color('weekday:N', sort=weekdays),
    tooltip = ['month:N','weekday:N','count():Q']
).properties(
    title= 'Distribution of Accidental Gun Violence by Month, Weekday',
    width = 400,
    height = 267
)
dist_by_Month_and_Weekday

In [3]:
dist_by_Month_and_Weekday = alt.Chart(df).mark_bar().encode(
    alt.X('month:N', sort=months),
    alt.Y('count():Q'),
    color=alt.Color('weekday:N', sort=weekdays),
    tooltip = ['month:N','weekday:N','count():Q']
).properties(
    title= 'Distribution of All Gun Violence by Month, Weekday',
    width = 400,
    height = 267
)
dist_by_Month_and_Weekday

In [4]:
states = alt.topo_feature(data.us_10m.url, feature='states')

# US states background
background = alt.Chart(states).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=700,
    height=500
).project('albersUsa')

# Incidents of Gun Violence positions on background
points = alt.Chart(acc_gun_dat).transform_aggregate(
    latitude='mean(latitude)',
    longitude='mean(longitude)',
    count='count()',
    groupby=['city_or_county','state']
).mark_circle().encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    size=alt.Size('count:Q', title='Incident Count'),
    color=alt.value('steelblue'),
    tooltip=['state:N','city_or_county:N','count:Q']
).properties(
    title='Incidents of Accidental Gun Violence'
)

background + points

In [6]:
# Incidents of Gun Violence positions on background
points = alt.Chart(df).transform_aggregate(
    latitude='mean(latitude)',
    longitude='mean(longitude)',
    count='count()',
    groupby=['city_or_county','state']
).mark_circle().encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    size=alt.Size('count:Q', title='Incident Count'),
    color=alt.value('steelblue'),
    tooltip=['state:N','city_or_county:N','count:Q']
).properties(
    title='Incidents of Any Gun Violence'
)

background + points

In [7]:
df = df[df.state!='Guam']
df = df[df.state!='Federal']

# Get US State background from JSON data provided by Altair/Vega Datasets
from vega_datasets import data
states = alt.topo_feature(data.us_10m.url, feature='states')

vega_state_dat = pd.read_csv('vega_state_ids.csv')


gBy_state = df.groupby(['state']).count().reset_index()
gBy_state = pd.merge(gBy_state, vega_state_dat, on='state', how='left')
gBy_state.drop(gBy_state.columns.difference(['state','incident_id','vega_state_id']), 1, inplace=True)

gBy_state = gBy_state.rename(columns={'state': 'State','incident_id': 'Total Gun Crime Reports (Any)','vega_state_id': 'id'})

US_hc_map = alt.Chart(states).mark_geoshape().encode(
    color='Total Gun Crime Reports (Any):Q',
    tooltip = ['State:N','Total Gun Crime Reports (Any):Q']
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(gBy_state, 'id', ['Total Gun Crime Reports (Any)','State'])
).project(
    type='albersUsa'
).properties(
    title = 'Gun Crime Incidents by State (All)',
    width=800,
    height=450
)
US_hc_map

In [8]:
acc_gBy_state = acc_gun_dat.groupby(['state']).count().reset_index()
acc_gBy_state = pd.merge(acc_gBy_state, vega_state_dat, on='state', how='left')
acc_gBy_state.drop(acc_gBy_state.columns.difference(['state','incident_id','vega_state_id']), 1, inplace=True)

acc_gBy_state = acc_gBy_state.rename(columns={'state': 'State','incident_id': 'Accidental Gun Crime Reports','vega_state_id': 'id'})

US_hc_map = alt.Chart(states).mark_geoshape().encode(
    color='Accidental Gun Crime Reports:Q',
    tooltip = ['State:N','Accidental Gun Crime Reports:Q']
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(acc_gBy_state, 'id', ['Accidental Gun Crime Reports','State'])
).project(
    type='albersUsa'
).properties(
    title = 'Incidents of Accidental Gun Violence by State',
    width=800,
    height=450
)
US_hc_map

In [16]:
chart_by_date = alt.Chart(acc_gun_dat).mark_line().encode(
    x = 'date:T',
    y = 'count(date):Q',
    tooltip = ['date:O','count(date)']
).properties(
    width = 1000,
    height = 400
).interactive()
chart_by_date

In [102]:
acc_guntype_source = acc_gun_dat[(acc_gun_dat['gun_type'].str.contains("NO_DATA")==False) & (acc_gun_dat['gun_type'].str.contains("Unknown")==False)].reset_index()
norm_gun_types = acc_guntype_source['gun_type'].str.replace(r'.?:', '')
acc_guntype_source = acc_guntype_source.drop('gun_type',1)
acc_guntype_source.insert(len(acc_guntype_source.columns), 'gun_type', norm_gun_types)

acc_gun_types = acc_guntype_source.gun_type.str.split('|', expand=True).stack().value_counts().reset_index()
acc_gun_types.columns=['Gun Type','Gun Count',]

acc_gun_types_over_time = alt.Chart(acc_gun_types).mark_bar().encode(
    x = 'Gun Type:N',
    y = 'Gun Count:Q',
    tooltip = ['Gun Type:N','Gun Count:Q']
)
acc_gun_types_over_time

In [112]:
guntype_source = df[(df['gun_type'].str.contains('Unknown')==False) & (df['gun_type'].str.contains('NO_DATA')==False)]

norm_gun_types = guntype_source['gun_type'].str.replace(r'.+:', '')
guntype_source = guntype_source.drop('gun_type',1)
guntype_source.insert(len(guntype_source.columns), 'gun_type', norm_gun_types)

gun_types = guntype_source.gun_type.str.split('|', expand=True).stack().value_counts().reset_index()
gun_types.columns=['Gun Type','Gun Count',]
gun_types
gun_types_chart = alt.Chart(gun_types).mark_bar().encode(
    x = 'Gun Type:N',
    y = 'Gun Count:Q',
    tooltip = ['Gun Type:N','Gun Count:Q']
)
gun_types_chart