In [1]:

import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt
import plotly
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import calendar

In [2]:
path = "/Users/peiyu/Desktop/ECE 143/Group Project/gun-violence-data_01-2013_03-2018.csv"
df = pd.read_csv(path)
missing_row = ['sban_1', '2017-10-01', 'Nevada', 'Las Vegas', 'Mandalay Bay 3950 Blvd S', 59, 489, 'https://en.wikipedia.org/wiki/2017_Las_Vegas_shooting', 'https://en.wikipedia.org/wiki/2017_Las_Vegas_shooting', '-', '-', '-', '-', '-', '36.095', 'Hotel', 
               '-115.171667', 47, 'Route 91 Harvest Festiva; concert, open fire from 32nd floor. 47 guns seized; TOTAL:59 kill, 489 inj, number shot TBD,girlfriend Marilou Danley POI', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-']
df.loc[len(df)] = missing_row

In [3]:
df.keys()

Index(['incident_id', 'date', 'state', 'city_or_county', 'address', 'n_killed',
       'n_injured', 'incident_url', 'source_url',
       'incident_url_fields_missing', 'congressional_district', 'gun_stolen',
       'gun_type', 'incident_characteristics', 'latitude',
       'location_description', 'longitude', 'n_guns_involved', 'notes',
       'participant_age', 'participant_age_group', 'participant_gender',
       'participant_name', 'participant_relationship', 'participant_status',
       'participant_type', 'sources', 'state_house_district',
       'state_senate_district'],
      dtype='object')

In [163]:
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['monthday'] = df['date'].dt.day
df['weekday'] = df['date'].dt.weekday
df['loss'] = df['n_killed'] + df['n_injured']
df['month_day_comb'] = df['date'].dt.strftime('00-%m-%d')

## Time Related Trends of Gun Violence

In [5]:
year_stat = df['year'].value_counts()
year_stat

2017    61402
2016    58763
2015    53579
2014    51854
2018    13802
2013      278
Name: year, dtype: int64

In [6]:
x1 = year_stat.index.tolist()
y1 = year_stat.values.tolist()
trace1 = go.Bar(x = x1, y = y1, name = 'year count', opacity = 0.7,
                marker = dict(
                    color = ['rgba(222,45,38,0.8)', 'rgba(222,45,38,0.8)', 'rgba(222,45,38,0.8)',
                             'rgba(222,45,38,0.8)', 'rgba(204,204,204,1)', 'rgba(204,204,204,1)']))
layout = go.Layout(title = 'Gun Violence Incidents by year')
fig = go.Figure(data = [trace1], layout = layout)
init_notebook_mode(connected=True)
iplot(fig)

In [7]:
month_count = defaultdict(int)
for i in range(len(df)):
    if df['year'][i] in [2014, 2015, 2016, 2017]:
        month_count[df['month'][i]] += 1
month_mean = defaultdict(int)
for i in month_count.keys():
    month_mean[i] = month_count[i] / len([2014, 2015, 2016, 2017])

In [8]:
x2 = list(month_mean.keys())
y2 = list(month_mean.values())
trace2 = go.Bar(x = [calendar.month_abbr[int(x)] for x in x2], y = y2, name = 'month', 
                opacity = 0.7, marker = dict(color = '#b1ce10'))
layout = go.Layout(title = 'Average number of Gun Violence Incidents by Months')
fig = go.Figure(data = [trace2], layout = layout)
iplot(fig)

In [9]:
weekday_count = defaultdict(int)
for i in range(len(df)):
    weekday_count[df['weekday'][i]] += 1

weekday2018_count = defaultdict(int)
for i in range(len(df)):
    if df['year'][i] == 2018:
        weekday2018_count[df['weekday'][i]] += 1
    
weekmap = {0:'Mon', 1:'Tue', 2:'Wed', 3:'Thu', 4:'Fri', 5:'Sat', 6:'Sun'}
weekday_mean = defaultdict(int)
for i in weekday_count.keys():
    weekday_mean[weekmap[i]] = (weekday_count[i] - weekday2018_count[i])*1/(5 + 3/12) + weekday2018_count[i]*(0.25/(5 + 3/12))

In [194]:
x3 = list(weekmap.values())
y3 = [weekday_mean[i] for i in list(weekmap.values())]
trace3 = go.Bar(x = x3, y = y3, name = 'weekday', opacity = 0.7, marker = dict(color = '#aedefc'))
layout = go.Layout(title = 'Average number of Gun Violence Incidents by weekdays')
fig = go.Figure(data = [trace3], layout = layout)
iplot(fig)

### Time series plot

In [126]:
temp = df[df['year'] == 2014].groupby('date').agg({'state': 'count', 'n_killed': 'sum', 'n_injured': 'sum'}).reset_index().rename(columns = {'state': 'incidents'})
trace1 = go.Scatter(x = temp.date, y = temp.incidents, name = 'Total number of Incidents', 
                    mode = 'lines', marker = dict(color = '#418bf4'))
trace2 = go.Scatter(x = temp.date, y = temp.n_killed, name = 'Total number of killed', 
                    mode = 'lines', marker = dict(color = '#ce2812'))
trace3 = go.Scatter(x = temp.date, y = temp.n_injured, name = 'Total number of injured', 
                    mode = 'lines', marker = dict(color = '#daa1e2'))
layout = dict(height = 350, title = 'Gun Violence Incidents - 2014', 
              legend = dict(orientation = "h", x = -.01, y = 1.1), xaxis = dict(title='Date Time', ticklen = 1))
fig = go.Figure(data = [trace1, trace2, trace3], layout = layout)
iplot(fig)

In [195]:
temp = df[df['year'] == 2015].groupby('date').agg({'state': 'count', 'n_killed': 'sum', 'n_injured': 'sum'}).reset_index().rename(columns = {'state': 'incidents'})
trace1 = go.Scatter(x = temp.date, y = temp.incidents, name = 'Total number of Incidents', 
                    mode = 'lines', marker = dict(color = '#418bf4'))
trace2 = go.Scatter(x = temp.date, y = temp.n_killed, name = 'Total number of killed', 
                    mode = 'lines', marker = dict(color = '#ce2812'))
trace3 = go.Scatter(x = temp.date, y = temp.n_injured, name = 'Total number of injured', 
                    mode = 'lines', marker = dict(color = '#daa1e2'))
layout = dict(height = 350, title = 'Gun Violence Incidents - 2015', 
              legend = dict(orientation = "h", x = -.01, y = 1.195), xaxis = dict(title='Date Time', ticklen = 1))
fig = go.Figure(data = [trace1, trace2, trace3], layout = layout)
iplot(fig)

In [84]:
temp = df[df['year'] == 2016].groupby('date').agg({'state': 'count', 'n_killed': 'sum', 'n_injured': 'sum'}).reset_index().rename(columns = {'state': 'incidents'})
temp16_max = temp.nlargest(10, 'incidents')
trace1 = go.Scatter(x = temp.date, y = temp.incidents, name = 'Total number of Incidents', 
                    mode = 'lines', marker = dict(color = '#418bf4'))
trace2 = go.Scatter(x = temp.date, y = temp.n_killed, name = 'Total number of killed', 
                    mode = 'lines', marker = dict(color = '#ce2812'))
trace3 = go.Scatter(x = temp.date, y = temp.n_injured, name = 'Total number of injured', 
                    mode = 'lines', marker = dict(color = '#daa1e2'))
layout = dict(height = 350, title = 'Gun Violence Incidents - 2016', 
              legend = dict(orientation = "h", x = -.01, y = 1.1), xaxis = dict(title='Date Time', ticklen = 1))
fig = go.Figure(data = [trace1, trace2, trace3], layout = layout)
iplot(fig)

In [168]:
temp = df[df['year'] == 2017].groupby('date').agg({'state': 'count', 'n_killed': 'sum', 'n_injured': 'sum'}).reset_index().rename(columns = {'state': 'incidents'})
temp17_max = temp.nlargest(10, 'incidents')
trace1 = go.Scatter(x = temp.date, y = temp.incidents, name = 'Total number of Incidents', 
                    mode = 'lines', marker = dict(color = '#418bf4'))
trace2 = go.Scatter(x = temp.date, y = temp.n_killed, name = 'Total number of killed', 
                    mode = 'lines', marker = dict(color = '#ce2812'))
trace3 = go.Scatter(x = temp.date, y = temp.n_injured, name = 'Total number of injured', 
                    mode = 'lines', marker = dict(color = '#daa1e2'))
layout = dict(height = 350, title = 'Gun Violence Incidents - 2017', 
              legend = dict(orientation = "h", x = -.01, y = 1.16), xaxis = dict(title='Date Time', ticklen = 1))
fig = go.Figure(data = [trace1, trace2, trace3], layout = layout)
iplot(fig)

In [169]:
holiday = {}
for year in [2014, 2015, 2016, 2017]:
    temp = df[df['year'] == year].groupby('month_day_comb').agg({'state': 'count', 'n_killed': 'sum', 'n_injured': 'sum'}).reset_index().rename(columns = {'state': 'incidents'})
    holiday[year] = temp.nlargest(10, 'incidents')

In [193]:
trace1 = go.Scatter(x = holiday[2014].month_day_comb, y = holiday[2014].incidents, name = '2014', 
                    mode = 'markers', marker = dict(size = 8, color = '#418bf4'))
trace2 = go.Scatter(x = holiday[2015].month_day_comb, y = holiday[2015].incidents, name = '2015', 
                    mode = 'markers', marker = dict(size = 8, color = '#FF8000'))
trace3 = go.Scatter(x = holiday[2016].month_day_comb, y = holiday[2016].incidents, name = '2016', 
                    mode = 'markers', marker = dict(size = 8, color = '#a1c45a'))
trace4 = go.Scatter(x = holiday[2017].month_day_comb, y = holiday[2017].incidents, name = '2017', 
                    mode = 'markers', marker = dict(size = 8, color = '#e62a76'))
layout = dict(height = 350, title = 'Top 10 dates of Gun Violence Incidents', 
              legend = dict(orientation = "h", x = -.01, y = 1.2), xaxis = dict(title = 'Date Time', tickformat = '%b-%d'))
fig = go.Figure(data = [trace1,trace2, trace3, trace4], layout = layout)
iplot(fig)

## Gun Laws Impact on Gun Violence Incidents

In [196]:
state_to_code = {'District of Columbia' : 'dc','Mississippi': 'MS', 'Oklahoma': 'OK', 'Delaware': 'DE', 'Minnesota': 'MN', 'Illinois': 'IL', 'Arkansas': 'AR', 'New Mexico': 'NM', 'Indiana': 'IN', 'Maryland': 'MD', 'Louisiana': 'LA', 'Idaho': 'ID', 'Wyoming': 'WY', 'Tennessee': 'TN', 'Arizona': 'AZ', 'Iowa': 'IA', 'Michigan': 'MI', 'Kansas': 'KS', 'Utah': 'UT', 'Virginia': 'VA', 'Oregon': 'OR', 'Connecticut': 'CT', 'Montana': 'MT', 'California': 'CA', 'Massachusetts': 'MA', 'West Virginia': 'WV', 'South Carolina': 'SC', 'New Hampshire': 'NH', 'Wisconsin': 'WI', 'Vermont': 'VT', 'Georgia': 'GA', 'North Dakota': 'ND', 'Pennsylvania': 'PA', 'Florida': 'FL', 'Alaska': 'AK', 'Kentucky': 'KY', 'Hawaii': 'HI', 'Nebraska': 'NE', 'Missouri': 'MO', 'Ohio': 'OH', 'Alabama': 'AL', 'Rhode Island': 'RI', 'South Dakota': 'SD', 'Colorado': 'CO', 'New Jersey': 'NJ', 'Washington': 'WA', 'North Carolina': 'NC', 'New York': 'NY', 'Texas': 'TX', 'Nevada': 'NV', 'Maine': 'ME'}

In [257]:
gun_laws = pd.read_csv('/Users/peiyu/Desktop/ECE 143/Group Project/Gun_laws_data.csv').rename(columns = {'Unnamed: 0': 'state'})

In [258]:
gun_laws['state'] = [state_to_code[gun_laws['state'][i]] for i in range(50)]
gun_laws_min = gun_laws.nsmallest(50,'2017')
gun_laws_max = gun_laws.nlargest(50,'2017')

In [250]:
trace1 = go.Bar(x = gun_laws['state'], y = gun_laws['2014'], name = '2014', marker = dict(color = '#355c7d'))
trace2 = go.Bar(x = gun_laws['state'], y = gun_laws['2015'], name = '2015',  marker = dict(color = '#6c5b7b'))
trace3 = go.Bar(x = gun_laws['state'], y = gun_laws['2016'], name = '2016',  marker = dict(color = '#c06c84'))
trace4 = go.Bar(x = gun_laws['state'], y = gun_laws['2017'], name = '2017',  marker = dict(color = '#f67280'))
layout = go.Layout(barmode = 'stack', title = "Rise of Gun Violence Laws: 2014 - 2017")
fig = go.Figure(data = [trace1, trace2, trace3, trace4], layout = layout)
iplot(fig)

In [254]:
trace1 = go.Bar(x = gun_laws_max['state'], y = gun_laws_max['2014'], name = '2014', marker = dict(color = '#27296d'))
trace2 = go.Bar(x = gun_laws_max['state'], y = gun_laws_max['2015'], name = '2015',  marker = dict(color = '#5e63b6'))
trace3 = go.Bar(x = gun_laws_max['state'], y = gun_laws_max['2016'], name = '2016',  marker = dict(color = '#a393eb'))
trace4 = go.Bar(x = gun_laws_max['state'], y = gun_laws_max['2017'], name = '2017',  marker = dict(color = '#f5c7f7'))
layout = go.Layout(barmode = 'stack', title = "Rise of Gun Violence Laws: 2014 - 2017")
fig = go.Figure(data = [trace1, trace2, trace3, trace4], layout = layout)
iplot(fig)

In [255]:
trace1 = go.Bar(x = gun_laws_min['2014'], y = gun_laws_min['state'], name = '2014', marker = dict(color = '#27296d'), orientation = 'h')
trace2 = go.Bar(x = gun_laws_min['2015'], y = gun_laws_min['state'], name = '2015',  marker = dict(color = '#5e63b6'), orientation = 'h')
trace3 = go.Bar(x = gun_laws_min['2016'], y = gun_laws_min['state'], name = '2016',  marker = dict(color = '#a393eb'), orientation = 'h')
trace4 = go.Bar(x = gun_laws_min['2017'], y = gun_laws_min['state'], name = '2017',  marker = dict(color = '#f5c7f7'), orientation = 'h')
layout = go.Layout(barmode = 'stack', title = "Rise of Gun Violence Laws: 2014 - 2017", margin=dict(l=150),width=900,height=900,)
fig = go.Figure(data = [trace1, trace2, trace3, trace4], layout = layout)
iplot(fig)

In [242]:
df[df['year'] == 2014]['state'].value_counts()

California              3732
Florida                 3138
Texas                   3133
Illinois                3095
Ohio                    2368
Pennsylvania            2267
North Carolina          2165
Georgia                 2032
Louisiana               1906
New York                1903
South Carolina          1660
Tennessee               1590
New Jersey              1521
Michigan                1447
Alabama                 1318
Virginia                1273
Missouri                1272
Maryland                1266
Indiana                 1203
Wisconsin               1065
Kentucky                 977
Massachusetts            968
District of Columbia     838
Mississippi              784
Washington               656
Oklahoma                 642
Connecticut              583
Arkansas                 572
Iowa                     569
Colorado                 556
Arizona                  556
Delaware                 493
Minnesota                450
Oregon                   398
Kansas        