## Import library

In [14]:
import numpy as np
import pandas as pd
import numbers
import plotly
from plotly.offline import init_notebook_mode, iplot
from plotly.subplots import make_subplots
import plotly.graph_objs as go
from plotly import tools
import folium 

init_notebook_mode(connected=True)

## Show The Data

In [15]:
gun_violence_df = pd.read_csv("gun-violence-data_01-2013_03-2018.csv")

gun_violence_df.head()

Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,incident_url_fields_missing,...,participant_age,participant_age_group,participant_gender,participant_name,participant_relationship,participant_status,participant_type,sources,state_house_district,state_senate_district
0,461105,2013-01-01,Pennsylvania,Mckeesport,1506 Versailles Avenue and Coursin Street,0,4,http://www.gunviolencearchive.org/incident/461105,http://www.post-gazette.com/local/south/2013/0...,False,...,0::20,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male||1::Male||3::Male||4::Female,0::Julian Sims,,0::Arrested||1::Injured||2::Injured||3::Injure...,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://pittsburgh.cbslocal.com/2013/01/01/4-pe...,,
1,460726,2013-01-01,California,Hawthorne,13500 block of Cerise Avenue,1,3,http://www.gunviolencearchive.org/incident/460726,http://www.dailybulletin.com/article/zz/201301...,False,...,0::20,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male,0::Bernard Gillis,,0::Killed||1::Injured||2::Injured||3::Injured,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://losangeles.cbslocal.com/2013/01/01/man-...,62.0,35.0
2,478855,2013-01-01,Ohio,Lorain,1776 East 28th Street,1,3,http://www.gunviolencearchive.org/incident/478855,http://chronicle.northcoastnow.com/2013/02/14/...,False,...,0::25||1::31||2::33||3::34||4::33,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male||1::Male||2::Male||3::Male||4::Male,0::Damien Bell||1::Desmen Noble||2::Herman Sea...,,"0::Injured, Unharmed, Arrested||1::Unharmed, A...",0::Subject-Suspect||1::Subject-Suspect||2::Vic...,http://www.morningjournal.com/general-news/201...,56.0,13.0
3,478925,2013-01-05,Colorado,Aurora,16000 block of East Ithaca Place,4,0,http://www.gunviolencearchive.org/incident/478925,http://www.dailydemocrat.com/20130106/aurora-s...,False,...,0::29||1::33||2::56||3::33,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Female||1::Male||2::Male||3::Male,0::Stacie Philbrook||1::Christopher Ratliffe||...,,0::Killed||1::Killed||2::Killed||3::Killed,0::Victim||1::Victim||2::Victim||3::Subject-Su...,http://denver.cbslocal.com/2013/01/06/officer-...,40.0,28.0
4,478959,2013-01-07,North Carolina,Greensboro,307 Mourning Dove Terrace,2,2,http://www.gunviolencearchive.org/incident/478959,http://www.journalnow.com/news/local/article_d...,False,...,0::18||1::46||2::14||3::47,0::Adult 18+||1::Adult 18+||2::Teen 12-17||3::...,0::Female||1::Male||2::Male||3::Female,0::Danielle Imani Jameison||1::Maurice Eugene ...,3::Family,0::Injured||1::Injured||2::Killed||3::Killed,0::Victim||1::Victim||2::Victim||3::Subject-Su...,http://myfox8.com/2013/01/08/update-mother-sho...,62.0,27.0


## Data Cleaning

In [16]:
missing =  ["sban_1", "2017-10-01", "Nevada", "Las Vegas", "Mandalay Bay 3950 Blvd S", 59, 489, "https://en.wikipedia.org/wiki/2017_Las_Vegas_shooting", "https://en.wikipedia.org/wiki/2017_Las_Vegas_shooting", "-", "-", "-", "-", "-", "36.095", "Hotel", 
            "-115.171667", 47, "Route 91 Harvest Festiva; concert, open fire from 32nd floor. 47 guns seized; TOTAL:59 kill, 489 inj, number shot TBD,girlfriend Marilou Danley POI", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"]
gun_violence_df.loc[len(gun_violence_df)] = missing

print(gun_violence_df.shape)
drop_columns = gun_violence_df.columns[gun_violence_df.apply(lambda col: col.isnull().sum() >= (0.5 * len(gun_violence_df)))]
gun_violence_filtered = gun_violence_df.drop(drop_columns, axis=1)
print(gun_violence_filtered.shape)
print("Dropped Columns:", list(drop_columns))

(239678, 29)
(239678, 26)
Dropped Columns: ['location_description', 'participant_name', 'participant_relationship']


## Number of Incidents per Year

In [17]:
gun_violence_filtered["date"] = pd.to_datetime(gun_violence_filtered["date"])

gun_violence_filtered = gun_violence_filtered.assign(year = gun_violence_filtered["date"].map(lambda dates: dates.year))

y_yrs = gun_violence_filtered.groupby("year")["incident_id"].count().values
x_yrs = gun_violence_filtered.groupby("year")["incident_id"].count().index.values

trace1 = go.Bar(x=x_yrs, y=y_yrs)

data = [trace1]
fig = plotly.tools.make_subplots(rows=1, cols=1, vertical_spacing = 0.25)

fig.append_trace(trace1, 1, 1)

fig["layout"]["xaxis1"].update(title="Years")
fig["layout"]["yaxis1"].update(title="Count")                   
                                                                  
fig["layout"].update(showlegend=False, height=500, width=800, title="Number of Incidents per Year")
iplot(fig)


plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead



## Number of Total Incidents

In [18]:
n_killed = gun_violence_filtered.groupby("date").sum()["n_killed"].values

n_injured = gun_violence_filtered.groupby("date").sum()["n_injured"].values

dates = gun_violence_filtered.groupby("date").count().index

trace1 = go.Scatter(x = dates, y = n_killed, name = "Number Killed", line = dict(dash = "dot"))

trace2 = go.Scatter(x = dates, y = n_injured, name = "Number Injured", line = dict(dash = "dot"))

data = [trace1,trace2]

layout = dict(height=500,width=1000, title = "Number of Total Incidents", xaxis = dict(title = "Time"),yaxis = dict(title = "Count"))

fig = dict(data = data, layout=layout)
iplot(fig)

## Top States with Highest Number of Gun Violence Incidents

In [19]:
state = gun_violence_filtered.groupby("state")
state_incidents = state.count().sort_values(by="incident_id",ascending=False)["incident_id"]
state_killed = state.sum()["n_killed"]
state_injured = state.sum()["n_injured"]

trace = go.Bar(x = state_incidents.index, y = state_incidents,)

layout = dict(height=600, width=1000, title =  "Top States with Highest Number of Gun Violence Incidents",
              xaxis = dict(title = "State of USA"),
              yaxis = dict(title = "Number of Incidents"))

data = [trace]

fig = dict(data = data, layout=layout)
iplot(fig)

## Top Twenty Cities with Highest Number of Gun Violence Incidents

In [20]:
city = gun_violence_filtered.groupby("city_or_county")
city_incidents= city.count().sort_values(by="incident_id",ascending=False)["incident_id"].head(20)

trace = go.Bar(x = city_incidents.index[:21], y = city_incidents,)
    
layout = dict(height=600, width=1000, title = "Top Twenty Cities with Highest Number of Gun Violence Incidents",
              xaxis = dict(title = "Cities of USA"),
              yaxis = dict(title = "Number of Incidents"))
 
data = [trace]

fig = dict(data = data, layout=layout)
iplot(fig)

## Most Impactful Incidents

In [21]:
gun_violence_filtered["total_damage"] = gun_violence_filtered["n_injured"] + gun_violence_filtered["n_killed"]

gun_violence_filtered.loc[:,["date","year","state", "city_or_county", "address", "total_damage"]].sort_values(by="total_damage", ascending = False).head(10)

Unnamed: 0,date,year,state,city_or_county,address,total_damage
239677,2017-10-01,2017,Nevada,Las Vegas,Mandalay Bay 3950 Blvd S,548
130448,2016-06-12,2016,Florida,Orlando,1912 S Orange Avenue,103
217151,2017-11-05,2017,Texas,Sutherland Springs,216 4th St,47
101531,2015-12-02,2015,California,San Bernardino,1365 South Waterman Avenue,35
232745,2018-02-14,2018,Florida,Pompano Beach (Parkland),5901 Pine Island Rd,34
70511,2015-05-17,2015,Texas,Waco,4671 S Jack Kultgen Fwy,27
195845,2017-07-01,2017,Arkansas,Little Rock,220 W 6th St,25
137328,2016-07-25,2016,Florida,Fort Myers,3580 Evans Ave,21
11566,2014-04-02,2014,Texas,Fort Hood,Motor Pool Road and Tank Destroyer Boulevard,20
92624,2015-10-01,2015,Oregon,Roseburg,1140 Umpqua College Rd,19


In [22]:
gun_violence_filtered["total_damage"] = gun_violence_filtered["n_injured"] + gun_violence_filtered["n_killed"]

df = gun_violence_filtered[gun_violence_filtered["total_damage"] >= 5][["latitude", "longitude", "total_damage", "n_killed"]].dropna()
maps = folium.Map([38.0, -97.0],  zoom_start=4, tiles="Stamen Toner")
markers = []
for idx, row in df.iterrows():
    total = row["total_damage"] * 0.30   
    folium.CircleMarker([float(row["latitude"]), float(row["longitude"])],
                        radius=float(total), color="#ef4f61", fill=True).add_to(maps)
maps

## Data Filter

In [23]:
gun_violence_filtered[["participant_age", "participant_type", "participant_gender"]]


Unnamed: 0,participant_age,participant_type,participant_gender
0,0::20,0::Victim||1::Victim||2::Victim||3::Victim||4:...,0::Male||1::Male||3::Male||4::Female
1,0::20,0::Victim||1::Victim||2::Victim||3::Victim||4:...,0::Male
2,0::25||1::31||2::33||3::34||4::33,0::Subject-Suspect||1::Subject-Suspect||2::Vic...,0::Male||1::Male||2::Male||3::Male||4::Male
3,0::29||1::33||2::56||3::33,0::Victim||1::Victim||2::Victim||3::Subject-Su...,0::Female||1::Male||2::Male||3::Male
4,0::18||1::46||2::14||3::47,0::Victim||1::Victim||2::Victim||3::Subject-Su...,0::Female||1::Male||2::Male||3::Female
...,...,...,...
239673,1::21,0::Victim||1::Subject-Suspect,0::Male||1::Male
239674,0::21,0::Victim,0::Male
239675,0::42,0::Victim,0::Male
239676,0::58||1::62,0::Victim||1::Subject-Suspect,0::Female||1::Male


In [24]:
 def StringToDic(S1):
    dic1 = {}
    list1 = str(S1).split("||")
    for i in list1:
        try:
            index = i.split("::")[0]
            value = i.split("::")[1]
            dic1[index] = value
        except:
            pass
        
    return dic1

gun_violence_filtered["participant_age_dic"] = gun_violence_filtered["participant_age"].apply(lambda x: StringToDic(x))

gun_violence_filtered["participant_type_dic"] = gun_violence_filtered["participant_type"].apply(lambda x: StringToDic(x)) 

gun_violence_filtered["participant_gender_dic"] = gun_violence_filtered["participant_gender"].apply(lambda x: StringToDic(x)) 

mappingCol1="participant_type_dic"
def MapThroughRow(df,mappingCol1,mappingCol2):
    newDic = {"Victim":[],"Suspect":[]}
    for rowName,row in df.iterrows():
        for keys,values in row[mappingCol1].items():
            if (keys in row[mappingCol2]) and (values =="Victim"):
                newDic["Victim"].append(row[mappingCol2][keys])
            elif (keys in row[mappingCol2]) and ("Suspect" in values):
                newDic["Suspect"].append(row[mappingCol2][keys])
                
    return newDic

In [25]:
mappingCol2 = "participant_age_dic"
mappingCol3 = "participant_gender_dic"
df = gun_violence_filtered
MapTypeAge = MapThroughRow(df,mappingCol1,mappingCol2)
for key,values in MapTypeAge.items():
    MapTypeAge[key] = [int(i) for i in values]
    
MapTypeGender = MapThroughRow(df,mappingCol1,mappingCol3)


In [26]:
print(len(MapTypeAge["Victim"]))
print(len(MapTypeAge["Suspect"]))
print(len(MapTypeGender["Victim"]))
print(len(MapTypeGender["Suspect"]))

107429
110949
167025
179454


In [27]:
def countDic(L):
    dic = {}
    for i in L:
        if i not in dic:
            dic[i] = 1
        else:
            dic[i] += 1
    return dic

VictimList = list(countDic(MapTypeAge["Victim"]).keys())
VictimCount = list(countDic(MapTypeAge["Victim"]).values())
SuspectList = list(countDic(MapTypeAge["Suspect"]).keys())
SuspectCount = list(countDic(MapTypeAge["Suspect"]).values())

## Age Distribution of Victims

In [28]:
trace1 = go.Bar(x=VictimList, y=VictimCount, name="Age distribution of Victim", marker=dict(color="darkcyan"))

data = [trace1]
layout = go.Layout(title="Age Distribution of Victims",
    xaxis=dict(title="Ages", titlefont=dict(size=16, color="black"), tickfont=dict( size=14, color="black"), range=[0,100]),
    yaxis=dict(title="Count", titlefont=dict( size=16, color="black"), tickfont=dict( size=14, color="black")),
    bargap=0.15, bargroupgap=0.1)

fig = go.Figure(data=data, layout=layout)
iplot(fig)

## Age Distribution of Suspect

In [29]:
trace1 = go.Bar(x=SuspectList, y=SuspectCount, name="Age distribution of Suspects", marker=dict(color="darkred"))

data = [trace1]
layout = go.Layout(title="Age Distribution of Suspects",
    xaxis=dict(title="Ages", titlefont=dict(size=16,color="black"), tickfont=dict(size=14,color="black",), range=[0,100]),
    yaxis=dict(title="Count", titlefont=dict(size=16,color="black"), tickfont=dict(size=14,color="black")),
    bargap=0.2, bargroupgap=0.1)

fig = go.Figure(data=data, layout=layout)
iplot(fig)

## Characteristics of Gender

In [30]:
VicGenderList = list(countDic(MapTypeGender["Victim"]).keys())
VicGenderCount = list(countDic(MapTypeGender["Victim"]).values())
SusGenderList = list(countDic(MapTypeGender["Suspect"]).keys())
SusGenderCount = list(countDic(MapTypeGender["Suspect"]).values())

In [31]:
print((VicGenderList,VicGenderCount))
print(sum(VicGenderCount))

print((SusGenderList,SusGenderCount))
print(sum(SusGenderCount))

(['Male', 'Female', 'Male, female'], [136394, 30630, 1])
167025
(['Female', 'Male'], [11746, 167708])
179454


In [32]:
labels = ["Male", "Female"]

fig = make_subplots(rows=1, cols=2, specs=[[{"type":"domain"}, {"type":"domain"}]])
fig.add_trace(go.Pie(labels=labels, values=[136394, 30630], name="Victims"),1, 1)
fig.add_trace(go.Pie(labels=labels, values=[167708, 11746], name="Suspects"),1, 2)

fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Characteristics of Gender",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text="Victims", x=0.18, y=0.5, font_size=20, showarrow=False),
                 dict(text="Suspects", x=0.83, y=0.5, font_size=20, showarrow=False)])
fig.show()

## Data Cleaning & Data Filtering

In [33]:
gun_violence_filtered[["gun_type"]]

Unnamed: 0,gun_type
0,
1,
2,0::Unknown||1::Unknown
3,
4,0::Handgun||1::Handgun
...,...
239673,0::Unknown
239674,0::Unknown
239675,0::Unknown
239676,0::Handgun||1::Shotgun


In [34]:
gun_violence_filtered["gun_type_dic"] = gun_violence_filtered["gun_type"].apply(lambda x: StringToDic(x))

In [35]:
def CountDfValue(df,col="gun_type_dic"):
    newDic = {}
    for index,row in df.iterrows():
        for key,value in row[col].items():
            if value not in newDic:
                newDic[value] = 1
            else:
                newDic[value] += 1
                
    return newDic

dicGun = CountDfValue(gun_violence_filtered)
del dicGun["Unknown"]

In [36]:
gun_violence_filtered[["gun_type_dic"]]

Unnamed: 0,gun_type_dic
0,{}
1,{}
2,"{'0': 'Unknown', '1': 'Unknown'}"
3,{}
4,"{'0': 'Handgun', '1': 'Handgun'}"
...,...
239673,{'0': 'Unknown'}
239674,{'0': 'Unknown'}
239675,{'0': 'Unknown'}
239676,"{'0': 'Handgun', '1': 'Shotgun'}"


In [37]:
gunList = []
gunCount = []
for i in sorted(dicGun.items(),key=lambda items:items[1],reverse=True):
    gunList.append(i[0])
    gunCount.append(i[1])

## Gun Involved in The Cases

In [38]:
trace1 = go.Bar(x=gunList, y=gunCount, marker=dict( color="firebrick"))

data = [trace1]
layout = go.Layout(title="Gun Involved in The Cases",
    xaxis=dict(title="Gun Types", titlefont=dict( size=16, color="black"), tickfont=dict( size=12, color="black",)),
    yaxis=dict(title="Count", titlefont=dict( size=16, color="black"), tickfont=dict( size=14, color="black")),
    bargap=0.15, bargroupgap=0.1,)

fig = go.Figure(data=data, layout=layout)
iplot(fig)

## Data Filter

In [39]:
df["gun_type_appear"] = df["gun_type_dic"].apply(lambda x: set(x.values()))
def FurthurColCal(df=gun_violence_filtered,colToCal="n_injured",colToMap="gun_type_appear"):
    dicGunCal = {}
    
    for index,row in df.iterrows():
        for item in row[colToMap]:
            if item not in dicGunCal:
                dicGunCal[item] = [1]
                dicGunCal[item].append(int(row[colToCal]))
            else:
                dicGunCal[item][0] += 1
                dicGunCal[item][1] += int(row[colToCal])
    return dicGunCal  

In [40]:
dicGunInjured = FurthurColCal()
del dicGunInjured["Unknown"]
dicGunKilled = FurthurColCal(colToCal="n_killed")
del dicGunKilled["Unknown"]

In [41]:
GunInjuredList = [(key,values[1]/values[0],values[1]) for key,values in list(dicGunInjured.items())]
GunKilledList = [(key,values[1]/values[0],values[1]) for key,values in list(dicGunKilled.items())]
GunTotalList = [(injured[0],injured[1]+kill[1],injured[2]+kill[2]) for injured,kill in zip(GunInjuredList,GunKilledList)]
GunType = [i[0] for i in GunTotalList]
GunInjuredAverage = [i[1] for i in GunInjuredList]
GunInjuredTotal = [i[2] for i in GunInjuredList]
GunKilledAverage = [i[1] for i in GunKilledList]
GunKilledTotal = [i[2] for i in GunKilledList]
GunTotalAverage = [i[1] for i in GunTotalList]
GunTotalTotal = [i[2] for i in GunTotalList]

## Number of Total Injured and Killed Caused by Each Gun Type

In [42]:
trace1 = go.Bar(x=GunType, y=GunInjuredTotal, marker=dict(color="orange"), name = "Total Injured")
trace2 = go.Bar(x=GunType, y=GunKilledTotal, marker=dict(color="red"), name = "Total Killed")

data = [trace1,trace2]
layout = go.Layout(title="Number of Total Injured and Killed Caused by Each Gun Type",
    xaxis=dict( title="Gun Types", titlefont=dict(size=16,color="black"), tickfont=dict( size=10, color="black",)),
    yaxis=dict(title="Count", range = [0,5000], titlefont=dict( size=16, color="black"),tickfont=dict( size=14, color="black")),
    bargap=0.15, bargroupgap=0.1,)

fig = go.Figure(data=data, layout=layout)
iplot(fig)