In [None]:
import os

def scale_input_data(scale_factor):
  file_bases = ['./input/gun-violence-data_01-2013_03-2018']
  for file_base in file_bases:
    import pandas as pd
    import shutil
    if scale_factor == 1.0:
      shutil.copyfile(file_base + '.csv', file_base + '.scaled.csv')
      continue
    df_to_scale = pd.read_csv(file_base + '.csv')
    new_num_rows = int(scale_factor * len(df_to_scale))
    if scale_factor < 1.0:
      df_to_scale = df_to_scale.iloc[:new_num_rows]
    else:
      while len(df_to_scale) < new_num_rows:
        df_to_scale = pd.concat([df_to_scale, df_to_scale[:min(new_num_rows - len(df_to_scale), len(df_to_scale))]])
    df_to_scale.to_csv(file_base + '.scaled.csv', index=False)

if 'INPUT_SCALE_FACTOR' in os.environ:
  scale_input_data(float(os.environ['INPUT_SCALE_FACTOR']))

# **Gun Violence Data Exploration, Analysis and Plotting**

## Database Content

#### File Name : gun-violence-data_01-2013_03-2018.csv 

The CSV file contains data for all recorded gun violence incidents in the US between January 2013 and March 2018, inclusive. Gun Violence Archive (GVA) is a not for profit corporation formed in 2013 to provide free online public access to accurate information about gun-related violence in the United States. GVA will collect and check for accuracy, comprehensive information about gun-related violence in the U.S. and then post and disseminate it online.

#### Column Used

- incident_id ID of the crime report
- date Date of crime
- stateState of crime
- city_or_countyCity/ County of crime
- addressAddress of the location of the crime
- n_killedNumber of people killed
- n_injuredNumber of people injured
- incident_urlURL regarding the
- source_url - Reference to the reporting source
- incident_url_fields_missing - TRUE if the incident_url is present, FALSE otherwise
- congressional_district - Congressional district id
- gun_stolen - Status of guns involved in the crime (i.e. Unknown, Stolen, etc...)
- gun_type - Typification of guns used in the crime
- incident_characteristics - Characteristics of the incidence
- latitude - Location of the incident
- location_description
- longitude - Location of the incident
- n_guns_involved - Number of guns involved in incident
- notes - Additional information of the crime
- participant_age - Age of participant(s) at the time of crime
- participant_age_group - Age group of participant(s) at the time crime
- participant_gender - Gender of participant(s)
- participant_name - Name of participant(s) involved in crime
- participant_relationship - Relationship of participant to other participant(s)
- participant_status - Extent of harm done to the participant
- participant_type - Type of participant
- sources
- state_house_district
- state_senate_district

## Import all required Libraries

In [8]:
# Data manipulation modules
# import pandas as pd        # R-like data manipulation
exec(os.environ['IREWR_IMPORTS'])
import numpy as np         # n-dimensional arrays

# For plotting
# ALEX: remove plotting
# import matplotlib as mpl
# import matplotlib.pyplot as plt      # For base plotting
# # Seaborn is a library for making statistical graphics
# # in Python. It is built on top of matplotlib and 
# #  numpy and pandas data structures.
# import seaborn as sns                # Easier plotting

# # Misc
# import os

# ## To Show graphs in same window
# %matplotlib inline

# mpl.style.use("seaborn")
# plt.style.use("seaborn")


### Data Loading

In [9]:
######### Begin
# Read data file
data_gv = pd.read_csv("./input/gun-violence-data_01-2013_03-2018.scaled.csv")

# Explore data - First 5 records of Gun Violance data
data_gv.head()                          # head()




    import ray
    ray.init(runtime_env={'env_vars': {'__MODIN_AUTOIMPORT_PANDAS__': '1'}})

2025-01-06 22:52:48,778	INFO worker.py:1528 -- Started a local Ray instance.


Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,incident_url_fields_missing,...,participant_age,participant_age_group,participant_gender,participant_name,participant_relationship,participant_status,participant_type,sources,state_house_district,state_senate_district
0,461105,2013-01-01,Pennsylvania,Mckeesport,1506 Versailles Avenue and Coursin Street,0,4,http://www.gunviolencearchive.org/incident/461105,http://www.post-gazette.com/local/south/2013/0...,False,...,0::20,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male||1::Male||3::Male||4::Female,0::Julian Sims,,0::Arrested||1::Injured||2::Injured||3::Injure...,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://pittsburgh.cbslocal.com/2013/01/01/4-pe...,,
1,460726,2013-01-01,California,Hawthorne,13500 block of Cerise Avenue,1,3,http://www.gunviolencearchive.org/incident/460726,http://www.dailybulletin.com/article/zz/201301...,False,...,0::20,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male,0::Bernard Gillis,,0::Killed||1::Injured||2::Injured||3::Injured,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://losangeles.cbslocal.com/2013/01/01/man-...,62.0,35.0
2,478855,2013-01-01,Ohio,Lorain,1776 East 28th Street,1,3,http://www.gunviolencearchive.org/incident/478855,http://chronicle.northcoastnow.com/2013/02/14/...,False,...,0::25||1::31||2::33||3::34||4::33,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male||1::Male||2::Male||3::Male||4::Male,0::Damien Bell||1::Desmen Noble||2::Herman Sea...,,"0::Injured, Unharmed, Arrested||1::Unharmed, A...",0::Subject-Suspect||1::Subject-Suspect||2::Vic...,http://www.morningjournal.com/general-news/201...,56.0,13.0
3,478925,2013-01-05,Colorado,Aurora,16000 block of East Ithaca Place,4,0,http://www.gunviolencearchive.org/incident/478925,http://www.dailydemocrat.com/20130106/aurora-s...,False,...,0::29||1::33||2::56||3::33,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Female||1::Male||2::Male||3::Male,0::Stacie Philbrook||1::Christopher Ratliffe||...,,0::Killed||1::Killed||2::Killed||3::Killed,0::Victim||1::Victim||2::Victim||3::Subject-Su...,http://denver.cbslocal.com/2013/01/06/officer-...,40.0,28.0
4,478959,2013-01-07,North Carolina,Greensboro,307 Mourning Dove Terrace,2,2,http://www.gunviolencearchive.org/incident/478959,http://www.journalnow.com/news/local/article_d...,False,...,0::18||1::46||2::14||3::47,0::Adult 18+||1::Adult 18+||2::Teen 12-17||3::...,0::Female||1::Male||2::Male||3::Female,0::Danielle Imani Jameison||1::Maurice Eugene ...,3::Family,0::Injured||1::Injured||2::Killed||3::Killed,0::Victim||1::Victim||2::Victim||3::Subject-Su...,http://myfox8.com/2013/01/08/update-mother-sho...,62.0,27.0


In [3]:
data_gv.columns

Index(['incident_id', 'date', 'state', 'city_or_county', 'address', 'n_killed',
       'n_injured', 'incident_url', 'source_url',
       'incident_url_fields_missing', 'congressional_district', 'gun_stolen',
       'gun_type', 'incident_characteristics', 'latitude',
       'location_description', 'longitude', 'n_guns_involved', 'notes',
       'participant_age', 'participant_age_group', 'participant_gender',
       'participant_name', 'participant_relationship', 'participant_status',
       'participant_type', 'sources', 'state_house_district',
       'state_senate_district'],
      dtype='object')

In [4]:
data_gv.columns.values

array(['incident_id', 'date', 'state', 'city_or_county', 'address',
       'n_killed', 'n_injured', 'incident_url', 'source_url',
       'incident_url_fields_missing', 'congressional_district',
       'gun_stolen', 'gun_type', 'incident_characteristics', 'latitude',
       'location_description', 'longitude', 'n_guns_involved', 'notes',
       'participant_age', 'participant_age_group', 'participant_gender',
       'participant_name', 'participant_relationship',
       'participant_status', 'participant_type', 'sources',
       'state_house_district', 'state_senate_district'], dtype=object)

In [5]:
data_gv.dtypes

incident_id                      int64
date                            object
state                           object
city_or_county                  object
address                         object
n_killed                         int64
n_injured                        int64
incident_url                    object
source_url                      object
incident_url_fields_missing       bool
congressional_district         float64
gun_stolen                      object
gun_type                        object
incident_characteristics        object
latitude                       float64
location_description            object
longitude                      float64
n_guns_involved                float64
notes                           object
participant_age                 object
participant_age_group           object
participant_gender              object
participant_name                object
participant_relationship        object
participant_status              object
participant_type         

In [6]:
data_gv.describe()

Unnamed: 0,incident_id,n_killed,n_injured,congressional_district,latitude,longitude,n_guns_involved,state_house_district,state_senate_district
count,239677.0,239677.0,239677.0,227733.0,231754.0,231754.0,140226.0,200905.0,207342.0
mean,559334.3,0.25229,0.494007,8.001265,37.546598,-89.338348,1.372442,55.447132,20.47711
std,293128.7,0.521779,0.729952,8.480835,5.130763,14.359546,4.678202,42.048117,14.20456
min,92114.0,0.0,0.0,0.0,19.1114,-171.429,1.0,1.0,1.0
25%,308545.0,0.0,0.0,2.0,33.9034,-94.158725,1.0,21.0,9.0
50%,543587.0,0.0,0.0,5.0,38.5706,-86.2496,1.0,47.0,19.0
75%,817228.0,0.0,1.0,10.0,41.437375,-80.048625,1.0,84.0,30.0
max,1083472.0,50.0,53.0,53.0,71.3368,97.4331,400.0,901.0,94.0


In [7]:
data_gv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 239677 entries, 0 to 239676
Data columns (total 29 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   incident_id                  239677 non-null  int64  
 1   date                         239677 non-null  object 
 2   state                        239677 non-null  object 
 3   city_or_county               239677 non-null  object 
 4   address                      223180 non-null  object 
 5   n_killed                     239677 non-null  int64  
 6   n_injured                    239677 non-null  int64  
 7   incident_url                 239677 non-null  object 
 8   source_url                   239209 non-null  object 
 9   incident_url_fields_missing  239677 non-null  bool   
 10  congressional_district       227733 non-null  float64
 11  gun_stolen                   140179 non-null  object 
 12  gun_type                     140226 non-null  object 
 13 

In [8]:
data_gv.shape

(239677, 29)

In [9]:
# Removing columns not useful in analysis
data_gv.drop(["incident_characteristics",
              "latitude",
              'longitude',
              "incident_url",
              "sources",
              "source_url",
              "incident_url_fields_missing",
              "location_description",
              "participant_relationship",
              "notes",
    ], axis=1, inplace=True)

In [10]:
data_gv['gun_type'].unique()              # Which values

array([nan, '0::Unknown||1::Unknown', '0::Handgun||1::Handgun', ...,
       '0::Handgun||1::Handgun||2::Unknown||3::Unknown||4::Unknown||5::Unknown||6::Unknown||7::Unknown||8::Unknown||9::Unknown||10::Unknown||11::Unknown||12::Unknown||13::Unknown||14::Unknown||15::Unknown||16::Unknown||17::Unknown||18::Unknown||19::Unknown||20::Unknown||21::Unknown||22::Unknown||23::Unknown||24::Unknown',
       '0::25 Auto||1::Shotgun||2::Shotgun||3::Shotgun',
       '0::357 Mag||1::44 Mag||2::45 Auto||3::Rifle||4::Rifle||5::Rifle||6::Rifle||7::Rifle'],
      dtype=object)

In [11]:
##Converting object datatype to datetime
data_gv['date'] = pd.to_datetime(data_gv['date']) 

In [12]:
# we can create columns for month, year and weekdays and extract values 
# from date for further analysis
data_gv['f_month'] = data_gv['date'].dt.month
data_gv['f_year'] = data_gv['date'].dt.year
data_gv['f_weekday'] = data_gv['date'].dt.weekday

data_gv['f_year'] = data_gv['f_year'].astype('object')
data_gv['f_month'] = data_gv['f_month'].astype('object')
data_gv['f_weekday'] = data_gv['f_weekday'].astype('object')

#Check the datatype of columns are changed
data_gv.dtypes

incident_id                        int64
date                      datetime64[ns]
state                             object
city_or_county                    object
address                           object
n_killed                           int64
n_injured                          int64
congressional_district           float64
gun_stolen                        object
gun_type                          object
n_guns_involved                  float64
participant_age                   object
participant_age_group             object
participant_gender                object
participant_name                  object
participant_status                object
participant_type                  object
state_house_district             float64
state_senate_district            float64
f_month                           object
f_year                            object
f_weekday                         object
dtype: object

In [13]:
# Created column for total number of persons impacted (injured+killed)
data_gv['total_impacted'] = data_gv['n_killed'] + data_gv['n_injured']

In [14]:
# Checking for null value of column for guns involved and guns stolen 
data_gv["n_guns_involved"] = data_gv["n_guns_involved"].fillna(value =0)
data_gv["gun_stolen"] = data_gv["gun_stolen"].fillna(value = "0::Unknown")

In [15]:
## Creating multiple columns from Participant's Gender column
data_gv["participant_gender"] = data_gv["participant_gender"].fillna("0::Unknown")
    
def gen(n) :                    
    gen_rows = []               
    gen_row = str(n).split("||")    
    for i in gen_row :              
        g_row = str(i).split("::")  
        if len(g_row) > 1 :         
            gen_rows.append(g_row[1])    

    return gen_rows

gen_series = data_gv.participant_gender.apply(gen)
data_gv["total_participant"] = gen_series.apply(lambda x: len(x))
data_gv["male_participant"] = gen_series.apply(lambda i: i.count("Male"))
data_gv["female_participant"] = gen_series.apply(lambda i: i.count("Female"))
data_gv["unknown_participant"] = gen_series.apply(lambda i: i.count("Unknown"))


In [16]:
# Checking values for new columns
data_gv.head()


Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,congressional_district,gun_stolen,gun_type,...,state_house_district,state_senate_district,f_month,f_year,f_weekday,total_impacted,total_participant,male_participant,female_participant,unknown_participant
0,461105,2013-01-01,Pennsylvania,Mckeesport,1506 Versailles Avenue and Coursin Street,0,4,14.0,0::Unknown,,...,,,1,2013,1,4,4,3,1,0
1,460726,2013-01-01,California,Hawthorne,13500 block of Cerise Avenue,1,3,43.0,0::Unknown,,...,62.0,35.0,1,2013,1,4,1,1,0,0
2,478855,2013-01-01,Ohio,Lorain,1776 East 28th Street,1,3,9.0,0::Unknown||1::Unknown,0::Unknown||1::Unknown,...,56.0,13.0,1,2013,1,4,5,5,0,0
3,478925,2013-01-05,Colorado,Aurora,16000 block of East Ithaca Place,4,0,6.0,0::Unknown,,...,40.0,28.0,1,2013,5,4,4,3,1,0
4,478959,2013-01-07,North Carolina,Greensboro,307 Mourning Dove Terrace,2,2,6.0,0::Unknown||1::Unknown,0::Handgun||1::Handgun,...,62.0,27.0,1,2013,0,4,4,2,2,0


In [17]:
data_gv.shape

(239677, 27)

### Start Plotting Graph

In [18]:

##As per assignment lets plot following graphs:
#i)  Joint Distribution plots
#ii)  Histograms
#iii) Kernel Density plots
#iv) Violin plots
#v) Box plots
#vi) FacetGrid

In [19]:
###########################Joint Distribution plots############################
# Draw a jointplot between Number of Person Killed Vs Injured in all incidences
# ALEX: remove plotting
# sns.jointplot("n_injured",
#               "n_killed",
#               data_gv,
#               kind='scatter',      # kind : { “scatter” | “reg” | “resid” | “kde” | “hex” }, optional
#               s=200, color='m', edgecolor="skyblue", linewidth=2)


In [20]:
# Draw a jointplot to identify Maximum Number of Person Injured in which incidence
# ALEX: remove plotting
# sns.jointplot("incident_id",
#               "n_injured",
#               data_gv,
#               kind='scatter'      # kind : { “scatter” | “reg” | “resid” | “kde” | “hex” }, optional
#               )

In [21]:
# Draw a jointplot to identify Maximum Number of Person Killed in which incidence
# ALEX: remove plotting
# sns.jointplot("incident_id",
#               "n_killed",
#               data_gv,
#               kind='scatter',      # kind : { “scatter” | “reg” | “resid” | “kde” | “hex” }, optional
#               color="Red",
#               marginal_kws={'color': 'red'})

In [22]:
###############################  Histograms  #########################

# Plot a Histogram for Top 10 Cities with maximum incidents of Gun Violence
ctwise_total = data_gv[["incident_id"]].groupby(data_gv["city_or_county"]).count()
top_ct = ctwise_total.sort_values(by='incident_id', ascending=False).head(10)
print(top_ct)
# ALEX: remove plotting
# top_ct.plot.barh()
del(top_ct)

                incident_id
city_or_county             
Chicago               10814
Baltimore              3943
Washington             3279
New Orleans            3071
Philadelphia           2963
Houston                2501
Saint Louis            2501
Milwaukee              2487
Jacksonville           2448
Memphis                2386


In [23]:
# Plot a Histogram for Top 10 States with maximum incidents of Gun Violence
stwise_total = data_gv[["incident_id"]].groupby(data_gv["state"]).count()
top_st = stwise_total.sort_values(by='incident_id', ascending=False).head(10)
print(top_st)
# ALEX: remove plotting
# top_st.plot.barh()
del(top_st)


                incident_id
state                      
Illinois              17556
California            16306
Florida               15029
Texas                 13577
Ohio                  10244
New York               9712
Pennsylvania           8929
Georgia                8925
North Carolina         8739
Louisiana              8103


In [24]:
# Plot a Histogram for Weekday wise Incidents
weekwise_total = data_gv[["incident_id"]].groupby(data_gv["f_weekday"]).count()
# ALEX: remove plotting
# weekwise_total.plot.barh()
del(weekwise_total)
# Here, for weekdays Monday is 0 and Sunday is 6.

In [25]:
############################  Kernel Density plots  #################################
# Density plot for gendrwise participant
genderwise_total = data_gv[["total_participant", "male_participant", "female_participant", "unknown_participant"]].groupby(data_gv["f_year"]).sum()
# ALEX: remove plotting
# dp_gen_plot=sns.kdeplot(genderwise_total['male_participant'], shade=True, color="r")
# dp_gen_plot=sns.kdeplot(genderwise_total['female_participant'], shade=True, color="b")
# dp_gen_plot=sns.kdeplot(genderwise_total['unknown_participant'], shade=True, color="g")
_ = genderwise_total['male_participant']
_ = genderwise_total['female_participant']
_ = genderwise_total['unknown_participant']
del(genderwise_total)

In [26]:
# Density plot for person injured vs killed on all weekdays
inj_kill_weektotal = data_gv[["n_injured","n_killed"]].groupby(data_gv["f_weekday"]).sum()
# ALEX: remove plotting
# dp_inj_kill_plot=sns.kdeplot(inj_kill_weektotal['n_injured'], shade=True, color="r")
# dp_inj_kill_plot=sns.kdeplot(inj_kill_weektotal['n_killed'], shade=True, color="b")
_ = inj_kill_weektotal['n_injured']
_ = inj_kill_weektotal['n_killed']
del(inj_kill_weektotal)

In [27]:
################################## Violin plots #################################
# Violin Plot for Yearwise Person Injured
# ALEX: remove plotting
# yr_injured_plot = sns.violinplot("f_year", "n_injured", data=data_gv,
#                                  split=True, inner="quartile")
# yr_injured_plot.set_title("Person killed in incidents per Year")

In [28]:
#  Violin Plot for  Yearwise Person killed
# ALEX: remove plotting
# yr_killed_plot = sns.violinplot("f_year", "n_killed",
#                data=data_gv,
#                split=True,         # If hue variable has two levels, draw half of a violin for each level.
#                inner="quartile"    #  Options: “box”, “quartile”, “point”, “stick”, None 
#                )


In [29]:
#Violin Plot for Peron Impacted(Killed/Injured) during gun violence
Impacted_person_total = data_gv[["total_impacted", "n_injured", "n_killed"]].groupby(data_gv["f_year"]).sum()
print(Impacted_person_total)
# ALEX: remove plotting
# yr_impacted_plot = sns.violinplot(data=Impacted_person_total,
#                split=True,         # If hue variable has two levels, draw half of a violin for each level.
#                inner="quartile"    #  Options: “box”, “quartile”, “point”, “stick”, None 
#                )
del(Impacted_person_total)

        total_impacted  n_injured  n_killed
f_year                                     
2013              1296        979       317
2014             35559      23002     12557
2015             40451      26967     13484
2016             45646      30580     15066
2017             46214      30703     15511
2018              9704       6171      3533


In [30]:
#Violin Plot for Genderwise Peron involved/impacted during gun violence
genderwise_total = data_gv[["total_participant", "male_participant", "female_participant", "unknown_participant"]].groupby(data_gv["f_year"]).sum()
print(genderwise_total)
# ALEX: remove plotting
# yr_gender_plot = sns.violinplot(data=genderwise_total,
#                split=True,         # If hue variable has two levels, draw half of a violin for each level.
#                inner="quartile"    #  Options: “box”, “quartile”, “point”, “stick”, None 
#                )

del(genderwise_total)

        total_participant  male_participant  female_participant  \
f_year                                                            
2013                 1281               981                 283   
2014                83546             66809                9706   
2015                87081             68781                9821   
2016                94774             75707               10547   
2017                95159             75484                9781   
2018                21000             16340                2238   

        unknown_participant  
f_year                       
2013                     17  
2014                   7030  
2015                   8479  
2016                   8520  
2017                   9894  
2018                   2422  


In [31]:
###################################  Box plots ##################################
# Box Plot for Monthwise Person Killed
# ALEX: remove plotting
# mth_killed_plot = sns.boxplot("f_month", "n_killed", data= data_gv)
# mth_killed_plot.set_title("Person killed in incidents per month")


In [32]:
# Box Plot for Monthwise Person Injured
# ALEX: remove plotting
# mth_injured_plot = sns.boxplot("f_month", "n_injured", data= data_gv)
# mth_injured_plot.set_title("Person injured in incidents per month")


In [33]:
####################################### Count Plot #################################
# Count Plot for Statewise incidences of Gun Violence
# ALEX: remove plotting
# state_inc_plot = sns.countplot("state", data = data_gv)
# state_inc_plot.set_title("Staterwise incidence of Gun Violence")
# state_inc_plot.set_xticklabels(state_inc_plot.get_xticklabels(), rotation=90)

In [34]:
## Count Plot for State House District wise
# ALEX: remove plotting
# state_inc_plot = sns.countplot("state_house_district", data = data_gv)
# state_inc_plot.set_title("State House District wise incidence of Gun Violence")
# state_inc_plot.set_xticklabels(state_inc_plot.get_xticklabels())

In [35]:
# Count Plot for State Senate District wise
# ALEX: remove plotting
# state_inc_plot = sns.countplot("state_senate_district", data = data_gv)
# state_inc_plot.set_title("State Senate District wise incidence of Gun Violence")
# state_inc_plot.set_xticklabels(state_inc_plot.get_xticklabels())


In [36]:
# Count Plot for Weekwise incidences of Gun Violence
# ALEX: remove plotting
# wk_inc_plot = sns.countplot("f_weekday", data = data_gv)
# wk_inc_plot.set_title("Weekwise incidence of Gun Violence")

In [37]:
# Count Plot for Monthwise incidences of Gun Violence
# ALEX: remove plotting
# mth_inc_plot = sns.countplot("f_month", data = data_gv)
# mth_inc_plot.set_title("Monthwise incidence of Gun Violence")

In [38]:
# Count Plot for Yearwise incidences of Gun Violence
# ALEX: remove plotting
# yr_inc_plot = sns.countplot("f_year", data = data_gv)
# yr_inc_plot.set_title("Yearwise incidence of Gun Violence")

In [39]:
################################# FacetGrid ################################
# Facet Grid Graph for Male/ Female Partipant per Year
# ALEX: remove plotting
# g = sns.FacetGrid(data_gv, hue="f_year", palette="Set1", size=5, hue_kws={"marker": ["^", "v","*",">","<","o"]})
# g.map(plt.scatter, "male_participant", "female_participant", s=100, linewidth=.5, edgecolor="white")
# g.add_legend();

In [40]:
# Facet Grid Graphh for Person killed and Injured per Year
# ALEX: remove plotting
# g = sns.FacetGrid(data_gv, hue="f_year", palette="Set1", size=5, hue_kws={"marker": ["^", "v","*",">","<","o"]})
# g.map(plt.scatter, "n_injured", "n_killed", s=100, linewidth=.5, edgecolor="white")
# g.add_legend();


In [41]:
# Facet Grid Graphh for Person killed and Injured on Particular days of the week
# ALEX: remove plotting
# g = sns.FacetGrid(data_gv, hue="f_weekday", palette="Set1", size=5, hue_kws={"marker": ["^", "v","h","o",">","<","d"]})
# g.map(plt.scatter, "n_injured", "n_killed", s=100, linewidth=.5, edgecolor="white")
# g.add_legend();

In [42]:
## Please UPVOTE, if you Like the Data Exploration and Plotting