In [2]:
import pandas as pd
import numpy as np
import re
from tqdm import tqdm, tqdm_notebook
#gmaps.configure(api_key='AIzaSyARYOoJ7XMYhRr2TcGkt-0jhCLonrSwoeE')

## Data Source: https://www.kaggle.com/jameslko/gun-violence-data

## Tasks/Cool things to look at:
* Break out encoded elements
* Find number of men/women involved in each incident
    - Count the number of men/women who are suspects and number who are victims
    - Make some boolean columns for if men/women were suspects
* Find the number of people involved at each age group
    - Count the number of suspects/victims in each age group
    - Make boolean columns for which age group were suspects
    - Where possible (there are a lot of missing values) make better age groupings from participant_age feature.
* Correllation table and orbital visualization

# Read in the Data from the csv

In [3]:
df = pd.read_csv('Data/gun-violence-data_01-2013_03-2018.csv')
pd.options.display.max_columns = 1000

# Data Munging
## Grab a list of each of the types of incidents
The incidents have a unique set and we are putting in a True or False Value into a column set that represents the collection of incident types for each row.

In [4]:
n = df['incident_characteristics'].dropna()
arr = set()
for x in n:
    for s in str(x).split('||'):
        for ss in s.split('|'):
            arr.add(ss)

arr = sorted(arr)

for x in tqdm_notebook(arr):
    df[x] = df['incident_characteristics'].str.contains(x, regex=False)

HBox(children=(IntProgress(value=0, max=109), HTML(value='')))




## Break out categories encoded within dataframe

Some of the elements of the data are encoded across the features by number, so '0::Suspect' is related to all other elements with '0::' prefix.

In [15]:
def category_split(feature):
    age_group = df[str(feature)].copy().dropna()

    arr = set()

    for string in tqdm_notebook(age_group):
    
        if '::' in string:
            temp_dict = dict(x.split('::') for x in string.split('||'))
        
            for _, value in temp_dict.items():
                if ',' in value:
                    value = value.split(', ')
                    
                    for item in value:
                        arr.add(item.lower())
                        
                    continue
                        
                arr.add(value.lower())
        
    arr = sorted(arr)
    return arr

In [17]:
encoded_categories = ['participant_gender', 'participant_status', 'participant_type']

for x in tqdm_notebook(encoded_categories):
    categories_list = category_split(x)
    print(categories_list)
    for y in categories_list:
        df['is_' + y] = df[x].str.contains(y, regex=False)
        
df.head(n=3)

HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

HBox(children=(IntProgress(value=0, max=203315), HTML(value='')))

['female', 'male']


HBox(children=(IntProgress(value=0, max=212051), HTML(value='')))

['arrested', 'injured', 'killed', 'unharmed']


HBox(children=(IntProgress(value=0, max=214814), HTML(value='')))

['subject-suspect', 'victim']



Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,incident_url_fields_missing,congressional_district,gun_stolen,gun_type,incident_characteristics,latitude,location_description,longitude,n_guns_involved,notes,participant_age,participant_age_group,participant_gender,participant_name,participant_relationship,participant_status,participant_type,sources,state_house_district,state_senate_district,ATF/LE Confiscation/Raid/Arrest,Accidental Shooting,Accidental Shooting - Death,Accidental Shooting - Injury,Accidental Shooting at a Business,Accidental/Negligent Discharge,Animal shot/killed,Armed robbery with injury/death and/or evidence of DGU found,"Assault weapon (AR-15, AK-47, and ALL variants defined by law enforcement)",Attempted Murder/Suicide (one variable unsuccessful),BB/Pellet/Replica gun,Bar/club incident - in or around establishment,Brandishing/flourishing/open carry/lost/found,Car-jacking,Child Involved Incident,Child injured (not child shooter),Child injured by child,Child injured self,Child killed (not child shooter),Child killed by child,Child killed self,Child picked up & fired gun,Child with gun - no shots fired,Cleaning gun,Concealed Carry License - Perpetrator,Concealed Carry License - Victim,Criminal act with stolen gun,Defensive Use,"Defensive Use - Crime occurs, victim shoots subject/suspect/perpetrator",Defensive Use - Good Samaritan/Third Party,"Defensive Use - Shots fired, no injury/death",Defensive Use - Stand Your Ground/Castle Doctrine established,Defensive Use - Victim stops crime,Defensive Use - WITHOUT a gun,Defensive use - No shots fired,Domestic Violence,"Drive-by (car to street, car to car)",Drug involvement,Gang involvement,Ghost gun,"Gun at school, no death/injury - elementary/secondary school","Gun at school, no death/injury - university/college",Gun buy back action,Gun range/gun shop/gun show shooting,Gun shop robbery or burglary,Gun(s) stolen from owner,Guns stolen from law enforcement,Hate crime,Home Invasion,Home Invasion - No death or injury,Home Invasion - Resident injured,Home Invasion - Resident killed,Home Invasion - subject/suspect/perpetrator injured,Home Invasion - subject/suspect/perpetrator killed,House party,Hunting accident,Implied Weapon,Institution/Group/Business,Kidnapping/abductions/hostage,LOCKDOWN/ALERT ONLY: No GV Incident Occurred Onsite,"Mass Murder (4+ deceased victims excluding the subject/suspect/perpetrator , one location)","Mass Shooting (4+ victims injured or killed excluding the subject/suspect/perpetrator, one location)","Mistaken ID (thought it was an intruder/threat, was friend/family)",Murder/Suicide,NAV,Non-Aggression Incident,Non-Shooting Incident,Officer Involved Incident,Officer Involved Incident - Weapon involved but no shots fired,Officer Involved Shooting - Accidental discharge - no injury required,Officer Involved Shooting - Bystander killed,Officer Involved Shooting - Bystander shot,Officer Involved Shooting - Officer killed,Officer Involved Shooting - Officer shot,"Officer Involved Shooting - Shots fired, no injury",Officer Involved Shooting - subject/suspect/perpetrator killed,Officer Involved Shooting - subject/suspect/perpetrator shot,Officer Involved Shooting - subject/suspect/perpetrator suicide at standoff,Officer Involved Shooting - subject/suspect/perpetrator suicide by cop,Officer Involved Shooting - subject/suspect/perpetrator surrender at standoff,Officer Involved Shooting - subject/suspect/perpetrator unarmed,Pistol-whipping,Playing with gun,Police Targeted,Political Violence,Possession (gun(s) found during commission of other crimes),Possession of gun by felon or prohibited person,Road rage,School Incident,School Shooting - elementary/secondary school,School Shooting - university/college,Self-Inflicted (not suicide or suicide attempt - NO PERP),Sex crime involving firearm,Shootout (where VENN diagram of shooters and victims overlap),"Shot - Dead (murder, accidental, suicide)",Shot - Wounded/Injured,ShotSpotter,Shots Fired - No Injuries,"Shots fired, no action (reported, no evidence found)","Spree Shooting (multiple victims, multiple locations)",Stolen/Illegally owned gun{s} recovered during arrest/warrant,Suicide - Attempt,Suicide^,TSA Action,Terrorism Involvement,Thought gun was unloaded,Under the influence of alcohol or drugs (only applies to the subject/suspect/perpetrator ),Unlawful purchase/sale,Workplace shooting (disgruntled employee),is_female,is_male,is_arrested,is_injured,is_killed,is_unharmed,is_subject-suspect,is_victim
0,461105,2013-01-01,Pennsylvania,Mckeesport,1506 Versailles Avenue and Coursin Street,0,4,http://www.gunviolencearchive.org/incident/461105,http://www.post-gazette.com/local/south/2013/0...,False,14.0,,,Shot - Wounded/Injured||Mass Shooting (4+ vict...,40.3467,,-79.8559,,Julian Sims under investigation: Four Shot and...,0::20,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male||1::Male||3::Male||4::Female,0::Julian Sims,,0::Arrested||1::Injured||2::Injured||3::Injure...,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://pittsburgh.cbslocal.com/2013/01/01/4-pe...,,,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
1,460726,2013-01-01,California,Hawthorne,13500 block of Cerise Avenue,1,3,http://www.gunviolencearchive.org/incident/460726,http://www.dailybulletin.com/article/zz/201301...,False,43.0,,,"Shot - Wounded/Injured||Shot - Dead (murder, a...",33.909,,-118.333,,Four Shot; One Killed; Unidentified shooter in...,0::20,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male,0::Bernard Gillis,,0::Killed||1::Injured||2::Injured||3::Injured,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://losangeles.cbslocal.com/2013/01/01/man-...,62.0,35.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,478855,2013-01-01,Ohio,Lorain,1776 East 28th Street,1,3,http://www.gunviolencearchive.org/incident/478855,http://chronicle.northcoastnow.com/2013/02/14/...,False,9.0,0::Unknown||1::Unknown,0::Unknown||1::Unknown,"Shot - Wounded/Injured||Shot - Dead (murder, a...",41.4455,Cotton Club,-82.1377,2.0,,0::25||1::31||2::33||3::34||4::33,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male||1::Male||2::Male||3::Male||4::Male,0::Damien Bell||1::Desmen Noble||2::Herman Sea...,,"0::Injured, Unharmed, Arrested||1::Unharmed, A...",0::Subject-Suspect||1::Subject-Suspect||2::Vic...,http://www.morningjournal.com/general-news/201...,56.0,13.0,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [7]:
exp = df['participant_status'].isnull()
temp = df[~exp]
note = temp[temp['participant_status'].str.contains('Killed, Unharmed, Arrested')].head(n=3)

np.set_printoptions(linewidth=1000)
print(np.array(note.notes))

['Occured at "known narcotics house," suggesting it may have been robbery attempt or home invasion, but that is not confirmed in sources' 'mm; ms 4 killed. Drug activity. Perp guilty/sentenced.;\r\n39.115044, -85.892496' 'Short standoff at motel ends, after confrontation w/ armed robbery suspect; suspect shot, killed;']


In [1]:
temp[temp['participant_status'].str.contains('Killed, Unharmed, Arrested')].shape

NameError: name 'temp' is not defined

## Grabbing the number of victims and suspects
Using regex we make a count of each of the numbers of victims and numbers of suspects / subjects and add a column for each of these values

In [13]:
#n = df[['incident_id','participant_type']].dropna()
df['n_victims'] = None
df['n_suspects'] = None

r = re.compile("\|+")
rr = re.compile("[0-9]+\:+")
rrv = re.compile("[0-9]+\:+Vic+")
rrs = re.compile("[0-9]+\:+Subj+")
for i,row in tqdm_notebook(df.iterrows()):
    if(pd.notnull(row['participant_type'])):
        count_victims = len(rrv.findall(row['participant_type']))
        count_perps = len(rrs.findall(row['participant_type']))
        df.set_value(i,'n_victims', count_victims)
        df.set_value(i, 'n_suspects', count_perps)



HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [12]:
df.describe(include=['O'])

Unnamed: 0,date,state,city_or_county,address,incident_url,source_url,gun_stolen,gun_type,incident_characteristics,location_description,notes,participant_age,participant_age_group,participant_gender,participant_name,participant_relationship,participant_status,participant_type,sources,ATF/LE Confiscation/Raid/Arrest,Accidental Shooting,Accidental Shooting - Death,Accidental Shooting - Injury,Accidental Shooting at a Business,Accidental/Negligent Discharge,Animal shot/killed,Armed robbery with injury/death and/or evidence of DGU found,"Assault weapon (AR-15, AK-47, and ALL variants defined by law enforcement)",Attempted Murder/Suicide (one variable unsuccessful),BB/Pellet/Replica gun,Bar/club incident - in or around establishment,Brandishing/flourishing/open carry/lost/found,Car-jacking,Child Involved Incident,Child injured (not child shooter),Child injured by child,Child injured self,Child killed (not child shooter),Child killed by child,Child killed self,Child picked up & fired gun,Child with gun - no shots fired,Cleaning gun,Concealed Carry License - Perpetrator,Concealed Carry License - Victim,Criminal act with stolen gun,Defensive Use,"Defensive Use - Crime occurs, victim shoots subject/suspect/perpetrator",Defensive Use - Good Samaritan/Third Party,"Defensive Use - Shots fired, no injury/death",Defensive Use - Stand Your Ground/Castle Doctrine established,Defensive Use - Victim stops crime,Defensive Use - WITHOUT a gun,Defensive use - No shots fired,Domestic Violence,"Drive-by (car to street, car to car)",Drug involvement,Gang involvement,Ghost gun,"Gun at school, no death/injury - elementary/secondary school","Gun at school, no death/injury - university/college",Gun buy back action,Gun range/gun shop/gun show shooting,Gun shop robbery or burglary,Gun(s) stolen from owner,Guns stolen from law enforcement,Hate crime,Home Invasion,Home Invasion - No death or injury,Home Invasion - Resident injured,Home Invasion - Resident killed,Home Invasion - subject/suspect/perpetrator injured,Home Invasion - subject/suspect/perpetrator killed,House party,Hunting accident,Implied Weapon,Institution/Group/Business,Kidnapping/abductions/hostage,LOCKDOWN/ALERT ONLY: No GV Incident Occurred Onsite,"Mass Murder (4+ deceased victims excluding the subject/suspect/perpetrator , one location)","Mass Shooting (4+ victims injured or killed excluding the subject/suspect/perpetrator, one location)","Mistaken ID (thought it was an intruder/threat, was friend/family)",Murder/Suicide,NAV,Non-Aggression Incident,Non-Shooting Incident,Officer Involved Incident,Officer Involved Incident - Weapon involved but no shots fired,Officer Involved Shooting - Accidental discharge - no injury required,Officer Involved Shooting - Bystander killed,Officer Involved Shooting - Bystander shot,Officer Involved Shooting - Officer killed,Officer Involved Shooting - Officer shot,"Officer Involved Shooting - Shots fired, no injury",Officer Involved Shooting - subject/suspect/perpetrator killed,Officer Involved Shooting - subject/suspect/perpetrator shot,Officer Involved Shooting - subject/suspect/perpetrator suicide at standoff,Officer Involved Shooting - subject/suspect/perpetrator suicide by cop,Officer Involved Shooting - subject/suspect/perpetrator surrender at standoff,Officer Involved Shooting - subject/suspect/perpetrator unarmed,Pistol-whipping,Playing with gun,Police Targeted,Political Violence,Possession (gun(s) found during commission of other crimes),Possession of gun by felon or prohibited person,Road rage,School Incident,School Shooting - elementary/secondary school,School Shooting - university/college,Self-Inflicted (not suicide or suicide attempt - NO PERP),Sex crime involving firearm,Shootout (where VENN diagram of shooters and victims overlap),"Shot - Dead (murder, accidental, suicide)",Shot - Wounded/Injured,ShotSpotter,Shots Fired - No Injuries,"Shots fired, no action (reported, no evidence found)","Spree Shooting (multiple victims, multiple locations)",Stolen/Illegally owned gun{s} recovered during arrest/warrant,Suicide - Attempt,Suicide^,TSA Action,Terrorism Involvement,Thought gun was unloaded,Under the influence of alcohol or drugs (only applies to the subject/suspect/perpetrator ),Unlawful purchase/sale,Workplace shooting (disgruntled employee),is_participant_gender,is_participant_status,is_participant_type,is_Female,is_Male,"is_Male, female",is_Arrested,is_Injured,"is_Injured, Arrested","is_Injured, Unharmed","is_Injured, Unharmed, Arrested",is_Killed,"is_Killed, Arrested","is_Killed, Injured","is_Killed, Unharmed","is_Killed, Unharmed, Arrested",is_Unharmed,"is_Unharmed, Arrested",is_Subject-Suspect,is_Victim
count,239677,239677,239677,223180,239677,239209,140179,140226,239351,42089,158660,147379,197558,203315,117424,15774,212051,214814,239068,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,239351,203315,212051,214814,203315,203315,203315,212051,212051,212051,212051,212051,212051,212051,212051,212051,212051,212051,212051,214814,214814
unique,1725,51,12898,198037,239677,213989,349,2502,18126,27595,136652,18951,898,873,113488,284,2150,259,217280,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
top,2017-01-01,Illinois,Chicago,2375 International Pkwy,http://www.gunviolencearchive.org/incident/186898,http://blog.tsa.gov,0::Unknown,0::Unknown,Shot - Wounded/Injured,Austin,man shot,0::24,0::Adult 18+,0::Male,0::Officer,1::Significant others - current or former,0::Injured,0::Victim,http://blog.tsa.gov,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True
freq,342,17556,10814,160,1,1092,121310,93559,47541,240,501,3814,94671,93496,144,2651,42293,58564,1092,221360,231138,237622,233985,238765,232929,238628,219628,237299,238882,237636,234761,220334,234407,237228,238666,239192,239137,239070,239263,239184,238744,239186,238850,238787,238727,237205,231960,236702,238813,238532,237959,236619,238343,238945,228510,225696,222252,233674,239270,237320,239075,239220,238917,238754,233038,239118,239249,228710,234199,236399,238387,238285,238500,238584,238847,238079,228305,236674,237873,239253,237714,239271,236439,239349,236301,194514,221363,235367,239121,239320,239211,239103,238260,236524,234696,235654,238375,239203,237063,239024,235600,238211,238776,239338,208488,222186,237211,236546,239119,239190,237464,238697,237020,185942,145425,238649,203601,237309,238953,231791,238749,234007,234315,239321,238948,235857,238572,239176,203314,148573,155263,165623,194136,203314,138602,114865,208757,212000,212029,158200,211999,212041,212016,212037,122645,148573,143172,155263


We need to check from here down, I don't think any of these values could be 0 if we are doing it right

**Solved by turning off regex flag in .contains().  Good catch!**

In [15]:
n = df['incident_characteristics']
incd_dict = {}
for c in arr:
    incd_dict[c] = df[c].sum()
print(incd_dict)   
#for key, value in sorted(incd_dict.iteritems(), key=lambda (k,v): (v,k)):
#    print "%s: %s" % (key, value)


{'ATF/LE Confiscation/Raid/Arrest': 17991, 'Accidental Shooting': 8213, 'Accidental Shooting - Death': 1729, 'Accidental Shooting - Injury': 5366, 'Accidental Shooting at a Business': 586, 'Accidental/Negligent Discharge': 6422, 'Animal shot/killed': 723, 'Armed robbery with injury/death and/or evidence of DGU found': 19723, 'Assault weapon (AR-15, AK-47, and ALL variants defined by law enforcement)': 2052, 'Attempted Murder/Suicide (one variable unsuccessful)': 469, 'BB/Pellet/Replica gun': 1715, 'Bar/club incident - in or around establishment': 4590, 'Brandishing/flourishing/open carry/lost/found': 19017, 'Car-jacking': 4944, 'Child Involved Incident': 2123, 'Child injured (not child shooter)': 685, 'Child injured by child': 159, 'Child injured self': 214, 'Child killed (not child shooter)': 281, 'Child killed by child': 88, 'Child killed self': 167, 'Child picked up & fired gun': 607, 'Child with gun - no shots fired': 165, 'Cleaning gun': 501, 'Concealed Carry License - Perpetrator

## Grabbing the number of men and women involved (maybe we can do this through iteration)
Using regex we make a count of each of the numbers of victims and numbers of suspects / subjects and add a column for each of these values

#n = df[['incident_id','participant_type']].dropna()
categoriesToSplit = ['participant_type', 'participant_gender']

df['n_men_involved'] = None
df['n_women_involved'] = None

r = re.compile("\|+")
rr = re.compile("[0-9]+\:+")
rrv = re.compile("[0-9]+\:+Vic+")
rrs = re.compile("[0-9]+\:+Subj+")
for i,row in tqdm_notebook(df.iterrows()):
    if(pd.notnull(row['participant_type'])):
        count_victims = len(rrv.findall(row['participant_type']))
        count_perps = len(rrs.findall(row['participant_type']))
        df.set_value(i,'n_victims', count_victims)
        df.set_value(i, 'n_suspects', count_perps)



## Non shooting vs total count

In [13]:
dfnna = df[['incident_characteristics']].dropna()
print(len(dfnna))
len(dfnna[dfnna['incident_characteristics'].str.contains('.*Non\-Shooting.*', regex=True)])


239351


44837

In [14]:
df_locs = df_locs.dropna()
df_locs_sm = df_locs[0:500]

NameError: name 'df_locs' is not defined

In [15]:
df.head(n=1)

Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,incident_url_fields_missing,congressional_district,gun_stolen,gun_type,incident_characteristics,latitude,location_description,longitude,n_guns_involved,notes,participant_age,participant_age_group,participant_gender,participant_name,participant_relationship,participant_status,participant_type,sources,state_house_district,state_senate_district,ATF/LE Confiscation/Raid/Arrest,Accidental Shooting,Accidental Shooting - Death,Accidental Shooting - Injury,Accidental Shooting at a Business,Accidental/Negligent Discharge,Animal shot/killed,Armed robbery with injury/death and/or evidence of DGU found,"Assault weapon (AR-15, AK-47, and ALL variants defined by law enforcement)",Attempted Murder/Suicide (one variable unsuccessful),BB/Pellet/Replica gun,Bar/club incident - in or around establishment,Brandishing/flourishing/open carry/lost/found,Car-jacking,Child Involved Incident,Child injured (not child shooter),Child injured by child,Child injured self,Child killed (not child shooter),Child killed by child,Child killed self,Child picked up & fired gun,Child with gun - no shots fired,Cleaning gun,Concealed Carry License - Perpetrator,Concealed Carry License - Victim,Criminal act with stolen gun,Defensive Use,"Defensive Use - Crime occurs, victim shoots subject/suspect/perpetrator",Defensive Use - Good Samaritan/Third Party,"Defensive Use - Shots fired, no injury/death",Defensive Use - Stand Your Ground/Castle Doctrine established,Defensive Use - Victim stops crime,Defensive Use - WITHOUT a gun,Defensive use - No shots fired,Domestic Violence,"Drive-by (car to street, car to car)",Drug involvement,Gang involvement,Ghost gun,"Gun at school, no death/injury - elementary/secondary school","Gun at school, no death/injury - university/college",Gun buy back action,Gun range/gun shop/gun show shooting,Gun shop robbery or burglary,Gun(s) stolen from owner,Guns stolen from law enforcement,Hate crime,Home Invasion,Home Invasion - No death or injury,Home Invasion - Resident injured,Home Invasion - Resident killed,Home Invasion - subject/suspect/perpetrator injured,Home Invasion - subject/suspect/perpetrator killed,House party,Hunting accident,Implied Weapon,Institution/Group/Business,Kidnapping/abductions/hostage,LOCKDOWN/ALERT ONLY: No GV Incident Occurred Onsite,"Mass Murder (4+ deceased victims excluding the subject/suspect/perpetrator , one location)","Mass Shooting (4+ victims injured or killed excluding the subject/suspect/perpetrator, one location)","Mistaken ID (thought it was an intruder/threat, was friend/family)",Murder/Suicide,NAV,Non-Aggression Incident,Non-Shooting Incident,Officer Involved Incident,Officer Involved Incident - Weapon involved but no shots fired,Officer Involved Shooting - Accidental discharge - no injury required,Officer Involved Shooting - Bystander killed,Officer Involved Shooting - Bystander shot,Officer Involved Shooting - Officer killed,Officer Involved Shooting - Officer shot,"Officer Involved Shooting - Shots fired, no injury",Officer Involved Shooting - subject/suspect/perpetrator killed,Officer Involved Shooting - subject/suspect/perpetrator shot,Officer Involved Shooting - subject/suspect/perpetrator suicide at standoff,Officer Involved Shooting - subject/suspect/perpetrator suicide by cop,Officer Involved Shooting - subject/suspect/perpetrator surrender at standoff,Officer Involved Shooting - subject/suspect/perpetrator unarmed,Pistol-whipping,Playing with gun,Police Targeted,Political Violence,Possession (gun(s) found during commission of other crimes),Possession of gun by felon or prohibited person,Road rage,School Incident,School Shooting - elementary/secondary school,School Shooting - university/college,Self-Inflicted (not suicide or suicide attempt - NO PERP),Sex crime involving firearm,Shootout (where VENN diagram of shooters and victims overlap),"Shot - Dead (murder, accidental, suicide)",Shot - Wounded/Injured,ShotSpotter,Shots Fired - No Injuries,"Shots fired, no action (reported, no evidence found)","Spree Shooting (multiple victims, multiple locations)",Stolen/Illegally owned gun{s} recovered during arrest/warrant,Suicide - Attempt,Suicide^,TSA Action,Terrorism Involvement,Thought gun was unloaded,Under the influence of alcohol or drugs (only applies to the subject/suspect/perpetrator ),Unlawful purchase/sale,Workplace shooting (disgruntled employee),n_victims,n_suspects
0,461105,2013-01-01,Pennsylvania,Mckeesport,1506 Versailles Avenue and Coursin Street,0,4,http://www.gunviolencearchive.org/incident/461105,http://www.post-gazette.com/local/south/2013/0...,False,14.0,,,Shot - Wounded/Injured||Mass Shooting (4+ vict...,40.3467,,-79.8559,,Julian Sims under investigation: Four Shot and...,0::20,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male||1::Male||3::Male||4::Female,0::Julian Sims,,0::Arrested||1::Injured||2::Injured||3::Injure...,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://pittsburgh.cbslocal.com/2013/01/01/4-pe...,,,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,4,1


# I don't think this is right

### You're right, it's dumb and wrong.  :-D

In [16]:
dummy_columns = ['incident_characteristics', 'participant_age_group', 'participant_gender']

for x in tqdm_notebook(dummy_columns):
    n = df[x].dropna()
    arr = set()
    
    for y in n:
        for s in str(y).split('||'):
            for ss in s.split('|'):
                arr.add(ss)
    
    arr = sorted(arr)
    
    for z in arr:
        df[y] = df[x].str.contains(y)

df.head(n=1)

HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

  from ipykernel import kernelapp as app





Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,incident_url_fields_missing,congressional_district,gun_stolen,gun_type,incident_characteristics,latitude,location_description,longitude,n_guns_involved,notes,participant_age,participant_age_group,participant_gender,participant_name,participant_relationship,participant_status,participant_type,sources,state_house_district,state_senate_district,ATF/LE Confiscation/Raid/Arrest,Accidental Shooting,Accidental Shooting - Death,Accidental Shooting - Injury,Accidental Shooting at a Business,Accidental/Negligent Discharge,Animal shot/killed,Armed robbery with injury/death and/or evidence of DGU found,"Assault weapon (AR-15, AK-47, and ALL variants defined by law enforcement)",Attempted Murder/Suicide (one variable unsuccessful),BB/Pellet/Replica gun,Bar/club incident - in or around establishment,Brandishing/flourishing/open carry/lost/found,Car-jacking,Child Involved Incident,Child injured (not child shooter),Child injured by child,Child injured self,Child killed (not child shooter),Child killed by child,Child killed self,Child picked up & fired gun,Child with gun - no shots fired,Cleaning gun,Concealed Carry License - Perpetrator,Concealed Carry License - Victim,Criminal act with stolen gun,Defensive Use,"Defensive Use - Crime occurs, victim shoots subject/suspect/perpetrator",Defensive Use - Good Samaritan/Third Party,"Defensive Use - Shots fired, no injury/death",Defensive Use - Stand Your Ground/Castle Doctrine established,Defensive Use - Victim stops crime,Defensive Use - WITHOUT a gun,Defensive use - No shots fired,Domestic Violence,"Drive-by (car to street, car to car)",Drug involvement,Gang involvement,Ghost gun,"Gun at school, no death/injury - elementary/secondary school","Gun at school, no death/injury - university/college",Gun buy back action,Gun range/gun shop/gun show shooting,Gun shop robbery or burglary,Gun(s) stolen from owner,Guns stolen from law enforcement,Hate crime,Home Invasion,Home Invasion - No death or injury,Home Invasion - Resident injured,Home Invasion - Resident killed,Home Invasion - subject/suspect/perpetrator injured,Home Invasion - subject/suspect/perpetrator killed,House party,Hunting accident,Implied Weapon,Institution/Group/Business,Kidnapping/abductions/hostage,LOCKDOWN/ALERT ONLY: No GV Incident Occurred Onsite,"Mass Murder (4+ deceased victims excluding the subject/suspect/perpetrator , one location)","Mass Shooting (4+ victims injured or killed excluding the subject/suspect/perpetrator, one location)","Mistaken ID (thought it was an intruder/threat, was friend/family)",Murder/Suicide,NAV,Non-Aggression Incident,Non-Shooting Incident,Officer Involved Incident,Officer Involved Incident - Weapon involved but no shots fired,Officer Involved Shooting - Accidental discharge - no injury required,Officer Involved Shooting - Bystander killed,Officer Involved Shooting - Bystander shot,Officer Involved Shooting - Officer killed,Officer Involved Shooting - Officer shot,"Officer Involved Shooting - Shots fired, no injury",Officer Involved Shooting - subject/suspect/perpetrator killed,Officer Involved Shooting - subject/suspect/perpetrator shot,Officer Involved Shooting - subject/suspect/perpetrator suicide at standoff,Officer Involved Shooting - subject/suspect/perpetrator suicide by cop,Officer Involved Shooting - subject/suspect/perpetrator surrender at standoff,Officer Involved Shooting - subject/suspect/perpetrator unarmed,Pistol-whipping,Playing with gun,Police Targeted,Political Violence,Possession (gun(s) found during commission of other crimes),Possession of gun by felon or prohibited person,Road rage,School Incident,School Shooting - elementary/secondary school,School Shooting - university/college,Self-Inflicted (not suicide or suicide attempt - NO PERP),Sex crime involving firearm,Shootout (where VENN diagram of shooters and victims overlap),"Shot - Dead (murder, accidental, suicide)",Shot - Wounded/Injured,ShotSpotter,Shots Fired - No Injuries,"Shots fired, no action (reported, no evidence found)","Spree Shooting (multiple victims, multiple locations)",Stolen/Illegally owned gun{s} recovered during arrest/warrant,Suicide - Attempt,Suicide^,TSA Action,Terrorism Involvement,Thought gun was unloaded,Under the influence of alcohol or drugs (only applies to the subject/suspect/perpetrator ),Unlawful purchase/sale,Workplace shooting (disgruntled employee),n_victims,n_suspects,"Shot - Dead (murder, accidental, suicide)||Suicide^||Murder/Suicide||Domestic Violence",0::Adult 18+||1::Adult 18+,0::Female||1::Male
0,461105,2013-01-01,Pennsylvania,Mckeesport,1506 Versailles Avenue and Coursin Street,0,4,http://www.gunviolencearchive.org/incident/461105,http://www.post-gazette.com/local/south/2013/0...,False,14.0,,,Shot - Wounded/Injured||Mass Shooting (4+ vict...,40.3467,,-79.8559,,Julian Sims under investigation: Four Shot and...,0::20,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male||1::Male||3::Male||4::Female,0::Julian Sims,,0::Arrested||1::Injured||2::Injured||3::Injure...,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://pittsburgh.cbslocal.com/2013/01/01/4-pe...,,,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,4,1,True,True,True
