# Import Libraries and Data

In [1]:
import quandl
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import statsmodels.api as sm # Using .api imports the public access version of statsmodels, which is a library that handles 
# statistical models.
import os
import warnings # This is a library that handles warnings.

warnings.filterwarnings("ignore") # Disable deprecation warnings that could indicate, for instance, a suspended library or 
# feature. These are more relevant to developers and very seldom to analysts.

plt.style.use('fivethirtyeight') # This is a styling option for how your plots will appear. More examples here:
# https://matplotlib.org/3.2.1/tutorials/introductory/customizing.html
# https://matplotlib.org/3.1.0/gallery/style_sheets/fivethirtyeight.html

In [2]:
%matplotlib inline

In [3]:
# Set Path
path = r'C:\Users\bwink\Downloads\Data Analytics A6'

In [4]:
# Import Data
df = pd.read_csv(os.path.join(path, 'Data', 'Original Data', 'gun-violence-data.csv'))

In [15]:
# Import Data for Time Series Analysis
df_time = pd.read_csv(os.path.join(path, 'Data', 'Prepared Data', 'Household Firearms_partlycleaned.csv'))

# Cleaning

In [5]:
df.head()

Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,incident_url_fields_missing,...,participant_age,participant_age_group,participant_gender,participant_name,participant_relationship,participant_status,participant_type,sources,state_house_district,state_senate_district
0,461105,2013-01-01,Pennsylvania,Mckeesport,1506 Versailles Avenue and Coursin Street,0,4,http://www.gunviolencearchive.org/incident/461105,http://www.post-gazette.com/local/south/2013/0...,False,...,0::20,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male||1::Male||3::Male||4::Female,0::Julian Sims,,0::Arrested||1::Injured||2::Injured||3::Injure...,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://pittsburgh.cbslocal.com/2013/01/01/4-pe...,,
1,460726,2013-01-01,California,Hawthorne,13500 block of Cerise Avenue,1,3,http://www.gunviolencearchive.org/incident/460726,http://www.dailybulletin.com/article/zz/201301...,False,...,0::20,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male,0::Bernard Gillis,,0::Killed||1::Injured||2::Injured||3::Injured,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://losangeles.cbslocal.com/2013/01/01/man-...,62.0,35.0
2,478855,2013-01-01,Ohio,Lorain,1776 East 28th Street,1,3,http://www.gunviolencearchive.org/incident/478855,http://chronicle.northcoastnow.com/2013/02/14/...,False,...,0::25||1::31||2::33||3::34||4::33,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male||1::Male||2::Male||3::Male||4::Male,0::Damien Bell||1::Desmen Noble||2::Herman Sea...,,"0::Injured, Unharmed, Arrested||1::Unharmed, A...",0::Subject-Suspect||1::Subject-Suspect||2::Vic...,http://www.morningjournal.com/general-news/201...,56.0,13.0
3,478925,2013-01-05,Colorado,Aurora,16000 block of East Ithaca Place,4,0,http://www.gunviolencearchive.org/incident/478925,http://www.dailydemocrat.com/20130106/aurora-s...,False,...,0::29||1::33||2::56||3::33,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Female||1::Male||2::Male||3::Male,0::Stacie Philbrook||1::Christopher Ratliffe||...,,0::Killed||1::Killed||2::Killed||3::Killed,0::Victim||1::Victim||2::Victim||3::Subject-Su...,http://denver.cbslocal.com/2013/01/06/officer-...,40.0,28.0
4,478959,2013-01-07,North Carolina,Greensboro,307 Mourning Dove Terrace,2,2,http://www.gunviolencearchive.org/incident/478959,http://www.journalnow.com/news/local/article_d...,False,...,0::18||1::46||2::14||3::47,0::Adult 18+||1::Adult 18+||2::Teen 12-17||3::...,0::Female||1::Male||2::Male||3::Female,0::Danielle Imani Jameison||1::Maurice Eugene ...,3::Family,0::Injured||1::Injured||2::Killed||3::Killed,0::Victim||1::Victim||2::Victim||3::Subject-Su...,http://myfox8.com/2013/01/08/update-mother-sho...,62.0,27.0


In [7]:
df_drop = df.drop(columns = ['incident_id', 'address', 'incident_url', 'source_url', 'incident_url_fields_missing', 'participant_name', 'participant_age_group', 'participant_gender', 'participant_status', 'participant_type', 'sources', 'state_house_district', 'state_senate_district'])

In [8]:
df_drop.head()

Unnamed: 0,date,state,city_or_county,n_killed,n_injured,congressional_district,gun_stolen,gun_type,incident_characteristics,latitude,location_description,longitude,n_guns_involved,notes,participant_age,participant_relationship
0,2013-01-01,Pennsylvania,Mckeesport,0,4,14.0,,,Shot - Wounded/Injured||Mass Shooting (4+ vict...,40.3467,,-79.8559,,Julian Sims under investigation: Four Shot and...,0::20,
1,2013-01-01,California,Hawthorne,1,3,43.0,,,"Shot - Wounded/Injured||Shot - Dead (murder, a...",33.909,,-118.333,,Four Shot; One Killed; Unidentified shooter in...,0::20,
2,2013-01-01,Ohio,Lorain,1,3,9.0,0::Unknown||1::Unknown,0::Unknown||1::Unknown,"Shot - Wounded/Injured||Shot - Dead (murder, a...",41.4455,Cotton Club,-82.1377,2.0,,0::25||1::31||2::33||3::34||4::33,
3,2013-01-05,Colorado,Aurora,4,0,6.0,,,"Shot - Dead (murder, accidental, suicide)||Off...",39.6518,,-104.802,,,0::29||1::33||2::56||3::33,
4,2013-01-07,North Carolina,Greensboro,2,2,6.0,0::Unknown||1::Unknown,0::Handgun||1::Handgun,"Shot - Wounded/Injured||Shot - Dead (murder, a...",36.114,,-79.9569,2.0,Two firearms recovered. (Attempted) murder sui...,0::18||1::46||2::14||3::47,3::Family


In [11]:
# Convert to csv to investigate in Excel
df_drop.to_csv(os.path.join(path, 'Data','Prepared Data', 'gun_drop1.csv'))

In [12]:
df_drop2 = df = pd.read_csv(os.path.join(path, 'Data', 'Prepared Data', 'gun_drop2.csv'))

In [13]:
df_drop2.head()

Unnamed: 0,date,state,city_or_county,Killed,Injured,guns_involved
0,1/1/2013,Pennsylvania,Mckeesport,0,4,1.0
1,1/1/2013,California,Hawthorne,1,1,1.0
2,1/1/2013,Ohio,Lorain,1,3,2.0
3,1/5/2013,Colorado,Aurora,4,0,1.0
4,1/7/2013,North Carolina,Greensboro,2,2,2.0


In [14]:
df_drop2.tail()

Unnamed: 0,date,state,city_or_county,Killed,Injured,guns_involved
239672,3/31/2018,Louisiana,Rayne,0,0,1.0
239673,3/31/2018,Louisiana,Natchitoches,1,0,1.0
239674,3/31/2018,Louisiana,Gretna,0,1,1.0
239675,3/31/2018,Texas,Houston,1,0,1.0
239676,3/31/2018,Maine,Norridgewock,2,0,2.0


In [16]:
df_time.head()

Unnamed: 0,Year,State,Background_Checks,Permit_Required,Female_Suicide_Rate,Male_Suicide_Rate
0,1980,Alabama,0,0,0.824324,0.833795
1,1981,Alabama,0,0,0.692308,0.831126
2,1982,Alabama,0,0,0.771739,0.821429
3,1983,Alabama,0,0,0.688172,0.819277
4,1984,Alabama,0,0,0.71,0.775956


In [17]:
df_time.tail()

Unnamed: 0,Year,State,Background_Checks,Permit_Required,Female_Suicide_Rate,Male_Suicide_Rate
1845,2012,Wyoming,0,0,0.375,0.647482
1846,2013,Wyoming,0,0,0.529412,0.714286
1847,2014,Wyoming,0,0,0.583333,0.666667
1848,2015,Wyoming,0,0,0.393939,0.66129
1849,2016,Wyoming,0,0,0.375,0.669643


In [18]:
df_time.value_counts()

Year  State           Background_Checks  Permit_Required  Female_Suicide_Rate  Male_Suicide_Rate
1980  Alabama         0                  0                0.824324             0.833795             1
2004  Texas           0                  0                0.436620             0.610649             1
      South Dakota    0                  0                0.277778             0.595745             1
      South Carolina  0                  0                0.527473             0.716113             1
      Rhode Island    1                  1                0.066667             0.300000             1
                                                                                                   ..
1992  Indiana         0                  0                0.409091             0.678632             1
      Illinois        0                  1                0.241803             0.521158             1
      Idaho           0                  0                0.428571             0.782313