In [None]:
# Import Dependencies
import pandas as pd
import numpy as np
import os
import glob

In [None]:
# map drive to retrieve datasets
from google.colab import drive
drive.mount('/content/Drive')

Mounted at /content/Drive


In [None]:
# map drive to retrieve datasets
from google.colab import drive
drive.mount('/content/Drive')

Drive already mounted at /content/Drive; to attempt to forcibly remount, call drive.mount("/content/Drive", force_remount=True).


In [None]:
# Import the data for inpection
accidental_deaths = pd.read_csv('/content/Drive/MyDrive/Final_Project/Resources/accidental_deaths_2018_2022.csv')
gun_data = pd.read_csv('/content/Drive/MyDrive/Final_Project/Resources/Gun_Data_Codebook_and_OLS_Regression_Outputs_Gun_Data.csv')
gun_laws = pd.read_csv('/content/Drive/MyDrive/Final_Project/Resources/gun_law_provisions_by_year_state.csv')
gun_violence_2013_2018 = pd.read_csv('/content/Drive/MyDrive/Final_Project/Resources/gun_violence_data_01_2013_03_2018.csv')

## Clean the Accidental Deaths 2018 - 2022 CSV file

In [None]:
# Look at the columns of accidental_deaths
accidental_deaths.count()

Incident ID       2000
Incident Date     2000
State             2000
City Or County    2000
Address           1898
# Killed          2000
# Injured         2000
Operations           0
dtype: int64

In [None]:
# Inspect the data in the Incident Date column
accidental_deaths['Incident Date'].head(5)

0    October 21, 2022
1    October 19, 2022
2    October 19, 2022
3    October 19, 2022
4    October 18, 2022
Name: Incident Date, dtype: object

In [None]:
# Splitting the Incident Date column into three separate columns
accidental_deaths[['month', 'day', 'date']] = accidental_deaths['Incident Date'].str.split(" ", expand=True)

In [None]:
# Confirm the values of the date columns
accidental_deaths['date'].head(5)

0    2022
1    2022
2    2022
3    2022
4    2022
Name: date, dtype: object

In [None]:
# Rename the date column to year
accidental_deaths.rename(columns={'Incident ID':'incident_id', "State":'state', '# Killed':'deaths', '# Injured':'injuries', 'date':'year'}, inplace=True)

In [None]:
accidental_deaths = accidental_deaths.drop(['Operations', 'Address', 'Incident Date', 'month', 'day', 'City Or County'], axis=1)

In [None]:
accidental_deaths = accidental_deaths.sort_values(['year'], ascending = True)
accidental_deaths.head(5)

Unnamed: 0,incident_id,state,deaths,injuries,year
1999,1227183,Texas,1,0,2018
1920,1274244,New York,1,0,2018
1921,1273747,Iowa,1,0,2018
1922,1275310,Iowa,1,0,2018
1923,1273048,Oklahoma,1,0,2018


In [None]:
year = accidental_deaths['year']
accidental_deaths.drop(labels=['year'], axis=1, inplace=True)
accidental_deaths.insert(1, 'year', year)
accidental_deaths.head(5)

Unnamed: 0,incident_id,year,state,deaths,injuries
1999,1227183,2018,Texas,1,0
1920,1274244,2018,New York,1,0
1921,1273747,2018,Iowa,1,0
1922,1275310,2018,Iowa,1,0
1923,1273048,2018,Oklahoma,1,0


In [None]:
accidental_df = accidental_deaths
accidental_df.to_csv('/content/Drive/MyDrive/Final_Project/cleaned/cleaned_accidental_df.csv', index=False)

## Gun Violence Data

In [None]:
# Review all columns
gun_violence_2013_2018.columns.tolist()

['incident_id',
 'date',
 'state',
 'city_or_county',
 'address',
 'n_killed',
 'n_injured',
 'incident_url',
 'source_url',
 'incident_url_fields_missing',
 'congressional_district',
 'gun_stolen',
 'gun_type',
 'incident_characteristics',
 'latitude',
 'location_description',
 'longitude',
 'n_guns_involved',
 'notes',
 'participant_age',
 'participant_age_group',
 'participant_gender',
 'participant_name',
 'participant_relationship',
 'participant_status',
 'participant_type',
 'sources',
 'state_house_district',
 'state_senate_district']

In [None]:
# split the date column into separate columns for date, day, month
# replace date with year
gun_violence_2013_2018[['date', 'month', 'day']] = gun_violence_2013_2018.date.str.split("-", expand=True)
gun_violence_2013_2018.columns = gun_violence_2013_2018.columns.str.replace('date', 'year')

In [None]:
# Create a new dataframe for latitude and longitude
location = gun_violence_2013_2018[['latitude','longitude']].copy()

In [None]:
# Drop unneeded columns
gun_violence_cleaned = gun_violence_2013_2018.drop(['city_or_county','address','incident_url',
                                                    'source_url','incident_url_fields_missing','congressional_district',
                                                    'gun_stolen','gun_type','incident_characteristics',
                                                    'location_description','n_guns_involved',
                                                     'notes','participant_age','participant_age_group',
                                                     'participant_gender','participant_name','participant_relationship',
                                                     'participant_status','participant_type','sources',
                                                     'state_house_district','state_senate_district','month','day','latitude','longitude'], axis=1)

In [None]:
# Rename the date column to year
gun_violence_cleaned.rename(columns={'n_killed':'deaths', 'n_injured':'injuries'}, inplace=True)

In [None]:
gun_violence_cleaned.head(5)

Unnamed: 0,incident_id,year,state,deaths,injuries
0,461105,2013,Pennsylvania,0,4
1,460726,2013,California,1,3
2,478855,2013,Ohio,1,3
3,478925,2013,Colorado,4,0
4,478959,2013,North Carolina,2,2


In [None]:
gun_violence_cleaned.dtypes
gun_violence_cleaned['year'] = pd.to_numeric(gun_violence_cleaned['year'])

In [None]:
gun_violence_cleaned.to_csv('/content/Drive/MyDrive/Final_Project/cleaned/cleaned_gun_violence.csv', index=False)

## Clean the Gun Data CSV

In [None]:
# Convert the columns to a list
gun_data.columns.tolist()

['State',
 'Year',
 'Gini-Indx',
 'PopDens',
 'Pov.%',
 'GnLwRnk (Low is strict)',
 'Pop.',
 'StateSqMi',
 'AllVlntCrm',
 'AllMrdr',
 'GunHom.Rate100k',
 'RapeRvsd',
 'RapeLgcy',
 'Robbery',
 'AggvtdAsslt',
 'MassShoot',
 'MassShoot100k',
 'MasShootY',
 'FirearmSuic',
 'AllSuic',
 'House%OwnGuns',
 'PolicKilledGun',
 'PoliceKilledGun100k',
 'NumOfPoliceOfficers',
 'PoliceOfficers100k',
 'GunRnkCATO',
 'GunRnkEvryTwn',
 'GunRnkAVG',
 'PrcntBlck',
 'PrcntHispORLat',
 'PrtyRepub.',
 'PrtyDemoc.',
 'AlchlPerCap',
 'MntlHlthRnk',
 'PrcntHSchl',
 'PrcntBchlr',
 'RdntHntLic',
 'RdntHntLic100k',
 'GunShow100k',
 'PrcntPopVet',
 'PrcntPopUrbn',
 'PrcntEvang.',
 'CntryV.Rck',
 'B5E',
 'B5A',
 'B5C',
 'B5N',
 'B5O',
 'IATScore',
 'UnHyphAm.',
 'PrcntUnHyphAm.',
 'PrcntConsrv',
 'FxNwsCnsmp.',
 'HnrCultr',
 'CensS',
 'CensNE',
 'CensMW',
 'CensW',
 'MnMrdrWmn']

In [None]:
# Deleting Unneeded columns
gun_data = gun_data.drop(['Gini-Indx','Pov.%','PrcntBlck','PrcntHispORLat','PrtyRepub.','PrtyDemoc.','AlchlPerCap','StateSqMi','B5E','B5A',
                'B5C','B5N','B5O','IATScore','HnrCultr','UnHyphAm.','PrcntUnHyphAm.','PrcntPopVet','PrcntPopUrbn','PrcntEvang.','CntryV.Rck',
                'AllMrdr','PrcntHSchl','PrcntBchlr','PrcntConsrv','AggvtdAsslt','AllSuic','AllVlntCrm','CensMW','CensNE','CensS','CensW',
                'MnMrdrWmn','RapeRvsd','Robbery', 'FxNwsCnsmp.','MntlHlthRnk','NumOfPoliceOfficers','PolicKilledGun','PoliceKilledGun100k',
                'PoliceOfficers100k','RapeLgcy'
              ], axis=1)

In [None]:
sorted(gun_data.columns.tolist())

['FirearmSuic',
 'GnLwRnk (Low is strict)',
 'GunHom.Rate100k',
 'GunRnkAVG',
 'GunRnkCATO',
 'GunRnkEvryTwn',
 'GunShow100k',
 'House%OwnGuns',
 'MasShootY',
 'MassShoot',
 'MassShoot100k',
 'Pop.',
 'PopDens',
 'RdntHntLic',
 'RdntHntLic100k',
 'State',
 'Year']

In [None]:
gun_data.head(5)

Unnamed: 0,State,Year,PopDens,GnLwRnk (Low is strict),Pop.,GunHom.Rate100k,MassShoot,MassShoot100k,MasShootY,FirearmSuic,House%OwnGuns,GunRnkCATO,GunRnkEvryTwn,GunRnkAVG,RdntHntLic,RdntHntLic100k,GunShow100k
0,Alabama,2016,95.839902,37,4863300,9.34,15,0.308433,1,11.31,69.81,9,16,12.5,567703,11673.20544,0.78
1,Alaska,2016,1.297129,44,741894,6.07,0,0.0,0,15.23,58.55,4,5,4.5,320848,43247.14851,0.33
2,Arizona,2016,60.994157,47,6931071,4.37,4,0.057711,1,10.89,59.38,3,13,8.0,408602,5895.221676,1.2
3,Arkansas,2016,57.391258,38,2988248,6.09,3,0.100393,1,11.08,59.67,32,22,27.0,395700,13241.87283,1.42
4,California,2016,251.668817,1,39250017,3.74,47,0.119745,1,4.06,37.11,49,46,47.5,1014445,2584.572129,0.37


In [None]:
mass_shootings_df = gun_data.drop(['RdntHntLic100k','GunShow100k','House%OwnGuns','GunRnkCATO','GunRnkEvryTwn','GunRnkAVG','RdntHntLic','PopDens','GnLwRnk (Low is strict)','Pop.','GunHom.Rate100k','MassShoot100k','MasShootY','FirearmSuic'], axis=1)
mass_shootings_df.head(5)

Unnamed: 0,State,Year,MassShoot
0,Alabama,2016,15
1,Alaska,2016,0
2,Arizona,2016,4
3,Arkansas,2016,3
4,California,2016,47


##### Not saving mass_shooting_df for further integration as year range not wide enough to match rest of datasets

## Inspect gun law provisions by year (through 2020) and state.csv

In [None]:
#Count the non-null values
gun_laws.count()

state                           1500
year                            1500
felony                          1500
invcommitment                   1500
invoutpatient                   1500
                                ... 
expartesurrendernoconditions    1500
expartesurrenderdating          1500
dvroremoval                     1500
stalking                        1500
lawtotal                        1500
Length: 137, dtype: int64

In [None]:
# Convert the columns into a list
gun_laws.columns.tolist()

['state',
 'year',
 'felony',
 'invcommitment',
 'invoutpatient',
 'danger',
 'drugmisdemeanor',
 'alctreatment',
 'alcoholism',
 'relinquishment',
 'violent',
 'violenth',
 'violentpartial',
 'dealer',
 'dealerh',
 'recordsall',
 'recordsallh',
 'recordsdealer',
 'recordsdealerh',
 'reportall',
 'reportallh',
 'reportdealer',
 'reportdealerh',
 'purge',
 'residential',
 'theft',
 'security',
 'inspection',
 'ammlicense',
 'ammrecords',
 'permit',
 'permith',
 'fingerprint',
 'training',
 'permitlaw',
 'registration',
 'registrationh',
 'defactoreg',
 'defactoregh',
 'ammpermit',
 'ammrestrict',
 'age21handgunsale',
 'age18longgunsale',
 'age21longgunsaled',
 'age21longgunsale',
 'age21handgunpossess',
 'age18longgunpossess',
 'age21longgunpossess',
 'loststolen',
 'amm18',
 'amm21h',
 'universal',
 'universalh',
 'gunshow',
 'gunshowh',
 'universalpermit',
 'universalpermith',
 'backgroundpurge',
 'ammbackground',
 'threedaylimit',
 'mentalhealth',
 'statechecks',
 'statechecksh',
 'w

In [None]:
# Check the data type of the year 
gun_laws['year'].dtypes

# Filter the dataset by years from 2013-2020
gun_laws = gun_laws[gun_laws['year'] >= 2013]

In [None]:
gun_laws.dtypes

state                           object
year                             int64
felony                           int64
invcommitment                    int64
invoutpatient                    int64
                                 ...  
expartesurrendernoconditions     int64
expartesurrenderdating           int64
dvroremoval                      int64
stalking                         int64
lawtotal                         int64
Length: 137, dtype: object

In [None]:
gun_laws.tail(5)

Unnamed: 0,state,year,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,...,expartedating,dvrosurrender,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal
1495,Wyoming,2016,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,8
1496,Wyoming,2017,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,8
1497,Wyoming,2018,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
1498,Wyoming,2019,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
1499,Wyoming,2020,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7


## Merging cleaned incidents

In [None]:
# setting the path for joining multiple files
files = os.path.join('/content/Drive/MyDrive/Final_Project/cleaned/cleaned*.csv')

# list of merged files returned
files = glob.glob(files)

# joining files with concat and read_csv
merged_yearly_incidents_df = pd.concat(map(pd.read_csv, files), ignore_index=True)

In [None]:
merged_yearly_incidents_df.head(5)

Unnamed: 0,incident_id,year,state,deaths,injuries
0,1227183,2018,Texas,1,0
1,1274244,2018,New York,1,0
2,1273747,2018,Iowa,1,0
3,1275310,2018,Iowa,1,0
4,1273048,2018,Oklahoma,1,0


In [None]:
merged_yearly_incidents_df = merged_yearly_incidents_df.sort_values(['year','state'], ascending = True)
merged_yearly_incidents_df.head(5)

Unnamed: 0,incident_id,year,state,deaths,injuries
2127,490395,2013,Alabama,0,4
2272,496668,2013,Alabama,3,5
2051,484307,2013,Arizona,2,4
2072,486153,2013,Arizona,0,5
2215,480897,2013,Arizona,0,4


In [None]:
# Filter the dataset by years from 2013-2020 to match gun laws datasheet
merged_yearly_incidents_df = merged_yearly_incidents_df[merged_yearly_incidents_df['year'] <= 2020]
merged_yearly_incidents_df.head(5)

Unnamed: 0,incident_id,year,state,deaths,injuries
2127,490395,2013,Alabama,0,4
2272,496668,2013,Alabama,3,5
2051,484307,2013,Arizona,2,4
2072,486153,2013,Arizona,0,5
2215,480897,2013,Arizona,0,4


In [None]:
merged_yearly_incidents_df.to_csv('/content/Drive/MyDrive/Final_Project/cleaned/merged_yearly_incidents_df.csv', index=False)

### Separate out merged_yearly_incidents_df and gun_laws by year

In [None]:
merged_2013_incidents_df = merged_yearly_incidents_df.loc[merged_yearly_incidents_df['year'] == 2013]
merged_2013_incidents_df.head(5)

Unnamed: 0,incident_id,year,state,deaths,injuries
2127,490395,2013,Alabama,0,4
2272,496668,2013,Alabama,3,5
2051,484307,2013,Arizona,2,4
2072,486153,2013,Arizona,0,5
2215,480897,2013,Arizona,0,4


In [None]:
merged_2014_incidents_df = merged_yearly_incidents_df.loc[merged_yearly_incidents_df['year'] == 2014]
merged_2015_incidents_df = merged_yearly_incidents_df.loc[merged_yearly_incidents_df['year'] == 2015]
merged_2016_incidents_df = merged_yearly_incidents_df.loc[merged_yearly_incidents_df['year'] == 2016]
merged_2017_incidents_df = merged_yearly_incidents_df.loc[merged_yearly_incidents_df['year'] == 2017]
merged_2018_incidents_df = merged_yearly_incidents_df.loc[merged_yearly_incidents_df['year'] == 2018]
merged_2019_incidents_df = merged_yearly_incidents_df.loc[merged_yearly_incidents_df['year'] == 2019]
merged_2020_incidents_df = merged_yearly_incidents_df.loc[merged_yearly_incidents_df['year'] == 2020]

In [None]:
gun_laws_2013_df = gun_laws.loc[gun_laws['year'] == 2013]
gun_laws_2013_df.head(5)

Unnamed: 0,state,year,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,...,expartedating,dvrosurrender,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal
22,Alabama,2013,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,10
52,Alaska,2013,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
82,Arizona,2013,1,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,8
112,Arkansas,2013,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,11
142,California,2013,1,1,0,1,0,0,1,0,...,1,1,1,1,1,1,1,1,1,99


In [None]:
gun_laws_2014_df = gun_laws.loc[gun_laws['year'] == 2014]
gun_laws_2015_df = gun_laws.loc[gun_laws['year'] == 2015]
gun_laws_2016_df = gun_laws.loc[gun_laws['year'] == 2016]
gun_laws_2017_df = gun_laws.loc[gun_laws['year'] == 2017]
gun_laws_2018_df = gun_laws.loc[gun_laws['year'] == 2018]
gun_laws_2019_df = gun_laws.loc[gun_laws['year'] == 2019]
gun_laws_2020_df = gun_laws.loc[gun_laws['year'] == 2020]

In [None]:
gun_laws_2018_df.head()

Unnamed: 0,state,year,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,...,expartedating,dvrosurrender,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal
27,Alabama,2018,0,1,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,10
57,Alaska,2018,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
87,Arizona,2018,1,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,8
117,Arkansas,2018,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,11
147,California,2018,1,1,0,1,0,0,1,1,...,1,1,1,1,1,1,1,1,1,109


### Merge incidents by state

In [None]:
merged_2013_incidents_df = merged_2013_incidents_df.groupby(['state']).sum()
merged_2013_incidents_df = merged_2013_incidents_df.drop(['incident_id','year'], axis=1)
merged_2013_incidents_df.head(5)

Unnamed: 0_level_0,deaths,injuries
state,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,3,9
Arizona,8,16
California,46,139
Colorado,4,9
Connecticut,2,13


In [None]:
merged_2014_incidents_df = merged_2014_incidents_df.groupby(['state']).sum()
merged_2014_incidents_df = merged_2014_incidents_df.drop(['incident_id','year'], axis=1)
merged_2015_incidents_df = merged_2015_incidents_df.groupby(['state']).sum()
merged_2015_incidents_df = merged_2015_incidents_df.drop(['incident_id','year'], axis=1)
merged_2016_incidents_df = merged_2016_incidents_df.groupby(['state']).sum()
merged_2016_incidents_df = merged_2016_incidents_df.drop(['incident_id','year'], axis=1)
merged_2017_incidents_df = merged_2017_incidents_df.groupby(['state']).sum()
merged_2017_incidents_df = merged_2017_incidents_df.drop(['incident_id','year'], axis=1)
merged_2018_incidents_df = merged_2018_incidents_df.groupby(['state']).sum()
merged_2018_incidents_df = merged_2018_incidents_df.drop(['incident_id','year'], axis=1)
merged_2019_incidents_df = merged_2019_incidents_df.groupby(['state']).sum()
merged_2019_incidents_df = merged_2019_incidents_df.drop(['incident_id','year'], axis=1)
merged_2020_incidents_df = merged_2020_incidents_df.groupby(['state']).sum()
merged_2020_incidents_df = merged_2020_incidents_df.drop(['incident_id','year'], axis=1)

## Merge merged_yearly_incidents_df and gun_laws datasets by year

In [None]:
merged_law_incidents_2013 = pd.merge(gun_laws_2013_df, merged_2013_incidents_df, how='inner', left_on=['state'], right_on=['state'])
merged_law_incidents_2014 = pd.merge(gun_laws_2014_df, merged_2014_incidents_df, how='inner', left_on=['state'], right_on=['state'])
merged_law_incidents_2015 = pd.merge(gun_laws_2015_df, merged_2015_incidents_df, how='inner', left_on=['state'], right_on=['state'])
merged_law_incidents_2016 = pd.merge(gun_laws_2016_df, merged_2016_incidents_df, how='inner', left_on=['state'], right_on=['state'])
merged_law_incidents_2017 = pd.merge(gun_laws_2017_df, merged_2017_incidents_df, how='inner', left_on=['state'], right_on=['state'])
merged_law_incidents_2018 = pd.merge(gun_laws_2018_df, merged_2018_incidents_df, how='inner', left_on=['state'], right_on=['state'])
merged_law_incidents_2019 = pd.merge(gun_laws_2019_df, merged_2019_incidents_df, how='inner', left_on=['state'], right_on=['state'])
merged_law_incidents_2020 = pd.merge(gun_laws_2020_df, merged_2020_incidents_df, how='inner', left_on=['state'], right_on=['state'])


In [None]:
merged_law_incidents_2020.head()

Unnamed: 0,state,year,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,...,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal,deaths,injuries
0,Alabama,2020,0,1,0,1,0,0,1,0,...,0,0,0,0,0,0,0,10,19,6
1,Alaska,2020,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,3,1,0
2,Arizona,2020,1,1,1,1,0,0,0,0,...,0,0,0,0,0,0,1,8,12,0
3,Arkansas,2020,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,11,6,1
4,California,2020,1,1,0,1,0,0,1,1,...,1,1,1,1,1,1,1,111,19,6


In [None]:
merged_law_incidents_2013.to_csv('/content/Drive/MyDrive/Final_Project/to_merge/merged_law_incidents_2013.csv', index=False)
merged_law_incidents_2014.to_csv('/content/Drive/MyDrive/Final_Project/to_merge/merged_law_incidents_2014.csv', index=False)
merged_law_incidents_2015.to_csv('/content/Drive/MyDrive/Final_Project/to_merge/merged_law_incidents_2015.csv', index=False)
merged_law_incidents_2016.to_csv('/content/Drive/MyDrive/Final_Project/to_merge/merged_law_incidents_2016.csv', index=False)
merged_law_incidents_2017.to_csv('/content/Drive/MyDrive/Final_Project/to_merge/merged_law_incidents_2017.csv', index=False)
merged_law_incidents_2018.to_csv('/content/Drive/MyDrive/Final_Project/to_merge/merged_law_incidents_2018.csv', index=False)
merged_law_incidents_2019.to_csv('/content/Drive/MyDrive/Final_Project/to_merge/merged_law_incidents_2019.csv', index=False)
merged_law_incidents_2020.to_csv('/content/Drive/MyDrive/Final_Project/to_merge/merged_law_incidents_2020.csv', index=False)

In [None]:
merged_law_incidents_2018.head()

Unnamed: 0,state,year,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,...,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal,deaths,injuries
0,Alabama,2018,0,1,0,1,0,0,1,0,...,0,0,0,0,0,0,0,10,140,219
1,Alaska,2018,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,3,11,19
2,Arizona,2018,1,1,1,1,0,0,0,0,...,0,0,0,0,0,0,1,8,86,73
3,Arkansas,2018,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,11,47,83
4,California,2018,1,1,0,1,0,0,1,1,...,1,1,1,1,1,1,1,109,306,432


## Merge all back into one dataset

In [None]:
# setting the path for joining multiple files
files = os.path.join('/content/Drive/MyDrive/Final_Project/to_merge/merged*.csv')

# list of merged files returned
files = glob.glob(files)

# joining files with concat and read_csv
merged_yearly_incidents_df = pd.concat(map(pd.read_csv, files), ignore_index=True)

In [None]:
merged_yearly_incidents_df.head()

Unnamed: 0,state,year,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,...,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal,deaths,injuries
0,Alabama,2013,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,10,3,9
1,Arizona,2013,1,1,1,1,0,0,0,0,...,0,0,0,0,0,0,1,8,8,16
2,California,2013,1,1,0,1,0,0,1,0,...,1,1,1,1,1,1,1,99,46,139
3,Colorado,2013,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,1,30,4,9
4,Connecticut,2013,1,1,1,1,1,0,0,1,...,1,1,0,0,0,0,1,82,2,13


In [None]:
merged_yearly_incidents_df.tail()

Unnamed: 0,state,year,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,...,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal,deaths,injuries
368,Virginia,2020,1,1,1,1,0,0,0,0,...,1,0,0,0,0,0,0,25,18,0
369,Washington,2020,1,1,1,1,0,0,0,0,...,0,1,1,0,1,0,0,55,6,8
370,West Virginia,2020,1,1,1,1,0,0,1,0,...,0,0,0,0,0,0,0,18,4,1
371,Wisconsin,2020,1,1,1,1,0,1,0,0,...,1,1,0,0,0,0,1,23,9,0
372,Wyoming,2020,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,7,1,0


In [None]:
# Export the final dataset
merged_yearly_incidents_df.to_csv('/content/Drive/MyDrive/Final_Project/merged_yearly_incidents_df.csv', index=False)
merged_yearly_incidents_df.to_json('/content/Drive/MyDrive/Final_Project/merged_yearly_incidents_df.json')