In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
mass_shootings_path = "Resources/mass_shootings.csv"

mass_shootings_df = pd.read_csv(mass_shootings_path)

mass_shootings_df.head()

Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,# Killed,# Injured
0,271363,"December 29, 2014",Louisiana,New Orleans,Poydras and Bolivar,0,4
1,269679,"December 27, 2014",California,Los Angeles,8800 block of South Figueroa Street,1,3
2,270036,"December 27, 2014",California,Sacramento,4000 block of May Street,0,4
3,269167,"December 26, 2014",Illinois,East St. Louis,2500 block of Summit Avenue,1,3
4,268598,"December 24, 2014",Missouri,Saint Louis,18th and Pine,1,3


In [3]:
mass_shootings_df.shape

(3609, 7)

In [4]:
def convert_date(date_string):
    date_object = pd.to_datetime(date_string)
    new_date = date_object.strftime("%Y-%m-%d")
    return new_date

In [5]:
state_count = mass_shootings_df.groupby(['State']).count()
state_count.head()

Unnamed: 0_level_0,Incident ID,Incident Date,City Or County,Address,# Killed,# Injured
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alabama,95,95,95,95,95,95
Alaska,5,5,5,5,5,5
Arizona,37,37,37,37,37,37
Arkansas,43,43,43,43,43,43
California,340,340,340,340,340,340


In [6]:
incident_dates = mass_shootings_df["Incident Date"]
incident_dates

0       December 29, 2014
1       December 27, 2014
2       December 27, 2014
3       December 26, 2014
4       December 24, 2014
              ...        
3604      January 6, 2015
3605      January 4, 2015
3606      January 4, 2015
3607      January 2, 2015
3608      January 1, 2015
Name: Incident Date, Length: 3609, dtype: object

In [7]:
#nj_dates is a series
incident_dates = incident_dates.apply(lambda date_string: convert_date(date_string))
incident_dates

0       2014-12-29
1       2014-12-27
2       2014-12-27
3       2014-12-26
4       2014-12-24
           ...    
3604    2015-01-06
3605    2015-01-04
3606    2015-01-04
3607    2015-01-02
3608    2015-01-01
Name: Incident Date, Length: 3609, dtype: object

In [9]:
mass_shootings_fixed = mass_shootings_df.copy()
mass_shootings_fixed["Incident Date"] = incident_dates
mass_shootings_fixed

Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,# Killed,# Injured
0,271363,2014-12-29,Louisiana,New Orleans,Poydras and Bolivar,0,4
1,269679,2014-12-27,California,Los Angeles,8800 block of South Figueroa Street,1,3
2,270036,2014-12-27,California,Sacramento,4000 block of May Street,0,4
3,269167,2014-12-26,Illinois,East St. Louis,2500 block of Summit Avenue,1,3
4,268598,2014-12-24,Missouri,Saint Louis,18th and Pine,1,3
...,...,...,...,...,...,...,...
3604,274867,2015-01-06,Florida,Miami,1300 block of NW 62nd Street,1,3
3605,273535,2015-01-04,Virginia,Roanoke,3634 Shenandoah Ave NW,2,4
3606,273397,2015-01-04,Texas,Dallas,2000 block of Ben Hur St.,3,1
3607,273965,2015-01-02,Georgia,Savannah,500 block of W. 54th Street,1,4


In [10]:
all_incidents_df = pd.read_csv("Resources/all_incidents.csv")
all_incidents_df.head(10)

Unnamed: 0,incident_id,date,state,city,address,n_killed,n_injured
0,2314858,2022-05-28,Arkansas,Little Rock,W 9th St and Broadway St,0,1
1,2314409,2022-05-28,Colorado,Denver,3300 block of Clay St,0,1
2,2314498,2022-05-28,Missouri,Saint Louis,Page Blvd and Vandeventer Ave,0,1
3,2314485,2022-05-28,South Carolina,Florence,Old River Rd,0,2
4,2314483,2022-05-28,California,Carmichael,4400 block of Manzanita Ave,1,0
5,2314481,2022-05-28,Kentucky,Louisville,400 block of M St,0,1
6,2314479,2022-05-28,Kentucky,Louisville,2100 block of Hale Ave,0,1
7,2314476,2022-05-28,Louisiana,Baton Rouge,Coursey Blvd and Stumberg Ln,0,2
8,2314468,2022-05-28,South Carolina,Charleston,2363 Ashley River Rd,2,0
9,2314321,2022-05-28,Georgia,Thomaston,205 Park Ln,1,2


In [11]:
all_incidents_df.shape

(472820, 7)

In [12]:
all_incidents_df = all_incidents_df.rename(columns={
    'state':"State",
    "incident_id": "Incident ID",
    "date":"Incident Date",
    "city":"City or County",
    "address":"Address",
    "n_killed":"# Killed",
    "n_injured":"# Injured"
})
all_incidents_df.head()

Unnamed: 0,Incident ID,Incident Date,State,City or County,Address,# Killed,# Injured
0,2314858,2022-05-28,Arkansas,Little Rock,W 9th St and Broadway St,0,1
1,2314409,2022-05-28,Colorado,Denver,3300 block of Clay St,0,1
2,2314498,2022-05-28,Missouri,Saint Louis,Page Blvd and Vandeventer Ave,0,1
3,2314485,2022-05-28,South Carolina,Florence,Old River Rd,0,2
4,2314483,2022-05-28,California,Carmichael,4400 block of Manzanita Ave,1,0


In [13]:
# gun_violence_archive_df = pd.read_csv("Resources/GunViolenceArchiveDataJan2014toJun2022.csv")
# gun_violence_archive_df

In [14]:
# gvadata_df = pd.read_csv("Resources/GVAData 1.1.14 to 6.15.22.csv")
# gvadata_df.head(10)

In [15]:
combined_incidents_df = pd.concat([mass_shootings_fixed,all_incidents_df],axis=0)
combined_incidents_df = combined_incidents_df.sort_values(by="Incident Date")
combined_incidents_df

Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,# Killed,# Injured,City or County
472819,461105,2013-01-01,Pennsylvania,,1506 Versailles Avenue and Coursin Street,0,4,Mckeesport
472817,478855,2013-01-01,Ohio,,1776 East 28th Street,1,3,Lorain
472818,460726,2013-01-01,California,,13500 block of Cerise Avenue,1,3,Hawthorne
472816,478925,2013-01-05,Colorado,,16000 block of East Ithaca Place,4,0,Aurora
472815,478959,2013-01-07,North Carolina,,307 Mourning Dove Terrace,2,2,Greensboro
...,...,...,...,...,...,...,...,...
27,2314738,2022-05-28,Virginia,,1300 block of George Washington Hwy N,0,1,Chesapeake
26,2314540,2022-05-28,Texas,,E Vickery Blvd and Belzsie Terrace,0,1,Fort Worth
25,2314279,2022-05-28,Pennsylvania,,N 50th St,1,0,Philadelphia
36,2314747,2022-05-28,New York,,769 Hendrix St,0,1,Brooklyn


In [16]:
nj_incidents = combined_incidents_df[combined_incidents_df["State"] == "New Jersey"]
nj_incidents = nj_incidents.sort_values(by="Incident Date")
nj_incidents

Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,# Killed,# Injured,City or County
472755,486005,2013-05-02,New Jersey,,804 S. 16th St,0,5,Newark
472748,486244,2013-05-11,New Jersey,,Fremont Street,0,4,Jersey City
472744,1007785,2013-05-14,New Jersey,,Delaware Ave,0,2,Delanco
472696,1023908,2013-07-02,New Jersey,,Dwight St,1,0,Jersey City
472679,490932,2013-07-13,New Jersey,,1100 block of Deutz Ave,1,4,Trenton (Hamilton Township)
...,...,...,...,...,...,...,...,...
417,2312134,2022-05-24,New Jersey,,Communipaw Ave and W Side Ave,1,0,Jersey City
416,2311481,2022-05-24,New Jersey,,705 S New Rd,0,1,Absecon
277,2312575,2022-05-25,New Jersey,,195 1st St,0,1,Newark
246,2313228,2022-05-26,New Jersey,,1811 N Delsea Dr,1,0,Vineland


In [17]:
state_count_all_incidents = combined_incidents_df.groupby(['State']).count()
state_count_all_incidents

Unnamed: 0_level_0,Incident ID,Incident Date,City Or County,Address,# Killed,# Injured,City or County
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Alabama,10818,10818,95,10265,10818,10818,10723
Alaska,1926,1926,5,1485,1926,1926,1921
Arizona,5280,5280,37,5009,5280,5280,5243
Arkansas,5628,5628,43,5213,5628,5628,5585
California,31085,31085,340,30205,31085,31085,30745
Colorado,6498,6498,52,6169,6498,6498,6446
Connecticut,6124,6124,27,5739,6124,6124,6097
Delaware,3162,3162,17,3084,3162,3162,3145
District of Columbia,6761,6761,59,6587,6761,6761,6702
Florida,26838,26838,218,25321,26838,26838,26620


In [18]:
new_jersey_counts = state_count_all_incidents[state_count_all_incidents.index=="New Jersey"]
new_jersey_counts

Unnamed: 0_level_0,Incident ID,Incident Date,City Or County,Address,# Killed,# Injured,City or County
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
New Jersey,10126,10126,87,9687,10126,10126,10039
