In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# file path
file_path = "archive.zip"

# create dataframes
kd_gun_violence_df = pd.read_csv(file_path)
kd_gun_violence_df.head()

In [None]:
# focus on only relevent columns
columns = kd_gun_violence_df.columns.values.tolist()

kd_gv_timeseries = kd_gun_violence_df.copy()

kd_gv_timeseries = kd_gv_timeseries.loc[:, ['incident_id', 'date', 'state', 'city_or_county','n_killed', 'n_injured','latitude', 'longitude']]

In [None]:
## prepare dataframe for timeseries analysis

# convert column date to datetime
kd_gv_timeseries["date"] = pd.to_datetime(kd_gv_timeseries["date"])

# split datetime column to isolate year-month and year
kd_gv_timeseries['Year/Month'] = kd_gv_timeseries["date"].dt.to_period('M')
kd_gv_timeseries['Year'] = kd_gv_timeseries["date"].dt.year

# examine data for data anomalies
kd_grpby_count = kd_gv_timeseries.groupby(['Year/Month'])['n_killed'].count()
kd_count_yr_month = pd.DataFrame(kd_grpby_count) 
kd_count_yr_month.head()

In [None]:
# remove 2013 (incomplete data).  Focus on 4 year timeperiod
kd_13_17 = kd_gv_timeseries.loc[(kd_gv_timeseries['Year'] >= 2014) & (kd_gv_timeseries['Year'] <= 2017)].reset_index(drop=True)

kd_13_17["Quarter"] = kd_13_17['date'].dt.to_period('Q-Dec')
kd_13_17.dtypes

In [None]:
# sum number of killed per quarter
kd_gb = kd_13_17.groupby(['Quarter'])['n_killed'].sum()
kd_total_deaths_timeseries = pd.DataFrame(kd_gb).reset_index().rename(columns={'Quarter': 'Date', 'n_killed': 'Gun Killings'})
kd_total_deaths_timeseries.head()

In [None]:
# plot number of killed per quarter
kd_total_deaths_timeseries.plot(kind='bar', x='Date', y='Gun Killings', figsize=(8,5), color='red')
plt.xticks(rotation=60)
plt.yticks(np.arange(1000, 4500, step=250))
plt.ylim(bottom=1000, top=4500)
plt.grid()
plt.show()

In [None]:
# compare number of incidences of gun violence to number killed per month
kd_gb2 = kd_13_17.groupby(['Year/Month'])['n_killed'].count()
kd_gb3 = kd_13_17.groupby(['Year/Month'])['n_killed'].sum()
kd_n_killed_by_month = pd.DataFrame(kd_gb3).reset_index().rename(columns={'Year/Month': 'Date', 'n_killed': 'Gun Killings'})
kd_incidences_timeseries = pd.DataFrame(kd_gb2).reset_index().rename(columns={'Year/Month': 'Date', 'n_killed': 'Incidences Involving Gun Violence'})
kd_incidences_vs_killings_timeseries = pd.merge(kd_incidences_timeseries, kd_n_killed_by_month, on="Date")

kd_ave_incidences = round(kd_incidences_vs_killings_timeseries['Incidences Involving Gun Violence'].expanding(1).mean(), 2)
kd_ave_killings = round(kd_incidences_vs_killings_timeseries['Gun Killings'].expanding(1).mean(), 2)
kd_incidences_vs_killings_timeseries['Moving Average: Incidences'] = kd_ave_incidences
kd_incidences_vs_killings_timeseries['Moving Average: Gun Killings'] = kd_ave_killings

kd_incidences_vs_killings_timeseries.head()

In [None]:
# plot number of incidences of gun violence to number killed per month
kd_incidences_vs_killings_timeseries.plot(kind='area', x='Date', 
                                        y=['Moving Average: Incidences', 'Moving Average: Gun Killings'],
                                        figsize=(10,8), stacked=False)
plt.xticks(rotation=60)
plt.xlim(pd.Timestamp('2014-02'))
plt.yticks(np.arange(0, 5000, step=250))
plt.ylim(0, 5000)
plt.show()

In [None]:
# unstack n_killed by quarter
kd_gb4 = kd_13_17.groupby(['Year', 'Quarter'])['n_killed'].sum()
kd_n_killed_comp_by_qrtr = pd.DataFrame(kd_gb4).reset_index()
kd_q = kd_n_killed_comp_by_qrtr['Quarter'].astype(str)
kd_n_killed_comp_by_qrtr['Date'] = kd_q
kd_n_killed_comp_by_qrtr['Q'] = kd_n_killed_comp_by_qrtr['Date'].str[4:6]
kd_quarter_sums = kd_n_killed_comp_by_qrtr.loc[:, ['Year', 'Q', 'n_killed']].set_index(['Year', 'Q']).unstack('Q')
kd_quarter_sums

In [None]:
# create dataframe holding n_killed by quarter and percent change
kd_q1 = pd.Series(kd_quarter_sums['n_killed']['Q1'])
kd_q2 = list(kd_quarter_sums['n_killed']['Q2'])
kd_q3 = list(kd_quarter_sums['n_killed']['Q3'])
kd_q4 = list(kd_quarter_sums['n_killed']['Q4'])

kd_qrtr_sum_pct_change = pd.DataFrame(kd_q1).reset_index(inplace=False)
kd_qrtr_sum_pct_change['Q1 pct change'] = kd_qrtr_sum_pct_change['Q1'].pct_change().fillna(0)
kd_qrtr_sum_pct_change['Q2'] = kd_q2
kd_qrtr_sum_pct_change['Q2 pct change'] = kd_qrtr_sum_pct_change['Q2'].pct_change().fillna(0)
kd_qrtr_sum_pct_change['Q3'] = kd_q3
kd_qrtr_sum_pct_change['Q3 pct change'] = kd_qrtr_sum_pct_change['Q3'].pct_change().fillna(0)
kd_qrtr_sum_pct_change['Q4'] = kd_q4
kd_qrtr_sum_pct_change['Q4 pct change'] = kd_qrtr_sum_pct_change['Q4'].pct_change().fillna(0)
kd_qrtr_sum_pct_change

In [None]:
# create x axis labels and locations
kd_year = kd_qrtr_sum_pct_change['Year']
kd_x_axis = np.arange(len(kd_qrtr_sum_pct_change))
tick_locations = [value for value in kd_x_axis]

In [None]:
# plot line graphs showing the pct change in number of gun killings over the time period

fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, sharex=True, figsize=(5,7), sharey=True)
ax1.plot(kd_qrtr_sum_pct_change['Q1 pct change'], 'tab:red')
ax1.set_ylabel('Q1')
ax1.grid()
ax2.plot(kd_qrtr_sum_pct_change['Q2 pct change'], 'tab:red')
ax2.set_ylabel('Q2')
ax2.grid()
ax3.plot(kd_qrtr_sum_pct_change['Q3 pct change'], 'tab:green')
ax3.grid()
ax3.set_ylabel('Q3')
ax4.plot(kd_qrtr_sum_pct_change['Q4 pct change'], 'tab:green')
ax4.grid()
ax4.set_ylabel('Q4')

plt.xticks(ticks= kd_x_axis, labels=kd_year, rotation=60)
plt.ylim(-0.10, 0.3)
plt.show()
plt.tight_layout()