# Gun Violence Dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# gun violence data 2013-2018
#df_gun_violence = pd.read_csv('./dataset/gun-violence-data_01-2013_03-2018.csv')
df_gun_violence = pd.read_csv('https://media.githubusercontent.com/media/Cng000/Gun-violence-analysis/master/dataset/gun-violence-data_01-2013_03-2018.csv')

In [None]:
df_income = pd.read_csv('./dataset/ACS_17_1YR_S1901_with_ann.csv',skiprows=1)

In [None]:
# mean annual household income (2016)
#df_income = pd.read_csv('./dataset/2016_mean_income_raw_data.csv', skiprows=2)
#df_income = pd.read_csv('https://raw.githubusercontent.com/Cng000/Gun-violence-analysis/master/dataset/2016_mean_income_raw_data.csv', skiprows=2)

In [None]:
# population estimates
#df_state_populations = pd.read_csv('./dataset/population_estimates.csv', skiprows=1)
df_state_populations = pd.read_csv('https://raw.githubusercontent.com/Cng000/Gun-violence-analysis/master/dataset/population_estimates.csv', skiprows=1)

## Gun Violence Analysis

In [None]:
df_gun_violence.info()

In [None]:
df_gun_violence.head()

In [None]:
df_gun_violence_new = df_gun_violence[['incident_id', 'date', 'state', 'city_or_county', 'n_killed', 'n_injured', 'congressional_district', 'gun_stolen', 'gun_type', 'incident_characteristics', 'n_guns_involved', 'participant_age', 'participant_age_group', 'participant_gender', 'participant_status', 'participant_type', 'state_house_district', 'state_senate_district']]

In [None]:
# extract year from date
# SettingWithCopyWarning????
df_gun_violence_new['year'] = pd.to_numeric(df_gun_violence_new.date.str[0:4], downcast='integer')

In [None]:
df_gun_violence_new.info()

In [None]:
df_gun_violence_new.head(10)

#### Top 10 cities/counties by number of incidents

In [None]:
df_gun_violence_new['city_or_county'].value_counts().head(10)

In [None]:
# graph results
y = df_gun_violence_new['city_or_county'].value_counts().index
x = df_gun_violence_new['city_or_county'].value_counts().head(10)
x = list(x)

city = []
for i in range(0,10):
    city.append(y[i])

_ = plt.scatter(x=city, y=x)
_ = plt.xticks(rotation=90)
_ = plt.xlabel("city")
_ = plt.ylabel("# of incidents")
plt.show()

#### Top 10 cities/counties by number of deaths

In [None]:
df_gun_violence_new.groupby('city_or_county')['n_killed'].sum().sort_values(ascending=False).head(10)

#### Top 10 cities/counties by number of injured

In [None]:
df_gun_violence_new.groupby('city_or_county')['n_injured'].sum().sort_values(ascending=False).head(10)

#### Top 10 states with higher number of incidents

In [None]:
df_gun_violence_new['state'].value_counts().head(10)

#### Top 10 states with higher number of deaths

In [None]:
df_gun_violence_new.groupby('state')['n_killed'].sum().sort_values(ascending=False).head(10)

#### Top 10 states with higher number of injuries

In [None]:
df_gun_violence_new.groupby('state')['n_injured'].sum().sort_values(ascending=False).head(10)

#### Can we correlate state population to number of violent incidents/deaths?

In [None]:
# let's look at 2013 first
df_gun_violence_2013 = df_gun_violence_new.loc[df_gun_violence_new.year == 2013]['state'].value_counts().reset_index().rename(index=str, columns={'index':'state', 'state':'incident_counts'})
df_gun_violence_2013.head(10)

In [None]:
# no data for 15 states in 2013. Also, incident counts appear to be too low for 2013
# Let's get the population estimate of each state now

In [None]:
df_state_populations.info()

In [None]:
df_state_populations.head(10)

In [None]:
df_state_populations_2013 = df_state_populations[['Geography', 'Population Estimate (as of July 1) - 2013']]

In [None]:
# extract 2013 population estimates
df_state_populations_2013 = df_state_populations_2013.rename(index=str, columns={'Geography':'state', 'Population Estimate (as of July 1) - 2013':'population_estimate'})
df_state_populations_2013.columns

In [None]:
# merge number of incidents to population estimate (2013 only)
df_incident_population_2013 = df_gun_violence_2013.merge(df_state_populations_2013, how='inner', on='state')
df_incident_population_2013.info()

In [None]:
df_incident_population_2013.head(10)

In [None]:
# let's plot this
df_incident_population_2013.plot.scatter('population_estimate', 'incident_counts')

## Mean Income 2016 Analysis

In [None]:
df_income.info()

In [None]:
# Take only the median income of 2017
df_income = df_income[['Geography','Households; Estimate; Median income (dollars)']]
df_income

In [None]:
df_income.head()

In [None]:
# dropped US average income and 'Notes'
df_income = df_income.drop(df_income.index[51])
df_income

In [None]:
# renamed columns
# dropped '$' and convert from str to int
df_income = df_income.rename(index=str, columns={"Geography":"state", "Households; Estimate; Median income (dollars)": "median_annual_household_income_2017"})
df_income['median_annual_household_income_2017'] = pd.to_numeric(df_income['median_annual_household_income_2017'], downcast='integer')
df_income.info()

In [None]:
# include only 2017 related accidents
df_gun_violence_new_2017 = df_gun_violence_new.loc[df_gun_violence_new.year == 2017]
df_gun_violence_new_2017

In [None]:
# merged both dataframe(gun-violence && 2017_income)
df_gun_income = pd.merge(df_gun_violence_new_2017, df_income, on="state", how="outer")
df_gun_income

In [None]:
# merge incident count and mean income (mean annual income household 2017 only) per state
incident_count_per_state = df_gun_income['state'].value_counts().to_frame().reset_index().rename(index=str, columns={"index":"state", "state":"incident_count"})
state_incidentCount_medianIncome = pd.merge(incident_count_per_state, df_income, on="state", how="outer")
state_incidentCount_medianIncome


In [None]:
# graphed (median_income && incident_count)
state_incidentCount_medianIncome.plot.scatter('median_annual_household_income_2017', 'incident_count')