# Module Import Cell

In [1]:
%matplotlib inline
import matplotlib
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd
plt.style.use('ggplot') 

## Data Import Cell

In [2]:
#Available population density by state and year meassured in persons per square mile
pop_df = pd.read_csv('data/Historical_Population_Density_States_only_RAND_US_1.csv')
#---------------------------------------------------------------------------------------
#Available firearm background check data
nics_df = pd.read_csv('data/nics-firearm-background-checks.csv')

In [3]:
#Change month to y-m for year by month
nics_df = nics_df.rename(columns={'month': 'y&m'})
#Add year column to group by
nics_df['year'] = list(map(lambda x: x.split('-')[0], nics_df['y&m']))
#Defining columns to combine
handgun_list = ['handgun', 'prepawn_handgun', 'redemption_handgun', 'returned_handgun', 'rentals_handgun', 'private_sale_handgun', 'return_to_seller_handgun']
long_gun_list = ['long_gun', 'prepawn_long_gun', 'redemption_long_gun', 'returned_long_gun', 'rentals_long_gun', 'private_sale_long_gun', 'return_to_seller_long_gun']
#Create combined columns
nics_df['total_handgun'] = nics_df[handgun_list].sum(axis=1)
nics_df['total_long_gun'] = nics_df[long_gun_list].sum(axis=1)
#Group by year then state
year_state_group = nics_df.groupby(('year', 'state')).sum()
#Collect the columns we want and reset index
clean_nics_df = year_state_group[['total_handgun', 'total_long_gun']]
clean_nics_df = clean_nics_df.reset_index()
#Remove any year not between 1999 and 2018 and assign to my_nics_df
clean_nics_df['year'] = pd.to_numeric(clean_nics_df['year'])
my_nics_df = clean_nics_df.loc[(clean_nics_df['year'] < 2019) & (clean_nics_df['year'] > 1998)] 
#Test: Expect 1100 rows (50 states + DC, Guam, Puerto Rico, Mariana Islands, and Virgin Islands X 20 years) and 4 columns
#my_nics_df.head()

In [4]:
#Droping NA Values
cleaner_pop_df = pop_df.dropna(axis=1)
#Keeping area and years from 1999 - 2018
cleaner_pop_df = cleaner_pop_df.loc[:, ['Area'] + list(map(str, range(1999, 2019)))]
#Rename 'Area' column to be more descriptive
my_pop_df = cleaner_pop_df.rename(columns={'Area': 'State'})
#test: expect 52 rows(50 states, DC, and US), 21 columns(State name + data from 1999 - 2018)
#my_pop_df
#my_pop_df.shape

In [5]:
new_my_pop_df = my_pop_df.loc[:, ['State', '2018']]
new_my_pop_df = new_my_pop_df[new_my_pop_df['State'] != 'United States']
my_2018_pop_df = new_my_pop_df

In [6]:
nics_2018_df = my_nics_df[my_nics_df['year'] == 2018]
rows_to_drop = ['Guam', 'Mariana Islands', 'Puerto Rico', 'Virgin Islands']
for row in rows_to_drop:
    nics_2018_df = nics_2018_df[nics_2018_df['state'] != row]
nics_2018_df.shape

(51, 4)

In [7]:
temp = np.array(my_2018_pop_df['2018'])
nics_2018_df['pop_density'] = temp
nics_2018_df['handgun/long_gun'] = nics_2018_df['total_handgun'] / nics_2018_df['total_long_gun']
#nics_2018_df

pop_sorted_nics_2018_df = nics_2018_df.sort_values('pop_density')
#pop_sorted_nics_2018_df

In [8]:
#nics_2018_df.head()

In [9]:
#nics_2018_df['total_handgun'].median()
#nics_2018_df.describe()

In [10]:
#nics_2018_df[nics_2018_df['state'] == 'District of Columbia']

In [11]:
#nics_2018_df[nics_2018_df['state'] == 'Montana']

In [12]:
#nics_2018_df['pop_density'].median()

In [13]:
#pop_sorted_nics_2018_df
# fig, ax = plt.subplots(figsize=(25,15))
# labels = np.array(pop_sorted_nics_2018_df['state'])
# h_l_r = np.array(pop_sorted_nics_2018_df['handgun/long_gun'])
# x = np.arange(len(labels))
# ax.bar(x, h_l_r, label=('State ratios'))
# ax.margins(x=0)
# ax.set_ylabel('Ratio of handgun to rifle applications', fontdict={'fontsize': 25, 'fontweight': 5})
# ax.set_xlabel('States from least to most population density', fontdict={'fontsize': 25, 'fontweight': 5})
# #ax.set_title('Scores by group and gender')
# ax.set_xticks(x)
# ax.set_xticklabels(labels, rotation=270, fontsize=14, fontweight=4)
# ax.axhline(1, color='black', alpha=.5, label='1-1 ratio')
# ax.set_title('Ratio of handguns to rifles by population density', fontdict={'fontsize': 33, 'fontweight': 7})
# ax.legend()
# fig.tight_layout()
# #V Generates graph image if missing V
# #fig.savefig('graphs/bar.png')

In [14]:
#C = pop_sorted_nics_2018_df[pop_sorted_nics_2018_df['state'] != 'District of Columbia']
# fig, ax = plt.subplots(figsize=(25,15))
# labels = np.array(noDC_pop_sorted_nics_2018_df['state'])
# h_l_r = np.array(noDC_pop_sorted_nics_2018_df['handgun/long_gun'])
# x = np.arange(len(labels))
# ax.bar(x, h_l_r, label=('State ratios'))
# ax.margins(x=0)
# ax.set_ylabel('Ratio of handgun to rifle applications', fontdict={'fontsize': 25, 'fontweight': 5})
# ax.set_xlabel('States from least to most population density', fontdict={'fontsize': 25, 'fontweight': 5})
# #ax.set_title('Scores by group and gender')
# ax.set_xticks(x)
# ax.set_xticklabels(labels, rotation=270, fontsize=14, fontweight=4)
# ax.axhline(1, color='black', alpha=.5, label='1-1 ratio')
# ax.set_title('Ratio of handguns to rifles by population density excluding DC', fontdict={'fontsize': 33, 'fontweight': 7})
# ax.legend()
# fig.tight_layout()
# #V Generates graph image if missing V
# #fig.savefig('graphs/bar.png')

In [20]:
#Setup
fig, ax = plt.subplots(figsize=(25, 15))
xlabels = pop_sorted_nics_2018_df['pop_density']
x = np.arange(len(xlabels))
#Plot 
ax.scatter(x, handgun_y, c='black', label='Handguns', alpha=1, s=200)
ax.scatter(x, long_gun_y, c='firebrick', label='Rifles', alpha=.75, s=200)
ax.set_xticks(x)
#Label
ax.set_xticklabels(xlabels, rotation=270)
ax.set_xlabel('States from least to most population density', fontdict={'fontsize': 25, 'fontweight': 5})
ax.set_ylabel('Number of applications', fontdict={'fontsize': 25, 'fontweight': 5})

NameError: name 'handgun_y' is not defined

In [15]:
pop_sorted_nics_2018_df


Unnamed: 0,year,state,total_handgun,total_long_gun,pop_density,handgun/long_gun
1101,2018,Alaska,35344.0,34748.0,1.28934,1.017152
1154,2018,Wyoming,23556.0,27399.0,5.94992,0.859739
1128,2018,Montana,38393.0,58497.0,7.29846,0.656324
1136,2018,North Dakota,19277.0,32577.0,11.01944,0.591737
1144,2018,South Dakota,29625.0,44193.0,11.62595,0.670355
1133,2018,New Mexico,75103.0,59389.0,17.26679,1.264594
1113,2018,Idaho,51196.0,61045.0,21.19966,0.83866
1129,2018,Nebraska,1633.0,23162.0,25.09715,0.070503
1130,2018,Nevada,58817.0,32619.0,27.62909,1.803152
1117,2018,Kansas,73792.0,66397.0,35.58645,1.111376


In [16]:
#C

NameError: name 'C' is not defined

In [17]:
# #Thanks to Bunji from https://stackoverflow.com/questions/35634238/how-to-save-a-pandas-dataframe-table-as-a-png for this code
# from pandas.plotting import table
# nics_describe = nics_2018_df.describe()
# fig, ax = plt.subplots(figsize=(10,5)) 
# ax.xaxis.set_visible(False)
# ax.yaxis.set_visible(False) 

# table(ax, nics_describe)  # where df is your data frame

In [18]:
#nics_2018_describe = nics_2018_df.describe()
nics_2018_describe.to_csv('data/nics_2018_describe.csv')

In [19]:
#nics_2018_df_noDC_describe = nics_2018_df_noDC.describe()
nics_2018_df_noDC_describe.to_csv('data/nics_2018_noDC_describe.csv')

NameError: name 'nics_2018_df_noDC' is not defined

\\aprox

Example:
    
    District of Columbia = 976 handgun applications and 40 rifle applications equate to a 24.4
    Montana = 38393 handgun applications and 58497 rifle applications for a proportion of .66