# Module Import Cell

In [1]:
%matplotlib inline
import matplotlib
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd
plt.style.use('ggplot') 

## Data Import Cell

In [3]:
#Available population density by state and year meassured in persons per square mile
pop_df = pd.read_csv('data/Historical_Population_Density_States_only_RAND_US_1.csv')
#---------------------------------------------------------------------------------------
#Available firearm background check data
nics_df = pd.read_csv('data/nics-firearm-background-checks.csv')

In [5]:
len(nics_df.columns)

27

In [6]:
#Change month to y-m for year by month
nics_df = nics_df.rename(columns={'month': 'y&m'})
#Add year column to group by
nics_df['year'] = list(map(lambda x: x.split('-')[0], nics_df['y&m']))
#Defining columns to combine
handgun_list = ['handgun', 'prepawn_handgun', 'redemption_handgun', 'returned_handgun', 'rentals_handgun', 'private_sale_handgun', 'return_to_seller_handgun']
long_gun_list = ['long_gun', 'prepawn_long_gun', 'redemption_long_gun', 'returned_long_gun', 'rentals_long_gun', 'private_sale_long_gun', 'return_to_seller_long_gun']
#Create combined columns
nics_df['total_handgun'] = nics_df[handgun_list].sum(axis=1)
nics_df['total_long_gun'] = nics_df[long_gun_list].sum(axis=1)
#Group by year then state
year_state_group = nics_df.groupby(('year', 'state')).sum()
#Collect the columns we want and reset index
clean_nics_df = year_state_group[['total_handgun', 'total_long_gun']]
clean_nics_df = clean_nics_df.reset_index()
#Remove any year not between 1999 and 2018 and assign to my_nics_df
clean_nics_df['year'] = pd.to_numeric(clean_nics_df['year'])
my_nics_df = clean_nics_df.loc[(clean_nics_df['year'] < 2019) & (clean_nics_df['year'] > 1998)] 
#Test: Expect 1100 rows (50 states + DC, Guam, Puerto Rico, Mariana Islands, and Virgin Islands X 20 years) and 4 columns
#my_nics_df.head()

In [8]:
my_nics_df.head()

Unnamed: 0,year,state,total_handgun,total_long_gun
55,1999,Alabama,94544.0,149017.0
56,1999,Alaska,14339.0,27790.0
57,1999,Arizona,78103.0,71365.0
58,1999,Arkansas,50523.0,126875.0
59,1999,California,371893.0,410119.0


In [4]:
#Droping NA Values
cleaner_pop_df = pop_df.dropna(axis=1)
#Keeping area and years from 1999 - 2018
cleaner_pop_df = cleaner_pop_df.loc[:, ['Area'] + list(map(str, range(1999, 2019)))]
#Rename 'Area' column to be more descriptive
my_pop_df = cleaner_pop_df.rename(columns={'Area': 'State'})
my_pop_df = my_pop_df[my_pop_df['State'] != 'United States']
#test: expect 52 rows(50 states, DC, and US), 21 columns(State name + data from 1999 - 2018)
my_pop_df
#my_pop_df.shape

Unnamed: 0,State,1999,2000,2001,2002,2003,2004,2005,2006,2007,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Alabama,86.11584,87.73153,87.97166,88.13692,88.49501,88.92066,89.56821,90.60555,91.39808,...,92.79339,94.30569,94.56949,94.89918,95.19273,95.42963,95.64008,95.86838,96.07284,96.32412
1,Alaska,1.08313,1.09712,1.10729,1.12368,1.13801,1.15669,1.17053,1.18424,1.19293,...,1.22121,1.24819,1.26241,1.27703,1.28865,1.28736,1.28953,1.29645,1.29344,1.28934
2,Arizona,42.04999,45.46765,46.67961,47.97931,49.20339,50.68374,52.57937,54.49134,55.9886,...,58.04376,56.38909,56.96746,57.69903,58.38869,59.2585,60.13637,61.12071,62.03085,63.11124
3,Arkansas,49.00063,51.43811,51.68355,51.94598,52.28321,52.74165,53.31897,54.0656,54.58602,...,55.4936,56.1185,56.47244,56.69718,56.84007,56.99712,57.20225,57.43278,57.67452,57.88248
4,California,212.52412,217.97073,221.11932,223.62363,226.02755,227.99801,229.51658,230.69608,232.27927,...,236.99551,239.29945,241.35717,243.40232,245.45441,247.66214,249.76527,251.40663,252.62632,253.63746
5,Colorado,39.1075,41.7294,42.74174,43.42819,43.85734,44.34815,44.93724,45.82681,46.68699,...,48.44647,48.67314,49.3817,50.07541,50.8155,51.59392,52.56664,53.42294,54.14588,54.91394
6,Connecticut,677.43374,704.20368,707.65212,711.76973,715.75153,717.18337,717.76255,719.36138,720.07782,...,726.19881,738.72549,740.56202,741.87719,741.98452,741.95728,740.45593,738.6324,737.64293,737.39216
7,Delaware,385.72555,402.55278,406.75485,411.6234,417.13845,423.14493,429.93612,436.65001,442.72815,...,453.08155,460.38639,464.33777,468.36643,472.69089,477.27533,481.78762,485.78096,489.8045,494.96981
8,District of Columbia,8452.76873,9311.79153,9414.36482,9439.49511,9410.04886,9442.9316,9479.62541,9511.04235,9550.63518,...,9766.40065,9919.42623,10157.4098,10405.32787,10662.80328,10860.86885,11069.7377,11255.32787,11404.77049,11515.65574
9,Florida,280.2176,297.57212,303.2604,309.31379,314.89309,322.2007,329.7778,335.42688,338.93873,...,343.76158,349.46845,354.05923,358.37762,362.77126,368.28175,375.03011,382.55386,388.98533,394.96588


In [5]:
# new_my_pop_df = my_pop_df.loc[:, ['State', '2018']]
# #new_my_pop_df = new_my_pop_df[new_my_pop_df['State'] != 'United States']
# my_2018_pop_df = new_my_pop_df

In [6]:
# nics_2018_df = my_nics_df[my_nics_df['year'] == 2018]
# rows_to_drop = ['Guam', 'Mariana Islands', 'Puerto Rico', 'Virgin Islands']
# for row in rows_to_drop:
#     nics_2018_df = nics_2018_df[nics_2018_df['state'] != row]
# nics_2018_df.shape

In [7]:
# temp = np.array(my_2018_pop_df['2018'])
# nics_2018_df['pop_density'] = temp
# nics_2018_df['handgun/long_gun'] = nics_2018_df['total_handgun'] / nics_2018_df['total_long_gun']
# #nics_2018_df

# pop_sorted_nics_2018_df = nics_2018_df.sort_values('pop_density')
# #pop_sorted_nics_2018_df

In [8]:
my_nics_df.head()

Unnamed: 0,year,state,total_handgun,total_long_gun
55,1999,Alabama,94544.0,149017.0
56,1999,Alaska,14339.0,27790.0
57,1999,Arizona,78103.0,71365.0
58,1999,Arkansas,50523.0,126875.0
59,1999,California,371893.0,410119.0


In [9]:
my_nics_df.tail()

Unnamed: 0,year,state,total_handgun,total_long_gun
1150,2018,Virginia,254216.0,168947.0
1151,2018,Washington,233901.0,170797.0
1152,2018,West Virginia,88740.0,91200.0
1153,2018,Wisconsin,158189.0,149239.0
1154,2018,Wyoming,23556.0,27399.0


In [19]:
my_nics_df.shape

(1020, 4)

In [12]:
my_pop_df.head()

Unnamed: 0,State,1999,2000,2001,2002,2003,2004,2005,2006,2007,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Alabama,86.11584,87.73153,87.97166,88.13692,88.49501,88.92066,89.56821,90.60555,91.39808,...,92.79339,94.30569,94.56949,94.89918,95.19273,95.42963,95.64008,95.86838,96.07284,96.32412
1,Alaska,1.08313,1.09712,1.10729,1.12368,1.13801,1.15669,1.17053,1.18424,1.19293,...,1.22121,1.24819,1.26241,1.27703,1.28865,1.28736,1.28953,1.29645,1.29344,1.28934
2,Arizona,42.04999,45.46765,46.67961,47.97931,49.20339,50.68374,52.57937,54.49134,55.9886,...,58.04376,56.38909,56.96746,57.69903,58.38869,59.2585,60.13637,61.12071,62.03085,63.11124
3,Arkansas,49.00063,51.43811,51.68355,51.94598,52.28321,52.74165,53.31897,54.0656,54.58602,...,55.4936,56.1185,56.47244,56.69718,56.84007,56.99712,57.20225,57.43278,57.67452,57.88248
4,California,212.52412,217.97073,221.11932,223.62363,226.02755,227.99801,229.51658,230.69608,232.27927,...,236.99551,239.29945,241.35717,243.40232,245.45441,247.66214,249.76527,251.40663,252.62632,253.63746


In [13]:
my_pop_df.tail()

Unnamed: 0,State,1999,2000,2001,2002,2003,2004,2005,2006,2007,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
47,Virginia,173.58438,179.43427,181.62578,183.95535,186.23228,188.63719,191.03585,193.13488,194.97235,...,199.08512,202.64889,204.5883,206.72902,208.44201,209.93272,211.21652,212.42981,213.80025,215.12565
48,Washington,86.50451,88.8302,89.98226,91.01018,91.86788,92.93525,94.09228,95.75976,97.15336,...,100.14711,101.32998,102.51345,103.58373,104.63612,105.98159,107.65122,109.6219,111.5868,113.24223
49,West Virginia,75.04561,75.04702,74.69899,74.73342,74.85083,74.89502,74.92068,75.05845,75.22295,...,75.57926,77.00864,77.08589,77.11454,76.99448,76.81149,76.5012,76.04157,75.46507,74.99925
50,Wisconsin,96.67531,98.95496,99.59048,100.29011,100.84305,101.47993,102.03338,102.59013,103.1405,...,104.12012,104.77774,105.0406,105.31863,105.63344,105.91003,106.0837,106.29641,106.64797,107.04415
51,Wyoming,4.93924,5.08709,5.07703,5.11912,5.14096,5.18008,5.21359,5.28155,5.39044,...,5.60523,5.81342,5.84165,5.93481,5.99509,5.99946,6.0316,6.0174,5.96225,5.94992


In [11]:
my_pop_df.shape

(51, 21)

In [14]:
my_nics_df[my_nics_df['year'] == 2005].shape

(55, 4)

In [15]:
my_pop_df['2005'].shape

(51,)

In [18]:
rows_to_drop = ['Guam', 'Mariana Islands', 'Puerto Rico', 'Virgin Islands']
for row in rows_to_drop:
    my_nics_df = my_nics_df[my_nics_df['state'] != row]


In [43]:
df_dict = {}
year_list = [i for i in range(1999, 2019)]
s_year_list =[str(s) for s in year_list]
for index, year_s in enumerate(s_year_list):
    df_dict[year_s] = my_nics_df[my_nics_df['year'] == year_list[index]]
    df_dict[year_s]['pop_density'] = np.array(my_pop_df[year_s])

In [23]:
df_list = [df_dict[year_s] for year_s in s_year_list]

In [24]:
all_df = pd.concat(df_list)

In [25]:
all_df

Unnamed: 0,year,state,total_handgun,total_long_gun,pop_density
55,1999,Alabama,94544.0,149017.0,86.11584
56,1999,Alaska,14339.0,27790.0,1.08313
57,1999,Arizona,78103.0,71365.0,42.04999
58,1999,Arkansas,50523.0,126875.0,49.00063
59,1999,California,371893.0,410119.0,212.52412
...,...,...,...,...,...
1150,2018,Virginia,254216.0,168947.0,215.12565
1151,2018,Washington,233901.0,170797.0,113.24223
1152,2018,West Virginia,88740.0,91200.0,74.99925
1153,2018,Wisconsin,158189.0,149239.0,107.04415


In [35]:
import plotly.express as px
import plotly.express as px
fig = px.line_3d(all_df, x="year", y="pop_density", z="total_handgun", color='state')
fig.show()

In [33]:
all_df_noDC = all_df[all_df['state'] != 'District of Columbia']
all_df_noDC.shape

(1000, 5)

In [41]:
fig = px.line_3d(all_df_noDC, x="pop_density", y="total_handgun", z='year', color='state')
fig.show()

In [42]:
fig = px.line_3d(all_df_noDC, x="pop_density", y="total_long_gun", z='year', color='state')
fig.show()

Don't Touch!
<!--
===========================================================V TEMPLATEING IDEAS V===================================================

Table 1 = Description of data including DC

|FIELD1|year  |total_handgun     |total_long_gun    |pop_density       |handgun/long_gun   |
|------|------|------------------|------------------|------------------|-------------------|
|count |51.0  |51.0              |51.0              |51.0              |51.0               |
|mean  |2018.0|138054.07843137256|103376.60784313726|423.35861745098043|1.6540754923677627 |
|std   |0.0   |149504.35134601843|85730.34706452048 |1605.7997009585968|3.297578803277349  |
|min   |2018.0|10.0              |31.0              |1.28934           |0.07050341075900181|
|25%   |2018.0|40816.0           |36366.0           |49.28428          |0.9026160053394    |
|50%   |2018.0|80350.0           |91200.0           |107.04415         |1.1951771451060367 |
|75%   |2018.0|174336.5          |151466.0          |225.18950999999998|1.494311656277248  |
|max   |2018.0|643595.0          |452932.0          |11515.65574       |24.4               |


Table 2 = Description of data excluding DC

|FIELD1|year  |total_handgun     |total_long_gun    |pop_density       |handgun/long_gun   |
|------|------|------------------|------------------|------------------|-------------------|
|count |50.0  |50.0              |50.0              |50.0              |50.0               |
|mean  |2018.0|140795.64         |105443.34         |201.51267500000003|1.199157002215119  |
|std   |0.0   |149721.60191989527|85307.67804992583 |264.5673177684518 |0.5710118989817893 |
|min   |2018.0|10.0              |31.0              |1.28934           |0.07050341075900181|
|25%   |2018.0|44137.5           |38391.5           |46.46945          |0.8928885698273171 |
|50%   |2018.0|84545.0           |92793.0           |107.008805        |1.1934184019619565 |
|75%   |2018.0|177639.75         |152579.5          |219.6491175       |1.4740263060688625 |
|max   |2018.0|643595.0          |452932.0          |1201.0947800000001|3.1627863509456446 |



















I'm chosing to look at data from 1999 through 2018.  This is because my population density data ends in 2018 and my NICS data begins at the end of 1998.  This is also because that is a 20 year span and I like round numbers.
### Installing  

A step by step series of examples that tell you how to get a development env running

Say what the step will be

```
Give the example
```

And repeat

```
until finished
```

End with an example of getting some data out of the system or using it for a little demo

## Running the tests

Explain how to run the automated tests for this system

### Break down into end to end tests

Explain what these tests test and why

```
Give an example
```

### And coding style tests

Explain what these tests test and why

```
Give an example
```

## Deployment

Add additional notes about how to deploy this on a live system

## Built With

* [Dropwizard](http://www.dropwizard.io/1.0.2/docs/) - The web framework used
* [Maven](https://maven.apache.org/) - Dependency Management
* [ROME](https://rometools.github.io/rome/) - Used to generate RSS Feeds

## Contributing

Please read [CONTRIBUTING.md](https://gist.github.com/PurpleBooth/b24679402957c63ec426) for details on our code of conduct, and the process for submitting pull requests to us.

## Versioning

We use [SemVer](http://semver.org/) for versioning. For the versions available, see the [tags on this repository](https://github.com/your/project/tags). 

## Authors

* **Billie Thompson** - *Initial work* - [PurpleBooth](https://github.com/PurpleBooth)

See also the list of [contributors](https://github.com/your/project/contributors) who participated in this project.

## License

This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details

## Acknowledgments

* Hat tip to anyone whose code was used
* Inspiration
* etc
-->
