In [None]:
# colab imports
!pip install geopandas
!pip install mapclassify

import geopandas as gpd
from shapely.geometry import Point, Polygon
import matplotlib.pyplot as plt
import pandas as pd
import mapclassify as mc

from google.colab import drive
import pandas as pd
import numpy as np
from datetime import datetime as dt
import requests
import urllib.parse

Collecting geopandas
[?25l  Downloading https://files.pythonhosted.org/packages/f7/a4/e66aafbefcbb717813bf3a355c8c4fc3ed04ea1dd7feb2920f2f4f868921/geopandas-0.8.1-py2.py3-none-any.whl (962kB)
[K     |████████████████████████████████| 972kB 6.5MB/s 
[?25hCollecting pyproj>=2.2.0
[?25l  Downloading https://files.pythonhosted.org/packages/e4/ab/280e80a67cfc109d15428c0ec56391fc03a65857b7727cf4e6e6f99a4204/pyproj-3.0.0.post1-cp36-cp36m-manylinux2010_x86_64.whl (6.4MB)
[K     |████████████████████████████████| 6.5MB 13.7MB/s 
[?25hCollecting fiona
[?25l  Downloading https://files.pythonhosted.org/packages/37/94/4910fd55246c1d963727b03885ead6ef1cd3748a465f7b0239ab25dfc9a3/Fiona-1.8.18-cp36-cp36m-manylinux1_x86_64.whl (14.8MB)
[K     |████████████████████████████████| 14.8MB 248kB/s 
Collecting click-plugins>=1.0
  Downloading https://files.pythonhosted.org/packages/e9/da/824b92d9942f4e472702488857914bdd50f73021efea15b4cad9aca8ecef/click_plugins-1.1.1-py2.py3-none-any.whl
Collecting mu

In [None]:
# read census tract shape files
dbPath = "/content/drive/My Drive/Junior Year/IW 07/tl_2019_us_county.dbf"
dbGDF = gpd.read_file(dbPath)

# set index
dbGDF.set_index('GEOID', inplace=True)

# removing data columns from dbGDF
removal_list = ['COUNTYNS', 'NAMELSAD', 'LSAD', 'CLASSFP', 'MTFCC', 'CSAFP', 'CBSAFP', 'METDIVFP', 'FUNCSTAT', 'INTPTLAT', 'INTPTLON', 'ALAND', 'AWATER']
dbGDF = dbGDF.drop(columns = removal_list)

# adding incident count and list columns
header_list = ['STATEFP', 'COUNTYFP', 'NAME', 'geometry', 'numIncidents', 'incidentList']
dbGDF = dbGDF.reindex(columns = header_list)

dbGDF['numIncidents'] = np.zeros(len(dbGDF))
dbGDF['incidentList'] = np.empty((len(dbGDF), 0)).tolist()

In [None]:
# export base county data template
dbGDF.to_pickle("/content/drive/MyDrive/Junior Year/IW 07/dbGDFbase.pkl")

In [None]:
# check export worked properly
testGDF = pd.read_pickle("/content/drive/MyDrive/Junior Year/IW 07/dbGDFbase.pkl")
testGDF.head()

Unnamed: 0_level_0,STATEFP,COUNTYFP,NAME,geometry,numIncidents,incidentList
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
31039,31,39,Cuming,"POLYGON ((-97.01952 42.00410, -97.01952 42.004...",0.0,[]
53069,53,69,Wahkiakum,"POLYGON ((-123.43639 46.23820, -123.44759 46.2...",0.0,[]
35011,35,11,De Baca,"POLYGON ((-104.56739 33.99757, -104.56772 33.9...",0.0,[]
31109,31,109,Lancaster,"POLYGON ((-96.91075 40.78494, -96.91075 40.790...",0.0,[]
31129,31,129,Nuckolls,"POLYGON ((-98.27367 40.08940, -98.27367 40.089...",0.0,[]


In [None]:
# read Gun Archive data
gaPath = "/content/drive/My Drive/Junior Year/IW 07/stage3.csv"
gaDF = pd.read_csv(gaPath)

# filling NaN values with 1
gaDF['n_guns_involved'] = gaDF['n_guns_involved'].fillna(1)
gaDF = gaDF.drop(columns = ['state', 'city_or_county'])

In [None]:
dbGDF = pd.read_pickle("/content/drive/MyDrive/Junior Year/IW 07/dbGDFbase.pkl")

In [None]:
length = len(gaDF)
years = [2013, 2014, 2015, 2016, 2017, 2018]

parent_directory = "/content/drive/MyDrive/Junior Year/IW 07/Processed Gun Archive + County Data/"

currentRow = 0
currentYear = 0

while currentRow < length:
  row = gaDF.loc[currentRow]

  # get long lat
  lat = row['latitude']
  long = row['longitude']
  yr = dt.strptime(row['date'], '%m/%d/%Y').year

  if yr != years[currentYear]:
    path = "{}{:d}{}".format(parent_directory, years[currentYear], ".pkl")
    dbGDF.to_pickle(path)
    print("incremental save successful for year {:d}".format(years[currentYear]))
    currentYear += 1
    dbGDF = pd.read_pickle("/content/drive/MyDrive/Junior Year/IW 07/dbGDFbase.pkl")

  # encode parameters
  params = urllib.parse.urlencode({'latitude': lat, 'longitude': long, 'censusYear': yr, 'format':'json'})

  # construct URL
  url = 'https://geo.fcc.gov/api/census/block/find?' + params

  # get response from API
  response = requests.get(url)

  # parse JSON
  try:
    data = response.json()
  except JSONDecodeError:
    pass
  # print(data)

  # get FIPS code and update dbGDF
  try:
    fips = data['County']['FIPS']

    # update county info
    dbGDF.loc[fips, 'numIncidents'] += 1
    dbGDF.loc[fips, 'incidentList'].append(row)
  except KeyError:
    print("KeyError")
    pass
  except TypeError:
    print("TypeError")
    pass

  print("runthrough no# {:d} successful".format(currentRow+1))
  currentRow += 1


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
runthrough no# 77103 successful
runthrough no# 77104 successful
runthrough no# 77105 successful
runthrough no# 77106 successful
runthrough no# 77107 successful
runthrough no# 77108 successful
runthrough no# 77109 successful
runthrough no# 77110 successful
runthrough no# 77111 successful
runthrough no# 77112 successful
runthrough no# 77113 successful
runthrough no# 77114 successful
runthrough no# 77115 successful
runthrough no# 77116 successful
runthrough no# 77117 successful
runthrough no# 77118 successful
runthrough no# 77119 successful
runthrough no# 77120 successful
runthrough no# 77121 successful
runthrough no# 77122 successful
runthrough no# 77123 successful
runthrough no# 77124 successful
runthrough no# 77125 successful
runthrough no# 77126 successful
runthrough no# 77127 successful
runthrough no# 77128 successful
runthrough no# 77129 successful
runthrough no# 77130 successful
runthrough no# 77131 successful
runthro

In [None]:
# load 2014-2017 data pkls
GDF13 = pd.read_pickle("/content/drive/MyDrive/Junior Year/IW 07/Processed Gun Archive + County Data/2013.pkl")

In [None]:
GDF13.nlargest(5, 'numIncidents')

Unnamed: 0_level_0,STATEFP,COUNTYFP,NAME,geometry,numIncidents,incidentList
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
17031,17,31,Cook,"POLYGON ((-88.23819 42.06698, -88.23819 42.067...",687.0,"[[1/1/2017, 0, 1, 41.714, -87.6433, 1.0, 0::15..."
12031,12,31,Duval,"POLYGON ((-81.83121 30.46745, -81.82340 30.473...",336.0,"[[1/1/2017, 0, 0, 30.2196, -81.5867, 1.0, 0::2..."
24510,24,510,Baltimore,"POLYGON ((-76.71151 39.36621, -76.71151 39.366...",217.0,"[[1/1/2017, 0, 1, 39.2814, -76.6937, 1.0, nan,..."
22071,22,71,Orleans,"POLYGON ((-90.14007 29.94790, -90.13765 29.951...",214.0,"[[1/1/2017, 0, 1, 30.0352, -90.0081, 1.0, nan,..."
55079,55,79,Milwaukee,"POLYGON ((-87.99417 43.19255, -87.99331 43.192...",208.0,"[[1/1/2017, 0, 1, 43.0346, -87.9221, 1.0, 0::2..."
