# CDE - Explore Endpoints

In [1]:
import os
import pandas as pd

import crime_helper as ch

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', -1)

## Import Lookups

In [3]:
# load the lookup data

STATE = 'data/state.csv'
AGENCY = 'data/agency.csv'
OFFENSE_SHORT = 'data/offense_short.csv'
YEAR_SINCE = 2010
YEAR_UNTIL = 2011

In [4]:
state_df = pd.read_csv(STATE)
state_df.head()

Unnamed: 0,region_code,state_abbr,state_fips_code,state_id,state_name
0,4,AK,2.0,1,Alaska
1,3,AL,1.0,2,Alabama
2,3,AR,5.0,3,Arkansas
3,99,AS,60.0,4,American Samoa
4,4,AZ,4.0,5,Arizona


In [5]:
offense_df = pd.read_csv(OFFENSE_SHORT)
offense_df.head()

Unnamed: 0,offense_type
0,aggravated-assault
1,burglary
2,larceny
3,motor-vehicle-theft
4,homicide


## Get API Data

In [6]:
# examine data for a random row

random_state = state_df.loc[10]['state_abbr']
random_offense = offense_df.loc[4]['offense_type']
random_state
random_offense

'DE'

'homicide'

In [7]:
config = {
    '{offense}': random_offense,
    '{stateAbbr}': random_state,
}
offense = ch.get_json(ch.OFFENSE_STATE, config)

In [8]:
offense

{'ui_type': 'text',
 'noun': 'offense',
 'category': None,
 'title': 'Offense Count',
 'short_title': None,
 'ui_restriction': None,
 'keys': ['Incident Count', 'Offense Count'],
 'data': [{'value': 25,
   'data_year': 2001,
   'month_num': 0,
   'key': 'Incident Count'},
  {'value': 29, 'data_year': 2002, 'month_num': 0, 'key': 'Incident Count'},
  {'value': 21, 'data_year': 2003, 'month_num': 0, 'key': 'Incident Count'},
  {'value': 27, 'data_year': 2004, 'month_num': 0, 'key': 'Incident Count'},
  {'value': 38, 'data_year': 2005, 'month_num': 0, 'key': 'Incident Count'},
  {'value': 39, 'data_year': 2006, 'month_num': 0, 'key': 'Incident Count'},
  {'value': 39, 'data_year': 2007, 'month_num': 0, 'key': 'Incident Count'},
  {'value': 54, 'data_year': 2008, 'month_num': 0, 'key': 'Incident Count'},
  {'value': 40, 'data_year': 2009, 'month_num': 0, 'key': 'Incident Count'},
  {'value': 51, 'data_year': 2010, 'month_num': 0, 'key': 'Incident Count'},
  {'value': 46, 'data_year': 2011,

In [9]:
offense['data'][0]['value']
offense['data'][0]['data_year']

25

2001

In [10]:
# loop over all lookup values and aggregate full results

offense_ls = []

for idx_off, row_off in offense_df.iterrows():
    offense = row_off['offense_type']
    print(offense)
    
    for idx_stt, row_stt in state_df.iterrows():
        state = row_stt['state_abbr']
        
        config = {
            '{offense}': offense,
            '{stateAbbr}': state,
        }
        offense_result = ch.get_json(ch.OFFENSE_STATE, config)
        
        for res in offense_result['data']:
            new_dx = {
                'state': state,
                'offense': offense,
                'year': res['data_year'],
                'count': res['value'],
            }
            offense_ls.append(new_dx)
            
offense_ls[:5]

aggravated-assault
burglary
larceny
motor-vehicle-theft
homicide
rape
robbery
arson
violent-crime
property-crime


[{'state': 'AL',
  'offense': 'aggravated-assault',
  'year': 1991,
  'count': 21909},
 {'state': 'AL',
  'offense': 'aggravated-assault',
  'year': 1992,
  'count': 17637},
 {'state': 'AL', 'offense': 'aggravated-assault', 'year': 2006, 'count': 30},
 {'state': 'AL', 'offense': 'aggravated-assault', 'year': 2007, 'count': 31},
 {'state': 'AL', 'offense': 'aggravated-assault', 'year': 2008, 'count': 32}]

## Create DataFrame from API

In [29]:
offense_df = pd.DataFrame(offense_ls)
offense_df = offense_df[['state', 'year', 'offense', 'count']]
offense_df = offense_df.sort_values(['state', 'year', 'offense'])
offense_df.head()
offense_df.info()

Unnamed: 0,state,year,offense,count
0,AL,1991,aggravated-assault,21909
15,AL,1991,aggravated-assault,21909
10238,AL,1991,arson,92
10252,AL,1991,arson,92
1474,AL,1991,burglary,47915


<class 'pandas.core.frame.DataFrame'>
Int64Index: 14646 entries, 0 to 13151
Data columns (total 4 columns):
state      14646 non-null object
year       14646 non-null int64
offense    14646 non-null object
count      14646 non-null int64
dtypes: int64(2), object(2)
memory usage: 572.1+ KB


In [32]:
# not sure why the api returns duplicate rows

doop_df = offense_df.loc[offense_df.duplicated(subset=['state', 'year', 'offense'], keep=False) == True]
doop_df.head()

Unnamed: 0,state,year,offense,count
0,AL,1991,aggravated-assault,21909
15,AL,1991,aggravated-assault,21909
10238,AL,1991,arson,92
10252,AL,1991,arson,92
1474,AL,1991,burglary,47915


In [34]:
offense_df = offense_df.drop_duplicates(subset=['state', 'year', 'offense'])
offense_df = offense_df.reset_index()
offense_df = offense_df[['state', 'year', 'offense', 'count']]
offense_df.head()
offense_df.info()

Unnamed: 0,state,year,offense,count
0,AL,1991,aggravated-assault,21909
1,AL,1991,arson,92
2,AL,1991,burglary,47915
3,AL,1991,homicide,434
4,AL,1991,larceny,107142


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7323 entries, 0 to 7322
Data columns (total 4 columns):
state      7323 non-null object
year       7323 non-null int64
offense    7323 non-null object
count      7323 non-null int64
dtypes: int64(2), object(2)
memory usage: 228.9+ KB


## Export to csv

In [27]:
save_path = r'data/offense_state.csv'
offense_df.to_csv(save_path, index=False)
f'{round(os.path.getsize(save_path) /1e6, 2)} mb'

'0.21 mb'