# CDE - Explore Endpoints

In [1]:
import os
import pandas as pd

import crime_helper as ch

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', -1)

## Import Data

In [3]:
# load the lookup data

STATE = 'data/state.csv'
OFFENSE_SHORT = 'data/offense_short.csv'
YEAR_SINCE = 2010
YEAR_UNTIL = 2011

In [25]:
state_df = pd.read_csv(STATE)
state_df.state_abbr.to_list()
state_df.head()

['AK',
 'AL',
 'AR',
 'AS',
 'AZ',
 'CA',
 'CO',
 'CT',
 'CZ',
 'DC',
 'DE',
 'FL',
 'GA',
 'GM',
 'HI',
 'IA',
 'ID',
 'IL',
 'IN',
 'KS',
 'KY',
 'LA',
 'MA',
 'MD',
 'ME',
 'MI',
 'MN',
 'MO',
 'MS',
 'MT',
 'NE',
 'NC',
 'ND',
 'NH',
 'NJ',
 'NM',
 'NV',
 'NY',
 'OH',
 'OK',
 'OR',
 'PA',
 'PR',
 'RI',
 'SC',
 'SD',
 'TN',
 'TX',
 'UT',
 'VA',
 'VT',
 'WA',
 'WI',
 'WV',
 'WY',
 'MP',
 'OT',
 'VI']

Unnamed: 0,region_code,state_abbr,state_fips_code,state_id,state_name
0,4,AK,2.0,1,Alaska
1,3,AL,1.0,2,Alabama
2,3,AR,5.0,3,Arkansas
3,99,AS,60.0,4,American Samoa
4,4,AZ,4.0,5,Arizona


In [16]:
offense_df = pd.read_csv(OFFENSE_SHORT)
offense_df.offense_type.to_list()

['aggravated-assault',
 'burglary',
 'larceny',
 'motor-vehicle-theft',
 'homicide',
 'rape',
 'robbery',
 'arson',
 'violent-crime',
 'property-crime']

In [6]:
# examine data for a random row

random_state = state_df.loc[10]['state_abbr']
random_offense = offense_df.loc[4]['offense_type']
random_state
random_offense

'DE'

'homicide'

In [10]:
config = {
    '{offense}': random_offense,
    '{stateAbbr}': random_state,
    '{variable}' : 'sex'
}
offender = ch.get_json(ch.OFFENDER_TKM_STATE, config)
offender['data']

[{'value': 31, 'data_year': 2001, 'month_num': 0, 'key': 'Male'},
 {'value': 28, 'data_year': 2002, 'month_num': 0, 'key': 'Male'},
 {'value': 25, 'data_year': 2003, 'month_num': 0, 'key': 'Male'},
 {'value': 17, 'data_year': 2004, 'month_num': 0, 'key': 'Male'},
 {'value': 39, 'data_year': 2005, 'month_num': 0, 'key': 'Male'},
 {'value': 74, 'data_year': 2006, 'month_num': 0, 'key': 'Male'},
 {'value': 49, 'data_year': 2007, 'month_num': 0, 'key': 'Male'},
 {'value': 60, 'data_year': 2008, 'month_num': 0, 'key': 'Male'},
 {'value': 42, 'data_year': 2009, 'month_num': 0, 'key': 'Male'},
 {'value': 68, 'data_year': 2010, 'month_num': 0, 'key': 'Male'},
 {'value': 55, 'data_year': 2011, 'month_num': 0, 'key': 'Male'},
 {'value': 47, 'data_year': 2012, 'month_num': 0, 'key': 'Male'},
 {'value': 26, 'data_year': 2013, 'month_num': 0, 'key': 'Male'},
 {'value': 47, 'data_year': 2014, 'month_num': 0, 'key': 'Male'},
 {'value': 53, 'data_year': 2015, 'month_num': 0, 'key': 'Male'},
 {'value':

In [17]:
offender

{'ui_type': 'pie_chart',
 'noun': 'Offender',
 'category': 'Offender demographic',
 'title': 'Offender Sex',
 'short_title': 'Sex',
 'ui_restriction': None,
 'keys': ['Male', 'Female', 'Unknown'],
 'data': [{'value': 31,
   'data_year': 2001,
   'month_num': 0,
   'key': 'Male',
   'state': 'DE',
   'offense': 'homicide'},
  {'value': 28,
   'data_year': 2002,
   'month_num': 0,
   'key': 'Male',
   'state': 'DE',
   'offense': 'homicide'},
  {'value': 25,
   'data_year': 2003,
   'month_num': 0,
   'key': 'Male',
   'state': 'DE',
   'offense': 'homicide'},
  {'value': 17,
   'data_year': 2004,
   'month_num': 0,
   'key': 'Male',
   'state': 'DE',
   'offense': 'homicide'},
  {'value': 39,
   'data_year': 2005,
   'month_num': 0,
   'key': 'Male',
   'state': 'DE',
   'offense': 'homicide'},
  {'value': 74,
   'data_year': 2006,
   'month_num': 0,
   'key': 'Male',
   'state': 'DE',
   'offense': 'homicide'},
  {'value': 49,
   'data_year': 2007,
   'month_num': 0,
   'key': 'Male',


In [13]:
offender_data = offender['data']
for item in offender_data:
    item['state'] = random_state
    item['offense'] = random_offense

offender_data[0:5]

[{'value': 31,
  'data_year': 2001,
  'month_num': 0,
  'key': 'Male',
  'state': 'DE',
  'offense': 'homicide'},
 {'value': 28,
  'data_year': 2002,
  'month_num': 0,
  'key': 'Male',
  'state': 'DE',
  'offense': 'homicide'},
 {'value': 25,
  'data_year': 2003,
  'month_num': 0,
  'key': 'Male',
  'state': 'DE',
  'offense': 'homicide'},
 {'value': 17,
  'data_year': 2004,
  'month_num': 0,
  'key': 'Male',
  'state': 'DE',
  'offense': 'homicide'},
 {'value': 39,
  'data_year': 2005,
  'month_num': 0,
  'key': 'Male',
  'state': 'DE',
  'offense': 'homicide'}]

In [26]:
# loop over all states and offense types and retrieve info via API
# aggregate the info in offender_list, which will be the later converted into a dataframe

offender_list =[]
for state in state_df.state_abbr.to_list():
    for offense in offense_df.offense_type.to_list():
        config = {
            '{offense}': offense,
            '{stateAbbr}': state,
            '{variable}' : 'sex'
            }
        offender = ch.get_json(ch.OFFENDER_TKM_STATE, config)
        offender = offender['data']
        
        for item in offender:
            item['state'] = state
            item['offense'] = offense
        offender_list += offender


In [27]:

offender_df = pd.DataFrame(offender_list)
offender_df.head()


Unnamed: 0,data_year,key,month_num,offense,state,value
0,1991,Male,0,aggravated-assault,AL,12152
1,1992,Male,0,aggravated-assault,AL,14974
2,2006,Male,0,aggravated-assault,AL,34
3,2007,Male,0,aggravated-assault,AL,35
4,2008,Male,0,aggravated-assault,AL,34


In [30]:
# remove unnecessery column
offender_df.drop(['month_num'], inplace = True, axis = 1)
offender_df.head(2)

Unnamed: 0,data_year,key,offense,state,value
0,1991,Male,aggravated-assault,AL,12152
1,1992,Male,aggravated-assault,AL,14974


In [31]:
# rename colum 'key' to 'gender', 'data_year' to 'year'
offender_df.rename(columns={'key': 'gender', 'data_year': 'year'}, inplace=True)
offender_df.head(2)

Unnamed: 0,year,gender,offense,state,value
0,1991,Male,aggravated-assault,AL,12152
1,1992,Male,aggravated-assault,AL,14974


In [36]:
# remove data before 2010
offender_df = offender_df.loc[offender_df.year>=2010]
offender_df.head(2)

Unnamed: 0,year,gender,offense,state,value
6,2010,Male,aggravated-assault,AL,13
7,2011,Male,aggravated-assault,AL,11


## Export to csv

In [37]:
save_path = r'data/offender_state.csv'
offender_df.to_csv(save_path, index=False)
f'{round(os.path.getsize(save_path) /1e6, 2)} mb'

'0.31 mb'