# Offender TKM Endpoints

Workflow: 2 <br>
Goal: Create CSV for offender_tkm endpoint. <br>

In [1]:
import os
import pandas as pd

import crime_helper as ch

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', -1)

## Load Lookup Data

In [3]:
# load the lookup data

STATE = 'data/lookup_state.csv'
OFFENSE_SHORT = 'data/lookup_offense_short.csv'

In [4]:
state_df = pd.read_csv(STATE)
state_df.state_abbr.to_list()
state_df.head()

['AK',
 'AL',
 'AR',
 'AS',
 'AZ',
 'CA',
 'CO',
 'CT',
 'CZ',
 'DC',
 'DE',
 'FL',
 'GA',
 'GM',
 'HI',
 'IA',
 'ID',
 'IL',
 'IN',
 'KS',
 'KY',
 'LA',
 'MA',
 'MD',
 'ME',
 'MI',
 'MN',
 'MO',
 'MS',
 'MT',
 'NE',
 'NC',
 'ND',
 'NH',
 'NJ',
 'NM',
 'NV',
 'NY',
 'OH',
 'OK',
 'OR',
 'PA',
 'PR',
 'RI',
 'SC',
 'SD',
 'TN',
 'TX',
 'UT',
 'VA',
 'VT',
 'WA',
 'WI',
 'WV',
 'WY',
 'MP',
 'OT',
 'VI']

Unnamed: 0,region_code,state_abbr,state_fips_code,state_id,state_name
0,4,AK,2.0,1,Alaska
1,3,AL,1.0,2,Alabama
2,3,AR,5.0,3,Arkansas
3,99,AS,60.0,4,American Samoa
4,4,AZ,4.0,5,Arizona


In [5]:
offense_df = pd.read_csv(OFFENSE_SHORT)
offense_df.offense_type.to_list()

['aggravated-assault',
 'burglary',
 'larceny',
 'motor-vehicle-theft',
 'homicide',
 'rape',
 'robbery',
 'arson',
 'violent-crime',
 'property-crime']

## Get Data from API

In [6]:
# examine data for a random row

random_state = state_df.loc[10]['state_abbr']
random_offense = offense_df.loc[4]['offense_type']
random_state
random_offense

'DE'

'homicide'

In [7]:
config = {
    '{offense}': random_offense,
    '{stateAbbr}': random_state,
    '{variable}' : 'race'
}
offender = ch.get_json(ch.OFFENDER_TKM_STATE, config)
offender['data']

[{'value': 0, 'data_year': 2001, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2002, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2003, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2004, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2005, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2006, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2007, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2008, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2009, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2010, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2011, 'month_num': 0, 'key': 'Asian'},
 {'value': 1, 'data_year': 2012, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2013, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2014, 'month_num': 0, 'key': 'Asian'},
 {'value': 0, 'data_year': 2015, 'month_num': 0, 'key': 'Asian'},
 {'value':

In [8]:
offender

{'ui_type': 'basic_table',
 'noun': 'Offender',
 'category': 'Offender demographic',
 'title': 'Offender Race',
 'short_title': 'Race',
 'ui_restriction': None,
 'keys': ['Asian',
  'Native Hawaiian',
  'Black or African American',
  'American Indian or Alaska Native',
  'White',
  'Unknown'],
 'data': [{'value': 0, 'data_year': 2001, 'month_num': 0, 'key': 'Asian'},
  {'value': 0, 'data_year': 2002, 'month_num': 0, 'key': 'Asian'},
  {'value': 0, 'data_year': 2003, 'month_num': 0, 'key': 'Asian'},
  {'value': 0, 'data_year': 2004, 'month_num': 0, 'key': 'Asian'},
  {'value': 0, 'data_year': 2005, 'month_num': 0, 'key': 'Asian'},
  {'value': 0, 'data_year': 2006, 'month_num': 0, 'key': 'Asian'},
  {'value': 0, 'data_year': 2007, 'month_num': 0, 'key': 'Asian'},
  {'value': 0, 'data_year': 2008, 'month_num': 0, 'key': 'Asian'},
  {'value': 0, 'data_year': 2009, 'month_num': 0, 'key': 'Asian'},
  {'value': 0, 'data_year': 2010, 'month_num': 0, 'key': 'Asian'},
  {'value': 0, 'data_year':

In [9]:
offender_data = offender['data']
for item in offender_data:
    item['state'] = random_state
    item['offense'] = random_offense

offender_data[0:5]

[{'value': 0,
  'data_year': 2001,
  'month_num': 0,
  'key': 'Asian',
  'state': 'DE',
  'offense': 'homicide'},
 {'value': 0,
  'data_year': 2002,
  'month_num': 0,
  'key': 'Asian',
  'state': 'DE',
  'offense': 'homicide'},
 {'value': 0,
  'data_year': 2003,
  'month_num': 0,
  'key': 'Asian',
  'state': 'DE',
  'offense': 'homicide'},
 {'value': 0,
  'data_year': 2004,
  'month_num': 0,
  'key': 'Asian',
  'state': 'DE',
  'offense': 'homicide'},
 {'value': 0,
  'data_year': 2005,
  'month_num': 0,
  'key': 'Asian',
  'state': 'DE',
  'offense': 'homicide'}]

In [10]:
# loop over all states and offense types and retrieve info via API
# aggregate the info in offender_list, which will be the later converted into a dataframe

offender_list =[]
for state in state_df.state_abbr.to_list():
    print(state)
    for offense in offense_df.offense_type.to_list():
        config = {
            '{offense}': offense,
            '{stateAbbr}': state,
            '{variable}' : 'race'
            }
        offender = ch.get_json(ch.OFFENDER_TKM_STATE, config)
        offender = offender['data']
        
        for item in offender:
            item['state'] = state
            item['offense'] = offense
        offender_list += offender

offender_list[0:5]

AK
AL
AR
AS
AZ
CA
CO
CT
CZ
DC
DE
FL
GA
GM
HI
IA
ID
IL
IN
KS
KY
LA
MA
MD
ME
MI
MN
MO
MS
MT
NE
NC
ND
NH
NJ
NM
NV
NY
OH
OK
OR
PA
PR
RI
SC
SD
TN
TX
UT
VA
VT
WA
WI
WV
WY
MP
OT
VI


[{'value': 17,
  'data_year': 1991,
  'month_num': 0,
  'key': 'Asian',
  'state': 'AL',
  'offense': 'aggravated-assault'},
 {'value': 18,
  'data_year': 1992,
  'month_num': 0,
  'key': 'Asian',
  'state': 'AL',
  'offense': 'aggravated-assault'},
 {'value': 0,
  'data_year': 2006,
  'month_num': 0,
  'key': 'Asian',
  'state': 'AL',
  'offense': 'aggravated-assault'},
 {'value': 0,
  'data_year': 2007,
  'month_num': 0,
  'key': 'Asian',
  'state': 'AL',
  'offense': 'aggravated-assault'},
 {'value': 0,
  'data_year': 2008,
  'month_num': 0,
  'key': 'Asian',
  'state': 'AL',
  'offense': 'aggravated-assault'}]

## Create DataFrame

In [13]:
offender_df = pd.DataFrame(offender_list)
offender_df.head()

Unnamed: 0,data_year,key,month_num,offense,state,value
0,1991,Asian,0,aggravated-assault,AL,17
1,1992,Asian,0,aggravated-assault,AL,18
2,2006,Asian,0,aggravated-assault,AL,0
3,2007,Asian,0,aggravated-assault,AL,0
4,2008,Asian,0,aggravated-assault,AL,0


In [14]:
# remove unnecessery column
offender_df.drop(['month_num'], inplace = True, axis = 1)
offender_df.head(2)

Unnamed: 0,data_year,key,offense,state,value
0,1991,Asian,aggravated-assault,AL,17
1,1992,Asian,aggravated-assault,AL,18


In [15]:
# rename colum 'key' to 'gender', 'data_year' to 'year'
offender_df.rename(columns={'key': 'race', 'data_year': 'year'}, inplace=True)
offender_df.head(2)

Unnamed: 0,year,race,offense,state,value
0,1991,Asian,aggravated-assault,AL,17
1,1992,Asian,aggravated-assault,AL,18


In [16]:
# remove data before 2010
offender_df = offender_df.loc[offender_df.year>=2010]
offender_df.head(2)

Unnamed: 0,year,race,offense,state,value
6,2010,Asian,aggravated-assault,AL,0
7,2011,Asian,aggravated-assault,AL,0


In [17]:
offender_df = offender_df[['state', 'year', 'race', 'offense', 'value']]
offender_df = offender_df.rename(columns={'value': 'offenders'})
offender_df.head()

Unnamed: 0,state,year,race,offense,offenders
6,AL,2010,Asian,aggravated-assault,0
7,AL,2011,Asian,aggravated-assault,0
8,AL,2012,Asian,aggravated-assault,0
9,AL,2013,Asian,aggravated-assault,0
10,AL,2014,Asian,aggravated-assault,0


## Export to csv

In [18]:
save_path = r'data/offender_race_state.csv'
offender_df.to_csv(save_path, index=False)
f'{round(os.path.getsize(save_path) /1e6, 2)} mb'

'0.79 mb'