# Arrest TKM Endpoint

Workflow: 2 <br>
Goal: Create csv for arrest_tkm endpoint. <br>

In [3]:
import os
import pandas as pd

import crime_helper as ch

In [4]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', -1)

## Import Lookups

In [5]:
OFFENSE_MEDIUM = 'data/lookup_offense_medium.csv'
STATE = 'data/lookup_state.csv'
YEAR_SINCE = '2000'
YEAR_UNTIL = '2018'

In [6]:
state_df = pd.read_csv(STATE)
state_df.head()
#state_df.info()

Unnamed: 0,region_code,state_abbr,state_fips_code,state_id,state_name
0,4,AK,2.0,1,Alaska
1,3,AL,1.0,2,Alabama
2,3,AR,5.0,3,Arkansas
3,99,AS,60.0,4,American Samoa
4,4,AZ,4.0,5,Arizona


In [7]:
offense_df = pd.read_csv(OFFENSE_MEDIUM)
offense_df.head()

Unnamed: 0,offense_type
0,aggravated-assault
1,arson
2,burglary
3,curfew
4,disorderly-conduct


## Get API Data

In [8]:
# examine data for a random row

state_test = state_df.loc[25]['state_abbr']
offense_test = offense_df.loc[6]['offense_type']

state_test
offense_test

'MI'

'drug-grand-total'

In [7]:
config = {
    '{stateAbbr}': state_test,
    '{offense}': offense_test,
    '{variable}': 'male',   
    '{since}': YEAR_SINCE,
    '{until}': YEAR_UNTIL,
}
endpoint_test = ch.get_json(ch.ARREST_TKM_STATE_OFFENSE, config)
endpoint_test

{'ui_type': 'basic_table',
 'noun': 'Arrestee',
 'category': 'Arrestee demographic',
 'title': 'Male Arrests By Age',
 'short_title': 'age',
 'ui_restriction': None,
 'keys': ['10-12',
  '13-14',
  '15',
  '16',
  '17',
  '18',
  '19',
  '20',
  '21',
  '22',
  '23',
  '24',
  '25-29',
  '30-34',
  '35-39',
  '40-44',
  '45-49',
  '50-54',
  '55-59',
  '60-64',
  '65 and over',
  'Under 10'],
 'data': [{'value': 35, 'data_year': 2000, 'month_num': 0, 'key': '10-12'},
  {'value': 50, 'data_year': 2001, 'month_num': 0, 'key': '10-12'},
  {'value': 35, 'data_year': 2002, 'month_num': 0, 'key': '10-12'},
  {'value': 32, 'data_year': 2003, 'month_num': 0, 'key': '10-12'},
  {'value': 28, 'data_year': 2004, 'month_num': 0, 'key': '10-12'},
  {'value': 19, 'data_year': 2005, 'month_num': 0, 'key': '10-12'},
  {'value': 32, 'data_year': 2006, 'month_num': 0, 'key': '10-12'},
  {'value': 28, 'data_year': 2007, 'month_num': 0, 'key': '10-12'},
  {'value': 24, 'data_year': 2008, 'month_num': 0, '

In [8]:
# loop over all lookup values and aggregate full results

results_ls = []

for idx_off, row_off in offense_df.iterrows():
    offense = row_off['offense_type']
    print(offense)
    
    for idx_stt, row_stt in state_df.iterrows():
        state = row_stt['state_abbr']
        
        # get the counts for males
        
        config = {
            '{stateAbbr}': state,
            '{offense}': offense,
            '{variable}': 'male',   
            '{since}': YEAR_SINCE,
            '{until}': YEAR_UNTIL,
        }
        result = ch.get_json(ch.ARREST_TKM_STATE_OFFENSE, config)
        
        if 'data' in result:       
            for res in result['data']:
                new_dx = {
                    'state': state,
                    'offense': offense,
                    'year': res['data_year'],
                    'gender': 'male', 
                    'age': res['key'],
                    'count': res['value'],
                }
                results_ls.append(new_dx)
        
        else:
            print(f'no data: {state} {offense} male')
        
        # get the counts for females
        
        config = {
            '{stateAbbr}': state,
            '{offense}': offense,
            '{variable}': 'female',   
            '{since}': YEAR_SINCE,
            '{until}': YEAR_UNTIL,
        }
        result = ch.get_json(ch.ARREST_TKM_STATE_OFFENSE, config)
        
        if 'data' in result:
            for res in result['data']:
                new_dx = {
                    'state': state,
                    'offense': offense,
                    'year': res['data_year'],
                    'gender': 'female', 
                    'age': res['key'],
                    'count': res['value'],
                }
                results_ls.append(new_dx)

        else:
            print(f'no data: {state} {offense} female')

        #break

results_ls[:5]

aggravated-assault
arson
burglary
curfew
disorderly-conduct
dui
drug-grand-total
drunkenness
embezzlement
forgery
no data: NY forgery female
fraud
gambling-total
human-trafficking-commerical
human-trafficking-servitude
larceny
liqour-laws
motor-vehcile-theft
murder
offense-against-family
no data: TN offense-against-family male
prostitution
prostitution-assisting
prostitution-prostitution
prostitution-purchasing
rape
robbery
runaway
sex-offenses
simple-assault
stolen-property
suspicion
vagrancy
vandalism
weapons


[{'state': 'AK',
  'offense': 'aggravated-assault',
  'year': 2000,
  'gender': 'male',
  'age': '10-12',
  'count': 12},
 {'state': 'AK',
  'offense': 'aggravated-assault',
  'year': 2001,
  'gender': 'male',
  'age': '10-12',
  'count': 7},
 {'state': 'AK',
  'offense': 'aggravated-assault',
  'year': 2002,
  'gender': 'male',
  'age': '10-12',
  'count': 11},
 {'state': 'AK',
  'offense': 'aggravated-assault',
  'year': 2003,
  'gender': 'male',
  'age': '10-12',
  'count': 7},
 {'state': 'AK',
  'offense': 'aggravated-assault',
  'year': 2004,
  'gender': 'male',
  'age': '10-12',
  'count': 2}]

## Create DataFrame from API

In [9]:
endpoint_df = pd.DataFrame(results_ls)
endpoint_df = endpoint_df.sort_values(['state', 'year', 'offense', 'gender'])
endpoint_df.head(10)
endpoint_df.info()

Unnamed: 0,age,count,gender,offense,state,year
418,10-12,3,female,aggravated-assault,AK,2000
437,13-14,8,female,aggravated-assault,AK,2000
456,15,4,female,aggravated-assault,AK,2000
475,16,5,female,aggravated-assault,AK,2000
494,17,9,female,aggravated-assault,AK,2000
513,18,7,female,aggravated-assault,AK,2000
532,19,5,female,aggravated-assault,AK,2000
551,20,4,female,aggravated-assault,AK,2000
570,21,8,female,aggravated-assault,AK,2000
589,22,9,female,aggravated-assault,AK,2000


<class 'pandas.core.frame.DataFrame'>
Int64Index: 1124244 entries, 418 to 1123781
Data columns (total 6 columns):
age        1124244 non-null object
count      1124244 non-null int64
gender     1124244 non-null object
offense    1124244 non-null object
state      1124244 non-null object
year       1124244 non-null int64
dtypes: int64(2), object(4)
memory usage: 60.0+ MB


In [10]:
# no duplicates in this endpoint

doop_df = endpoint_df.loc[endpoint_df.duplicated(subset=['state', 'year', 'offense', 'gender', 'age'], keep=False) == True]
doop_df.head()

Unnamed: 0,age,count,gender,offense,state,year


In [11]:
# group the different ages into one count

endpoint_age_df = endpoint_df.groupby(['state', 'year', 'gender', 'offense']).sum()
endpoint_age_df = endpoint_age_df.reset_index()
endpoint_age_df.head(30)

Unnamed: 0,state,year,gender,offense,count
0,AK,2000,female,aggravated-assault,194
1,AK,2000,female,arson,3
2,AK,2000,female,burglary,49
3,AK,2000,female,curfew,1
4,AK,2000,female,disorderly-conduct,221
5,AK,2000,female,drug-grand-total,312
6,AK,2000,female,drunkenness,5
7,AK,2000,female,dui,1000
8,AK,2000,female,embezzlement,1
9,AK,2000,female,forgery,41


## Export to csv

In [12]:
save_path = r'data/arrest_tkm_state.csv'
endpoint_age_df.to_csv(save_path, index=False)
f'{round(os.path.getsize(save_path) /1e6, 2)} mb'

'1.52 mb'