# Arrest TKM Endpoint

In [1]:
import os
import pandas as pd

import crime_helper as ch

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', -1)

## Import Lookups

In [3]:
OFFENSE_MEDIUM = 'data/lookup_offense_medium.csv'
STATE = 'data/state.csv'
YEAR_SINCE = '2010'
YEAR_UNTIL = '2018'

In [4]:
state_df = pd.read_csv(STATE)
state_df.head()
#state_df.info()

Unnamed: 0,region_code,state_abbr,state_fips_code,state_id,state_name
0,4,AK,2.0,1,Alaska
1,3,AL,1.0,2,Alabama
2,3,AR,5.0,3,Arkansas
3,99,AS,60.0,4,American Samoa
4,4,AZ,4.0,5,Arizona


In [5]:
offense_df = pd.read_csv(OFFENSE_MEDIUM)
offense_df.head()

Unnamed: 0,offense_type
0,aggravated-assault
1,arson
2,burglary
3,curfew
4,disorderly-conduct


## Get API Data

In [6]:
# examine data for a random row

state_test = state_df.loc[25]['state_abbr']
offense_test = offense_df.loc[6]['offense_type']

state_test
offense_test

'MI'

'drug-grand-total'

In [7]:
config = {
    '{stateAbbr}': state_test,
    '{offense}': offense_test,
    '{variable}': 'male',   
    '{since}': YEAR_SINCE,
    '{until}': YEAR_UNTIL,
}
endpoint_test = ch.get_json(ch.ARREST_TKM_STATE_OFFENSE, config)
endpoint_test

{'ui_type': 'basic_table',
 'noun': 'Arrestee',
 'category': 'Arrestee demographic',
 'title': 'Male Arrests By Age',
 'short_title': 'age',
 'ui_restriction': None,
 'keys': ['10-12',
  '13-14',
  '15',
  '16',
  '17',
  '18',
  '19',
  '20',
  '21',
  '22',
  '23',
  '24',
  '25-29',
  '30-34',
  '35-39',
  '40-44',
  '45-49',
  '50-54',
  '55-59',
  '60-64',
  '65 and over',
  'Under 10'],
 'data': [{'value': 36, 'data_year': 2010, 'month_num': 0, 'key': '10-12'},
  {'value': 28, 'data_year': 2011, 'month_num': 0, 'key': '10-12'},
  {'value': 35, 'data_year': 2012, 'month_num': 0, 'key': '10-12'},
  {'value': 27, 'data_year': 2013, 'month_num': 0, 'key': '10-12'},
  {'value': 27, 'data_year': 2014, 'month_num': 0, 'key': '10-12'},
  {'value': 17, 'data_year': 2015, 'month_num': 0, 'key': '10-12'},
  {'value': 18, 'data_year': 2016, 'month_num': 0, 'key': '10-12'},
  {'value': 28, 'data_year': 2017, 'month_num': 0, 'key': '10-12'},
  {'value': 21, 'data_year': 2018, 'month_num': 0, '

In [8]:
# loop over all lookup values and aggregate full results

results_ls = []

for idx_off, row_off in offense_df.iterrows():
    offense = row_off['offense_type']
    print(offense)
    
    for idx_stt, row_stt in state_df.iterrows():
        state = row_stt['state_abbr']
        
        # get the counts for males
        
        config = {
            '{stateAbbr}': state,
            '{offense}': offense,
            '{variable}': 'male',   
            '{since}': YEAR_SINCE,
            '{until}': YEAR_UNTIL,
        }
        result = ch.get_json(ch.ARREST_TKM_STATE_OFFENSE, config)
        
        for res in result['data']:
            new_dx = {
                'state': state,
                'offense': offense,
                'year': res['data_year'],
                'gender': 'male', 
                'age': res['key'],
                'count': res['value'],
            }
            results_ls.append(new_dx)
        
        # get the counts for females
        
        config = {
            '{stateAbbr}': state,
            '{offense}': offense,
            '{variable}': 'female',   
            '{since}': YEAR_SINCE,
            '{until}': YEAR_UNTIL,
        }
        result = ch.get_json(ch.ARREST_TKM_STATE_OFFENSE, config)
        
        for res in result['data']:
            new_dx = {
                'state': state,
                'offense': offense,
                'year': res['data_year'],
                'gender': 'female', 
                'age': res['key'],
                'count': res['value'],
            }
            results_ls.append(new_dx)

        #break

results_ls[:5]

aggravated-assault
arson
burglary
curfew
disorderly-conduct
dui
drug-grand-total
drunkenness
embezzlement
forgery
fraud
gambling-total
human-trafficking-commerical
human-trafficking-servitude
larceny
liqour-laws
motor-vehcile-theft
murder
offense-against-family
prostitution
prostitution-assisting
prostitution-prostitution
prostitution-purchasing
rape
robbery
runaway
sex-offenses
simple-assault
stolen-property
suspicion
vagrancy
vandalism
weapons


[{'state': 'AK',
  'offense': 'aggravated-assault',
  'year': 2010,
  'gender': 'male',
  'age': '10-12',
  'count': 11},
 {'state': 'AK',
  'offense': 'aggravated-assault',
  'year': 2011,
  'gender': 'male',
  'age': '10-12',
  'count': 16},
 {'state': 'AK',
  'offense': 'aggravated-assault',
  'year': 2012,
  'gender': 'male',
  'age': '10-12',
  'count': 6},
 {'state': 'AK',
  'offense': 'aggravated-assault',
  'year': 2013,
  'gender': 'male',
  'age': '10-12',
  'count': 9},
 {'state': 'AK',
  'offense': 'aggravated-assault',
  'year': 2014,
  'gender': 'male',
  'age': '10-12',
  'count': 11}]

## Create DataFrame from API

In [9]:
endpoint_df = pd.DataFrame(results_ls)
endpoint_df = endpoint_df.sort_values(['state', 'year', 'offense', 'gender'])
endpoint_df.head(10)
endpoint_df.info()

Unnamed: 0,age,count,gender,offense,state,year
198,10-12,5,female,aggravated-assault,AK,2010
207,13-14,7,female,aggravated-assault,AK,2010
216,15,10,female,aggravated-assault,AK,2010
225,16,6,female,aggravated-assault,AK,2010
234,17,15,female,aggravated-assault,AK,2010
243,18,25,female,aggravated-assault,AK,2010
252,19,21,female,aggravated-assault,AK,2010
261,20,20,female,aggravated-assault,AK,2010
270,21,29,female,aggravated-assault,AK,2010
279,22,16,female,aggravated-assault,AK,2010


<class 'pandas.core.frame.DataFrame'>
Int64Index: 541244 entries, 198 to 541001
Data columns (total 6 columns):
age        541244 non-null object
count      541244 non-null int64
gender     541244 non-null object
offense    541244 non-null object
state      541244 non-null object
year       541244 non-null int64
dtypes: int64(2), object(4)
memory usage: 28.9+ MB


In [10]:
# no duplicates in this endpoint

doop_df = endpoint_df.loc[endpoint_df.duplicated(subset=['state', 'year', 'offense', 'gender', 'age'], keep=False) == True]
doop_df.head()

Unnamed: 0,age,count,gender,offense,state,year


In [11]:
# group the different ages into one count

endpoint_age_df = endpoint_df.groupby(['state', 'year', 'gender', 'offense']).sum()
endpoint_age_df = endpoint_age_df.reset_index()
endpoint_age_df.head(30)

Unnamed: 0,state,year,gender,offense,count
0,AK,2010,female,aggravated-assault,415
1,AK,2010,female,arson,11
2,AK,2010,female,burglary,78
3,AK,2010,female,curfew,3
4,AK,2010,female,disorderly-conduct,266
5,AK,2010,female,drug-grand-total,599
6,AK,2010,female,drunkenness,133
7,AK,2010,female,dui,1371
8,AK,2010,female,embezzlement,47
9,AK,2010,female,forgery,51


## Export to csv

In [12]:
save_path = r'data/arrest_tkm_state.csv'
endpoint_age_df.to_csv(save_path, index=False)
f'{round(os.path.getsize(save_path) /1e6, 2)} mb'

'0.73 mb'