# Victim TKM Endpoint

In [1]:
import os
import pandas as pd

import crime_helper as ch

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', -1)

## Import Lookups

In [3]:
OFFENSE_SHORT = 'data/offense_short.csv'
STATE = 'data/state.csv'
AGENCY = 'data/agency.csv'
YEAR_SINCE = 2010
YEAR_UNTIL = 2018

In [4]:
offense_df = pd.read_csv(OFFENSE_SHORT)
offense_df.head()

Unnamed: 0,offense_type
0,aggravated-assault
1,burglary
2,larceny
3,motor-vehicle-theft
4,homicide


In [5]:
state_df = pd.read_csv(STATE)
state_df.head()
#state_df.info()

Unnamed: 0,region_code,state_abbr,state_fips_code,state_id,state_name
0,4,AK,2.0,1,Alaska
1,3,AL,1.0,2,Alabama
2,3,AR,5.0,3,Arkansas
3,99,AS,60.0,4,American Samoa
4,4,AZ,4.0,5,Arizona


In [6]:
agency_df = pd.read_csv(AGENCY)
agency_df.head()

Unnamed: 0,agency_name,agency_type_name,county_name,division_name,latitude,longitude,nibrs,nibrs_start_date,ori,region_desc,region_name,state_abbr,state_name
0,Anchorage Police Department,Municipality,ANCHORAGE,Pacific,61.17425,-149.284329,False,,AK0010100,Region IV,West,AK,Alaska
1,Fairbanks Police Department,Borough,FAIRBANKS NORTH STAR,Pacific,64.83945,-147.71942,False,,AK0010200,Region IV,West,AK,Alaska
2,Juneau Police Department,City and Borough,JUNEAU,Pacific,58.356556,-134.50731,False,,AK0010300,Region IV,West,AK,Alaska
3,Ketchikan Police Department,Borough,KETCHIKAN GATEWAY,Pacific,55.449938,-131.106685,False,,AK0010400,Region IV,West,AK,Alaska
4,Kodiak Police Department,Borough,KODIAK ISLAND,Pacific,57.8049,-152.37332,False,,AK0010500,Region IV,West,AK,Alaska


## Get API Data

In [7]:
# examine data for a random row

random_offense = offense_df.loc[4]['offense_type']
random_agency = agency_df.loc[41]['ori']

random_offense
random_agency

'homicide'

'AL0010200'

In [8]:
config = {
    '{offense}': random_offense,
    '{ori}': random_agency,
    '{variable}': 'sex', 
}
victim_test = ch.get_json(ch.VICTIM_TKM_AGENCY, config)
victim_test

{'ui_type': 'pie_chart',
 'noun': 'Victim',
 'category': 'Victim demographic',
 'title': 'Victim Sex',
 'short_title': 'Sex',
 'ui_restriction': None,
 'keys': ['Male', 'Female', 'Unknown'],
 'data': [{'value': 119, 'data_year': 1991, 'month_num': 0, 'key': 'Male'},
  {'value': 64, 'data_year': 1992, 'month_num': 0, 'key': 'Male'},
  {'value': 22, 'data_year': 1991, 'month_num': 0, 'key': 'Female'},
  {'value': 16, 'data_year': 1992, 'month_num': 0, 'key': 'Female'},
  {'value': 1, 'data_year': 1991, 'month_num': 0, 'key': 'Unknown'},
  {'value': 1, 'data_year': 1992, 'month_num': 0, 'key': 'Unknown'}],
 'precise_data': []}

In [9]:
victim_test['data'][0]['data_year']
victim_test['data'][0]['key']
victim_test['data'][0]['value']

1991

'Male'

119

### not enough data at agency level

In [10]:
# examine data for a random row

offense_test = offense_df.loc[5]['offense_type']
state_test = state_df.loc[41]['state_abbr']

offense_test
state_test

'rape'

'PA'

In [11]:
config = {
    '{offense}': offense_test,
    '{stateAbbr}': state_test,
    '{variable}': 'sex', 
}
victim_test = ch.get_json(ch.VICTIM_TKM_STATE, config)
victim_test

{'ui_type': 'pie_chart',
 'noun': 'Victim',
 'category': 'Victim demographic',
 'title': 'Victim Sex',
 'short_title': 'Sex',
 'ui_restriction': None,
 'keys': ['Male', 'Female', 'Unknown'],
 'data': [{'value': 3, 'data_year': 2013, 'month_num': 0, 'key': 'Male'},
  {'value': 5, 'data_year': 2014, 'month_num': 0, 'key': 'Male'},
  {'value': 5, 'data_year': 2015, 'month_num': 0, 'key': 'Male'},
  {'value': 5, 'data_year': 2016, 'month_num': 0, 'key': 'Male'},
  {'value': 1, 'data_year': 2017, 'month_num': 0, 'key': 'Male'},
  {'value': 4, 'data_year': 2018, 'month_num': 0, 'key': 'Male'},
  {'value': 12, 'data_year': 2013, 'month_num': 0, 'key': 'Female'},
  {'value': 43, 'data_year': 2014, 'month_num': 0, 'key': 'Female'},
  {'value': 52, 'data_year': 2015, 'month_num': 0, 'key': 'Female'},
  {'value': 39, 'data_year': 2016, 'month_num': 0, 'key': 'Female'},
  {'value': 33, 'data_year': 2017, 'month_num': 0, 'key': 'Female'},
  {'value': 36, 'data_year': 2018, 'month_num': 0, 'key': 'F

In [12]:
# loop over all lookup values and aggregate full results

results_ls = []

for idx_off, row_off in offense_df.iterrows():
    offense = row_off['offense_type']
    print(offense)
    
    for idx_stt, row_stt in state_df.iterrows():
        state = row_stt['state_abbr']
        
        config = {
            '{offense}': offense,
            '{stateAbbr}': state,
            '{variable}': 'sex', 
        }
        result = ch.get_json(ch.VICTIM_TKM_STATE, config)
        
        for res in result['data']:
            new_dx = {
                'state': state,
                'offense': offense,
                'year': res['data_year'],
                'gender': res['key'],
                'count': res['value'],
            }
            results_ls.append(new_dx)
            
results_ls[:5]

aggravated-assault
burglary
larceny
motor-vehicle-theft
homicide
rape
robbery
arson
violent-crime
property-crime


[{'state': 'AL',
  'offense': 'aggravated-assault',
  'year': 1991,
  'gender': 'Male',
  'count': 11230},
 {'state': 'AL',
  'offense': 'aggravated-assault',
  'year': 1992,
  'gender': 'Male',
  'count': 9008},
 {'state': 'AL',
  'offense': 'aggravated-assault',
  'year': 2006,
  'gender': 'Male',
  'count': 22},
 {'state': 'AL',
  'offense': 'aggravated-assault',
  'year': 2007,
  'gender': 'Male',
  'count': 22},
 {'state': 'AL',
  'offense': 'aggravated-assault',
  'year': 2008,
  'gender': 'Male',
  'count': 26}]

## Create DataFrame from API

In [15]:
endpoint_df = pd.DataFrame(results_ls)
endpoint_df = endpoint_df.sort_values(['state', 'year', 'offense'])
endpoint_df.head()
endpoint_df.info()

Unnamed: 0,count,gender,offense,state,year
0,11230,Male,aggravated-assault,AL,1991
15,9592,Female,aggravated-assault,AL,1991
30,1087,Unknown,aggravated-assault,AL,1991
15357,32,Male,arson,AL,1991
15371,26,Female,arson,AL,1991


<class 'pandas.core.frame.DataFrame'>
Int64Index: 21969 entries, 0 to 19727
Data columns (total 5 columns):
count      21969 non-null int64
gender     21969 non-null object
offense    21969 non-null object
state      21969 non-null object
year       21969 non-null int64
dtypes: int64(2), object(3)
memory usage: 1.0+ MB


In [16]:
# no duplicates in this endpoint

doop_df = endpoint_df.loc[endpoint_df.duplicated(subset=['state', 'year', 'offense', 'gender'], keep=False) == True]
doop_df.head()

Unnamed: 0,count,gender,offense,state,year


In [17]:
endpoint_df = endpoint_df.reset_index()
endpoint_df = endpoint_df[['state', 'year', 'offense', 'gender', 'count']]
endpoint_df.head()
endpoint_df.info()

Unnamed: 0,state,year,offense,gender,count
0,AL,1991,aggravated-assault,Male,11230
1,AL,1991,aggravated-assault,Female,9592
2,AL,1991,aggravated-assault,Unknown,1087
3,AL,1991,arson,Male,32
4,AL,1991,arson,Female,26


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21969 entries, 0 to 21968
Data columns (total 5 columns):
state      21969 non-null object
year       21969 non-null int64
offense    21969 non-null object
gender     21969 non-null object
count      21969 non-null int64
dtypes: int64(2), object(3)
memory usage: 858.2+ KB


## Export to csv

In [18]:
save_path = r'data/victim_tkm_state.csv'
endpoint_df.to_csv(save_path, index=False)
f'{round(os.path.getsize(save_path) /1e6, 2)} mb'

'0.65 mb'