# CDE - Explore Endpoints

In [1]:
import os
import pandas as pd

import crime_helper as ch

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', -1)

## Import Data

In [3]:
# load the lookup data

STATE = 'data/state.csv'
OFFENSE_SHORT = 'data/offense_short.csv'
YEAR_SINCE = 2010
YEAR_UNTIL = 2011

In [4]:
state_df = pd.read_csv(STATE)
state_df.head()

Unnamed: 0,region_code,state_abbr,state_fips_code,state_id,state_name
0,4,AK,2.0,1,Alaska
1,3,AL,1.0,2,Alabama
2,3,AR,5.0,3,Arkansas
3,99,AS,60.0,4,American Samoa
4,4,AZ,4.0,5,Arizona


In [5]:
offense_df = pd.read_csv(OFFENSE_SHORT)
offense_df.head()

Unnamed: 0,offense_type
0,aggravated-assault
1,burglary
2,larceny
3,motor-vehicle-theft
4,homicide


In [6]:
# examine data for a random row

random_state = state_df.loc[10]['state_abbr']
random_offense = offense_df.loc[4]['offense_type']
random_state
random_offense

'DE'

'homicide'

In [7]:
config = {
    '{offense}': random_offense,
    '{stateAbbr}': random_state,
    '{variable}' : 'sex'
}
offender = ch.get_json(ch.OFFENDER_TKM_STATE, config)
offender

{'ui_type': 'pie_chart',
 'noun': 'Offender',
 'category': 'Offender demographic',
 'title': 'Offender Sex',
 'short_title': 'Sex',
 'ui_restriction': None,
 'keys': ['Male', 'Female', 'Unknown'],
 'data': [{'value': 31, 'data_year': 2001, 'month_num': 0, 'key': 'Male'},
  {'value': 28, 'data_year': 2002, 'month_num': 0, 'key': 'Male'},
  {'value': 25, 'data_year': 2003, 'month_num': 0, 'key': 'Male'},
  {'value': 17, 'data_year': 2004, 'month_num': 0, 'key': 'Male'},
  {'value': 39, 'data_year': 2005, 'month_num': 0, 'key': 'Male'},
  {'value': 74, 'data_year': 2006, 'month_num': 0, 'key': 'Male'},
  {'value': 49, 'data_year': 2007, 'month_num': 0, 'key': 'Male'},
  {'value': 60, 'data_year': 2008, 'month_num': 0, 'key': 'Male'},
  {'value': 42, 'data_year': 2009, 'month_num': 0, 'key': 'Male'},
  {'value': 68, 'data_year': 2010, 'month_num': 0, 'key': 'Male'},
  {'value': 55, 'data_year': 2011, 'month_num': 0, 'key': 'Male'},
  {'value': 47, 'data_year': 2012, 'month_num': 0, 'key': 

In [8]:
offender

{'ui_type': 'pie_chart',
 'noun': 'Offender',
 'category': 'Offender demographic',
 'title': 'Offender Sex',
 'short_title': 'Sex',
 'ui_restriction': None,
 'keys': ['Male', 'Female', 'Unknown'],
 'data': [{'value': 31, 'data_year': 2001, 'month_num': 0, 'key': 'Male'},
  {'value': 28, 'data_year': 2002, 'month_num': 0, 'key': 'Male'},
  {'value': 25, 'data_year': 2003, 'month_num': 0, 'key': 'Male'},
  {'value': 17, 'data_year': 2004, 'month_num': 0, 'key': 'Male'},
  {'value': 39, 'data_year': 2005, 'month_num': 0, 'key': 'Male'},
  {'value': 74, 'data_year': 2006, 'month_num': 0, 'key': 'Male'},
  {'value': 49, 'data_year': 2007, 'month_num': 0, 'key': 'Male'},
  {'value': 60, 'data_year': 2008, 'month_num': 0, 'key': 'Male'},
  {'value': 42, 'data_year': 2009, 'month_num': 0, 'key': 'Male'},
  {'value': 68, 'data_year': 2010, 'month_num': 0, 'key': 'Male'},
  {'value': 55, 'data_year': 2011, 'month_num': 0, 'key': 'Male'},
  {'value': 47, 'data_year': 2012, 'month_num': 0, 'key': 

In [9]:
offense['data'][0]['value']
offense['data'][0]['data_year']

NameError: name 'offense' is not defined

In [None]:
# loop over all lookup values and aggregate full results

offense_ls = []

for idx_off, row_off in offense_df.iterrows():
    offense = row_off['offense_type']
    print(offense)
    
    for idx_stt, row_stt in state_df.iterrows():
        state = row_stt['state_abbr']
        
        config = {
            '{offense}': offense,
            '{stateAbbr}': state,
        }
        offense_result = ch.get_json(ch.OFFENSE_STATE, config)
        
        for res in offense_result['data']:
            new_dx = {
                'state': state,
                'offense': offense,
                'year': res['data_year'],
                'count': res['value'],
            }
            offense_ls.append(new_dx)
            
offense_ls[:5]

In [None]:
offense_df = pd.DataFrame(offense_ls)
offense_df = offense_df[['state', 'year', 'offense', 'count']]
offense_df = offense_df.sort_values(['state', 'year', 'offense'])
offense_df.head()
offense_df.info()

In [None]:
# not sure why the api returns duplicate rows

doop_df = offense_df.loc[offense_df.duplicated(subset=['state', 'year', 'offense'], keep=False) == True]
doop_df.head()

In [None]:
offense_df = offense_df.drop_duplicates(subset=['state', 'year', 'offense'])
offense_df = offense_df.reset_index()
offense_df = offense_df[['state', 'year', 'offense', 'count']]
offense_df.head()
offense_df.info()

## Export to csv

In [None]:
save_path = r'data/offense_state.csv'
offense_df.to_csv(save_path, index=False)
f'{round(os.path.getsize(save_path) /1e6, 2)} mb'