# Employment Endpoint

In [1]:
import os
import pandas as pd

import crime_helper as ch

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', -1)

## Import Data

In [3]:
# load the lookup data

STATE = 'data/lookup_state.csv'
OFFENSE_SHORT = 'data/lookup_offense_short.csv'
YEAR_SINCE = '2000'
YEAR_UNTIL = '2018'

In [4]:
state_df = pd.read_csv(STATE)
state_df.state_abbr.to_list()[:5]
state_df.head()

['AK', 'AL', 'AR', 'AS', 'AZ']

Unnamed: 0,region_code,state_abbr,state_fips_code,state_id,state_name
0,4,AK,2.0,1,Alaska
1,3,AL,1.0,2,Alabama
2,3,AR,5.0,3,Arkansas
3,99,AS,60.0,4,American Samoa
4,4,AZ,4.0,5,Arizona


In [5]:
offense_df = pd.read_csv(OFFENSE_SHORT)
offense_df.offense_type.to_list()

['aggravated-assault',
 'burglary',
 'larceny',
 'motor-vehicle-theft',
 'homicide',
 'rape',
 'robbery',
 'arson',
 'violent-crime',
 'property-crime']

In [6]:
# examine data for a random row

random_state = state_df.loc[10]['state_abbr']
# random_offense = offense_df.loc[4]['offense_type']
random_state
# random_offense

'DE'

In [7]:
config = {
    '{stateAbbr}': random_state,
    '{since}': YEAR_SINCE,
    '{until}' : YEAR_UNTIL
}
employment = ch.get_json(ch.EMPLOYMENT_STATE, config)

{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19


In [8]:
# loop over all states and retrieve info via API
# aggregate the info into employment_list, which will be later converted into a dataframe

employment_list = []
for state in state_df.state_abbr.to_list():
    config = {
        '{stateAbbr}': state,
        '{since}': YEAR_SINCE,
        '{until}' : YEAR_UNTIL
    }
    employment = ch.get_json(ch.EMPLOYMENT_STATE, config)
    employment_list += employment


{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: le

In [9]:
employment_df = pd.DataFrame(employment_list)
employment_df.head()
len(employment_df)

Unnamed: 0,agency_count_pe_submitting,civilian_ct,csv_header,data_year,female_civilian_ct,female_officer_ct,female_total_ct,male_civilian_ct,male_officer_ct,male_total_ct,pe_ct_per_1000,population,state_abbr,state_id,state_name,total_pe_ct
0,32,675.0,,2000,475.0,103.0,578.0,200.0,1055.0,1255.0,2.92,626932,AK,1,Alaska,1833.0
1,33,661.0,,2001,463.0,107.0,570.0,198.0,1097.0,1295.0,2.94,634892,AK,1,Alaska,1865.0
2,33,701.0,,2002,482.0,112.0,594.0,219.0,1091.0,1310.0,2.96,643893,AK,1,Alaska,1904.0
3,33,682.0,,2003,470.0,111.0,581.0,212.0,1063.0,1275.0,2.86,648921,AK,1,Alaska,1856.0
4,34,688.0,,2004,470.0,127.0,597.0,218.0,1125.0,1343.0,2.96,655932,AK,1,Alaska,1940.0


1102

In [10]:
# remove unnecessery column
employment_df.drop(['csv_header', 'state_id'], inplace = True, axis = 1)


In [11]:
employment_df.head()

Unnamed: 0,agency_count_pe_submitting,civilian_ct,data_year,female_civilian_ct,female_officer_ct,female_total_ct,male_civilian_ct,male_officer_ct,male_total_ct,pe_ct_per_1000,population,state_abbr,state_name,total_pe_ct
0,32,675.0,2000,475.0,103.0,578.0,200.0,1055.0,1255.0,2.92,626932,AK,Alaska,1833.0
1,33,661.0,2001,463.0,107.0,570.0,198.0,1097.0,1295.0,2.94,634892,AK,Alaska,1865.0
2,33,701.0,2002,482.0,112.0,594.0,219.0,1091.0,1310.0,2.96,643893,AK,Alaska,1904.0
3,33,682.0,2003,470.0,111.0,581.0,212.0,1063.0,1275.0,2.86,648921,AK,Alaska,1856.0
4,34,688.0,2004,470.0,127.0,597.0,218.0,1125.0,1343.0,2.96,655932,AK,Alaska,1940.0


In [12]:
# rename colum 'key' to 'gender', 'data_year' to 'year'
employment_df.rename(columns={'agency_count_pe_submitting': 'agency_ct', 'data_year': 'year'}, inplace=True)
employment_df.head(2)

Unnamed: 0,agency_ct,civilian_ct,year,female_civilian_ct,female_officer_ct,female_total_ct,male_civilian_ct,male_officer_ct,male_total_ct,pe_ct_per_1000,population,state_abbr,state_name,total_pe_ct
0,32,675.0,2000,475.0,103.0,578.0,200.0,1055.0,1255.0,2.92,626932,AK,Alaska,1833.0
1,33,661.0,2001,463.0,107.0,570.0,198.0,1097.0,1295.0,2.94,634892,AK,Alaska,1865.0


In [13]:
# looking for null values, removing states with null values
states_to_remove = employment_df.loc[employment_df.isna().any(axis = 1)].state_name.unique()
states_to_remove

array(['American Samoa', 'Canal Zone', 'Guam', 'Puerto Rico',
       'Mariana Islands', 'Other', 'U.S. Virgin Islands'], dtype=object)

In [14]:
# remove the states with null values
for state in states_to_remove:
    employment_df = employment_df.loc[(employment_df.state_name != state)]


In [15]:
employment_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 969 entries, 0 to 1044
Data columns (total 14 columns):
agency_ct             969 non-null int64
civilian_ct           969 non-null float64
year                  969 non-null int64
female_civilian_ct    969 non-null float64
female_officer_ct     969 non-null float64
female_total_ct       969 non-null float64
male_civilian_ct      969 non-null float64
male_officer_ct       969 non-null float64
male_total_ct         969 non-null float64
pe_ct_per_1000        969 non-null float64
population            969 non-null int64
state_abbr            969 non-null object
state_name            969 non-null object
total_pe_ct           969 non-null float64
dtypes: float64(9), int64(3), object(2)
memory usage: 113.6+ KB


## Export to csv

In [16]:
save_path = r'data/employment_state.csv'
employment_df.to_csv(save_path, index=False)
f'{round(os.path.getsize(save_path) /1e6, 2)} mb'

'0.09 mb'