# Employment Endpoint

In [1]:
import os
import pandas as pd

import crime_helper as ch

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', -1)

## Import Data

In [3]:
# load the lookup data

STATE = 'data/lookup_state.csv'
OFFENSE_SHORT = 'data/lookup_offense_short.csv'
YEAR_SINCE = '2000'
YEAR_UNTIL = '2018'

In [4]:
state_df = pd.read_csv(STATE)
state_df.state_abbr.to_list()
state_df.head()

['AK',
 'AL',
 'AR',
 'AS',
 'AZ',
 'CA',
 'CO',
 'CT',
 'CZ',
 'DC',
 'DE',
 'FL',
 'GA',
 'GM',
 'HI',
 'IA',
 'ID',
 'IL',
 'IN',
 'KS',
 'KY',
 'LA',
 'MA',
 'MD',
 'ME',
 'MI',
 'MN',
 'MO',
 'MS',
 'MT',
 'NE',
 'NC',
 'ND',
 'NH',
 'NJ',
 'NM',
 'NV',
 'NY',
 'OH',
 'OK',
 'OR',
 'PA',
 'PR',
 'RI',
 'SC',
 'SD',
 'TN',
 'TX',
 'UT',
 'VA',
 'VT',
 'WA',
 'WI',
 'WV',
 'WY',
 'MP',
 'OT',
 'VI']

Unnamed: 0,region_code,state_abbr,state_fips_code,state_id,state_name
0,4,AK,2.0,1,Alaska
1,3,AL,1.0,2,Alabama
2,3,AR,5.0,3,Arkansas
3,99,AS,60.0,4,American Samoa
4,4,AZ,4.0,5,Arizona


In [5]:
offense_df = pd.read_csv(OFFENSE_SHORT)
offense_df.offense_type.to_list()

['aggravated-assault',
 'burglary',
 'larceny',
 'motor-vehicle-theft',
 'homicide',
 'rape',
 'robbery',
 'arson',
 'violent-crime',
 'property-crime']

In [6]:
# examine data for a random row

random_state = state_df.loc[10]['state_abbr']
# random_offense = offense_df.loc[4]['offense_type']
random_state
# random_offense

'DE'

In [7]:
config = {
    '{stateAbbr}': random_state,
    '{since}': YEAR_SINCE,
    '{until}' : YEAR_UNTIL
}
employment = ch.get_json(ch.EMPLOYMENT_STATE, config)

{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19


In [8]:
# loop over all states and retrieve info via API
# aggregate the info into employment_list, which will be later converted into a dataframe

employment_list = []
for state in state_df.state_abbr.to_list():
    config = {
        '{stateAbbr}': state,
        '{since}': YEAR_SINCE,
        '{until}' : YEAR_UNTIL
    }
    employment = ch.get_json(ch.EMPLOYMENT_STATE, config)
    employment_list += employment


{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: len 19
{'count': 19, 'page': 0, 'pages': 1, 'per_page': 0}
page 0: le

In [9]:
employment_df = pd.DataFrame(employment_list)
employment_df.head()
len(employment_df)

Unnamed: 0,agency_count_pe_submitting,civilian_ct,csv_header,data_year,female_civilian_ct,female_officer_ct,female_total_ct,male_civilian_ct,male_officer_ct,male_total_ct,pe_ct_per_1000,population,state_abbr,state_id,state_name,total_pe_ct
0,53,1013,,2000,631,278,909,382,2371,2753,4.54,806621,DE,11,Delaware,3662
1,62,1120,,2001,694,323,1017,426,2468,2894,4.76,820963,DE,11,Delaware,3911
2,64,1150,,2002,714,338,1052,436,2761,3197,5.06,839193,DE,11,Delaware,4249
3,60,1257,,2003,813,399,1212,444,2625,3069,5.08,843255,DE,11,Delaware,4281
4,54,1161,,2004,697,348,1045,464,2561,3025,4.76,854779,DE,11,Delaware,4070


1102

In [10]:
# remove unnecessery column
employment_df.drop(['csv_header', 'state_id'], inplace = True, axis = 1)


In [11]:
employment_df.head()

Unnamed: 0,agency_count_pe_submitting,civilian_ct,data_year,female_civilian_ct,female_officer_ct,female_total_ct,male_civilian_ct,male_officer_ct,male_total_ct,pe_ct_per_1000,population,state_abbr,state_name,total_pe_ct
0,53,1013,2000,631,278,909,382,2371,2753,4.54,806621,DE,Delaware,3662
1,62,1120,2001,694,323,1017,426,2468,2894,4.76,820963,DE,Delaware,3911
2,64,1150,2002,714,338,1052,436,2761,3197,5.06,839193,DE,Delaware,4249
3,60,1257,2003,813,399,1212,444,2625,3069,5.08,843255,DE,Delaware,4281
4,54,1161,2004,697,348,1045,464,2561,3025,4.76,854779,DE,Delaware,4070


In [12]:
# rename colum 'key' to 'gender', 'data_year' to 'year'
employment_df.rename(columns={'agency_count_pe_submitting': 'agency_ct', 'data_year': 'year'}, inplace=True)
employment_df.head(2)

Unnamed: 0,agency_ct,civilian_ct,year,female_civilian_ct,female_officer_ct,female_total_ct,male_civilian_ct,male_officer_ct,male_total_ct,pe_ct_per_1000,population,state_abbr,state_name,total_pe_ct
0,53,1013,2000,631,278,909,382,2371,2753,4.54,806621,DE,Delaware,3662
1,62,1120,2001,694,323,1017,426,2468,2894,4.76,820963,DE,Delaware,3911


In [13]:
# looking for null values, removing states with null values
states_to_remove = employment_df.loc[employment_df.isna().any(axis = 1)].state_name.unique()
states_to_remove

array([], dtype=object)

In [14]:
# remove the states with null values
for state in states_to_remove:
    employment_df = employment_df.loc[(employment_df.state_name != state)]


In [15]:
employment_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1102 entries, 0 to 1101
Data columns (total 14 columns):
agency_ct             1102 non-null int64
civilian_ct           1102 non-null int64
year                  1102 non-null int64
female_civilian_ct    1102 non-null int64
female_officer_ct     1102 non-null int64
female_total_ct       1102 non-null int64
male_civilian_ct      1102 non-null int64
male_officer_ct       1102 non-null int64
male_total_ct         1102 non-null int64
pe_ct_per_1000        1102 non-null float64
population            1102 non-null int64
state_abbr            1102 non-null object
state_name            1102 non-null object
total_pe_ct           1102 non-null int64
dtypes: float64(1), int64(11), object(2)
memory usage: 120.6+ KB


## Export to csv

In [16]:
save_path = r'data/employment_state.csv'
employment_df.to_csv(save_path, index=False)
f'{round(os.path.getsize(save_path) /1e6, 2)} mb'

'0.08 mb'