# Analysis: Arrests By Population

Workflow: 3 <br>
Goal: Run analysis on arrest / population ratio. <br>

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import crime_helper as ch

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', -1)

## Initial Setup

In [None]:
ARREST_FILE = 'data/arrest_tkm_state.csv'
EMPLOYMENT_FILE = 'data/employment_state.csv'
OFFENDER_FILE = 'data/offender_tkm_state.csv'
OFFENSE_FILE = 'data/offense_tkm_state.csv'
VICTIM_FILE = 'data/victim_tkm_state.csv'
STATE_FILE = 'data/lookup_state.csv'
REGION_FILE = 'data/lookup_region.csv'

In [None]:
state_df = pd.read_csv(STATE_FILE)
state_df.head()
state_df.info()

In [None]:
region_df = pd.read_csv(REGION_FILE)
region_df

In [None]:
state_full_df = pd.merge(state_df, region_df, how='inner', on='region_code')
state_full_df.head()

In [None]:
state_clean_df = state_full_df.sort_values('state_abbr').reset_index()
state_clean_df = state_clean_df[['state_abbr', 'state_name', 'region_code', 'region_name']]
state_clean_df.head(10)
state_clean_df.info()

## Explore Arrests

In [None]:
arrest_df = pd.read_csv(ARREST_FILE)
arrest_df = arrest_df.rename(columns={'count': 'arrests'})
arrest_df.head()
arrest_df.info()

In [None]:
arrest_total_df = arrest_df.groupby(['state', 'year']).sum().sort_values(['state', 'year'])
arrest_total_df = arrest_total_df.reset_index()
arrest_total_df.head()

In [None]:
arrest_year_df = arrest_total_df.groupby(['year']).count().sort_values(['year']) 
arrest_year_df = arrest_year_df.rename(columns={'arrests': 'row_count'})
arrest_year_df

## Explore Employment

In [None]:
employment_df = pd.read_csv(EMPLOYMENT_FILE)
employment_df.head()
employment_df.info()

In [None]:
population_df = employment_df[['state_abbr', 'year', 'population']]
population_df.head()

In [None]:
population_df.groupby('year').count()

## Merge DataFrames

In [None]:
merge_1_df = pd.merge(arrest_total_df, population_df, how='inner', 
                      left_on=['state', 'year'], right_on=['state_abbr', 'year'])
merge_1_df.head()

In [None]:
merge_2_df = pd.merge(merge_1_df, state_clean_df, how='inner', left_on=['state'], right_on=['state_abbr'])
merge_2_df.head()

In [None]:
merge_df = merge_2_df[['region_name', 'state', 'year', 'arrests', 'population']]
merge_df.head()
merge_df.shape

In [None]:
merge_df.boxplot(column=['arrests'])

## Create 3 period Dataframes

In [None]:
merge_df.groupby(['region_name', 'year']).count()[['state']]

In [None]:
period_0405_df = combined_df.loc[combined_df['year'].between(2004, 2005)]
period_0405_df.head()
period_0405_df.shape

In [None]:
period_1112_df = combined_df.loc[combined_df['year'].between(2011, 2012)]
period_1112_df.head()
period_1112_df.shape

In [None]:
period_1718_df = combined_df.loc[combined_df['year'].between(2017, 2018)]
period_1718_df.head(10)
period_1718_df.shape

In [None]:
# examine number of counts per region

period_0405_df.groupby(['region_name']).count()[['state', 'year']]
period_1112_df.groupby(['region_name']).count()[['state', 'year']]
period_1718_df.groupby(['region_name']).count()[['state', 'year']]

In [None]:
save_path = r'data/crime_data_full.csv'
merge_df.to_csv(save_path, index=False)
f'{round(os.path.getsize(save_path) /1e6, 2)} mb'