In [21]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
from tqdm.auto import tqdm
import config
import docs
import pandas as pd
import requests

pd.set_option('display.max_colwidth', None)

INDICATOR = 'crime_rate'
CONFIG = config.get_config(INDICATOR, '../config.toml')

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


## Raw data path

[C:/Users/tan/src/regional-pm-2023/data/raw/qol/cj/crime_rate](C:/Users/tan/src/regional-pm-2023/data/raw/qol/cj/crime_rate)

# Quality of Life: Criminal Justice

## Crime Rate

In [22]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Rate of crimes per 1,000 people.

nan

In [23]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Year,Year of record.,datetime
crime_rate,Crime Rate,"Rate of crimes per 1,000 people",float


In [24]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
crime_report,42 years of crime report,SANDAG,True,Rate pulled from report (ODP data didn't have single crime rate number)


In [25]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Download newest crime report and copy the values to legacy workbook.
1,Extract from legacy PM data.


In [26]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,TAN,


### Step 0: Download and copy data to legacy workbook

In [27]:
display(steps.loc[0])

step    Download newest crime report and copy the values to legacy workbook.
Name: 0, dtype: object

* New years: 2021 and 2022.
* New data comes from:
  * Crime: [Page 20 of CJ's 43 Years of Crime report](https://opendata.sandag.org/Criminal-Justice-Public-Safety/FBI-Index-Crime-Totals-1980-2021/9i4f-3bid)


### Step 1: Extract Legacy data

In [28]:
display(steps.loc[1])

step    Extract from legacy PM data.
Name: 1, dtype: object

In [29]:
crime_rate = (
    pd.read_excel(
        CONFIG['legacy_xlsx_path'],
        CONFIG['legacy_sheet'],
        usecols='A:B',
        header=None,
        names=['year', 'crime_rate'],
        skiprows=3,
        nrows=18,
    )
    .rename(columns={'Unnamed: 0': 'year'})
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y'))
    .set_index('year')
)
display(crime_rate.tail(3))

Unnamed: 0_level_0,crime_rate
year,Unnamed: 1_level_1
2019-01-01,19.88
2020-01-01,18.2
2021-01-01,20.09


### Save Data

In [30]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/src/regional-pm-2023/data/clean/qol/cj/crime_rate](C:/Users/tan/src/regional-pm-2023/data/clean/qol/cj/crime_rate)

In [31]:
crime_rate.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)