In [22]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
from tqdm.auto import tqdm
import config
import docs
import pandas as pd
import requests

pd.set_option('display.max_colwidth', None)

INDICATOR = 'drug_use_rates'
CONFIG = config.get_config(INDICATOR, '../config.toml')

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


## Raw data path

[C:/Users/tan/src/regional-pm-2023/data/raw/qol/cj/drug_use_rates](C:/Users/tan/src/regional-pm-2023/data/raw/qol/cj/drug_use_rates)

# Quality of Life: Criminal Justice

## Drug Use Rate

In [23]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Drug use rates among arrested individuals.

nan

In [24]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Year,Year of record.,datetime
drug,Drug,Drug of record.,string
gender,Gender,Gender of record,string
positive_rate,Positive Rate,Percentage of tested arrestees testing positive in a given year for a given drug.,float


In [25]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
drug_use_odp,Drug use ODP data,SANDAG,True,


In [26]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Download CJ ODP data.
1,Transform data.


In [27]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,,


### Step 0: Download data

In [28]:
display(steps.loc[0])

step    Download CJ ODP data.
Name: 0, dtype: object

* [Total](https://opendata.sandag.org/Criminal-Justice-Public-Safety/Gender-Drug-Testing-Positive-Rates/3rqm-uxyv)
* [Marijuana](https://opendata.sandag.org/Criminal-Justice-Public-Safety/Gender-Marijuana-Positive-Rates/bkm7-th78)
* [Cocaine](https://opendata.sandag.org/Criminal-Justice-Public-Safety/Gender-Cocaine-Positive-Rates/ratu-zv8t)
* [Methamphetamine](https://opendata.sandag.org/Criminal-Justice-Public-Safety/Gender-Methamphetamine-Positive-Rates/atan-8ecq)
* [Opiod](https://opendata.sandag.org/Criminal-Justice-Public-Safety/Gender-Opioid-Positive-Rates/m5x5-9pdw)

In [29]:
def parse_drug_table(drug: str, url: str) -> pd.DataFrame:
    """Parse an individaul drug table.
    """
    return (
        pd.read_csv(url)
        .assign(drug=drug)
        .assign(year=lambda df: pd.to_datetime(df.year, format='%Y'))
        .set_index(['year', 'drug', 'gender'])
    )

In [30]:
drug_use_rates = pd.concat(
    [
        parse_drug_table(drug, url)
        for drug, url in (
            {
                'Any': 'https://opendata.sandag.org/resource/wmbt-fb6g.csv',
                'Marijuana': 'https://opendata.sandag.org/resource/79p7-i9i2.csv',
                'Cocaine': 'https://opendata.sandag.org/resource/civ6-nstp.csv',
                'Methamphetamine': 'https://opendata.sandag.org/resource/tj9f-hqpw.csv',
                'Opiod': 'https://opendata.sandag.org/resource/6ke3-rkbn.csv',
            }
            .items()
        )
    ]
)
drug_use_rates.tail(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,percentage
year,drug,gender,Unnamed: 3_level_1
2020-01-01,Opiod,Females,6.0
2021-01-01,Opiod,Males,7.0
2021-01-01,Opiod,Females,9.0


### Step 1: Combine data

In [31]:
display(steps.loc[1])

step    Transform data.
Name: 1, dtype: object

### Save Data

In [32]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/src/regional-pm-2023/data/clean/qol/cj/drug_use_rates](C:/Users/tan/src/regional-pm-2023/data/clean/qol/cj/drug_use_rates)

In [33]:
drug_use_rates.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)