In [1]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
from tqdm.auto import tqdm
import config
import docs
import pandas as pd
import requests
import db


pd.set_option('display.max_colwidth', None)

INDICATOR = 'injuries'
CONFIG = config.get_config(INDICATOR, '../config.toml')

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


  from .autonotebook import tqdm as notebook_tqdm


## Raw data path

[C:/Users/tan/src/regional-pm-2023/data/raw/transportation/safety/injuries](C:/Users/tan/src/regional-pm-2023/data/raw/transportation/safety/injuries)

# Transportation Planning: Safety

## Injuries

In [2]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Serious injuries from road collisions.

nan

In [3]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Year,Year of record.,datetime
mode,Mode,Transportation mode of record (CATEGORIES).,string
injuries,Serious Injuries,"Serious injuries from road collisions, in a given year for a given mode.",int
injury_rate,Serious Injury Rate,"Serious injuries per 100,000,000 VMT, in a given year for a given mode.",float


In [4]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
switrs_db,SWITRS,California Highway Patrol,1.0,"We ingest this for the safety dashboard, so we'll query for it on the database server."


In [5]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Query data from prepared data on SQL server.


In [6]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,TAN,Extract from query on SANDAG's SQL Server.


### Step 0: Query data from server

In [7]:
display(steps.loc[0])

step    Query data from prepared data on SQL server.
Name: 0, dtype: object

In [8]:
con = db.get_db_connection(
    server='SQL2014b8',
    db='WS',
)

In [9]:
query = """--sql
    SELECT 
        [ACCIDENT_YEAR] AS [year],
        SUM([COUNT_SEVERE_INJ]) AS [injuries]
    FROM [WS].[dbo].[switrs_safety_collisions]
    WHERE [sandag_region] = 'True'
    GROUP BY [ACCIDENT_YEAR]
"""

In [10]:
injuries = (
    pd.read_sql(
        sql=query,
        con=con,
    )
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y'))
    .set_index('year')
)
injuries.tail(3)

Unnamed: 0_level_0,injuries
year,Unnamed: 1_level_1
2020-01-01,985
2021-01-01,1223
2022-01-01,1103


### Save Data

In [11]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/src/regional-pm-2023/data/clean/transportation/safety/injuries](C:/Users/tan/src/regional-pm-2023/data/clean/transportation/safety/injuries)

In [12]:
injuries.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)