In [7]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
from tqdm.auto import tqdm
import config
import docs
import pandas as pd
import requests

pd.set_option('display.max_colwidth', None)

INDICATOR = 'water_use'
CONFIG = config.get_config(INDICATOR, '../config.toml')

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


## Raw data path

[C:/Users/tan/src/regional-pm-2023/data/raw/utilities/water/water_use](C:/Users/tan/src/regional-pm-2023/data/raw/utilities/water/water_use)

# Environment: Water

## Water Use

In [8]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Amount of water delievered.

nan

In [9]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Fiscal Year,Fiscal year of record.,datetime
water_use,Water Use,"Amount of water delievered in acre-feet, for a given year.",float


In [10]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cwa_budget,Annual Budget Reports,San Diego County Water Authority,True,Based on fiscal year instead of calendar year.


In [11]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Update legacy sheet based on newest CWA fiscal year report.
1,Extract legacy data from legacy PM sheet.


In [12]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,TAN,We have not identified a current SANDAG SME for this.


### Step 0:

## Step 0: Update legacy sheet

2022 Central Water Authority's [Fiscal Year report](https://www.sdcwa.org/about-us/budgets-financial-reports/#), from a number in the text on page 40.

In [13]:
display(steps.loc[0])

step    Update legacy sheet based on newest CWA fiscal year report.
Name: 0, dtype: object

## Step 1: Extract from legacy sheet

In [14]:
water_use = (
    pd.read_excel(
        CONFIG['legacy_xlsx_path'],
        CONFIG['legacy_sheet'],
        usecols='A:B',
        header=None,
        names=['year', 'water_use'],
        skiprows=4,
        nrows=18,
    )
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y'))
    .set_index('year')
)
display(water_use.tail(3))

Unnamed: 0_level_0,water_use
year,Unnamed: 1_level_1
2020-01-01,354007.0
2021-01-01,384165.0
2022-01-01,409514.0


### Save Data

In [15]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/src/regional-pm-2023/data/clean/utilities/water/water_use](C:/Users/tan/src/regional-pm-2023/data/clean/utilities/water/water_use)

In [16]:
water_use.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)