In [20]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
from tqdm.auto import tqdm
import config
import docs
import pandas as pd
import requests

pd.set_option('display.max_colwidth', None)

INDICATOR = 'electricity_use'
CONFIG = config.get_config(INDICATOR, '../config.toml')

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


## Raw data path

[C:/Users/tan/src/regional-pm-2023/data/raw/utilities/energy/electricity_use](C:/Users/tan/src/regional-pm-2023/data/raw/utilities/energy/electricity_use)

# Energy and Water: Energy

## Electricity Use

In [21]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Amount of electricity consumed by sector.

nan

In [22]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Year,Year of record.,datetime
sector,Sector,"Sector (""Residential"" or ""Non-Residential"") of record.",string
electricity_use,Electricity Use,"Amount of electricity consumed by sector in gigawatt hours (GWh), in a given year by a given sector.",float


In [23]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
energy_consumption_db,California Energy Consumption Database,California Energy Commission,True,"Generated based on ""Consumption by County"" report."


In [24]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Download annual county data from CEC Energy Consumption Database
1,Extract residential and non-residential consumption from the raw data.


In [25]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,TAN,Current SME is Jeff Hoyos <Jeff.Hoyos@sandag.org>.
1,TAN,I'm not sure if any source was used consistently in the past. This source was easy to download for all needed years and covers the whole county (CEC forecasts use SDGE's area which goes into Orange County).
2,TAN,Note that the legacy PM data sheet is not used as input.


### Step 0: Download data

Data can be downloaded using [this](http://www.ecdms.energy.ca.gov/elecbycounty.aspx) CEC report.

In [26]:
display(steps.loc[0])

step    Download annual county data from CEC Energy Consumption Database
Name: 0, dtype: object

### Step 1:  Extract data

In [27]:
display(steps.loc[1])

step    Extract residential and non-residential consumption from the raw data.
Name: 1, dtype: object

In [28]:
electricity_use = (
    pd.read_csv(
        CONFIG['raw_dir']/'ElectricityByCounty.csv'
    )
    .rename(columns={'Sector': 'sector'})
    .set_index('sector')
    .drop(columns='County') # San Diego implicit, don't need this
    .drop('Total') # Don't need aggregate columns/rows
    .drop(columns='Total Usage') # Don't need aggregate columns/rows
    .reset_index()
    .melt(
        id_vars=['sector'],
        var_name='year',
        value_name='electricity_use',
    )
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y'))
    .set_index(['sector', 'year'])
)
electricity_use

Unnamed: 0_level_0,Unnamed: 1_level_0,electricity_use
sector,year,Unnamed: 2_level_1
Non-Residential,2021-01-01,12285.326331
Residential,2021-01-01,7480.166978
Non-Residential,2020-01-01,11657.679451
Residential,2020-01-01,7387.046267
Non-Residential,2019-01-01,12415.797864
Residential,2019-01-01,6573.152838
Non-Residential,2018-01-01,12768.021612
Residential,2018-01-01,6711.739591
Non-Residential,2017-01-01,12614.446916
Residential,2017-01-01,6747.412499


### Save Data

In [29]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/src/regional-pm-2023/data/clean/utilities/energy/electricity_use](C:/Users/tan/src/regional-pm-2023/data/clean/utilities/energy/electricity_use)

In [30]:
electricity_use.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)