In [26]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
import config
import docs
import pandas as pd

pd.set_option('display.max_colwidth', None)

INDICATOR = 'electricity_sources'
CONFIG = config.get_config(INDICATOR, '../config.toml')

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


## Raw data path

[C:/Users/tan/src/regional-pm-2023/data/raw/utilities/energy/electricity_sources](C:/Users/tan/src/regional-pm-2023/data/raw/utilities/energy/electricity_sources)

# Energy and Water: Energy

## Electricity Sources

In [27]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Percentages of different electricity sources.

nan

In [28]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Year,Year of record.,datetime
source,Source,Energy source of record.,string
percentage,Percentage,Percentage of electricity source used.,float


In [29]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
content_labels,Power Content Labels,California Energy Commission,True,"Community Choice Aggregators are new utilities with separate content labels, these haven't been integrated yet with reporting."


In [30]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Update legacy sheet based on newest power content labels.
1,Extract legacy data from legacy PM sheet.


In [31]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,TAN,Current SME is Jeff Hoyos <Jeff.Hoyos@sandag.org>.
1,TAN,"Extra data was collected for Community Choice Aggregator energy sources, but we have not integrated these into reporting yet."


### Step 0: Download new data and update legacy sheet

New power content labels can be downloaded from [here](https://www.energy.ca.gov/programs-and-topics/programs/power-source-disclosure/power-content-label).

Note that on the legacy sheet, 2006-2011 were not archived, and public CEC data only goes back to 2016.

However, we found old correspondence/workbook with 2009-2011 power mixes. Those were added to the sheet as well. 

In [32]:
display(steps.loc[0])

step    Update legacy sheet based on newest power content labels.
Name: 0, dtype: object

### Step 1: Extract data from legacy sheet

In [33]:
display(steps.loc[1])

step    Extract legacy data from legacy PM sheet.
Name: 1, dtype: object

In [35]:
electricity_sources = (
    pd.read_excel(
        CONFIG['legacy_xlsx_path'],
        CONFIG['legacy_sheet'],
        usecols='A:O',
        skiprows=3,
        nrows=6,
    )
    .rename(columns={'Unnamed: 0': 'source'})
    .melt(id_vars='source', var_name='year', value_name='supply')
    .assign(source=lambda df: df.source.str.strip('*'))
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y'))
    .set_index(['year', 'source'])
)
display(electricity_sources.tail(3))

Unnamed: 0_level_0,Unnamed: 1_level_0,supply
year,source,Unnamed: 2_level_1
2021-01-01,Coal,0.0
2021-01-01,Large Hydroelectric,0.018
2021-01-01,Other,0.239


### Save Data

In [36]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/src/regional-pm-2023/data/clean/utilities/energy/electricity_sources](C:/Users/tan/src/regional-pm-2023/data/clean/utilities/energy/electricity_sources)

In [37]:
electricity_sources.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)