In [1]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
from tqdm.auto import tqdm
import config
import docs
import pandas as pd
import requests

pd.set_option('display.max_colwidth', None)

INDICATOR = 'gas_use'
CONFIG = config.get_config(INDICATOR, '../config.toml')

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


  from .autonotebook import tqdm as notebook_tqdm


## Raw data path

[C:/Users/tan/src/regional-pm-2023/data/raw/utilities/energy/gas_use](C:/Users/tan/src/regional-pm-2023/data/raw/utilities/energy/gas_use)

# Energy and Water: Energy

## Gas Use

In [2]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Amount of gas consumed by sector.

nan

In [3]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Year,Year of record.,datetime
sector,Sector,"Sector (""Residential"" or ""Non-Residential"") of record.",string
gas_use,Gas Use,"Amount of gas consumed by sector in millions of therms, in a given year by a given sector.",float


In [4]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
energy_consumption_db,California Energy Consumption Database,California Energy Commission,True,"Generated based on ""Consumption by County"" report."


In [5]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Download annual county data from CEC Energy Consumption Database
1,Extract residential and non-residential consumption from the raw data.


In [6]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,TAN,Current SME is Jeff Hoyos <Jeff.Hoyos@sandag.org>.
1,TAN,I'm not sure if any source was used consistently in the past. This source was easy to download for all needed years and covers the whole county (CEC forecasts use SDGE's area which goes into Orange County).
2,TAN,Note that the legacy PM data sheet is not used as input.


### Step 0: Download data

Data can be downloaded using [this](http://www.ecdms.energy.ca.gov/gasbycounty.aspxx) CEC report.

In [7]:
display(steps.loc[0])

step    Download annual county data from CEC Energy Consumption Database
Name: 0, dtype: object

### Step 1:  Extract data

In [8]:
display(steps.loc[1])

step    Extract residential and non-residential consumption from the raw data.
Name: 1, dtype: object

In [9]:
gas_use = (
    pd.read_csv(
        CONFIG['raw_dir']/'GasByCounty.csv'
    )
    .rename(columns={'Sector': 'sector'})
    .set_index('sector')
    .drop(columns='County') # San Diego implicit, don't need this
    .drop('Total') # Don't need aggregate columns/rows
    .drop(columns='Total Usage') # Don't need aggregate columns/rows
    .reset_index()
    .melt(
        id_vars=['sector'],
        var_name='year',
        value_name='electricity_use',
    )
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y'))
    .set_index(['sector', 'year'])
)
gas_use

Unnamed: 0_level_0,Unnamed: 1_level_0,electricity_use
sector,year,Unnamed: 2_level_1
Non-Residential,2021-01-01,227.549467
Residential,2021-01-01,295.965443
Non-Residential,2020-01-01,202.366603
Residential,2020-01-01,302.849797
Non-Residential,2019-01-01,230.14062
Residential,2019-01-01,303.771611
Non-Residential,2018-01-01,217.997747
Residential,2018-01-01,264.52674
Non-Residential,2017-01-01,207.039431
Residential,2017-01-01,272.796531


### Save Data

In [10]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/src/regional-pm-2023/data/clean/utilities/energy/gas_use](C:/Users/tan/src/regional-pm-2023/data/clean/utilities/energy/gas_use)

In [11]:
gas_use.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)