# Produce Western Demand Profile Using EIA Data

In [1]:
import getpass
import pandas as pd

from prereise.gather.demanddata.eia.clean_data import (fix_dataframe_outliers, 
                                                       replace_with_shifted_demand)
from prereise.gather.demanddata.eia.get_eia_data import get_ba_demand
from prereise.gather.demanddata.eia.map_ba import aggregate_ba_demand

Loadzone to BA mapping is based comes from [here](https://www.wecc.org/Administrative/Balancing_Authorities_JAN17.pdf)

In [2]:
loadzone2ba = {
    'WA' : ['AVA','BPAT','CHPD','DOPD','GCPD','PSEI','SCL','TPWR'],
    'OR' : ['BPAT','GRID','IPCO','PACW','PGE'],
    'CAnorth' : ['BANC','CISO','PACW','BPAT'],
    'CABayArea' : ['CISO'],
    'CAcentral' : ['CISO', 'TIDC'],
    'CAsw' : ['CISO', 'LDWP'],
    'CAse' : ['CISO', 'IID'],
    'NV' : ['NEVP'],
    'AZ' : ['AZPS','DEAA','GRIF','GRMA','HGMA','PNM','SRP','TEPC','WALC'],
    'UT' : ['PACE'],
    'NM' : ['EPE','PNM'],
    'CO' : ['PSCO','WACM'],
    'WY' : ['PACE','WACM'],
    'ID' : ['AVA','BPAT','IPCO','PACE'],
    'MT' : ['BPAT','GWA','NWMT','WAUW','WWA'],
    'ElPaso' : ['EPE']
}

In [3]:
ba = set()
for v in loadzone2ba.values():
    ba.update(v)

## 1. Download Data

Demand data from EIA is imported using an API Key. Note that data for some BAs will not be found and this will be reported.

In [4]:
start = pd.to_datetime('2016-01-01 00:00:00')
end = pd.to_datetime('2016-12-31 23:00:00')

key = getpass.getpass(prompt='api_key=')
western_demand = get_ba_demand(ba, start, end, key)
western_demand_2016 = western_demand.loc[start:end]
western_demand_2016

api_key=········
Downloading EBA.IID-ALL.D.H
Downloading EBA.BPAT-ALL.D.H
Downloading EBA.GRID-ALL.D.H
ERROR: EBA.GRID-ALL.D.H not found. invalid series_id. For key registration, documentation, and examples see https://www.eia.gov/developer/
Downloading EBA.AZPS-ALL.D.H
Downloading EBA.PACE-ALL.D.H
Downloading EBA.PSEI-ALL.D.H
Downloading EBA.GWA-ALL.D.H
ERROR: EBA.GWA-ALL.D.H not found. invalid series_id. For key registration, documentation, and examples see https://www.eia.gov/developer/
Downloading EBA.GCPD-ALL.D.H
Downloading EBA.IPCO-ALL.D.H
Downloading EBA.WACM-ALL.D.H
Downloading EBA.GRMA-ALL.D.H
ERROR: EBA.GRMA-ALL.D.H not found. invalid series_id. For key registration, documentation, and examples see https://www.eia.gov/developer/
Downloading EBA.CHPD-ALL.D.H
Downloading EBA.TPWR-ALL.D.H
Downloading EBA.SCL-ALL.D.H
Downloading EBA.NWMT-ALL.D.H
Downloading EBA.AVA-ALL.D.H
Downloading EBA.WALC-ALL.D.H
Downloading EBA.WAUW-ALL.D.H
Downloading EBA.DOPD-ALL.D.H
Downloading EBA.DEAA

Unnamed: 0,IID,BPAT,AZPS,PACE,PSEI,GCPD,IPCO,WACM,CHPD,TPWR,...,EPE,TIDC,PNM,PSCO,BANC,LDWP,TEPC,PACW,CISO,SRP
2016-01-01 00:00:00+00:00,,7689.0,2982.0,5703.0,4238.0,540.0,2063.0,3254.0,325.0,732.0,...,848.0,245.0,1716.0,5809.0,1909.0,3020.0,1624.0,2852.0,23745.0,2730.0
2016-01-01 01:00:00+00:00,,8143.0,3363.0,6170.0,4712.0,573.0,2248.0,3472.0,353.0,784.0,...,948.0,256.0,1911.0,6345.0,2028.0,3146.0,1801.0,3036.0,24213.0,3026.0
2016-01-01 02:00:00+00:00,,8634.0,3642.0,6274.0,5085.0,592.0,2328.0,3448.0,371.0,839.0,...,988.0,284.0,1967.0,6312.0,2271.0,3518.0,1788.0,3296.0,25174.0,3262.0
2016-01-01 03:00:00+00:00,,8598.0,3615.0,5914.0,5030.0,594.0,2289.0,3417.0,366.0,837.0,...,959.0,285.0,1940.0,6112.0,2301.0,3547.0,1763.0,3290.0,28423.0,3220.0
2016-01-01 04:00:00+00:00,,8390.0,3568.0,6136.0,4845.0,593.0,2249.0,3374.0,365.0,816.0,...,929.0,276.0,1922.0,5932.0,2242.0,3452.0,1739.0,3189.0,28784.0,3142.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-12-31 19:00:00+00:00,,8429.0,2871.0,5421.0,4713.0,623.0,2271.0,3019.0,369.0,799.0,...,802.0,249.0,1668.0,4874.0,1975.0,2870.0,1496.0,2941.0,23816.0,2703.0
2016-12-31 20:00:00+00:00,,8257.0,2819.0,5326.0,4717.0,613.0,2206.0,2967.0,355.0,800.0,...,804.0,249.0,1609.0,4809.0,1977.0,2937.0,1490.0,2863.0,23971.0,2648.0
2016-12-31 21:00:00+00:00,,8023.0,2764.0,5206.0,4664.0,595.0,2147.0,2912.0,346.0,785.0,...,789.0,248.0,1602.0,4761.0,1913.0,2987.0,1457.0,2734.0,23901.0,2619.0
2016-12-31 22:00:00+00:00,,7865.0,2726.0,5183.0,4583.0,589.0,2110.0,2886.0,335.0,767.0,...,789.0,240.0,1584.0,4745.0,1876.0,3000.0,1433.0,2610.0,23671.0,2632.0


## 2. Impute Missing Values

Use adjacent demand data to fill missing values using a series of rules:
1. Monday: look forward one day
2. Tues - Thurs: average of look forward one day and look back one day
3. Fri: look back one day
4. Sat: look forward one day
5. Sun: look back one day

If data is still missing after applying the above rules, week ahead and week behind data is used
1. Monday: look forward two days 
2. Tues: look forward two days
3. Wed: average of look forward two days and look back two days
4. Thurs: look back two days
5. Fri: look back two days
6. Sat - Sun: average of look back one week and look forward one week

If data is still missing after applying the above rules, week ahead and week behind data is used:
1. Mon - Sun: average of look back one week and look forward one week

In [5]:
filled_western_demand_2016 = replace_with_shifted_demand(western_demand_2016, start, end)

Check for the presence of NaNs in the dataset:   

In [6]:
for name in filled_western_demand_2016.columns:
    print(name)
    print(f'Num missing values: {filled_western_demand_2016.loc[filled_western_demand_2016[name].isnull()].shape[0]}')
    print()
    

IID
Num missing values: 0

BPAT
Num missing values: 0

AZPS
Num missing values: 0

PACE
Num missing values: 16

PSEI
Num missing values: 0

GCPD
Num missing values: 0

IPCO
Num missing values: 0

WACM
Num missing values: 0

CHPD
Num missing values: 0

TPWR
Num missing values: 0

SCL
Num missing values: 0

NWMT
Num missing values: 0

AVA
Num missing values: 0

WALC
Num missing values: 0

WAUW
Num missing values: 50

DOPD
Num missing values: 365

NEVP
Num missing values: 0

PGE
Num missing values: 0

EPE
Num missing values: 0

TIDC
Num missing values: 0

PNM
Num missing values: 0

PSCO
Num missing values: 0

BANC
Num missing values: 0

LDWP
Num missing values: 0

TEPC
Num missing values: 0

PACW
Num missing values: 40

CISO
Num missing values: 0

SRP
Num missing values: 0



## 3. Aggregate BAs to Load Zone

Some BAs are very small. In these cases, BAs are agregated into BA areas as follows:   

In [7]:
western_agg_demand = aggregate_ba_demand(filled_western_demand_2016, loadzone2ba)
western_agg_demand

WA regional demand was summed from ['BPAT', 'CHPD', 'DOPD', 'TPWR', 'PSEI', 'SCL', 'AVA', 'GCPD']


******************************
Missing BA columns for OR!
Original columns: ['BPAT', 'GRID', 'IPCO', 'PACW', 'PGE']
******************************
OR regional demand was summed from ['PACW', 'PGE', 'BPAT', 'IPCO']

CAnorth regional demand was summed from ['BANC', 'BPAT', 'PACW', 'CISO']

CABayArea regional demand was summed from ['CISO']

CAcentral regional demand was summed from ['TIDC', 'CISO']

CAsw regional demand was summed from ['LDWP', 'CISO']

CAse regional demand was summed from ['IID', 'CISO']

NV regional demand was summed from ['NEVP']


******************************
Missing BA columns for AZ!
Original columns: ['AZPS', 'DEAA', 'GRIF', 'GRMA', 'HGMA', 'PNM', 'SRP', 'TEPC', 'WALC']
******************************
AZ regional demand was summed from ['WALC', 'TEPC', 'AZPS', 'SRP', 'PNM']

UT regional demand was summed from ['PACE']

NM regional demand was summed from ['EPE', 'PN

Unnamed: 0,WA,OR,CAnorth,CABayArea,CAcentral,CAsw,CAse,NV,AZ,UT,NM,CO,WY,ID,MT,ElPaso
2016-01-01 00:00:00+00:00,16980.0,15491.0,36195.0,23745.0,23990.0,26765.0,24011.0,3910.0,9890.0,5703.0,2564.0,9063.0,8957.0,17244.0,9296.0,848.0
2016-01-01 01:00:00+00:00,18265.0,16459.0,37420.0,24213.0,24469.0,27359.0,24500.0,4220.0,11005.0,6170.0,2859.0,9817.0,9642.0,18491.0,9855.0,948.0
2016-01-01 02:00:00+00:00,19398.0,17543.0,39375.0,25174.0,25458.0,28692.0,25494.0,4593.0,11607.0,6274.0,2955.0,9760.0,9722.0,19227.0,10356.0,988.0
2016-01-01 03:00:00+00:00,19239.0,17410.0,42612.0,28423.0,28708.0,31970.0,28752.0,4607.0,11475.0,5914.0,2899.0,9529.0,9331.0,18771.0,10272.0,959.0
2016-01-01 04:00:00+00:00,18692.0,16927.0,42605.0,28784.0,29060.0,32236.0,29108.0,4523.0,11293.0,6136.0,2851.0,9306.0,9510.0,18680.0,10024.0,929.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-12-31 19:00:00+00:00,18550.0,16629.0,37161.0,23816.0,24065.0,26686.0,24098.0,3757.0,9555.0,5421.0,2470.0,7893.0,8440.0,17995.0,10012.0,802.0
2016-12-31 20:00:00+00:00,18338.0,16329.0,37068.0,23971.0,24220.0,26908.0,24249.0,3727.0,9373.0,5326.0,2413.0,7776.0,8293.0,17637.0,9803.0,804.0
2016-12-31 21:00:00+00:00,17930.0,15888.0,36571.0,23901.0,24149.0,26888.0,24173.0,3706.0,9242.0,5206.0,2391.0,7673.0,8118.0,17164.0,9537.0,789.0
2016-12-31 22:00:00+00:00,17559.0,15495.0,36022.0,23671.0,23911.0,26671.0,23953.0,3701.0,9167.0,5183.0,2373.0,7631.0,8069.0,16899.0,9352.0,789.0


Check for the presence of NaNs in the dataset:

In [8]:
for name in western_agg_demand.columns:
    print(name)
    print(f'Num missing values: {western_agg_demand.loc[western_agg_demand[name].isnull()].shape[0]}')
    print()

WA
Num missing values: 0

OR
Num missing values: 0

CAnorth
Num missing values: 0

CABayArea
Num missing values: 0

CAcentral
Num missing values: 0

CAsw
Num missing values: 0

CAse
Num missing values: 0

NV
Num missing values: 0

AZ
Num missing values: 0

UT
Num missing values: 16

NM
Num missing values: 0

CO
Num missing values: 0

WY
Num missing values: 0

ID
Num missing values: 0

MT
Num missing values: 0

ElPaso
Num missing values: 0



## 4. Detect and Fix Outliers

Indentify and fix outliers. Outliers defined as demand hours for which the ramp rate (difference between two hours) is greater than 3 standard deviations of the ramp rates for the BA across all hours in the year. 
Demand data for hours identified as outliers are replaced with the average of the neighboring non-outlier hour's demand.
Stretches of >5 consecutive outlier hours are flagged.

In [9]:
western_fix_outliers = fix_dataframe_outliers(western_agg_demand)

824 17375.0 17375.0
825 13215.0 13215.0
1735 13200.0 13200.0
1736 17519.0 12800.0
1737 12400.0 12400.0
1758 13735.0 13735.0
1759 18122.0 12922.0
1760 12109.0 12109.0
1782 14632.0 14632.0
1783 19262.0 13753.0
1784 12874.0 12874.0
3099 12053.0 12053.0
3100 8999.0 12105.5
3101 12158.0 12158.0
3198 12042.0 12042.0
3199 16410.0 11153.5
3200 10265.0 10265.0
239 13523.0 13523.0
240 2541113.0 13994.5
241 14466.0 14466.0
1484 12941.0 12941.0
1485 3115051.0 3115051.0
1486 3114898.0 3114898.0
1487 12496.0 12496.0
6883 12850.0 12850.0
6884 2638872.0 12796.0
6885 12742.0 12742.0
7676 12598.0 12598.0
7677 3350277.0 12566.5
7678 12535.0 12535.0
239 33372.0 33372.0
240 2561136.0 34050.0
241 34728.0 34728.0
1484 35935.0 35935.0
1485 3138307.0 3138307.0
1486 3138142.0 3138142.0
1487 36272.0 36272.0
6883 37322.0 37322.0
6884 2663613.0 37649.0
6885 37976.0 37976.0
7676 36861.0 36861.0
7677 3375151.0 37208.5
7678 37556.0 37556.0
3360 5073.0 5073.0
3361 6993.0 5015.0
3362 4957.0 4957.0
4847 6344.0 6344.0
48

In [10]:
western_fix_outliers

Unnamed: 0,WA,OR,CAnorth,CABayArea,CAcentral,CAsw,CAse,NV,AZ,UT,NM,CO,WY,ID,MT,ElPaso
2016-01-01 00:00:00+00:00,16980.0,15491.0,36195.0,23745.0,23990.0,26765.0,24011.0,3910.0,9890.0,5703.0,2564.0,9063.0,8957.0,17244.0,9296.0,848.0
2016-01-01 01:00:00+00:00,18265.0,16459.0,37420.0,24213.0,24469.0,27359.0,24500.0,4220.0,11005.0,6170.0,2859.0,9817.0,9642.0,18491.0,9855.0,948.0
2016-01-01 02:00:00+00:00,19398.0,17543.0,39375.0,25174.0,25458.0,28692.0,25494.0,4593.0,11607.0,6274.0,2955.0,9760.0,9722.0,19227.0,10356.0,988.0
2016-01-01 03:00:00+00:00,19239.0,17410.0,42612.0,28423.0,28708.0,31970.0,28752.0,4607.0,11475.0,5914.0,2899.0,9529.0,9331.0,18771.0,10272.0,959.0
2016-01-01 04:00:00+00:00,18692.0,16927.0,42605.0,28784.0,29060.0,32236.0,29108.0,4523.0,11293.0,6136.0,2851.0,9306.0,9510.0,18680.0,10024.0,929.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-12-31 19:00:00+00:00,18550.0,16629.0,37161.0,23816.0,24065.0,26686.0,24098.0,3757.0,9555.0,5421.0,2470.0,7893.0,8440.0,17995.0,10012.0,802.0
2016-12-31 20:00:00+00:00,18338.0,16329.0,37068.0,23971.0,24220.0,26908.0,24249.0,3727.0,9373.0,5326.0,2413.0,7776.0,8293.0,17637.0,9803.0,804.0
2016-12-31 21:00:00+00:00,17930.0,15888.0,36571.0,23901.0,24149.0,26888.0,24173.0,3706.0,9242.0,5206.0,2391.0,7673.0,8118.0,17164.0,9537.0,789.0
2016-12-31 22:00:00+00:00,17559.0,15495.0,36022.0,23671.0,23911.0,26671.0,23953.0,3701.0,9167.0,5183.0,2373.0,7631.0,8069.0,16899.0,9352.0,789.0
