# Produce Eastern Demand Profile Using EIA Data

In [1]:
import getpass
import os
import pandas as pd
import pickle

from powersimdata.input.grid import Grid

from prereise.gather.demanddata.eia.clean_data import (fix_dataframe_outliers, 
                                                       replace_with_shifted_demand)
from prereise.gather.demanddata.eia.get_eia_data import get_ba_demand
from prereise.gather.demanddata.eia.map_ba import (aggregate_ba_demand,
                                                   get_demand_in_loadzone)

In [2]:
ba = ['SWPP',
      'MISO',
      'AECI',
      'SPA',
      'EEI',
      'LGEE',
      'OVEC',
      'PJM',
      'NYIS',
      'ISNE',
      'TVA',
      'CPLW',
      'DUK',
      'YAD',
      'CPLE',
      'AEC',
      'SOCO',
      'SEPA',
      'SC',
      'TAL',
      'SCEG',
      'GVL',
      'JEA',
      'SEC',
      'FPC',
      'NSB',
      'TEC',
      'FMPP',
      'FPL',
      'HST']

## 1. Download Data
Demand data from EIA is imported using an API Key. Note that data for some BAs will not be found and this will be reported.

In [3]:
start = pd.to_datetime('2016-01-01 00:00:00')
end = pd.to_datetime('2016-12-31 23:00:00')

key = getpass.getpass(prompt='api_key=')
eastern_demand = get_ba_demand(ba, start, end, key)
eastern_demand_2016 = eastern_demand.loc[start:end]
eastern_demand_2016

api_key=········
Downloading EBA.SWPP-ALL.D.H
Downloading EBA.MISO-ALL.D.H
Downloading EBA.AECI-ALL.D.H
Downloading EBA.SPA-ALL.D.H
Downloading EBA.EEI-ALL.D.H
ERROR: EBA.EEI-ALL.D.H was found but has no data
Downloading EBA.LGEE-ALL.D.H
Downloading EBA.OVEC-ALL.D.H
Downloading EBA.PJM-ALL.D.H
Downloading EBA.NYIS-ALL.D.H
Downloading EBA.ISNE-ALL.D.H
Downloading EBA.TVA-ALL.D.H
Downloading EBA.CPLW-ALL.D.H
Downloading EBA.DUK-ALL.D.H
Downloading EBA.YAD-ALL.D.H
ERROR: EBA.YAD-ALL.D.H not found. invalid series_id. For key registration, documentation, and examples see https://www.eia.gov/developer/
Downloading EBA.CPLE-ALL.D.H
Downloading EBA.AEC-ALL.D.H
Downloading EBA.SOCO-ALL.D.H
Downloading EBA.SEPA-ALL.D.H
ERROR: EBA.SEPA-ALL.D.H not found. invalid series_id. For key registration, documentation, and examples see https://www.eia.gov/developer/
Downloading EBA.SC-ALL.D.H
Downloading EBA.TAL-ALL.D.H
Downloading EBA.SCEG-ALL.D.H
Downloading EBA.GVL-ALL.D.H
Downloading EBA.JEA-ALL.D.H
Do

Unnamed: 0,SWPP,MISO,AECI,SPA,LGEE,OVEC,PJM,NYIS,ISNE,TVA,...,SCEG,GVL,JEA,SEC,FPC,NSB,TEC,FMPP,FPL,HST
2016-01-01 00:00:00+00:00,30875.0,78481.0,2917.0,68.0,4280.0,,95072.0,19726,16063.0,18742.0,...,2637.0,270.0,,175.0,7389.0,65.0,2969.0,2507.0,18036.0,84.0
2016-01-01 01:00:00+00:00,31457.0,77902.0,2946.0,83.0,4245.0,,92298.0,19008,15273.0,18555.0,...,2558.0,258.0,,164.0,6931.0,61.0,2778.0,2386.0,16990.0,79.0
2016-01-01 02:00:00+00:00,30969.0,75821.0,2910.0,58.0,4078.0,,89416.0,18340,14568.0,18180.0,...,2487.0,244.0,,152.0,0.0,53.0,2575.0,2226.0,15650.0,73.0
2016-01-01 03:00:00+00:00,30397.0,73863.0,2878.0,80.0,3935.0,,86496.0,17557,13850.0,17744.0,...,2374.0,229.0,,140.0,5830.0,51.0,2397.0,2049.0,14406.0,66.0
2016-01-01 04:00:00+00:00,29818.0,71919.0,2829.0,69.0,3793.0,,83405.0,16772,13108.0,17306.0,...,2256.0,213.0,,128.0,5405.0,48.0,2223.0,1908.0,13389.0,60.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-12-31 19:00:00+00:00,26723.0,69959.0,2178.0,70.0,4090.0,80.0,89370.0,17863,14716.0,18673.0,...,2727.0,183.0,1275.0,124.0,5087.0,,2099.0,,12686.0,53.0
2016-12-31 20:00:00+00:00,26109.0,68689.0,2094.0,58.0,4038.0,80.0,90063.0,17847,14714.0,18408.0,...,2676.0,182.0,1234.0,115.0,4998.0,,2047.0,,12760.0,54.0
2016-12-31 21:00:00+00:00,25618.0,68160.0,2035.0,60.0,3957.0,80.0,90826.0,18117,14965.0,18052.0,...,2651.0,182.0,1220.0,115.0,4949.0,,2061.0,,12858.0,56.0
2016-12-31 22:00:00+00:00,25441.0,68204.0,2035.0,64.0,4014.0,80.0,92672.0,19122,15996.0,18122.0,...,2714.0,185.0,1230.0,116.0,4981.0,,2089.0,,13248.0,57.0


## 2. Impute Missing Values

Use adjacent demand data to fill missing values using a series of rules:
1. Monday: look forward one day
2. Tues - Thurs: average of look forward one day and look back one day
3. Fri: look back one day
4. Sat: look forward one day
5. Sun: look back one day

If data is still missing after applying the above rules, week ahead and week behind data is used
1. Monday: look forward two days 
2. Tues: look forward two days
3. Wed: average of look forward two days and look back two days
4. Thurs: look back two days
5. Fri: look back two days
6. Sat - Sun: average of look back one week and look forward one week

If data is still missing after applying the above rules, week ahead and week behind data is used:
1. Mon - Sun: average of look back one week and look forward one week

In [4]:
filled_eastern_demand_2016 = replace_with_shifted_demand(eastern_demand_2016, start, end)

In [5]:
for name in filled_eastern_demand_2016.columns:
    print(name)
    print(filled_eastern_demand_2016.loc[filled_eastern_demand_2016[name].isnull()])

SWPP
Empty DataFrame
Columns: [SWPP, MISO, AECI, SPA, LGEE, OVEC, PJM, NYIS, ISNE, TVA, CPLW, DUK, CPLE, AEC, SOCO, SC, TAL, SCEG, GVL, JEA, SEC, FPC, NSB, TEC, FMPP, FPL, HST]
Index: []

[0 rows x 27 columns]
MISO
Empty DataFrame
Columns: [SWPP, MISO, AECI, SPA, LGEE, OVEC, PJM, NYIS, ISNE, TVA, CPLW, DUK, CPLE, AEC, SOCO, SC, TAL, SCEG, GVL, JEA, SEC, FPC, NSB, TEC, FMPP, FPL, HST]
Index: []

[0 rows x 27 columns]
AECI
Empty DataFrame
Columns: [SWPP, MISO, AECI, SPA, LGEE, OVEC, PJM, NYIS, ISNE, TVA, CPLW, DUK, CPLE, AEC, SOCO, SC, TAL, SCEG, GVL, JEA, SEC, FPC, NSB, TEC, FMPP, FPL, HST]
Index: []

[0 rows x 27 columns]
SPA
Empty DataFrame
Columns: [SWPP, MISO, AECI, SPA, LGEE, OVEC, PJM, NYIS, ISNE, TVA, CPLW, DUK, CPLE, AEC, SOCO, SC, TAL, SCEG, GVL, JEA, SEC, FPC, NSB, TEC, FMPP, FPL, HST]
Index: []

[0 rows x 27 columns]
LGEE
Empty DataFrame
Columns: [SWPP, MISO, AECI, SPA, LGEE, OVEC, PJM, NYIS, ISNE, TVA, CPLW, DUK, CPLE, AEC, SOCO, SC, TAL, SCEG, GVL, JEA, SEC, FPC, NSB, TEC, 

## 3. Aggregate BAs
Some BAs are very small. In these cases, BAs are agregated into BA areas as follows:

In [6]:
ba_area ={
    'ISNE':['ISNE'],
    'NYIS':['NYIS'],
    'PJM':['PJM','OVEC'],
    'AEC':['AEC'],
    'SOCO':['SOCO'],
    'TVA_LGEE':['TVA','LGEE'],
    'Florida':['TAL','JEA','GVL','SEC','FPC','NSB','FMPP','TEC','FPL','HST'],
    'MISO':['MISO','AECI'],
    'SWPP':['SWPP','SPA'],
    'Carolina':['CPLW','DUK','SCEG','SC','CPLE']
}

In [7]:
eastern_agg_demand = aggregate_ba_demand(eastern_demand_2016, ba_area)
eastern_agg_demand

ISNE regional demand was summed from ['ISNE']

NYIS regional demand was summed from ['NYIS']

PJM regional demand was summed from ['PJM', 'OVEC']

AEC regional demand was summed from ['AEC']

SOCO regional demand was summed from ['SOCO']

TVA_LGEE regional demand was summed from ['LGEE', 'TVA']

Florida regional demand was summed from ['TEC', 'HST', 'NSB', 'TAL', 'SEC', 'FMPP', 'FPC', 'GVL', 'JEA', 'FPL']

MISO regional demand was summed from ['MISO', 'AECI']

SWPP regional demand was summed from ['SPA', 'SWPP']

Carolina regional demand was summed from ['CPLW', 'CPLE', 'DUK', 'SCEG', 'SC']



Unnamed: 0,ISNE,NYIS,PJM,AEC,SOCO,TVA_LGEE,Florida,MISO,SWPP,Carolina
2016-01-01 00:00:00+00:00,16063.0,19726,95072.0,492.0,23927.0,23022.0,31808.0,81398.0,30943.0,22878.0
2016-01-01 01:00:00+00:00,15273.0,19008,92298.0,488.0,24347.0,22800.0,29950.0,80848.0,31540.0,22097.0
2016-01-01 02:00:00+00:00,14568.0,18340,89416.0,465.0,23574.0,22258.0,21262.0,78731.0,31027.0,21357.0
2016-01-01 03:00:00+00:00,13850.0,17557,86496.0,444.0,22762.0,21679.0,25439.0,76741.0,30477.0,20466.0
2016-01-01 04:00:00+00:00,13108.0,16772,83405.0,429.0,21857.0,21099.0,23625.0,74748.0,29887.0,19469.0
...,...,...,...,...,...,...,...,...,...,...
2016-12-31 19:00:00+00:00,14716.0,17863,89450.0,545.0,26496.0,22763.0,21783.0,72137.0,26793.0,24925.0
2016-12-31 20:00:00+00:00,14714.0,17847,90143.0,528.0,26294.0,22446.0,21659.0,70783.0,26167.0,24653.0
2016-12-31 21:00:00+00:00,14965.0,18117,90906.0,513.0,26043.0,22009.0,21707.0,70195.0,25678.0,24634.0
2016-12-31 22:00:00+00:00,15996.0,19122,92752.0,521.0,26012.0,22136.0,22178.0,70239.0,25505.0,25303.0


## 4. Detect and Fix Outliers

Indentify and fix outliers. Outliers defined as demand hours for which the ramp rate (difference between two hours) is greater than 3 standard deviations of the ramp rates for the BA across all hours in the year. 
Demand data for hours identified as outliers are replaced with the average of the neighboring non-outlier hour's demand.
Stretches of >5 consecutive outlier hours are flagged.

In [8]:
eastern_fix_outliers = fix_dataframe_outliers(eastern_agg_demand)

1357 104873.0 104873.0
1358 137033.0 104409.0
1359 103945.0 103945.0
1734 67360.0 67360.0
1735 67.0 66320.0
1736 65280.0 65280.0
1756 76763.0 76763.0
1757 52.0 52.0
1780 49.0 49.0
1781 74584.0 74584.0
7468 70188.0 70188.0
7469 0.0 69438.5
7470 68689.0 68689.0
2410 19206.0 19206.0
2411 26343.0 19742.0
2412 20278.0 20278.0
701 23378.0 23378.0
702 4517.0 22913.0
703 22448.0 22448.0
704 22324.0 22324.0
705 3964.0 22365.0
706 22406.0 22406.0
1735 14850.0 14850.0
1736 2638.0 2638.0
1757 2897.0 2897.0
1758 15124.0 15124.0
7469 16362.0 16362.0
7470 4106.0 15976.0
7471 15590.0 15590.0
8237 25897.0 25897.0
8238 5649.0 25647.5
8239 25398.0 25398.0
8477 28120.0 28120.0
8478 7081.0 27567.5
8479 27015.0 27015.0
1564 18918.0 18918.0
1565 28308.0 18278.0
1566 17638.0 17638.0
1876 31575.0 31575.0
1877 21050.0 21050.0
3912 33901.0 33901.0
3913 16354.0 16354.0
3916 13220.0 13220.0
3917 24792.0 24792.0
4194 42510.0 42510.0
4195 21544.0 21544.0
4204 15518.0 15518.0
4205 29081.0 29081.0
4850 38735.0 38735.0

In [9]:
eastern_fix_outliers 

Unnamed: 0,ISNE,NYIS,PJM,AEC,SOCO,TVA_LGEE,Florida,MISO,SWPP,Carolina
2016-01-01 00:00:00+00:00,16063.0,19726,95072.0,492.0,23927.0,23022.0,31808.0,81398.0,30943.0,22878.0
2016-01-01 01:00:00+00:00,15273.0,19008,92298.0,488.0,24347.0,22800.0,29950.0,80848.0,31540.0,22097.0
2016-01-01 02:00:00+00:00,14568.0,18340,89416.0,465.0,23574.0,22258.0,21262.0,78731.0,31027.0,21357.0
2016-01-01 03:00:00+00:00,13850.0,17557,86496.0,444.0,22762.0,21679.0,25439.0,76741.0,30477.0,20466.0
2016-01-01 04:00:00+00:00,13108.0,16772,83405.0,429.0,21857.0,21099.0,23625.0,74748.0,29887.0,19469.0
...,...,...,...,...,...,...,...,...,...,...
2016-12-31 19:00:00+00:00,14716.0,17863,89450.0,545.0,26496.0,22763.0,21783.0,72137.0,26793.0,24925.0
2016-12-31 20:00:00+00:00,14714.0,17847,90143.0,528.0,26294.0,22446.0,21659.0,70783.0,26167.0,24653.0
2016-12-31 21:00:00+00:00,14965.0,18117,90906.0,513.0,26043.0,22009.0,21707.0,70195.0,25678.0,24634.0
2016-12-31 22:00:00+00:00,15996.0,19122,92752.0,521.0,26012.0,22136.0,22178.0,70239.0,25505.0,25303.0


## 5. Get bus to BA area mapping via county

### A. Create File if not Available
If the file is snot available, it will be created by the `bus_ba_map.ipynb` notebook. Note that it takes around 3h to run.

In [10]:
if not os.path.exists("bus_ba_map.csv"):
    %run ./bus_ba_map.ipynb

### B. Read the File

In [11]:
bus_map = pd.read_csv('bus_ba_map.csv')
bus_map.set_index('bus_id')

Unnamed: 0_level_0,Pd,lat,lon,County,BA,zone_name
bus_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3,9.97,43.976116,-70.221052,Androscoggin__ME,ISNE,Maine
9,11.48,44.100992,-70.305323,Androscoggin__ME,ISNE,Maine
10,9.97,44.179913,-70.185998,Androscoggin__ME,ISNE,Maine
11,9.97,44.143322,-70.224057,Androscoggin__ME,ISNE,Maine
12,3.06,44.020747,-70.180492,Androscoggin__ME,ISNE,Maine
...,...,...,...,...,...,...
69946,1.70,48.307278,-106.272727,Valley__MT,MISO,Montana Eastern
69948,0.85,48.222633,-106.316038,Valley__MT,MISO,Montana Eastern
69949,0.66,48.932800,-106.642307,Valley__MT,MISO,Montana Eastern
69950,0.95,48.408811,-106.524730,Valley__MT,MISO,Montana Eastern


Transform to load zone using the demand that has missing data filled and outliers fixed, prepared from the first steps in the demo notebook.

In [12]:
eastern_demand_v5 = get_demand_in_loadzone(eastern_fix_outliers, bus_map)

Load in Eastern Grid and transform column names into corresponding loadzone id.

In [13]:
grid = Grid(['Eastern'])
eastern_demand_v5.columns = [grid.zone2id[col] for col in eastern_demand_v5.columns]
eastern_demand_v5 = eastern_demand_v5.sort_index(axis=1)
eastern_demand_v5

Reading bus.csv
Reading plant.csv
Reading gencost.csv
Reading branch.csv
Reading dcline.csv
Reading sub.csv
Reading bus2sub.csv
Reading zone.csv


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,43,44,45,46,47,48,49,50,51,52
2016-01-01 00:00:00+00:00,1342.820998,1440.655575,600.763115,7581.562212,910.213388,4209.789191,13682.014219,6569.107335,11713.791325,11126.541138,...,10853.450041,2371.772581,1168.394506,316.149204,8405.379423,6110.042669,4447.529440,1373.158280,1366.952944,149.880227
2016-01-01 01:00:00+00:00,1276.779250,1369.802192,571.235771,7208.690759,865.447866,4003.605235,13191.418166,6330.139172,11367.967452,10801.826942,...,10798.385096,2382.624748,1190.936972,322.248841,8567.548945,6227.927020,4529.357442,1377.295717,1374.289820,149.433565
2016-01-01 02:00:00+00:00,1217.843260,1306.572273,544.883366,6875.938387,825.498888,3819.512768,12731.570544,6107.744102,11010.890784,10464.506373,...,10522.721761,2330.668354,1171.566310,317.007444,8428.197245,6126.629412,4454.181626,1346.438528,1344.736901,145.740370
2016-01-01 03:00:00+00:00,1157.820508,1242.176413,518.037295,6537.050155,784.813262,3631.675821,12198.174558,5847.184973,10646.093831,10122.689492,...,10262.059399,2279.568946,1150.798545,311.388013,8278.794838,6018.025739,4374.108813,1316.304481,1315.563397,142.221137
2016-01-01 04:00:00+00:00,1095.791423,1175.628045,490.295684,6186.834183,742.767670,3437.642565,11661.456256,5585.918592,10261.177675,9760.875341,...,10000.116213,2227.086383,1128.520396,305.359896,8118.526802,5901.523616,4288.478577,1285.473076,1285.540547,138.669091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-12-31 19:00:00+00:00,1230.215638,1319.846072,550.369002,6945.792786,833.885340,3856.056040,12429.282223,5949.516634,11001.349786,10468.263763,...,9604.338286,2080.936274,1011.692273,273.748041,7278.070352,5290.578588,3854.146105,1206.451556,1198.486470,132.385658
2016-12-31 20:00:00+00:00,1230.048444,1319.666696,550.293181,6944.848807,833.772010,3855.485732,12427.158302,5944.373335,11082.201766,10549.294293,...,9421.276151,2037.772776,988.054780,267.352106,7108.023248,5166.967861,3764.718808,1181.757877,1173.460099,129.814354
2016-12-31 21:00:00+00:00,1251.031328,1342.178341,559.677808,7063.318092,847.994979,3921.137495,12608.264844,6034.163572,11179.378799,10638.641470,...,9336.855999,2011.787603,969.590348,262.355921,6975.190926,5070.409322,3695.744353,1167.420144,1158.127294,128.545226
2016-12-31 22:00:00+00:00,1337.219989,1434.646491,598.217981,7549.938937,906.416818,4190.452183,13271.103871,6368.157596,11424.466326,10854.968898,...,9338.422445,2006.743406,963.057941,260.588354,6928.197078,5036.248530,3671.815456,1165.004751,1154.965366,128.493010
