### Create 2016 demand profile for input to simulation
* Use input hourly balancing authority (BA) demand data from downloaded spreadsheets to use the already imputed data columns
* Distribute BA demand counts among 16 predefined WECC regions; for BA's that service multiple regions, assign population fraction of BA count to the region
* BA demand data comes only from BAs that submit info to EIA, Population counts come from US and CA census

CA has 5 regions composed of counties
* Northern California: Butte, Colusa, Del Norte, El Dorado, Glenn, Humboldt, Lake, Lassen, Mendocino, Modoc, Nevada, Placer, Plumas, Sacramento, Shasta, Sierra, Siskiyou, Sutter, Tehama, Trinity, Yolo, Yuba
* Bay Area: Alameda, Contra Costa, Marin, Napa, San Francisco, San Mateo, Santa Clara, Santa Cruz, Solano, Sonoma
* Central California: Alpine, Amador, Calaveras, Fresno, Inyo, Kings, Madera, Mariposa, Merced, Mono, Monterey, San Benito, San Joaquin, Stanislaus, Tulare, Tuolumne
* Southeast California: Imperial, Orange, Riverside, San Bernardino, San Diego
* Southwest California: Kern, Los Angeles, San Luis Obispo, Santa Barbara, Ventura


In [1]:
import numpy as np
import pandas as pd

from datetime import datetime
from dateutil.parser import parse

In [7]:
#Bus areas 1-16 in numerical order as case10K file
WA = ['AVA', 'BPAT','CHPD','DOPD','GCPD','PSEI','SCL','TPWR','AVA', 'BPAT']
OR = ['BPAT','GRIP','IPCO','PACW','PGE']
CAnorth = ['BANC','CISO','PACW','BPAT']
CABayArea = ['CISO']
CAcentral = ['CISO', 'TIDC']
CAsw = ['CISO', 'LWDP']
CAse = ['CISO', 'IID']
NV = ['NEVP']
AZ = ['AZPS','DEAA','GRIF','GRMA','HGMA','PNM','SRP','TEPC','WALC']
UT = ['PACE']
NM = ['EPE','PNM']
CO = ['PSCO','WACM']
WY = ['PACE','WACM']
ID = ['AVA','BPAT','IPCO','PACE']
MT = ['BPAT','GWA','NWMT','WAUW','WWA'] 
ElPaso = ['EPE']

BA_all = sorted(list(set().union(WA,OR,CAnorth,CABayArea,CAcentral,CAsw,CAse,NV,AZ,UT,NM,CO,WY,ID,MT,ElPaso)))


In [8]:
print(BA_all)

['AVA', 'AZPS', 'BANC', 'BPAT', 'CHPD', 'CISO', 'DEAA', 'DOPD', 'EPE', 'GCPD', 'GRIF', 'GRIP', 'GRMA', 'GWA', 'HGMA', 'IID', 'IPCO', 'LWDP', 'NEVP', 'NWMT', 'PACE', 'PACW', 'PGE', 'PNM', 'PSCO', 'PSEI', 'SCL', 'SRP', 'TEPC', 'TIDC', 'TPWR', 'WACM', 'WALC', 'WAUW', 'WWA']


In [10]:
#Data dirs
dir1 = 'U:\\Data\\WECC_demand_2015-2018'
dir1 = 'C:\\Users\\mlamherr\\Data\WECC_demand_2015-2018'
dir2 = 'C:\\Users\\mlamherr\\Data'


In [None]:
#Get population data and create weights

In [None]:
def collectBAdata(directory, serieslist):
    df={}
    
    for x in serieslist:
        print(x)
        filename = x + '.xlsx'
        df[BA] = pd.read_excel(io = directory + "/" + filename, header = 0)
    

    #### revise this for excel data
    
    for x in [[i] for i in serieslist]:
        BA = x[0]
        print(BA)
        d = EIAgov(tok, x)
        df[BA] = d.GetData()
        
        df[BA].drop(columns =['Date'], inplace=True)
        df[BA] = df[BA].resample('H').asfreq()   #fills in missing hours

    timespan = pd.date_range(startdate, enddate - DateOffset(days=offsetdays), freq='H')
    
    df_all = pd.DataFrame(index = timespan)
    for x in serieslist:
        df_all = pd.concat([df_all,df[x]], axis=1)

    return df_all


In [18]:
BA_all = ['AVA','BPAT','SCL']
BA_all = ['AVA']

start = pd.to_datetime('2016-01-01 00:00:00')
end = pd.to_datetime('2017-01-01 00:00:00')
df = {}
for x in BA_all:
    filename = x + '.xlsx'
    df[x] = pd.read_excel(io = dir1 + "/" + filename, header = 0)
    df[x].drop(columns= ['BA', 'Date', 'Hour', 'Local Time', 'Time Zone',
       'Generation Only', 'DF', 'D', 'NG', 'TI', 'Missing DIBA',
       'Missing NG by Energy Source', 'Missing D by Subregion',
       'DF Range Error', 'D Range Error', 'NG Range Error', 'TI Range Error',
       'Imputed D', 'Imputed NG',  'Published NG',
       'Balance NG D TI', 'Balance TI DIBA', 'Balance NG', 'Balance D',
       'Sum (NG) over Energy Sources', 'NG: COL', 'NG: NG', 'NG: NUC',
       'NG: OIL', 'NG: WAT', 'NG: SUN', 'NG: WND', 'NG: OTH', 'NG: UNK',
       'Sum (D) over Subregions', 'Sum (DIBA)', 'BPAT', 'CHPD', 'GCPD', 'IPCO',
       'NWMT', 'PACW', 'Active BPAT', 'Active CHPD', 'Active GCPD',
       'Active IPCO', 'Active NWMT', 'Active PACW', 'From BPAT', 'From CHPD',
       'From GCPD', 'From IPCO', 'From NWMT', 'From PACW', 'Balance BPAT',
       'Balance CHPD', 'Balance GCPD', 'Balance IPCO', 'Balance NWMT',
       'Balance PACW', 'Pair Mismatch', 'File 0', 'File 1', 'File 2',
       'File D'],  inplace=True)
    df[x].index = pd.to_datetime(df[x]['UTC Time']

In [20]:
df['AVA'].head()

Unnamed: 0,UTC Time,Published D
0,2015-07-01 08:00:00,1192.0
1,2015-07-01 09:00:00,1108.0
2,2015-07-01 10:00:00,1058.0
3,2015-07-01 11:00:00,1024.0
4,2015-07-01 12:00:00,1031.0


In [22]:
df['AVA'].rename(columns={"Published D": 'AVA'}, inplace=True)

In [24]:
df['AVA'].head()

Unnamed: 0,UTC Time,AVA
0,2015-07-01 08:00:00,1192.0
1,2015-07-01 09:00:00,1108.0
2,2015-07-01 10:00:00,1058.0
3,2015-07-01 11:00:00,1024.0
4,2015-07-01 12:00:00,1031.0
