#### Assign BA counts by County

The current version of assigning BA counts to a bus uses state-level population fractions, for example, if a BA is known to offer service in both WA and OR, e.g., BA=BPAT, then the BA counts, for example, are

BPATcount(WA) = BPATtotal * pop(WA)/(pop(WA) + pop(OR)
demandCount(WA) = sum(all fractional BA counts assigned to WA)

However, we actually know what counties each BA serves, so instead of using state-level population, we can use county level population. So 

* Demand in LoadZone = sumOverCountiesInLoadZone(demandPerCounty)
* demandPerCounty = sumOverBAs(BAcount(County))
* BAcounty(County) = BAtotal * CountyPopulation/sumOverCountyPopulationServedByBA

So we need to make a list of Counties per LoadZone and the corresponing populations for those Counties.
                   
<img src="BA-county_array.png">

Note that the array values have to be updated as county census data are updated anda as BA-county assignments change.

In [1]:
import pandas as pd
import numpy as np

In [2]:
WA = ['AVA', 'BPAT','CHPD','DOPD','GCPD','PSEI','SCL','TPWR','AVA', 'BPAT']
OR = ['BPAT','GRID','IPCO','PACW','PGE']
CAnorth = ['BANC','CISO','PACW','BPAT']
CABayArea = ['CISO']
CAcentral = ['CISO', 'TIDC']
CAsw = ['CISO', 'LDWP']
CAse = ['CISO', 'IID']
NV = ['NEVP']
AZ = ['AZPS','DEAA','GRIF','GRMA','HGMA','PNM','SRP','TEPC','WALC']
UT = ['PACE']
NM = ['EPE','PNM']
CO = ['PSCO','WACM']
WY = ['PACE','WACM']
ID = ['AVA','BPAT','IPCO','PACE']
MT = ['BPAT','GWA','NWMT','WAUW','WWA'] 
ElPaso = ['EPE']

ba_all = sorted(list(set().union(WA,OR,CAnorth,CABayArea,CAcentral,CAsw,CAse,NV,AZ,UT,NM,CO,WY,ID,MT,ElPaso)))

In [3]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

In [4]:
dir_861 = 'U:\\src\\PreREISE\\prereise\\gather\\demanddata\\EIA\\demo\\data\\f8612016\\'
dir_pop = 'U:\\src\\PreREISE\\prereise\\gather\\demanddata\\EIA\\demo\\data\\US_Counties_Population\\'
dir_BA =  'U:\\src\\PreREISE\\prereise\\gather\\demanddata\\EIA\\test\\data\\

service_territory_file = 'Service_Territory_2016.xlsx'  #get utility-county mapping
ba_counts_file = 'Sales_Ult_Cust_2016.xlsx'  #Get utility-BAname mapping
ba_id_map_file = 'Balancing_Authority_2016.xlsx'  #use for checking final join: which BAs by state

#This assumes data has been through outlier fixing for now
ba_demand_file = 'BA_2016.csv'

#This file is edited from original due to mysterious read problems
us_counties_pop_file = 'USCountyPop.csv'

service_territory =  pd.read_excel(io = dir_861 + '/' + service_territory_file, 
                                   header = 0,
                                   dtype = {'Utility Number': str, 'County': str, 'State': str}
                                  )
ba_counts = pd.read_excel(io = dir_861 + '/' + ba_counts_file, 
                          header = 0, usecols = 'B,G,I,W', skiprows = [0,1], 
                          dtype = {'Utility Number': str, 'BA_CODE': str, 'State': str}
                          #sheet_name = ['States']
                         )
counties_pop = pd.read_csv(dir_pop + '/' + us_counties_pop_file, header=0)
ba_id_map = pd.read_excel(io = dir_861 + '/' + ba_id_map_file, header = 0)
ba_demand = pd.read_csv(dir_BA + '/' + ba_demand_file, header = 0, parse_dates = True, index_col = 'UTC Time')

SyntaxError: invalid syntax (<ipython-input-4-679734615c73>, line 4)

In [None]:
service_territory.head()

In [None]:
ba_counts.head()

In [None]:
ba_county = pd.merge(service_territory, ba_counts, how='outer', on ='Utility Number')

In [None]:
ba_county.head(10)

In [None]:
ba_county.loc[ba_county['BA_CODE'].isin(ba_all)].sort_values('BA_CODE').head(20)

In [None]:
counties_pop.drop(columns = ['GEO.id', 'GEO.id2', 'rescen42010', 'resbase42010',
       'respop72010', 'respop72011', 'respop72012', 'respop72013',
       'respop72014', 'respop72015','respop72017'], inplace = True)

In [None]:
counties_pop.columns

In [None]:
counties_pop.head()

In [None]:
counties_pop['County'] = counties_pop['GEO.display-label'].str.split(',',expand=True)[0].str.replace(' County','')
counties_pop['State'] = counties_pop['GEO.display-label'].str.split(',',expand=True)[1].str.replace('^ ','')
counties_pop['State'] = counties_pop['State'].apply(lambda x: us_state_abbrev[x])
counties_pop.drop(columns = 'GEO.display-label', inplace = True)
counties_pop.rename(columns = {'respop72016':'pop2016'}, inplace=True)

In [None]:
counties_pop.head()

In [None]:
# Match BA to county: make table :  BAname-County-State-Population

In [None]:
ba_county.head()


In [None]:
ba_county_uniq = ba_county[['BA_CODE','County','State_x','State_y']].drop_duplicates(keep='first')
ba_county_uniq.drop(columns = ['State_y'], inplace=True)
ba_county_uniq.rename(columns = {'State_x':'State'}, inplace=True)
ba_county_uniq.dropna(axis = 0, how = 'any', inplace=True)
ba_county_uniq.drop_duplicates(keep = 'first', inplace=True)

In [None]:
ba_county_uniq.head()

In [None]:
counties_pop.head()

In [None]:
ba_counties_pop = pd.merge(ba_county_uniq, counties_pop, how = 'inner', on = ['County','State'])
ba_counties_pop['County_State'] = ba_counties_pop['County'] + ',' + ba_counties_pop['State']

In [None]:
ba_counties_pop.head()

In [None]:
ba_county_uniq['BA_CODE'].unique()

In [None]:
ba_counties_pop['BA_CODE'].unique()

In [None]:
print(len(ba_county_uniq['BA_CODE'].unique()))
print(len(ba_counties_pop['BA_CODE'].unique()))

In [None]:
# Get WECC BAs

In [None]:
wecc_ba_counties_pop = ba_counties_pop.loc[ba_counties_pop['BA_CODE'].isin(ba_all)]

In [None]:
wecc_ba_counties_pop.shape

In [None]:
wecc_ba_counties_pop.loc[wecc_ba_counties_pop['State']=='NV'].sort_values('BA_CODE')


In [None]:
wecc_ba_counties_pop.loc[wecc_ba_counties_pop['BA_CODE']=='IID']

* Comparing with Balancing_Authority_2016.xlsx, BA='IID' is missing in NV list
* Comparing with Balancing_Authority_JAN17.pdf (which can arguably be lower resolution): only NEVP serves NV
* Document inconsistencies for later checking: which are real and which are due to the joins? (if any of BA, County, or State are missing, they drop off the list); How many counties are covered? For example, 'Fond du Lac, WI' is only present in county_pop file.

In [None]:
wecc_ba_counties_pop.loc[wecc_ba_counties_pop['State']=='UT']['BA_CODE'].unique()

In [None]:
wecc_ba_counties_pop['BA_CODE'].unique()

In [None]:
len(wecc_ba_counties_pop['BA_CODE'].unique())

In [None]:
wecc_ba_counties_pop.loc[wecc_ba_counties_pop['BA_CODE']=='TEPC']

In [None]:
counties_pop.head()

In [None]:
ba_counties_pop.loc[ba_counties_pop['BA_CODE']== 'EPE']['County'].tolist()

In [None]:
# Load Zone mapping: use county-pop file; except CA and ElPaso
wecc_states_list = ['WA','OR','CA','NV','AZ','UT','NM','CO','WY','MT']

zone_county = {}
for r in wecc_states_list:
    zone_county[r] = counties_pop.loc[counties_pop['State'] == r]['County'].tolist()

zone_county['Northern_California'] = ['Butte','Colusa','Del Norte','El Dorado','Glenn','Humboldt',
                       'Lake','Lassen','Mendocino','Modoc','Nevada','Placer','Plumas',
                       'Sacramento','Shasta','Sierra','Siskiyou','Sutter','Tehama',
                       'Trinity','Yolo','Yuba']
zone_county['Bay_Area'] = ['Alameda','Contra Costa','Marin','Napa','San Francisco','San Mateo',
            'Santa Clara','Santa Cruz','Solano','Sonoma']
zone_county['Central_California'] = ['Alpine','Amador','Calaveras','Fresno','Inyo','Kings','Madera',
                      'Mariposa','Merced','Mono','Monterey','San Benito','San Joaquin',
                      'Stanislaus','Tulare','Tuolumne']
zone_county['Southeast_California'] = ['Imperial','Orange','Riverside','San Bernardino','San Diego']
zone_county['Southwest_California'] = ['Kern','Los Angeles','San Luis Obispo','Santa Barbara','Ventura']

#El Paso is defined by BA= EPE, so counties served by EPE
zone_county['El_Paso'] = ba_counties_pop.loc[ba_counties_pop['BA_CODE']== 'EPE']['County'].tolist()

* WA = ['AVA', 'BPAT','CHPD','DOPD','GCPD','PSEI','SCL','TPWR','AVA', 'BPAT']
* OR = ['BPAT','GRID','IPCO','PACW','PGE']
* CAnorth = ['BANC','CISO','PACW','BPAT']
* CABayArea = ['CISO']
* CAcentral = ['CISO', 'TIDC']
* CAsw = ['CISO', 'LDWP']
* CAse = ['CISO', 'IID']
* NV = ['NEVP']
* AZ = ['AZPS','DEAA','GRIF','GRMA','HGMA','PNM','SRP','TEPC','WALC']
* UT = ['PACE']
* NM = ['EPE','PNM']
* CO = ['PSCO','WACM']
* WY = ['PACE','WACM']
* ID = ['AVA','BPAT','IPCO','PACE']
* MT = ['BPAT','GWA','NWMT','WAUW','WWA'] 
* ElPaso = ['EPE']

In [None]:
wecc_ba_counties_pop.head()

In [None]:
def assign_demand_by_county(ba_county_pop, zone_county_dict, ba_demand):
    '''
    ba_county_pop = dataframe containing BA-county-population mapping
    zone_county_dict = dict containing zone-counties mapping
    ba_demand = dataframe of counts of all BAs for 2016
    '''
    
    
    #for each load zone:
    

    
    
    
    
    
    
    

In [None]:
BA_list = ba_counties_pop['BA_CODE'].unique()
ba_county_dict = {}

for ba in BA_list:
    ba_county_dict[ba] = 

In [None]:
ba_counties_pop.head()

In [None]:
ba_counties_pop['County'] + ',' + ba_counties_pop['State']