## Format MISO demand from data files

In [1]:
import pandas as pd

Read in 2016 MISO demand data

In [2]:
LRZ_demand_2016 = pd.read_excel('20161231_dfal_hist.xls', skiprows=5)
LRZ_demand_2016

Unnamed: 0,MarketDay,HourEnding,LoadResource Zone,MTLF (MWh),ActualLoad (MWh)
0,2016-01-01 00:00:00,1,LRZ1,10692,10911.1
1,2016-01-01 00:00:00,1,LRZ2_7,15766,15824.5
2,2016-01-01 00:00:00,1,LRZ3_5,9728,10006.7
3,2016-01-01 00:00:00,1,LRZ4,5043,5198.81
4,2016-01-01 00:00:00,1,LRZ6,10046,9963.71
...,...,...,...,...,...
61537,2016-12-31 00:00:00,24,LRZ6,9415,
61538,2016-12-31 00:00:00,24,LRZ8_9_10,16828,
61539,2016-12-31 00:00:00,24,MISO,67316,
61540,,,,,


Replace nans with predicted values

In [3]:
LRZ_demand_2016.loc[LRZ_demand_2016['ActualLoad (MWh)'].isna() , 'ActualLoad (MWh)'] = LRZ_demand_2016['MTLF (MWh)']
LRZ_demand_2016

Unnamed: 0,MarketDay,HourEnding,LoadResource Zone,MTLF (MWh),ActualLoad (MWh)
0,2016-01-01 00:00:00,1,LRZ1,10692,10911.1
1,2016-01-01 00:00:00,1,LRZ2_7,15766,15824.5
2,2016-01-01 00:00:00,1,LRZ3_5,9728,10006.7
3,2016-01-01 00:00:00,1,LRZ4,5043,5198.81
4,2016-01-01 00:00:00,1,LRZ6,10046,9963.71
...,...,...,...,...,...
61537,2016-12-31 00:00:00,24,LRZ6,9415,9415
61538,2016-12-31 00:00:00,24,LRZ8_9_10,16828,16828
61539,2016-12-31 00:00:00,24,MISO,67316,67316
61540,,,,,


Read in 2015 MISO demand data

In [4]:
LRZ_demand_2015 = pd.read_excel('20151231_dfal_hist.xls', skiprows=5)
LRZ_demand_2015

Unnamed: 0,MarketDay,HourEnding,LoadResource Zone,MTLF (MWh),ActualLoad (MWh)
0,2015-01-01 00:00:00,1,LRZ1,11881,11566.6
1,2015-01-01 00:00:00,1,LRZ2_7,16777,16866.8
2,2015-01-01 00:00:00,1,LRZ3_5,10544,10673.9
3,2015-01-01 00:00:00,1,LRZ4,5620,5687.05
4,2015-01-01 00:00:00,1,LRZ6,11476,11594.3
...,...,...,...,...,...
61369,2015-12-31 00:00:00,24,LRZ6,10611,10198.9
61370,2015-12-31 00:00:00,24,LRZ8_9_10,17711,17926.4
61371,2015-12-31 00:00:00,24,MISO,72655,71431.1
61372,,,,,


Function converts MISO file format to a dataframe with subarea demand columns 

In [5]:
miso_zones = ['LRZ1','LRZ2_7','LRZ3_5','LRZ4','LRZ6','LRZ8_9_10']

def CreateDemandProfile(data, miso_zones):
    demand = pd.DataFrame(data[data['LoadResource Zone'] == miso_zones[0]]['ActualLoad (MWh)'])
    for zone in range(1,len(miso_zones)):
        demand[miso_zones[zone]] = data[data['LoadResource Zone'] == miso_zones[zone]]['ActualLoad (MWh)'].to_list()
    demand.columns = miso_zones
    return demand

Create subareas dataframe for 2015 demand data

In [6]:
MISO_loadzones_2015 = CreateDemandProfile(LRZ_demand_2015, miso_zones)
MISO_loadzones_2015

Unnamed: 0,LRZ1,LRZ2_7,LRZ3_5,LRZ4,LRZ6,LRZ8_9_10
0,11566.6,16866.75,10673.85,5687.05,11594.30,18887.39
7,11128.9,16232.87,10440.79,5540.74,11449.76,18462.71
14,10744.2,15752.56,10206.73,5436.40,11292.13,18250.65
21,10516.3,15413.05,10086.26,5361.72,11198.64,18014.31
28,10369.1,15325.18,10041.79,5324.75,11125.65,17934.87
...,...,...,...,...,...,...
61337,12645,19112.33,11559.97,5871.35,11323.68,19621.66
61344,12225,18441.55,11249.20,5727.29,11011.60,19286.61
61351,11868.5,17831.50,10982.81,5560.36,10746.58,18881.53
61358,11525.2,17238.04,10760.08,5403.62,10484.22,18448.14


Set the 2015 demand datetime index to be the next hour in the Eastern Timezone.

In [8]:
start_date = pd.to_datetime('2015-01-01 01:00:00')
end_date = pd.to_datetime('2016-01-01 00:00:00')
time_interval = pd.date_range(start_date, end_date, tz = "US/Eastern", freq='H')
MISO_loadzones_2015.index = time_interval
MISO_loadzones_2015

Unnamed: 0,LRZ1,LRZ2_7,LRZ3_5,LRZ4,LRZ6,LRZ8_9_10
2015-01-01 01:00:00-05:00,11566.6,16866.75,10673.85,5687.05,11594.30,18887.39
2015-01-01 02:00:00-05:00,11128.9,16232.87,10440.79,5540.74,11449.76,18462.71
2015-01-01 03:00:00-05:00,10744.2,15752.56,10206.73,5436.40,11292.13,18250.65
2015-01-01 04:00:00-05:00,10516.3,15413.05,10086.26,5361.72,11198.64,18014.31
2015-01-01 05:00:00-05:00,10369.1,15325.18,10041.79,5324.75,11125.65,17934.87
...,...,...,...,...,...,...
2015-12-31 20:00:00-05:00,12645,19112.33,11559.97,5871.35,11323.68,19621.66
2015-12-31 21:00:00-05:00,12225,18441.55,11249.20,5727.29,11011.60,19286.61
2015-12-31 22:00:00-05:00,11868.5,17831.50,10982.81,5560.36,10746.58,18881.53
2015-12-31 23:00:00-05:00,11525.2,17238.04,10760.08,5403.62,10484.22,18448.14


Create subareas dataframe for 2016 demand data

In [9]:
MISO_loadzones_2016 = CreateDemandProfile(LRZ_demand_2016, miso_zones)
MISO_loadzones_2016

Unnamed: 0,LRZ1,LRZ2_7,LRZ3_5,LRZ4,LRZ6,LRZ8_9_10
0,10911.1,15824.53,10006.74,5198.81,9963.71,17483.79
7,10560.7,15271.17,9879.14,4968.12,9842.67,17121.95
14,10211.5,14792.03,9751.33,4858.49,9740.47,16969.39
21,9977.08,14462.12,9602.58,4831.48,9629.32,16820.69
28,9870.24,14392.40,9622.67,4804.95,9701.47,16797.15
...,...,...,...,...,...,...
61505,12300,18113.00,9887.00,5442.00,10315.00,18062.00
61512,11976,17624.00,9737.00,5352.00,10069.00,17683.00
61519,11651,17156.00,9645.00,5294.00,9892.00,17476.00
61526,11351,16556.00,9522.00,5203.00,9663.00,17293.00


Set the 2016 demand datetime index to be the next hour in the Eastern Timezone.

In [10]:
start_date = pd.to_datetime('2016-01-01 01:00:00')
end_date = pd.to_datetime('2017-01-01 00:00:00')
time_interval = pd.date_range(start_date, end_date, tz = "US/Eastern", freq='H')
MISO_loadzones_2016.index = time_interval
MISO_loadzones_2016

Unnamed: 0,LRZ1,LRZ2_7,LRZ3_5,LRZ4,LRZ6,LRZ8_9_10
2016-01-01 01:00:00-05:00,10911.1,15824.53,10006.74,5198.81,9963.71,17483.79
2016-01-01 02:00:00-05:00,10560.7,15271.17,9879.14,4968.12,9842.67,17121.95
2016-01-01 03:00:00-05:00,10211.5,14792.03,9751.33,4858.49,9740.47,16969.39
2016-01-01 04:00:00-05:00,9977.08,14462.12,9602.58,4831.48,9629.32,16820.69
2016-01-01 05:00:00-05:00,9870.24,14392.40,9622.67,4804.95,9701.47,16797.15
...,...,...,...,...,...,...
2016-12-31 20:00:00-05:00,12300,18113.00,9887.00,5442.00,10315.00,18062.00
2016-12-31 21:00:00-05:00,11976,17624.00,9737.00,5352.00,10069.00,17683.00
2016-12-31 22:00:00-05:00,11651,17156.00,9645.00,5294.00,9892.00,17476.00
2016-12-31 23:00:00-05:00,11351,16556.00,9522.00,5203.00,9663.00,17293.00


Concatenate 2015 and 2016 together, shift datetimes to utc and then select 2016 data

In [11]:
demand_15_16 = pd.concat([MISO_loadzones_2015, MISO_loadzones_2016])
utc_demand_15_16 = demand_15_16.tz_convert('utc')

start = pd.to_datetime('2016-01-01 00:00:00')
end = pd.to_datetime('2016-12-31 23:00:00')
MISO_demand_2016 = utc_demand_15_16.loc[start:end]
MISO_demand_2016

Unnamed: 0,LRZ1,LRZ2_7,LRZ3_5,LRZ4,LRZ6,LRZ8_9_10
2016-01-01 00:00:00+00:00,12863.1,19590.03,11604.49,6035.28,11606.52,19492.58
2016-01-01 01:00:00+00:00,12645,19112.33,11559.97,5871.35,11323.68,19621.66
2016-01-01 02:00:00+00:00,12225,18441.55,11249.20,5727.29,11011.60,19286.61
2016-01-01 03:00:00+00:00,11868.5,17831.50,10982.81,5560.36,10746.58,18881.53
2016-01-01 04:00:00+00:00,11525.2,17238.04,10760.08,5403.62,10484.22,18448.14
...,...,...,...,...,...,...
2016-12-31 19:00:00+00:00,11589,16830.00,9135.00,5014.00,10025.00,17571.00
2016-12-31 20:00:00+00:00,11432,16640.00,8933.00,4962.00,9926.00,17107.00
2016-12-31 21:00:00+00:00,11297,16572.00,8776.00,4920.00,9892.00,16764.00
2016-12-31 22:00:00+00:00,11301,16827.00,8785.00,4933.00,9980.00,16688.00


Write out to csv file

In [None]:
MISO_demand_2016.to_csv('miso_subarea_demand_profile_2016.csv')

## Create MISO subarea to bus mapping

In [None]:
Dictionary of State Abbreviations

In [12]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

Invert dictionary of abbreviations for lookup

In [13]:
inverted_us_state_abbrev = {val:key for key,val in us_state_abbrev.items()}
inverted_us_state_abbrev

{'AL': 'Alabama',
 'AK': 'Alaska',
 'AZ': 'Arizona',
 'AR': 'Arkansas',
 'CA': 'California',
 'CO': 'Colorado',
 'CT': 'Connecticut',
 'DE': 'Delaware',
 'DC': 'District of Columbia',
 'FL': 'Florida',
 'GA': 'Georgia',
 'HI': 'Hawaii',
 'ID': 'Idaho',
 'IL': 'Illinois',
 'IN': 'Indiana',
 'IA': 'Iowa',
 'KS': 'Kansas',
 'KY': 'Kentucky',
 'LA': 'Louisiana',
 'ME': 'Maine',
 'MD': 'Maryland',
 'MA': 'Massachusetts',
 'MI': 'Michigan',
 'MN': 'Minnesota',
 'MS': 'Mississippi',
 'MO': 'Missouri',
 'MT': 'Montana',
 'NE': 'Nebraska',
 'NV': 'Nevada',
 'NH': 'New Hampshire',
 'NJ': 'New Jersey',
 'NM': 'New Mexico',
 'NY': 'New York',
 'NC': 'North Carolina',
 'ND': 'North Dakota',
 'OH': 'Ohio',
 'OK': 'Oklahoma',
 'OR': 'Oregon',
 'PA': 'Pennsylvania',
 'RI': 'Rhode Island',
 'SC': 'South Carolina',
 'SD': 'South Dakota',
 'TN': 'Tennessee',
 'TX': 'Texas',
 'UT': 'Utah',
 'VT': 'Vermont',
 'VA': 'Virginia',
 'WA': 'Washington',
 'WV': 'West Virginia',
 'WI': 'Wisconsin',
 'WY': 'Wyoming

Preliminary mapping of states to subareas

In [14]:
state_to_MISO_demand_zones = {'North Dakota':'LRZ1', 'Montana':'LRZ1', 'South Dakota':'LRZ1','Minnesota':'LRZ1','Iowa':'LRZ3_5', 'Missouri':'LRZ3_5',\
'Wisconsin': 'LRZ2_7', 'Michigan': 'LRZ2_7', 'Illinois':'LRZ4','Indiana': 'LRZ6', 'Texas': 'LRZ8_9_10',\
'Louisiana': 'LRZ8_9_10','Arkansas':'LRZ8_9_10','Alabama':'LRZ8_9_10','Mississippi': 'LRZ8_9_10','Kentucky':'LRZ6'}

Read in original bus_ba_map file

In [15]:
bus_ba_map = pd.read_csv("../bus_ba_map.csv", index_col = 0)
bus_ba_map

Unnamed: 0_level_0,Pd,lat,lon,County,BA,zone_name
bus_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3,9.97,43.976116,-70.221052,Androscoggin__ME,ISNE,Maine
9,11.48,44.100992,-70.305323,Androscoggin__ME,ISNE,Maine
10,9.97,44.179913,-70.185998,Androscoggin__ME,ISNE,Maine
11,9.97,44.143322,-70.224057,Androscoggin__ME,ISNE,Maine
12,3.06,44.020747,-70.180492,Androscoggin__ME,ISNE,Maine
...,...,...,...,...,...,...
69946,1.70,48.307278,-106.272727,Valley__MT,MISO,Montana Eastern
69948,0.85,48.222633,-106.316038,Valley__MT,MISO,Montana Eastern
69949,0.66,48.932800,-106.642307,Valley__MT,MISO,Montana Eastern
69950,0.95,48.408811,-106.524730,Valley__MT,MISO,Montana Eastern


Select out only MISO specific mappings

In [16]:
miso = bus_ba_map[bus_ba_map['BA'] == 'MISO']
miso

Unnamed: 0_level_0,Pd,lat,lon,County,BA,zone_name
bus_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
29396,15.37,30.445044,-88.409612,Jackson__MS,MISO,Alabama
30015,30.56,31.540745,-91.381848,Adams__MS,MISO,Mississippi
30017,30.56,31.519409,-91.305079,Adams__MS,MISO,Mississippi
30018,30.56,31.523599,-91.358243,Adams__MS,MISO,Mississippi
30019,30.56,31.442345,-91.296654,Adams__MS,MISO,Mississippi
...,...,...,...,...,...,...
69946,1.70,48.307278,-106.272727,Valley__MT,MISO,Montana Eastern
69948,0.85,48.222633,-106.316038,Valley__MT,MISO,Montana Eastern
69949,0.66,48.932800,-106.642307,Valley__MT,MISO,Montana Eastern
69950,0.95,48.408811,-106.524730,Valley__MT,MISO,Montana Eastern


Create a new column that only contains the state abbrevation from the 'County' column

In [17]:
miso['state'] = miso['County'].str.split('__').str[-1]
miso

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Pd,lat,lon,County,BA,zone_name,state
bus_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
29396,15.37,30.445044,-88.409612,Jackson__MS,MISO,Alabama,MS
30015,30.56,31.540745,-91.381848,Adams__MS,MISO,Mississippi,MS
30017,30.56,31.519409,-91.305079,Adams__MS,MISO,Mississippi,MS
30018,30.56,31.523599,-91.358243,Adams__MS,MISO,Mississippi,MS
30019,30.56,31.442345,-91.296654,Adams__MS,MISO,Mississippi,MS
...,...,...,...,...,...,...,...
69946,1.70,48.307278,-106.272727,Valley__MT,MISO,Montana Eastern,MT
69948,0.85,48.222633,-106.316038,Valley__MT,MISO,Montana Eastern,MT
69949,0.66,48.932800,-106.642307,Valley__MT,MISO,Montana Eastern,MT
69950,0.95,48.408811,-106.524730,Valley__MT,MISO,Montana Eastern,MT


Map state column to MISO demand zones use state abbrevation lookup table and MISO demand zone mapping

In [18]:
def get_MISO_demand_zone(inverted_us_state_abbrev, state_to_MISO_demand_zones, state_abbrev):
    mapped_zone = np.nan
    valid_state_abbrev = state_abbrev in inverted_us_state_abbrev.keys()
    if valid_state_abbrev:
        in_MISO_mapping = inverted_us_state_abbrev[state_abbrev] in state_to_MISO_demand_zones
        if in_MISO_mapping:
            mapped_zone =  state_to_MISO_demand_zones[inverted_us_state_abbrev[state_abbrev]]
    return mapped_zone

Create a partial function that only has one input for dataframe.apply

In [19]:
from functools import partial
MISO_demand_zone = partial(get_MISO_demand_zone,inverted_us_state_abbrev,state_to_MISO_demand_zones)

Map state column to MISO demand zone

In [20]:
miso['miso_zone'] = miso.apply(lambda x: MISO_demand_zone(x['state']), axis=1)
miso

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Pd,lat,lon,County,BA,zone_name,state,miso_zone
bus_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
29396,15.37,30.445044,-88.409612,Jackson__MS,MISO,Alabama,MS,LRZ8_9_10
30015,30.56,31.540745,-91.381848,Adams__MS,MISO,Mississippi,MS,LRZ8_9_10
30017,30.56,31.519409,-91.305079,Adams__MS,MISO,Mississippi,MS,LRZ8_9_10
30018,30.56,31.523599,-91.358243,Adams__MS,MISO,Mississippi,MS,LRZ8_9_10
30019,30.56,31.442345,-91.296654,Adams__MS,MISO,Mississippi,MS,LRZ8_9_10
...,...,...,...,...,...,...,...,...
69946,1.70,48.307278,-106.272727,Valley__MT,MISO,Montana Eastern,MT,LRZ1
69948,0.85,48.222633,-106.316038,Valley__MT,MISO,Montana Eastern,MT,LRZ1
69949,0.66,48.932800,-106.642307,Valley__MT,MISO,Montana Eastern,MT,LRZ1
69950,0.95,48.408811,-106.524730,Valley__MT,MISO,Montana Eastern,MT,LRZ1


Check to make sure all states are mapped

In [21]:
miso[miso['miso_zone'].isnull()].state.unique()

array([nan], dtype=object)

Load in correction to state mapping using county-based map website

In [None]:
import json
data = json.load(open('C:\\Users\\dmuldrew\\Downloads\\mapchartSave__usa_counties__.txt'))
data

Turn JSON structure into dictionary mapping

In [None]:
MISO_minority_report = {}
for val in data['groups'].values():
    MISO_minority_report[val['label']] = set(val['paths'])
MISO_minority_report

Further correction to state mapping using county-based map data

In [None]:
miso_copy = miso

for zone,counties in MISO_minority_report.items():
    for county in counties:
        print(county)
        print(zone)
        miso_copy.loc[miso['County'] == county,'miso_zone'] = zone
        print(miso_copy[miso['County'] == county]['miso_zone'])
        
miso_copy

Output to disk

In [None]:
miso_copy.to_csv('MISO_mapping.csv')