## Format MISO demand from data files

In [1]:
import pandas as pd

Read in 2016 MISO demand data

In [2]:
LRZ_demand_2016 = pd.read_excel('20161231_dfal_hist.xls', skiprows=5)
LRZ_demand_2016

Unnamed: 0,MarketDay,HourEnding,LoadResource Zone,MTLF (MWh),ActualLoad (MWh)
0,2016-01-01 00:00:00,1,LRZ1,10692,10911.1
1,2016-01-01 00:00:00,1,LRZ2_7,15766,15824.5
2,2016-01-01 00:00:00,1,LRZ3_5,9728,10006.7
3,2016-01-01 00:00:00,1,LRZ4,5043,5198.81
4,2016-01-01 00:00:00,1,LRZ6,10046,9963.71
5,2016-01-01 00:00:00,1,LRZ8_9_10,16748,17483.8
6,2016-01-01 00:00:00,1,MISO,68023,69388.7
7,2016-01-01 00:00:00,2,LRZ1,10403,10560.7
8,2016-01-01 00:00:00,2,LRZ2_7,15313,15271.2
9,2016-01-01 00:00:00,2,LRZ3_5,9579,9879.14


Replace nans with predicted values

In [5]:
LRZ_demand_2016.loc[LRZ_demand_2016['ActualLoad (MWh)'].isna() , 'ActualLoad (MWh)'] = LRZ_demand_2016['MTLF (MWh)']
LRZ_demand_2016

Unnamed: 0,MarketDay,HourEnding,LoadResource Zone,MTLF (MWh),ActualLoad (MWh)
0,2016-01-01 00:00:00,1,LRZ1,10692,10911.1
1,2016-01-01 00:00:00,1,LRZ2_7,15766,15824.5
2,2016-01-01 00:00:00,1,LRZ3_5,9728,10006.7
3,2016-01-01 00:00:00,1,LRZ4,5043,5198.81
4,2016-01-01 00:00:00,1,LRZ6,10046,9963.71
5,2016-01-01 00:00:00,1,LRZ8_9_10,16748,17483.8
6,2016-01-01 00:00:00,1,MISO,68023,69388.7
7,2016-01-01 00:00:00,2,LRZ1,10403,10560.7
8,2016-01-01 00:00:00,2,LRZ2_7,15313,15271.2
9,2016-01-01 00:00:00,2,LRZ3_5,9579,9879.14


Read in 2015 MISO demand data

In [7]:
LRZ_demand_2015 = pd.read_excel('20151231_dfal_hist.xls', skiprows=5)
LRZ_demand_2015

Unnamed: 0,MarketDay,HourEnding,LoadResource Zone,MTLF (MWh),ActualLoad (MWh)
0,2015-01-01 00:00:00,1,LRZ1,11881,11566.6
1,2015-01-01 00:00:00,1,LRZ2_7,16777,16866.8
2,2015-01-01 00:00:00,1,LRZ3_5,10544,10673.9
3,2015-01-01 00:00:00,1,LRZ4,5620,5687.05
4,2015-01-01 00:00:00,1,LRZ6,11476,11594.3
5,2015-01-01 00:00:00,1,LRZ8_9_10,19013,18887.4
6,2015-01-01 00:00:00,1,MISO,75311,75275.9
7,2015-01-01 00:00:00,2,LRZ1,11445,11128.9
8,2015-01-01 00:00:00,2,LRZ2_7,16096,16232.9
9,2015-01-01 00:00:00,2,LRZ3_5,10283,10440.8


Function converts MISO file format to a dataframe with subarea demand columns 

In [8]:
miso_zones = ['LRZ1','LRZ2_7','LRZ3_5','LRZ4','LRZ6','LRZ8_9_10']

def CreateDemandProfile(data, miso_zones):
    demand = pd.DataFrame(data[data['LoadResource Zone'] == miso_zones[0]]['ActualLoad (MWh)'])
    for zone in range(1,len(miso_zones)):
        demand[miso_zones[zone]] = data[data['LoadResource Zone'] == miso_zones[zone]]['ActualLoad (MWh)'].to_list()
    demand.columns = miso_zones
    return demand

Create subareas dataframe for 2015 demand data

In [9]:
MISO_loadzones_2015 = CreateDemandProfile(LRZ_demand_2015, miso_zones)
MISO_loadzones_2015

Unnamed: 0,LRZ1,LRZ2_7,LRZ3_5,LRZ4,LRZ6,LRZ8_9_10
0,11566.6,16866.75,10673.85,5687.05,11594.30,18887.39
7,11128.9,16232.87,10440.79,5540.74,11449.76,18462.71
14,10744.2,15752.56,10206.73,5436.40,11292.13,18250.65
21,10516.3,15413.05,10086.26,5361.72,11198.64,18014.31
28,10369.1,15325.18,10041.79,5324.75,11125.65,17934.87
35,10335.9,15472.11,10154.53,5354.31,11255.00,18008.60
42,10501.8,15799.27,10318.95,5434.42,11418.44,18302.87
49,10702.6,16185.31,10519.45,5537.95,11564.77,18668.21
56,10847.4,16334.72,10527.70,5524.95,11501.08,18806.34
63,11065.2,16781.55,10649.18,5593.12,11611.09,19275.53


Set the 2015 demand datetime index to be the next hour in the Eastern Timezone.

In [13]:
start_date = pd.to_datetime('2015-01-01 01:00:00')
end_date = pd.to_datetime('2016-01-01 00:00:00')
time_interval = pd.date_range(start_date, end_date, tz = "US/Eastern", freq='H')
MISO_loadzones_2015.index = time_interval
MISO_loadzones_2015

Unnamed: 0,LRZ1,LRZ2_7,LRZ3_5,LRZ4,LRZ6,LRZ8_9_10
2015-01-01 01:00:00-05:00,11566.6,16866.75,10673.85,5687.05,11594.30,18887.39
2015-01-01 02:00:00-05:00,11128.9,16232.87,10440.79,5540.74,11449.76,18462.71
2015-01-01 03:00:00-05:00,10744.2,15752.56,10206.73,5436.40,11292.13,18250.65
2015-01-01 04:00:00-05:00,10516.3,15413.05,10086.26,5361.72,11198.64,18014.31
2015-01-01 05:00:00-05:00,10369.1,15325.18,10041.79,5324.75,11125.65,17934.87
2015-01-01 06:00:00-05:00,10335.9,15472.11,10154.53,5354.31,11255.00,18008.60
2015-01-01 07:00:00-05:00,10501.8,15799.27,10318.95,5434.42,11418.44,18302.87
2015-01-01 08:00:00-05:00,10702.6,16185.31,10519.45,5537.95,11564.77,18668.21
2015-01-01 09:00:00-05:00,10847.4,16334.72,10527.70,5524.95,11501.08,18806.34
2015-01-01 10:00:00-05:00,11065.2,16781.55,10649.18,5593.12,11611.09,19275.53


Create subareas dataframe for 2016 demand data

In [15]:
MISO_loadzones_2016 = CreateDemandProfile(LRZ_demand_2016, miso_zones)
MISO_loadzones_2016

Unnamed: 0,LRZ1,LRZ2_7,LRZ3_5,LRZ4,LRZ6,LRZ8_9_10
0,10911.1,15824.53,10006.74,5198.81,9963.71,17483.79
7,10560.7,15271.17,9879.14,4968.12,9842.67,17121.95
14,10211.5,14792.03,9751.33,4858.49,9740.47,16969.39
21,9977.08,14462.12,9602.58,4831.48,9629.32,16820.69
28,9870.24,14392.40,9622.67,4804.95,9701.47,16797.15
35,9897.06,14541.61,9692.37,4856.01,9888.91,16965.46
42,10096.7,14886.83,9853.55,4970.98,10131.83,17392.22
49,10296.6,15351.13,10111.77,5070.41,10315.24,17941.20
56,10506.7,15622.51,10188.25,5061.51,10352.28,18293.29
63,10617.4,16011.85,10070.44,5181.92,10481.42,18843.33


Set the 2016 demand datetime index to be the next hour in the Eastern Timezone.

In [16]:
start_date = pd.to_datetime('2016-01-01 01:00:00')
end_date = pd.to_datetime('2017-01-01 00:00:00')
time_interval = pd.date_range(start_date, end_date, tz = "US/Eastern", freq='H')
MISO_loadzones_2016.index = time_interval
MISO_loadzones_2016

Unnamed: 0,LRZ1,LRZ2_7,LRZ3_5,LRZ4,LRZ6,LRZ8_9_10
2016-01-01 06:00:00+00:00,10911.1,15824.53,10006.74,5198.81,9963.71,17483.79
2016-01-01 07:00:00+00:00,10560.7,15271.17,9879.14,4968.12,9842.67,17121.95
2016-01-01 08:00:00+00:00,10211.5,14792.03,9751.33,4858.49,9740.47,16969.39
2016-01-01 09:00:00+00:00,9977.08,14462.12,9602.58,4831.48,9629.32,16820.69
2016-01-01 10:00:00+00:00,9870.24,14392.40,9622.67,4804.95,9701.47,16797.15
2016-01-01 11:00:00+00:00,9897.06,14541.61,9692.37,4856.01,9888.91,16965.46
2016-01-01 12:00:00+00:00,10096.7,14886.83,9853.55,4970.98,10131.83,17392.22
2016-01-01 13:00:00+00:00,10296.6,15351.13,10111.77,5070.41,10315.24,17941.20
2016-01-01 14:00:00+00:00,10506.7,15622.51,10188.25,5061.51,10352.28,18293.29
2016-01-01 15:00:00+00:00,10617.4,16011.85,10070.44,5181.92,10481.42,18843.33


Concatenate 2015 and 2016 together, shift datetimes to utc and then select 2016 data

In [22]:
demand_15_16 = pd.concat([MISO_loadzones_2015, MISO_loadzones_2016])
utc_demand_15_16 = demand_15_16.tz_convert('utc')

start = pd.to_datetime('2016-01-01 00:00:00')
end = pd.to_datetime('2016-12-31 23:00:00')
MISO_demand_2016 = utc_demand_15_16.loc[start:end]
MISO_demand_2016

Unnamed: 0,LRZ1,LRZ2_7,LRZ3_5,LRZ4,LRZ6,LRZ8_9_10
2016-01-01 00:00:00+00:00,12863.1,19590.03,11604.49,6035.28,11606.52,19492.58
2016-01-01 01:00:00+00:00,12645,19112.33,11559.97,5871.35,11323.68,19621.66
2016-01-01 02:00:00+00:00,12225,18441.55,11249.20,5727.29,11011.60,19286.61
2016-01-01 03:00:00+00:00,11868.5,17831.50,10982.81,5560.36,10746.58,18881.53
2016-01-01 04:00:00+00:00,11525.2,17238.04,10760.08,5403.62,10484.22,18448.14
2016-01-01 05:00:00+00:00,11104,16538.11,10281.16,5382.65,10198.86,17926.37
2016-01-01 06:00:00+00:00,10911.1,15824.53,10006.74,5198.81,9963.71,17483.79
2016-01-01 07:00:00+00:00,10560.7,15271.17,9879.14,4968.12,9842.67,17121.95
2016-01-01 08:00:00+00:00,10211.5,14792.03,9751.33,4858.49,9740.47,16969.39
2016-01-01 09:00:00+00:00,9977.08,14462.12,9602.58,4831.48,9629.32,16820.69


Write out to csv file

In [None]:
MISO_demand_2016.to_csv('miso_subarea_demand_profile_2016.csv')

## Create MISO subarea to bus mapping

In [None]:
Dictionary of State Abbreviations

In [24]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

Invert dictionary of abbreviations for lookup

In [25]:
inverted_us_state_abbrev = {val:key for key,val in us_state_abbrev.items()}
inverted_us_state_abbrev

{'AL': 'Alabama',
 'AK': 'Alaska',
 'AZ': 'Arizona',
 'AR': 'Arkansas',
 'CA': 'California',
 'CO': 'Colorado',
 'CT': 'Connecticut',
 'DE': 'Delaware',
 'DC': 'District of Columbia',
 'FL': 'Florida',
 'GA': 'Georgia',
 'HI': 'Hawaii',
 'ID': 'Idaho',
 'IL': 'Illinois',
 'IN': 'Indiana',
 'IA': 'Iowa',
 'KS': 'Kansas',
 'KY': 'Kentucky',
 'LA': 'Louisiana',
 'ME': 'Maine',
 'MD': 'Maryland',
 'MA': 'Massachusetts',
 'MI': 'Michigan',
 'MN': 'Minnesota',
 'MS': 'Mississippi',
 'MO': 'Missouri',
 'MT': 'Montana',
 'NE': 'Nebraska',
 'NV': 'Nevada',
 'NH': 'New Hampshire',
 'NJ': 'New Jersey',
 'NM': 'New Mexico',
 'NY': 'New York',
 'NC': 'North Carolina',
 'ND': 'North Dakota',
 'OH': 'Ohio',
 'OK': 'Oklahoma',
 'OR': 'Oregon',
 'PA': 'Pennsylvania',
 'RI': 'Rhode Island',
 'SC': 'South Carolina',
 'SD': 'South Dakota',
 'TN': 'Tennessee',
 'TX': 'Texas',
 'UT': 'Utah',
 'VT': 'Vermont',
 'VA': 'Virginia',
 'WA': 'Washington',
 'WV': 'West Virginia',
 'WI': 'Wisconsin',
 'WY': 'Wyoming

Preliminary mapping of states to subareas

In [26]:
state_to_MISO_demand_zones = {'North Dakota':'LRZ1', 'Montana':'LRZ1', 'South Dakota':'LRZ1','Minnesota':'LRZ1','Iowa':'LRZ3_5', 'Missouri':'LRZ3_5',\
'Wisconsin': 'LRZ2_7', 'Michigan': 'LRZ2_7', 'Illinois':'LRZ4','Indiana': 'LRZ6', 'Texas': 'LRZ8_9_10',\
'Louisiana': 'LRZ8_9_10','Arkansas':'LRZ8_9_10','Alabama':'LRZ8_9_10','Mississippi': 'LRZ8_9_10','Kentucky':'LRZ6'}

Read in original bus_ba_map file

In [23]:
bus_ba_map = pd.read_csv("../bus_ba_map.csv", index_col = 0)
bus_ba_map

Unnamed: 0_level_0,Pd,lat,lon,County,BA,zone_name
bus_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3,9.97,43.976116,-70.221052,Androscoggin__ME,ISNE,Maine
9,11.48,44.100992,-70.305323,Androscoggin__ME,ISNE,Maine
10,9.97,44.179913,-70.185998,Androscoggin__ME,ISNE,Maine
11,9.97,44.143322,-70.224057,Androscoggin__ME,ISNE,Maine
12,3.06,44.020747,-70.180492,Androscoggin__ME,ISNE,Maine
13,2.51,44.248216,-70.198762,Androscoggin__ME,ISNE,Maine
15,3.04,44.230101,-70.122191,Androscoggin__ME,ISNE,Maine
16,2.51,44.256746,-70.089404,Androscoggin__ME,ISNE,Maine
18,5.61,44.339760,-70.099794,Kennebec__ME,ISNE,Maine
20,15.83,44.098853,-70.154980,Androscoggin__ME,ISNE,Maine


Select out only MISO specific mappings

In [27]:
miso = bus_ba_map[bus_ba_map['BA'] == 'MISO']
miso

Unnamed: 0_level_0,Pd,lat,lon,County,BA,zone_name
bus_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
29396,15.37,30.445044,-88.409612,Jackson__MS,MISO,Alabama
30015,30.56,31.540745,-91.381848,Adams__MS,MISO,Mississippi
30017,30.56,31.519409,-91.305079,Adams__MS,MISO,Mississippi
30018,30.56,31.523599,-91.358243,Adams__MS,MISO,Mississippi
30019,30.56,31.442345,-91.296654,Adams__MS,MISO,Mississippi
30021,11.33,31.539737,-91.021922,Franklin__MS,MISO,Mississippi
30031,7.46,31.153957,-91.175500,Wilkinson__MS,MISO,Mississippi
30033,9.40,31.034530,-91.109517,Wilkinson__MS,MISO,Mississippi
30034,13.98,31.241578,-90.937497,Amite__MS,MISO,Mississippi
30035,6.32,31.110875,-90.725897,Amite__MS,MISO,Mississippi


Create a new column that only contains the state abbrevation from the 'County' column

In [30]:
miso['state'] = miso['County'].str.split('__').str[-1]
miso

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Pd,lat,lon,County,BA,zone_name,state
bus_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
29396,15.37,30.445044,-88.409612,Jackson__MS,MISO,Alabama,MS
30015,30.56,31.540745,-91.381848,Adams__MS,MISO,Mississippi,MS
30017,30.56,31.519409,-91.305079,Adams__MS,MISO,Mississippi,MS
30018,30.56,31.523599,-91.358243,Adams__MS,MISO,Mississippi,MS
30019,30.56,31.442345,-91.296654,Adams__MS,MISO,Mississippi,MS
30021,11.33,31.539737,-91.021922,Franklin__MS,MISO,Mississippi,MS
30031,7.46,31.153957,-91.175500,Wilkinson__MS,MISO,Mississippi,MS
30033,9.40,31.034530,-91.109517,Wilkinson__MS,MISO,Mississippi,MS
30034,13.98,31.241578,-90.937497,Amite__MS,MISO,Mississippi,MS
30035,6.32,31.110875,-90.725897,Amite__MS,MISO,Mississippi,MS


Map state column to MISO demand zones use state abbrevation lookup table and MISO demand zone mapping

In [32]:
def get_MISO_demand_zone(inverted_us_state_abbrev, state_to_MISO_demand_zones, state_abbrev):
    mapped_zone = np.nan
    valid_state_abbrev = state_abbrev in inverted_us_state_abbrev.keys()
    if valid_state_abbrev:
        in_MISO_mapping = inverted_us_state_abbrev[state_abbrev] in state_to_MISO_demand_zones
        if in_MISO_mapping:
            mapped_zone =  state_to_MISO_demand_zones[inverted_us_state_abbrev[state_abbrev]]
    return mapped_zone

Create a partial function that only has one input for dataframe.apply

In [34]:
from functools import partial
MISO_demand_zone = partial(get_MISO_demand_zone,inverted_us_state_abbrev,state_to_MISO_demand_zones)

Map state column to MISO demand zone

In [35]:
miso['miso_zone'] = miso.apply(lambda x: MISO_demand_zone(x['state']), axis=1)
miso

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Pd,lat,lon,County,BA,zone_name,state,miso_zone
bus_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
29396,15.37,30.445044,-88.409612,Jackson__MS,MISO,Alabama,MS,LRZ8_9_10
30015,30.56,31.540745,-91.381848,Adams__MS,MISO,Mississippi,MS,LRZ8_9_10
30017,30.56,31.519409,-91.305079,Adams__MS,MISO,Mississippi,MS,LRZ8_9_10
30018,30.56,31.523599,-91.358243,Adams__MS,MISO,Mississippi,MS,LRZ8_9_10
30019,30.56,31.442345,-91.296654,Adams__MS,MISO,Mississippi,MS,LRZ8_9_10
30021,11.33,31.539737,-91.021922,Franklin__MS,MISO,Mississippi,MS,LRZ8_9_10
30031,7.46,31.153957,-91.175500,Wilkinson__MS,MISO,Mississippi,MS,LRZ8_9_10
30033,9.40,31.034530,-91.109517,Wilkinson__MS,MISO,Mississippi,MS,LRZ8_9_10
30034,13.98,31.241578,-90.937497,Amite__MS,MISO,Mississippi,MS,LRZ8_9_10
30035,6.32,31.110875,-90.725897,Amite__MS,MISO,Mississippi,MS,LRZ8_9_10


Check to make sure all states are mapped

In [36]:
miso[miso['miso_zone'].isnull()].state.unique()

array([nan], dtype=object)

Load in correction to state mapping using county-based map website

In [None]:
import json
data = json.load(open('C:\\Users\\dmuldrew\\Downloads\\mapchartSave__usa_counties__.txt'))
data

Turn JSON structure into dictionary mapping

In [None]:
MISO_minority_report = {}
for val in data['groups'].values():
    MISO_minority_report[val['label']] = set(val['paths'])
MISO_minority_report

Further correction to state mapping using county-based map data

In [None]:
miso_copy = miso

for zone,counties in MISO_minority_report.items():
    for county in counties:
        print(county)
        print(zone)
        miso_copy.loc[miso['County'] == county,'miso_zone'] = zone
        print(miso_copy[miso['County'] == county]['miso_zone'])
        
miso_copy

Output to disk

In [None]:
miso_copy.to_csv('MISO_mapping.csv')