In [1]:
import os

import numpy as np
import pandas as pd

In [2]:
def load_data():

    base_path = '/Users/adam/git/research/AEMO/data/Hilary/processed_data/MMSDM/PUBLIC_'

    names = ['DVD_DISPATCHPRICE', 'DVD_TRADINGPRICE', 
             'DVD_P5MIN_REGIONSOLUTION']

    paths = [os.path.join(base_path+name,'3_processed_numeric.csv') 
             for name in names]

    csvs = {name: pd.read_csv(path, index_col=0, parse_dates=True) 
            for name, path in zip(names, paths)}
    
    for k, v in csvs.items():
        print('{} shape {}'.format(k, v.shape))
        print('start {}'.format(v.index[0]))
        print('end {}'.format(v.index[-1]))
        
    return csvs

In [3]:
csvs = load_data()

DVD_DISPATCHPRICE shape (493056, 5)
start 2012-01-01 00:00:00
end 2016-09-07 23:55:00
DVD_TRADINGPRICE shape (493051, 5)
start 2012-01-01 00:25:00
end 2016-09-07 23:55:00
DVD_P5MIN_REGIONSOLUTION shape (493056, 84)
start 2012-01-01 00:00:00
end 2016-09-07 23:55:00


In [4]:
dispatch = csvs['DVD_DISPATCHPRICE']
dispatch.describe()

Unnamed: 0,RRP NSW1 offset 0 HH PUBLIC_DVD_DISPATCHPRICE,RRP QLD1 offset 0 HH PUBLIC_DVD_DISPATCHPRICE,RRP SA1 offset 0 HH PUBLIC_DVD_DISPATCHPRICE,RRP TAS1 offset 0 HH PUBLIC_DVD_DISPATCHPRICE,RRP VIC1 offset 0 HH PUBLIC_DVD_DISPATCHPRICE
count,493056.0,493056.0,493056.0,493056.0,493056.0
mean,46.781051,56.102234,58.075399,55.161194,44.63955
std,88.168931,320.162549,288.099275,119.887482,138.007515
min,-1000.0,-1000.0,-1000.0,-1000.0,-1000.0
25%,32.73,30.386845,32.53355,34.050465,29.648098
50%,45.78524,44.85394,45.79,41.53309,41.97
75%,52.603623,53.308243,57.441795,50.68,50.666888
max,13800.0,13800.0,14000.0,14000.0,13800.0


In [5]:
#  going to use a naive way to do cumulative average dispatch price
dispatch_sa = dispatch.iloc[:,2]

In [6]:
cum_mean, vals = [], []

for idx in range(dispatch_sa.shape[0])[::6]:
    data = dispatch_sa.iloc[idx:idx+6]
    
    for j, val in enumerate(data.values):

        if j == 0:
            vals.append(val)
            cum_mean.append(val)

        else:
            vals.append(val)
            cum_mean.append(np.mean(vals))
        
    vals = []

In [7]:
cum_mean = pd.Series(cum_mean, index=dispatch.index)
cum_mean.head()

SETTLEMENTDATE
2012-01-01 00:00:00    26.885070
2012-01-01 00:05:00    26.542155
2012-01-01 00:10:00    26.323057
2012-01-01 00:15:00    25.825630
2012-01-01 00:20:00    25.151972
dtype: float64

In [8]:
dispatch_sa.head()

SETTLEMENTDATE
2012-01-01 00:00:00    26.88507
2012-01-01 00:05:00    26.19924
2012-01-01 00:10:00    25.88486
2012-01-01 00:15:00    24.33335
2012-01-01 00:20:00    22.45734
Name: RRP SA1 offset 0 HH PUBLIC_DVD_DISPATCHPRICE, dtype: float64

In [9]:
csvs['DVD_TRADINGPRICE'].head()

Unnamed: 0_level_0,RRP NSW1 offset 0 HH PUBLIC_DVD_TRADINGPRICE,RRP QLD1 offset 0 HH PUBLIC_DVD_TRADINGPRICE,RRP SA1 offset 0 HH PUBLIC_DVD_TRADINGPRICE,RRP TAS1 offset 0 HH PUBLIC_DVD_TRADINGPRICE,RRP VIC1 offset 0 HH PUBLIC_DVD_TRADINGPRICE
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-01-01 00:25:00,24.51,24.95,24.93,60.3,23.07
2012-01-01 00:30:00,23.05,23.84,22.71,60.3,20.98
2012-01-01 00:35:00,23.05,23.84,22.71,60.3,20.98
2012-01-01 00:40:00,23.05,23.84,22.71,60.3,20.98
2012-01-01 00:45:00,23.05,23.84,22.71,60.3,20.98


In [10]:
trad_sa = csvs['DVD_TRADINGPRICE'].loc[:,'RRP SA1 offset 0 HH PUBLIC_DVD_TRADINGPRICE']

output = pd.concat([dispatch_sa,
                    cum_mean,
                    trad_sa], axis=1).dropna()

output.columns = ['C_SA_dispatch_price_[$/MWh]',
                  'C_cumulative_mean_dispatch_[$/MWh]',
                  'C_electricity_price_[$/MWh]']

output.head()

Unnamed: 0_level_0,C_SA_dispatch_price_[$/MWh],C_cumulative_mean_dispatch_[$/MWh],C_electricity_price_[$/MWh]
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-01-01 00:25:00,23.80364,24.92725,24.93
2012-01-01 00:30:00,24.32653,24.32653,22.71
2012-01-01 00:35:00,24.53312,24.429825,22.71
2012-01-01 00:40:00,23.75187,24.20384,22.71
2012-01-01 00:45:00,23.57818,24.047425,22.71


In [11]:
output.to_csv('state.csv')

In [13]:
cum_mean[cum_mean > 200]

SETTLEMENTDATE
2012-02-25 13:00:00      320.955970
2012-02-25 14:00:00      209.252590
2012-02-25 14:05:00      207.964715
2012-07-02 11:50:00      201.272556
2012-07-02 11:55:00      211.622782
2012-07-02 12:00:00      299.537750
2012-07-02 12:05:00      296.180340
2012-07-02 12:10:00      329.542170
2012-07-02 12:15:00     3472.156628
2012-07-02 12:20:00     4665.226074
2012-07-02 12:25:00     3937.555062
2012-07-02 12:35:00     6142.392145
2012-07-02 12:40:00     8201.278993
2012-07-02 12:45:00     6177.406785
2012-07-02 12:50:00     4955.863428
2012-07-02 12:55:00     4140.148072
2012-07-02 13:30:00      258.932330
2012-07-02 13:35:00      263.980705
2012-07-02 13:40:00      214.172953
2012-07-02 14:00:00      275.483840
2012-07-02 14:05:00      262.491920
2012-07-02 14:10:00      213.967363
2012-07-02 17:30:00      249.500000
2012-07-02 17:40:00      209.945583
2012-07-02 17:45:00      226.850967
2012-07-02 17:50:00      236.899324
2012-07-02 17:55:00      238.999437
2012-07-02 18

In [14]:
output.shape

(493051, 3)

In [16]:
100*(8540 / 493051)

1.7320723414007884