## This code merges various tables in the EAs data warehouse together and then calculates correlations between variables
## Some data are converted to daily data (from trading period data), as storage data is only available daily

This notebook presents the correlation analysis as discussed in the supplementary consultation paper (SCP). It includes the analysis discussed but not shown in the SCP (e.g. correlations calculated for past periods of high South Island storage).
The csv contains the data used to calculate the correlations.


In [3]:
import pandas as pd

# first get hydro and thermal generation

### this section gets data on generation from our data warehouse - including thermal generation, NI hydro generation, and SI hydro generation

In [7]:
table.head()

Unnamed: 0,Date,Period,Tech_Code,kWh
0,2010-01-01,1,Thrml,309609.0
1,2010-01-01,2,Thrml,295331.0
2,2010-01-01,3,Thrml,273477.0
3,2010-01-01,4,Thrml,271233.0
4,2010-01-01,5,Thrml,250347.0


In [8]:
table.tail(10)

Unnamed: 0,Date,Period,Tech_Code,kWh
188438,2020-09-30,39,NIHydro,450916.61
188439,2020-09-30,40,NIHydro,474979.47
188440,2020-09-30,41,NIHydro,470763.18
188441,2020-09-30,42,NIHydro,446315.83
188442,2020-09-30,43,NIHydro,409610.47
188443,2020-09-30,44,NIHydro,393991.85
188444,2020-09-30,45,NIHydro,350691.85
188445,2020-09-30,46,NIHydro,334535.49
188446,2020-09-30,47,NIHydro,312013.18
188447,2020-09-30,48,NIHydro,293357.89


In [10]:
table['Date']=pandas.to_datetime(table.Date)
table.tail()

Unnamed: 0,Date,Period,Tech_Code,kWh
188446,2020-09-30,47,SIHydro,1054405.0
188446,2020-09-30,47,Thrml,83454.0
188447,2020-09-30,48,NIHydro,293357.89
188447,2020-09-30,48,SIHydro,1031591.0
188447,2020-09-30,48,Thrml,61970.0


# match in hydro storage

### this is done separately for NI and SI hydro storage

In [18]:
Storage = SIStorage.join(NIStorage)
Storage.head()

Unnamed: 0_level_0,SIstorage,NIstorage
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1926-01-01,2136.505,303.765
1926-01-02,1929.549,290.143
1926-01-03,1722.593,276.52
1926-01-04,1515.637,262.898
1926-01-05,1419.116,256.246


In [19]:
testitout=DataFrame.join(Storage, testitout, sort=True, how='right')
testitout.head()

Unnamed: 0_level_0,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-01,2541.198,420.146,11770000.0,39755000.0,12425000.0
2010-01-02,2574.492,418.095,11953000.0,39255000.0,12881000.0
2010-01-03,2609.314,416.864,11616000.0,42900000.0,10957000.0
2010-01-04,2700.553,417.684,13431000.0,42415000.0,11706000.0
2010-01-05,2755.363,410.709,15713000.0,45483000.0,16724000.0


In [20]:
testitout.tail()

Unnamed: 0_level_0,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-09-26,1042.836,180.659,11224000.0,45299000.0,830174.0
2020-09-27,1082.243,204.022,12127000.0,40542000.0,870484.0
2020-09-28,1119.154,226.977,16995000.0,45632000.0,1656331.0
2020-09-29,1136.669,244.195,18290000.0,49157000.0,2982481.0
2020-09-30,1144.595,254.034,17714000.0,54812000.0,3403466.0


# match in price

### price is load weighted average price

In [23]:
testitout=DataFrame.join(Pricetable, testitout, sort=True, how='right')
testitout.head()

Unnamed: 0_level_0,Price,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-01,41.7713,2541.198,420.146,11770000.0,39755000.0,12425000.0
2010-01-02,42.8017,2574.492,418.095,11953000.0,39255000.0,12881000.0
2010-01-03,41.6221,2609.314,416.864,11616000.0,42900000.0,10957000.0
2010-01-04,29.6972,2700.553,417.684,13431000.0,42415000.0,11706000.0
2010-01-05,41.5289,2755.363,410.709,15713000.0,45483000.0,16724000.0


In [24]:
testitout.tail()

Unnamed: 0_level_0,Price,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-09-26,76.9624,1042.836,180.659,11224000.0,45299000.0,830174.0
2020-09-27,60.3817,1082.243,204.022,12127000.0,40542000.0,870484.0
2020-09-28,100.5965,1119.154,226.977,16995000.0,45632000.0,1656331.0
2020-09-29,134.0174,1136.669,244.195,18290000.0,49157000.0,2982481.0
2020-09-30,163.3076,1144.595,254.034,17714000.0,54812000.0,3403466.0


# match in HVDC flows

In [27]:
testitout=DataFrame.join(HVDC_Northflow, testitout, sort=True, how='right')
testitout.head()

Unnamed: 0_level_0,Northflow_GWh,Price,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-01,,41.7713,2541.198,420.146,11770000.0,39755000.0,12425000.0
2010-01-02,,42.8017,2574.492,418.095,11953000.0,39255000.0,12881000.0
2010-01-03,,41.6221,2609.314,416.864,11616000.0,42900000.0,10957000.0
2010-01-04,,29.6972,2700.553,417.684,13431000.0,42415000.0,11706000.0
2010-01-05,,41.5289,2755.363,410.709,15713000.0,45483000.0,16724000.0


# match in price separation

In [8]:
# Load Data from above
testitout = pd.read_csv('data/data_for_correlations.csv', index_col=0, parse_dates=True)

In [9]:
testitout.head()

Unnamed: 0_level_0,Ratio,Ratio_INVBEN,Northflow_GWh,Price,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-01-01,,,,41.771299,2541.198,420.146,11770393.37,39755136.0,12424968.0
2010-01-02,,,,42.801725,2574.492,418.095,11953441.23,39255165.0,12881006.0
2010-01-03,,,,41.622063,2609.314,416.864,11615501.15,42899744.0,10957223.0
2010-01-04,,,,29.697153,2700.553,417.684,13431336.67,42414585.0,11706179.0
2010-01-05,,,,41.528936,2755.363,410.709,15713171.64,45483290.0,16723682.0


# CORRELATIONS

In [11]:
#correlation from 1 June 2011 to Nov 2019
#(1 june 2011 when transfer of tekapo A and B to Genesis was completed)
testitout[(testitout.index>=pd.datetime(2011,6,1))&(testitout.index<pd.datetime(2019,11,10))].corr().round(decimals=2)

Unnamed: 0,Ratio,Ratio_INVBEN,Northflow_GWh,Price,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Ratio,1.0,0.37,-0.5,0.11,-0.33,0.11,0.12,-0.45,0.31
Ratio_INVBEN,0.37,1.0,-0.58,0.19,-0.35,-0.09,0.02,-0.56,0.5
Northflow_GWh,-0.5,-0.58,1.0,-0.17,0.56,-0.16,-0.07,0.91,-0.47
Price,0.11,0.19,-0.17,1.0,-0.26,-0.25,-0.01,-0.1,0.35
SIstorage,-0.33,-0.35,0.56,-0.26,1.0,-0.14,-0.34,0.53,-0.37
NIstorage,0.11,-0.09,-0.16,-0.25,-0.14,1.0,0.53,0.01,-0.44
NIHydro,0.12,0.02,-0.07,-0.01,-0.34,0.53,1.0,-0.05,-0.05
SIHydro,-0.45,-0.56,0.91,-0.1,0.53,0.01,-0.05,1.0,-0.52
Thrml,0.31,0.5,-0.47,0.35,-0.37,-0.44,-0.05,-0.52,1.0


In [12]:
#sample size, 1 June 2011 to 9 Nov 2019
testitout[(testitout.index>=pd.datetime(2011,6,1))&(testitout.index<pd.datetime(2019,11,10))].count()

Ratio            2139
Ratio_INVBEN     2139
Northflow_GWh    2139
Price            3084
SIstorage        3084
NIstorage        3084
NIHydro          3084
SIHydro          3084
Thrml            3084
dtype: int64

In [13]:
#correlations nov/dec (same period as UTS) 2011-2018
testitout[((testitout.index>=pd.datetime(2011,11,10)) & (testitout.index<=pd.datetime(2012,1,6)))|
          ((testitout.index>=pd.datetime(2012,11,10)) & (testitout.index<=pd.datetime(2013,1,6)))|
          ((testitout.index>=pd.datetime(2013,11,10)) & (testitout.index<=pd.datetime(2014,1,6)))|
          ((testitout.index>=pd.datetime(2014,11,10)) & (testitout.index<=pd.datetime(2015,1,6)))|
          ((testitout.index>=pd.datetime(2015,11,10)) & (testitout.index<=pd.datetime(2016,1,6)))|
          ((testitout.index>=pd.datetime(2016,11,10)) & (testitout.index<=pd.datetime(2017,1,6)))|
          ((testitout.index>=pd.datetime(2017,11,10)) & (testitout.index<=pd.datetime(2018,1,6)))|
          ((testitout.index>=pd.datetime(2018,11,10)) & (testitout.index<=pd.datetime(2019,1,6)))].corr().round(decimals=2)

Unnamed: 0,Ratio,Ratio_INVBEN,Northflow_GWh,Price,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Ratio,1.0,0.29,-0.23,-0.01,-0.12,0.02,-0.19,-0.19,0.26
Ratio_INVBEN,0.29,1.0,-0.52,0.42,-0.37,-0.35,-0.18,-0.51,0.54
Northflow_GWh,-0.23,-0.52,1.0,-0.09,0.27,-0.13,0.26,0.83,-0.63
Price,-0.01,0.42,-0.09,1.0,-0.42,-0.38,0.15,-0.01,0.33
SIstorage,-0.12,-0.37,0.27,-0.42,1.0,0.07,-0.18,0.33,-0.44
NIstorage,0.02,-0.35,-0.13,-0.38,0.07,1.0,0.26,-0.07,-0.2
NIHydro,-0.19,-0.18,0.26,0.15,-0.18,0.26,1.0,0.23,0.0
SIHydro,-0.19,-0.51,0.83,-0.01,0.33,-0.07,0.23,1.0,-0.29
Thrml,0.26,0.54,-0.63,0.33,-0.44,-0.2,0.0,-0.29,1.0


In [14]:
#sample size nov/dec (same period as UTS) 2011-2018
testitout[((testitout.index>=pd.datetime(2011,11,10)) & (testitout.index<=pd.datetime(2012,1,6)))|
          ((testitout.index>=pd.datetime(2012,11,10)) & (testitout.index<=pd.datetime(2013,1,6)))|
          ((testitout.index>=pd.datetime(2013,11,10)) & (testitout.index<=pd.datetime(2014,1,6)))|
          ((testitout.index>=pd.datetime(2014,11,10)) & (testitout.index<=pd.datetime(2015,1,6)))|
          ((testitout.index>=pd.datetime(2015,11,10)) & (testitout.index<=pd.datetime(2016,1,6)))|
          ((testitout.index>=pd.datetime(2016,11,10)) & (testitout.index<=pd.datetime(2017,1,6)))|
          ((testitout.index>=pd.datetime(2017,11,10)) & (testitout.index<=pd.datetime(2018,1,6)))|
          ((testitout.index>=pd.datetime(2018,11,10)) & (testitout.index<=pd.datetime(2019,1,6)))].count()

Ratio            296
Ratio_INVBEN     296
Northflow_GWh    296
Price            464
SIstorage        464
NIstorage        464
NIHydro          464
SIHydro          464
Thrml            464
dtype: int64

In [15]:
#correlations when storage high, after 1 June 2011 (and excluding april/may/june 2019)
testitout[(testitout.SIstorage>2750)&(testitout.index<=pd.datetime(2019,4,1))&(testitout.index>=pd.datetime(2011,6,1))].corr().round(decimals=2)

Unnamed: 0,Ratio,Ratio_INVBEN,Northflow_GWh,Price,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Ratio,1.0,-0.08,-0.65,-0.04,0.09,-0.22,0.23,-0.46,0.34
Ratio_INVBEN,-0.08,1.0,0.15,0.1,-0.3,-0.22,0.03,0.06,0.02
Northflow_GWh,-0.65,0.15,1.0,0.27,-0.02,0.15,-0.26,0.9,-0.23
Price,-0.04,0.1,0.27,1.0,-0.14,0.0,0.26,0.31,0.18
SIstorage,0.09,-0.3,-0.02,-0.14,1.0,-0.22,-0.27,-0.08,0.2
NIstorage,-0.22,-0.22,0.15,0.0,-0.22,1.0,0.41,0.45,-0.74
NIHydro,0.23,0.03,-0.26,0.26,-0.27,0.41,1.0,0.25,-0.14
SIHydro,-0.46,0.06,0.9,0.31,-0.08,0.45,0.25,1.0,-0.39
Thrml,0.34,0.02,-0.23,0.18,0.2,-0.74,-0.14,-0.39,1.0


In [16]:
#sample size when storage high, after 1 June 2011 (and excluding april/may/june 2019)
testitout[(testitout.SIstorage>2750)&(testitout.index<=pd.datetime(2019,4,1))&(testitout.index>=pd.datetime(2011,6,1))].count()

Ratio             94
Ratio_INVBEN      94
Northflow_GWh     94
Price            140
SIstorage        140
NIstorage        140
NIHydro          140
SIHydro          140
Thrml            140
dtype: int64

In [17]:
#correlations for the April-June 2019 period
testitout[(testitout.index>=pd.datetime(2019,4,1))&(testitout.index<=pd.datetime(2019,6,30))].corr().round(decimals=2)

Unnamed: 0,Ratio,Ratio_INVBEN,Northflow_GWh,Price,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Ratio,1.0,0.26,-0.72,-0.24,-0.19,-0.08,-0.31,-0.6,-0.05
Ratio_INVBEN,0.26,1.0,-0.34,0.17,-0.37,-0.58,-0.67,-0.41,0.03
Northflow_GWh,-0.72,-0.34,1.0,0.32,0.24,0.24,0.37,0.9,0.17
Price,-0.24,0.17,0.32,1.0,-0.25,0.01,0.34,0.44,0.59
SIstorage,-0.19,-0.37,0.24,-0.25,1.0,0.02,0.02,0.21,-0.53
NIstorage,-0.08,-0.58,0.24,0.01,0.02,1.0,0.68,0.42,0.16
NIHydro,-0.31,-0.67,0.37,0.34,0.02,0.68,1.0,0.61,0.41
SIHydro,-0.6,-0.41,0.9,0.44,0.21,0.42,0.61,1.0,0.4
Thrml,-0.05,0.03,0.17,0.59,-0.53,0.16,0.41,0.4,1.0


In [18]:
#sample size (April-June 2019)
testitout[(testitout.index>=pd.datetime(2019,4,1))&(testitout.index<=pd.datetime(2019,6,30))].count()

Ratio            91
Ratio_INVBEN     91
Northflow_GWh    91
Price            91
SIstorage        91
NIstorage        91
NIHydro          91
SIHydro          91
Thrml            91
dtype: int64

In [19]:
#UTS investigation period to 6 Jan (ie excluding HVDC outage)
testitout[(testitout.index>=pd.datetime(2019,11,10))&(testitout.index<=pd.datetime(2020,1,6))].corr().round(decimals=2)

Unnamed: 0,Ratio,Ratio_INVBEN,Northflow_GWh,Price,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Ratio,1.0,0.35,-0.71,-0.4,0.24,0.13,-0.09,-0.54,-0.19
Ratio_INVBEN,0.35,1.0,-0.42,-0.41,0.35,0.09,-0.05,-0.29,-0.05
Northflow_GWh,-0.71,-0.42,1.0,0.64,-0.39,-0.37,0.27,0.82,0.4
Price,-0.4,-0.41,0.64,1.0,-0.46,-0.41,0.54,0.79,0.67
SIstorage,0.24,0.35,-0.39,-0.46,1.0,-0.36,-0.22,-0.16,0.0
NIstorage,0.13,0.09,-0.37,-0.41,-0.36,1.0,-0.17,-0.6,-0.63
NIHydro,-0.09,-0.05,0.27,0.54,-0.22,-0.17,1.0,0.44,0.35
SIHydro,-0.54,-0.29,0.82,0.79,-0.16,-0.6,0.44,1.0,0.71
Thrml,-0.19,-0.05,0.4,0.67,0.0,-0.63,0.35,0.71,1.0


In [20]:
#sample size (UTS investigation period to 6 Jan)
testitout[(testitout.index>=pd.datetime(2019,11,10))&(testitout.index<=pd.datetime(2020,1,6))].count()

Ratio            58
Ratio_INVBEN     58
Northflow_GWh    58
Price            58
SIstorage        58
NIstorage        58
NIHydro          58
SIHydro          58
Thrml            58
dtype: int64

In [21]:
#correlations 3 Dec to 27 Dec
testitout[(testitout.index>=pd.datetime(2019,12,3))&(testitout.index<=pd.datetime(2019,12,27))].corr().round(decimals=2)

Unnamed: 0,Ratio,Ratio_INVBEN,Northflow_GWh,Price,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Ratio,1.0,0.32,-0.72,-0.1,-0.25,0.26,0.06,-0.4,-0.02
Ratio_INVBEN,0.32,1.0,-0.46,-0.17,-0.14,0.34,0.05,-0.31,0.02
Northflow_GWh,-0.72,-0.46,1.0,0.55,0.31,-0.73,0.21,0.84,0.43
Price,-0.1,-0.17,0.55,1.0,-0.07,-0.71,0.63,0.84,0.83
SIstorage,-0.25,-0.14,0.31,-0.07,1.0,-0.33,-0.29,0.12,0.07
NIstorage,0.26,0.34,-0.73,-0.71,-0.33,1.0,-0.28,-0.88,-0.65
NIHydro,0.06,0.05,0.21,0.63,-0.29,-0.28,1.0,0.52,0.52
SIHydro,-0.4,-0.31,0.84,0.84,0.12,-0.88,0.52,1.0,0.75
Thrml,-0.02,0.02,0.43,0.83,0.07,-0.65,0.52,0.75,1.0


In [22]:
#correlations 10 Nov to 2 Dec
testitout[(testitout.index>=pd.datetime(2019,11,10))&(testitout.index<=pd.datetime(2019,12,2))].corr().round(decimals=2)

Unnamed: 0,Ratio,Ratio_INVBEN,Northflow_GWh,Price,SIstorage,NIstorage,NIHydro,SIHydro,Thrml
Ratio,1.0,0.13,-0.51,-0.34,-0.11,-0.08,-0.06,-0.37,-0.23
Ratio_INVBEN,0.13,1.0,-0.04,-0.3,-0.13,0.18,0.08,-0.03,-0.05
Northflow_GWh,-0.51,-0.04,1.0,0.14,-0.44,0.05,0.44,0.35,0.17
Price,-0.34,-0.3,0.14,1.0,0.33,-0.27,0.38,0.71,0.53
SIstorage,-0.11,-0.13,-0.44,0.33,1.0,0.38,-0.01,0.3,-0.08
NIstorage,-0.08,0.18,0.05,-0.27,0.38,1.0,0.34,-0.18,-0.64
NIHydro,-0.06,0.08,0.44,0.38,-0.01,0.34,1.0,0.5,0.15
SIHydro,-0.37,-0.03,0.35,0.71,0.3,-0.18,0.5,1.0,0.73
Thrml,-0.23,-0.05,0.17,0.53,-0.08,-0.64,0.15,0.73,1.0


# Testing if correlations are statistically different from each other

In [23]:
#Fisher's z transformation for testing significance between 2 independent correlation coefficients
#from:
#https://medium.com/@ph_singer/statistical-significance-tests-on-correlation-coefficients-b9397380be55
#https://github.com/psinger/CorrelationStats/blob/master/corrstats.py

#This use case applies when you have two correlations that come from different samples and are independent to each other. 
#An example would be that you want to know whether height and weight are correlated in the same way for two distinct social groups.
#so here we use the independent correlations test to test the correlation between the same 2 variables at different points in time
#(ie, different samples)

from __future__ import division
import numpy as np
from scipy.stats import t, norm
from math import atanh, pow
from numpy import tanh

In [24]:
def independent_corr(xy, ab, n, n2 = None, twotailed=True, conf_level=0.95, method='fisher'):
    """
    Calculates the statistic significance between two independent correlation coefficients
    @param xy: correlation coefficient between x and y
    @param xz: correlation coefficient between a and b
    @param n: number of elements in xy
    @param n2: number of elements in ab (if distinct from n)
    @param twotailed: whether to calculate a one or two tailed test, only works for 'fisher' method
    @param conf_level: confidence level, only works for 'zou' method
    @param method: defines the method uses, 'fisher' or 'zou'
    @return: z and p-val
    """

    if method == 'fisher':
        xy_z = 0.5 * np.log((1 + xy)/(1 - xy))
        ab_z = 0.5 * np.log((1 + ab)/(1 - ab))
        if n2 is None:
            n2 = n

        se_diff_r = np.sqrt(1/(n - 3) + 1/(n2 - 3))
        diff = xy_z - ab_z
        z = abs(diff / se_diff_r)
        p = (1 - norm.cdf(z))
        if twotailed:
            p *= 2

        return z, p
    else:
        raise Exception('Wrong method!')

In [25]:
#tests between June 2011-9 Nov 2019: 
#and UTS investigation period to 6 Jan
#and april-june 2019 period

In [26]:
#correlation between SI hydro gen and SI hydro storage
print(independent_corr(0.53 , -0.16, 3084, 58, method='fisher', twotailed=True),
      independent_corr(0.53 , 0.21, 3084, 91, method='fisher', twotailed=True))

(5.524418374852503, 3.3057887760534754e-08) (3.4868819818541668, 0.0004886869571769648)


In [27]:
#correlation between SI hydro gen and thermal gen
print(independent_corr(-0.52 , 0.71, 3084, 58, method='fisher', twotailed=True),
      independent_corr(-0.52 , 0.40, 3084, 91, method='fisher', twotailed=True))

(10.758182376847888, 0.0) (9.249561655351025, 0.0)


In [28]:
#correlation between SI hydro storage and thermal gen
print(independent_corr(-0.37 , 0.00, 3084, 58, method='fisher', twotailed=True),
      independent_corr(-0.37 , -0.53, 3084, 91, method='fisher', twotailed=True))

(2.855250502353728, 0.004300289579611816) (1.8658617442730039, 0.06206071109005773)


In [29]:
#correlation between the spot price and SI hydro gen
print(independent_corr(-0.10 , 0.79, 3084, 58, method='fisher', twotailed=True),
      independent_corr(-0.10 , 0.44, 3084, 91, method='fisher', twotailed=True))

(8.613515541649447, 0.0) (5.296045849989032, 1.1833713697839698e-07)


In [30]:
#correlation between the spot price and SI hydro storage
print(independent_corr(-0.26 , -0.46, 3084, 58, method='fisher', twotailed=True),
      independent_corr(-0.26 , -0.25, 3084, 91, method='fisher', twotailed=True))

(1.6995439811340718, 0.08921673507819095) (0.0989306846913536, 0.9211933055463628)


In [31]:
#correlation between the spot price and NI hydro gen
print(independent_corr(-0.01 , 0.54, 3084, 58, method='fisher', twotailed=True),
      independent_corr(-0.01 , 0.34, 3084, 91, method='fisher', twotailed=True))

(4.514584861097145, 6.344091538323937e-06) (3.367737483662547, 0.0007578772267031741)


In [32]:
#correlation between northwards HVDC flow and SI hydro storage
print(independent_corr(0.56 , -0.39, 2139, 58, method='fisher', twotailed=True),
      independent_corr(0.56 , 0.24, 2139, 91, method='fisher', twotailed=True))

(7.649351416710907, 2.020605904817785e-14) (3.5675692489524553, 0.00036030817542109794)


In [33]:
#correlation between NI hydro gen and NI hydro storage
print(independent_corr(0.53 , -0.17, 3084, 58, method='fisher', twotailed=True),
      independent_corr(0.53 , 0.68, 3084, 91, method='fisher', twotailed=True))

(5.599985152461656, 2.1437016606284942e-08) (2.2103823851480486, 0.02707863525632348)


In [34]:
#correlation between SI hydro storage and ratio of benmore to haywards price
print(independent_corr(-0.33 , 0.24, 2139, 58, method='fisher', twotailed=True),
      independent_corr(-0.33 , -0.19, 2139, 91, method='fisher', twotailed=True))

(4.302732201185559, 1.6870471499119688e-05) (1.3835196848856524, 0.1665055757395737)


In [35]:
#correlation between SI hydro storage and ratio of invercargill to benmore price
print(independent_corr(-0.35 , 0.35, 2139, 58, method='fisher', twotailed=True),
      independent_corr(-0.35 , -0.37, 2139, 91, method='fisher', twotailed=True))

(5.3519410313255555, 8.701575748837342e-08) (0.21125754213413414, 0.8326863116732384)


In [36]:
#statistical significance betwen correlations during UTS investigation period (to 6 Jan) and April-June period
print('SI hydro gen & storage:',independent_corr(-0.16 , 0.21, 58, 91, method='fisher', twotailed=True),
     'SI hydro gen & thermal:',independent_corr(0.71 , 0.40, 58, 91, method='fisher', twotailed=True),
     'SI hydro storage & thermal:',independent_corr(0.00 , -0.53, 58, 91, method='fisher', twotailed=True),
     'SI hydro gen & price:',independent_corr(0.79 , 0.44, 58, 91, method='fisher', twotailed=True),
     'SI storage & price:',independent_corr(-0.46 , -0.25, 58, 91, method='fisher', twotailed=True),
     'NI hydro gen & price:',independent_corr(0.54 , 0.34, 58, 91, method='fisher', twotailed=True),
     'SI hydro storage & HVDC northflow:',independent_corr(-0.39 , 0.24, 58, 91, method='fisher', twotailed=True),
     'NI hydro gen & storage:',independent_corr(-0.17 , 0.68, 58, 91, method='fisher', twotailed=True),
     'SI hydro storage and ben/hay price ratio:',independent_corr(0.24 , -0.19, 58, 91, method='fisher', twotailed=True),
     'SI hydro storage and inv/ben price ratio:',independent_corr(0.35 , -0.37, 58, 91, method='fisher', twotailed=True))

SI hydro gen & storage: (2.1790830822823826, 0.029325496795106876) SI hydro gen & thermal: (2.696727918098652, 0.00700244592132937) SI hydro storage & thermal: (3.4333138988105274, 0.0005962513663728686) SI hydro gen & price: (3.485997765016122, 0.0004903051012195014) SI storage & price: (1.4073035842702537, 0.1593373869634629) NI hydro gen & price: (1.454803133138646, 0.14572377056426689) SI hydro storage & HVDC northflow: (3.819780790162436, 0.0001335703281040157) NI hydro gen & storage: (5.822286662630583, 5.804787317487126e-09) SI hydro storage and ben/hay price ratio: (2.543001860433059, 0.010990464392355737) SI hydro storage and inv/ben price ratio: (4.385804923574893, 1.155576516276291e-05)


In [37]:
#statistical significance between correlations during UTS investigation period (to 6 jan), and periods of high storage from 1 June 2011-1 April 2019

print('SI hydro gen & storage:',independent_corr(-0.16 , -0.08, 58, 140, method='fisher', twotailed=True),
     'SI hydro gen & thermal:',independent_corr(0.71 , -0.39, 58, 140, method='fisher', twotailed=True),
     'SI hydro storage & thermal:',independent_corr(0.00 , 0.20, 58, 140, method='fisher', twotailed=True),
     'SI hydro gen & price:',independent_corr(0.79 , 0.31, 58, 140, method='fisher', twotailed=True),
     'SI storage & price:',independent_corr(-0.46 , -0.14, 58, 140, method='fisher', twotailed=True),
     'NI hydro gen & price:',independent_corr(0.54 , 0.26, 58, 140, method='fisher', twotailed=True),
     'SI hydro storage & HVDC northflow:',independent_corr(-0.39 , -0.02, 58, 94, method='fisher', twotailed=True),
     'NI hydro gen & storage:',independent_corr(-0.17 , 0.41, 58, 140, method='fisher', twotailed=True),
     'SI hydro storage and ben/hay price ratio:',independent_corr(0.24 , 0.09, 58, 94, method='fisher', twotailed=True),
     'SI hydro storage and inv/ben price ratio:',independent_corr(0.35 , -0.30, 58, 94, method='fisher', twotailed=True))

SI hydro gen & storage: (0.5087790815910541, 0.6109070835453132) SI hydro gen & thermal: (8.137570826681058, 4.440892098500626e-16) SI hydro storage & thermal: (1.2700315380070502, 0.2040733962943535) SI hydro gen & price: (4.703976896296885, 2.5514213282118448e-06) SI storage & price: (2.2326019391108254, 0.025575206121114746) NI hydro gen & price: (2.117719092380503, 0.03419886493387936) SI hydro storage & HVDC northflow: (2.293966789984475, 0.021792406406239806) NI hydro gen & storage: (3.8043326198330596, 0.00014218701694224123) SI hydro storage and ben/hay price ratio: (0.9047700297635657, 0.36558723223456413) SI hydro storage and inv/ben price ratio: (3.951898758211436, 7.75335437839253e-05)


In [38]:
#statistical significance between correlations during April-June period, and periods of high storage from 1 June 2011-1 April 2019

print('SI hydro gen & storage:',independent_corr(0.21 , -0.08, 91, 140, method='fisher', twotailed=True),
     'SI hydro gen & thermal:',independent_corr(0.40 , -0.39, 91, 140, method='fisher', twotailed=True),
     'SI hydro storage & thermal:',independent_corr(-0.53 , 0.20, 91, 140, method='fisher', twotailed=True),
     'SI hydro gen & price:',independent_corr(0.44 , 0.31, 91, 140, method='fisher', twotailed=True),
     'SI storage & price:',independent_corr(-0.25 , -0.14, 91, 140, method='fisher', twotailed=True),
     'NI hydro gen & price:',independent_corr(0.34 , 0.26, 91, 140, method='fisher', twotailed=True),
     'SI hydro storage & HVDC northflow:',independent_corr(0.24 , -0.02, 91, 94, method='fisher', twotailed=True),
     'NI hydro gen & storage:',independent_corr(0.68 , 0.41, 91, 140, method='fisher', twotailed=True),
     'SI hydro storage and ben/hay price ratio:',independent_corr(-0.19 , 0.09, 58, 94, method='fisher', twotailed=True),
     'SI hydro storage and inv/ben price ratio:',independent_corr(-0.37 , -0.30, 58, 94, method='fisher', twotailed=True))

SI hydro gen & storage: (2.1472647939816008, 0.03177220694236693) SI hydro gen & thermal: (6.115476276293011, 9.62690149819423e-10) SI hydro storage & thermal: (5.803855237560721, 6.48072351339124e-09) SI hydro gen & price: (1.1103352503058743, 0.2668545885352851) SI storage & price: (0.8380451758945562, 0.4020053318753507) NI hydro gen & price: (0.6440427052708015, 0.5195477418359631) SI hydro storage & HVDC northflow: (1.770987162178897, 0.0765628365706128) NI hydro gen & storage: (2.880435827856677, 0.003971257864481936) SI hydro storage and ben/hay price ratio: (1.6545089449671317, 0.09802414737370646) SI hydro storage and inv/ben price ratio: (0.4619785978452867, 0.6440966684182752)
