In [2]:
# Loading
import pandas as pd

In [3]:
# Function to normalise for minimised variables
def normalisationmin(i):
    return (i-i.min())/(i.max()-i.min())

In [4]:
# Function to normalise for maximised variables
def normalisationmax(i):
    return (i-i.max())/(i.min()-i.max())

# User Input

# 1. Optimisation results

## 1.1 Import results

In [5]:
# data file given here is for water grid case study, 5 year planning period, 
    #non-dominated results of 5 seeds, optimised using NSGA-II algorithm
opdata = pd.read_csv("Data/optimisation results 5 combined seeds.csv")

In [6]:
opdata.drop(opdata.columns[0], axis = 1, inplace = True)

## 1.2 Cleanup data

In [7]:
opdata.rename(columns = lambda x: x.replace('$', ''), inplace = True)
opdata.rename(columns = lambda x: x.replace('.', ''), inplace = True)

In [8]:
# convert storage to GL and absolute value
opdata[opdata.columns[16:18]] = abs(opdata[opdata.columns[16:18]]/1000)

# convert to $ million
opdata[opdata.columns[18:24]] = opdata[opdata.columns[18:24]]/1000000

# convert spill to GL
opdata[opdata.columns[24:27]] = opdata[opdata.columns[24:27]]/1000

In [9]:
print(list(opdata.columns.values))

['XTwoWayPipelinesNPI2NPI2Threshold', 'XTwoWayPipelinesNPI2NPI2FlowThreshold', 'XTwoWayPipelinesNPINPIThreshold', 'XTwoWayPipelinesNPINPIFlowThreshold', 'XTwoWayPipelinesBrisbanetoNthPineInterconnectorBrisbanetoNthPineThreshold', 'XTwoWayPipelinesBrisbanetoNthPineInterconnectorBrisbanetoNthPineFlowThreshold', 'XTwoWayPipelinesMaroochytoBaroonInterconnectorMaroochytoBaroonThreshold', 'XTwoWayPipelinesEwenMaddocktoBaroonInterconnectorEwenMaddocktoBaroonThreshold', 'XTwoWayPipelinesEPIEPIThreshold', 'XTwoWayPipelinesEPIEPIFlowThreshold', 'XTwoWayPipelinesSPISPIThreshold', 'XTwoWayPipelinesSPISPIFlowThreshold', 'XWCRWSPRWThreshold', 'XTugunDesalinationTugunDesalOneThirdThreshold', 'XTugunDesalinationTugunDesalTwoThirdsThreshold', 'XTugunDesalinationTugunDesalFullThreshold', 'XSpillVolumesTotalSpillVolume', 'XMinimumSystemStorageNegation', 'XCostsTotalCost', 'XCostsSwitchingCost', 'XCostsPumpingCost', 'XCostsTugunDesalCost', 'XCostsTreatmentCost', 'XCostsPRWCost', 'XTugunDesalinationTugunDe

In [10]:
replacementcolumns = ['NPI2 Threshold', 'NPI2 Flow Threshold', 'NPI Threshold', 'NPI Flow Threshold', 
                      'Brisbane to Nth Pine Threshold', 'Brisbane to Nth Pine Flow Threshold', 'Maroochy to Baroon Threshold', 
                      'Ewen Maddock to Baroon Threshold', 'EPI Threshold', 'EPI Flow Threshold', 'SPI Threshold', 
                      'SPI Flow Threshold', 'PRW Threshold', 'Desalination 1/3 Production Threshold', 
                      'Desalination 2/3 Production Threshold', 'Desalination Full Production Threshold', 
                      'Total Spill Volume (GL)', 'Minimum System Storage (GL)', 'Total Cost ($ million)', 
                      'Switching Cost ($ million)', 'Pumping Cost ($ million)', 'Desalination Cost ($ million)', 
                      'Treatment Cost ($ million)', 'PRW Cost ($ million)', 'Desalination Total Production Volume (GL)', 
                      'PRW Inflow (GL)', 'Two Way Pipeline Flow (GL)', 'Total Switch Count', 'Brisbane to Nth Pine Switch Count', 
                      'EPI Switch Count', 'Ewen to Baroon Switch Count', 'Maroochy to Baroon Switch Count', 'NPI2 Switch Count', 
                      'NPI Switch Count', 'SPI Switch Count', 'Environmental Flow Deficit', 'Total Volumetric Reliability']


if len(replacementcolumns) != len(opdata.columns):
    print 'replacement columns do not match length of original columns'

for i in range(len(opdata.columns)):
    opdata.rename(columns={opdata.columns[i]:replacementcolumns[i]}, inplace=True)

## 1.3 Save formatted Pareto set (inc. to pickle for use by other notebooks)

In [11]:
opdata.to_pickle("Data/opdata")

In [12]:
opdata.to_csv("Data/opdata.csv", index = False)

In [13]:
obj1 = opdata['Minimum System Storage (GL)']
obj1name = obj1.name
obj2 = opdata['Total Cost ($ million)']
obj2name = obj2.name
obj3 = opdata['Total Spill Volume (GL)']
obj3name = obj3.name

In [14]:
objdict = {obj1name: obj1, obj2name: obj2, obj3name: obj3}

objdf = pd.DataFrame(objdict)

In [17]:
obj1.to_pickle("Data/obj1")
obj2.to_pickle("Data/obj2")
obj3.to_pickle("Data/obj3")

objdf.to_pickle("Data/objdf")

In [18]:
# Enter titles/order of decision variables to create data frame
dvdf = pd.DataFrame(opdata, columns = ['NPI2 Threshold', 'NPI2 Flow Threshold', 'NPI Threshold', 'NPI Flow Threshold', 
                                        'Brisbane to Nth Pine Threshold', 'Brisbane to Nth Pine Flow Threshold', 
                                        'Maroochy to Baroon Threshold', 'Ewen Maddock to Baroon Threshold', 
                                        'EPI Threshold', 'EPI Flow Threshold', 'SPI Threshold', 
                                        'SPI Flow Threshold', 'PRW Threshold', 'Desalination 1/3 Production Threshold', 
                                        'Desalination 2/3 Production Threshold', 'Desalination Full Production Threshold',])

In [19]:
dvdf.to_pickle("Data/dvdf")

In [20]:
#Create new column/chart names for normalised objectives, with units removed and newlines
normcols = ['Minimum\nSystem Storage', 'Total Cost', 'Total\nSpill']

In [21]:
normobj1 = normalisationmax(obj1)
normobj2 = normalisationmin(obj2)
normobj3 = normalisationmin(obj3)

#create dataframe that preserves order of objectives
normobjdf = pd.DataFrame.from_items([(normcols[0], normobj1), (normcols[1], normobj2), (normcols[2], normobj3)])

In [25]:
normobjdf.to_pickle("Data/normobjdf")

In [23]:
onenormobj1 = normalisationmin(obj1)
onenormobj2 = normalisationmax(obj2)
onenormobj3 = normalisationmax(obj3)

#create dataframe that preserves order of objectives
onenormobjdf = pd.DataFrame.from_items([(obj1name, onenormobj1), 
                                        (obj2name, onenormobj2),
                                        (obj3name, onenormobj3)])

In [26]:
onenormobj1.to_pickle("Data/onenormobj1")
onenormobj2.to_pickle("Data/onenormobj2")
onenormobj3.to_pickle("Data/onenormobj3")

onenormobjdf.to_pickle("Data/onenormobjdf")

# 3. Import and format cluster results

## 3.1 Medoids

In [27]:
medoids = pd.read_csv('Data/medoids.csv', header = 0)

In [29]:
medobj1 = medoids['Minimum System Storage (GL)']
medobj2 = medoids['Total Cost ($ million)']
medobj3 = medoids['Total Spill Volume (GL)']

medobjdf = pd.DataFrame.from_items([(medobj1.name, medobj1), (medobj2.name, medobj2), (medobj3.name, medobj3)])

# Identify option number (index) of medoids within wider Pareto set
medindex = medoids['Option']-1 # converting R index of medoids (starts at 1) to python index

In [30]:
medobj1.to_pickle("Data/medobj1")
medobj2.to_pickle("Data/medobj2")
medobj3.to_pickle("Data/medobj3")

medobjdf.to_pickle("Data/medobjdf")

medindex.to_pickle("Data/medindex")

## 3.2 Normalise medoids

In [31]:
normmedobj1 = (medobj1-obj1.max())/(obj1.min()-obj1.max()) #maximised obj
normmedobj2 = (medobj2-obj2.min())/(obj2.max()-obj2.min()) #minimised obj
normmedobj3 = (medobj3-obj3.min())/(obj3.max()-obj3.min()) #minimised obj

In [32]:
normmedoidobjdf = pd.DataFrame({obj1name: normmedobj1, obj2name: normmedobj2,
                         obj3name: normmedobj3})

In [33]:
normmedoidobjdf.to_pickle("Data/normmedoidobjdf")

In [34]:
normmedoiddvdf = pd.DataFrame(index = range(0, len(medindex)), columns = dvdf.columns)
for i in range(0, len(medindex)):
    for j in dvdf.columns:
        normmedoiddvdf.ix[i,j] = (dvdf.ix[medindex[i],j]-dvdf[j].min())/(dvdf[j].max()-dvdf[j].min())

In [35]:
normmedoiddf = pd.concat([normmedoiddvdf, normmedoidobjdf], axis = 1)

In [36]:
normmedoiddf.to_pickle("Data/normmedoiddf")

## 3.3 Cluster mapping (cluster membership of each option)

In [37]:
clusternos = pd.read_csv('Data/clusternos.csv', usecols = [1], header = 0, names = ['Cluster']) 
    #importing cluster numbers from R results. This needs to be updated when opdata is updated

In [39]:
clusternos.to_pickle("Data/clusternos")

## 3.4 Set cluster colours for plotting

In [40]:
clustcols = pd.Series(['brown', 'red', 'magenta', 'yellow', 'aqua', 'Green', 
          'SteelBlue', 'Navy', 'purple', 'Gray', 'Black'], name = 'Cluster Colours')

In [41]:
clustcols.to_pickle("Data/clustercolours")

In [42]:
clustcollist = []
for i in clusternos.index:
    clustcollist.append(clustcols[clusternos['Cluster'][i]-1])
clustcolseries = pd.Series(clustcollist, name = 'Cluster Colour List')

In [43]:
clustcolseries.to_pickle("Data/clustcollist")

# 4. Create shortlist of options based on results visual and post-optimisation analysis

## Enter row numbers of shortlist for selecting shortlisted solution

In [44]:
#Enter row numbers of shorlisted solutions, remember in Python they start at 0

shortlist_rows = [medindex[2-1], medindex[9-1], medindex[10-1], obj2.idxmin(), 471, 672, 670, 295, 348]

In [45]:
print shortlist_rows

[218, 509, 138, 405, 471, 672, 670, 295, 348]


## Create category to identify shortlist and save to pickle

In [46]:
shortlist_decider = lambda value: "Option %d" % (x+1) if value in shortlist_rows else "Other option"
shortlist = pd.DataFrame({'Shortlist':[shortlist_decider(x) for x in xrange(len(opdata))]})

In [47]:
shortlist.to_pickle("Data/shortlist")

## Create a dataframe of shortlist data and save to pickle

In [48]:
shortlistdf = pd.DataFrame(opdata.ix[shortlist_rows])

In [49]:
shortlistdf.to_pickle("Data/shortlistdf")

## Save to csv, with decision options as columns instead of rows and converted to start from 1 (instead of python 0-index). Used for presentation purposes.

In [53]:
shortlistdftf = pd.DataFrame(index = shortlistdf.columns, columns = shortlistdf.index+1)
for i in shortlistdf.columns:
    for j in shortlistdf.index:
        shortlistdftf.loc[i,j+1] = str("%.2f") % shortlistdf.loc[j,i]
shortlistdftf.to_csv("Data/shortlist.csv")

## Normalise shortlist objective functions and save to pickle

In [54]:
normshortobj1 = (shortlistdf['Minimum System Storage (GL)']-obj1.max())/(obj1.min()-obj1.max())
normshortobj2 = (shortlistdf['Total Cost ($ million)']-obj2.min())/(obj2.max()-obj2.min())
normshortobj3 = (shortlistdf['Total Spill Volume (GL)']-obj3.min())/(obj3.max()-obj3.min())

In [55]:
normshortlistobjdf = pd.DataFrame.from_items([(obj1name, normshortobj1), 
                                           (obj2name, normshortobj2), 
                                           (obj3name, normshortobj3)])

In [56]:
normshortlistobjdf.to_pickle("Data/normshortlistobjdf")