In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pylab as p

# 1. User input

## 1.1 Input and format Pareto optimal set

In [2]:
# Enter file name containing results. 1 row of headers only
opdata = pd.read_pickle("Data/opdata")

In [3]:
dvdf = pd.read_pickle("Data/dvdf")

In [4]:
dvticks = [0,0.2,0.4,0.6,0.8,1.0]

## 1.2 Input cluster membership

In [5]:
# Enter cluster mapping. Rows must correspond with rows of objective values
# In this case study, k-medoids was used to cluster, using R
clusternos = pd.read_csv('Data/clusternos.csv', usecols = [1], header = 0, names = ['Cluster'])

## 1.3 Specify cluster colours for plotting

In [6]:
# Create colour pool for clusters (add colours to taste, depending on number of clusters)
clustcols = pd.read_pickle("Data/clustercolours")

# 2. Functions

## 2.1 Function to determine number of rows and columns of subplots

In [7]:
def dimensions(numplots):
    cols = int(numplots**0.5) + (numplots % numplots**0.5 > 0) # rounds up the square root of numplots
    rows = int(numplots/cols) + (numplots % cols > 0) # rounds up the numplots divided by num cols
    return [rows, cols]

## 2.2 Function to separate a decision variable into dataframes for each cluster

In [8]:
def clusterdf(data, clusterno, column):
    df = []
    for i in range(0, len(opdata)):
        if (clusternos.ix[i] == clusterno).bool():
            df.append(data[column][i])
    df = pd.DataFrame({column: df})
    return df

# 3. Execution

## 3.1 Plot histograms of values for each decision variable

In [9]:
dims = dimensions(len(dvdf.columns))

fig = plt.figure(figsize = [dims[1]*5*1.25, dims[0]*5]) 
    #figsize adjusted for extra wspace between plots to alow for long xlabels

for i in range(0, len(dvdf.columns)):
    column = dvdf.columns[i]
    ax = fig.add_subplot(dims[0], dims[1], i+1)
    ax.hist(dvdf[column], bins = 10, range = (0,1.0))
    ax.set_xticks(dvticks)
    ax.set_xlabel(column)

    # save and close figure
plt.savefig('Histograms/histograms dvs.png', dpi = 150, bbox_inches='tight')
plt.close(fig)

## 3.2 Plot histograms of decision variable values for each cluster

In [10]:
fig = plt.figure(figsize = [16,12])
dxticks = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8] #change to suit
dyticks = [0,2,4,6,8,10,12] #change to suit
for i in range(0, clusternos.max()):
    cluster = clusterdf(opdata, (i+1), 'Desalination Full Production Threshold')
    clustlab = 'Cluster %d' % (i+1)
    ax = fig.add_subplot(3,4,(i+1))
    ax.hist(cluster['Desalination Full Production Threshold'], 
                                 bins = 32, 
                                 range = (0,0.8)
                                 )
    ax.set_xticks(dxticks)
    ax.set_yticks(dyticks)
    ax.set_xlabel(clustlab)
# save and close figure
plt.savefig('Histograms/histograms desal full.png', dpi=150, bbox_inches='tight')
plt.close(fig)

## 3.3 Plot KDE of decision variable values for each cluster

In [10]:
fig, axes = plt.subplots(1, 1, figsize = [9,7])
for i in range(0, clusternos.max()):
    cluster = clusterdf(opdata, (i+1), 'Desalination Full Production Threshold')
    clustlab = 'Cluster %d' % (i+1)
    cluster['Desalination Full Production Threshold'].plot(kind = 'kde', color = clustcols[i], label = clustlab)
    plt.xlim(0,0.8)
    plt.legend()
    plt.xlabel('Desalination Full Production Threshold')
    plt.ylabel('Count')
# save and close figure
plt.savefig('Histograms/histogram kde desal full.png', dpi=150, bbox_inches='tight')
plt.savefig('Histograms/histogram kde desal full.eps', bbox_inches='tight')
plt.close(fig)