# Daily Load Profile Timeseries Clustering Evaluation

In [None]:
import pandas as pd
import numpy as np
import datetime as dt
from math import ceil

import plotly.plotly as py
import plotly.offline as po
import plotly.graph_objs as go
import plotly.tools as tools
import colorlover as cl
#from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
cf.go_offline()

import matplotlib. pyplot as plt
from matplotlib import colors
from matplotlib.colors import LinearSegmentedColormap

import evaluation.evalClusters as ec
import evaluation.plotClusters as pc

## Analyse Cluster Scores

### Davies-Bouldin Index

In [None]:
pc.plotClusterIndex('dbi', 'Davies-Bouldin Index')

### Mean Index Adequacy

In [None]:
pc.plotClusterIndex('mia','Mean Index Adequacy')

### Silhouette Score

The best value is 1 and the worst value is -1. Values near 0 indicate overlapping clusters. Negative values generally indicate that a sample has been assigned to the wrong cluster, as a different cluster is more similar.

In [None]:
pc.plotClusterIndex('silhouette', 'Silhouette Score')

### Combined Cluster Score

In [None]:
pc.plotClusterIndex('score','Combined Cluster Score',ylog=True)

## Explore Cluster Centroids and Sizes

### Select best clusters for different algorithms

In [None]:
cluster_results = ec.readResults()
demin_clusters = ec.selectClusters(cluster_results, 5, 'exp2_kmeans_demin')
zerone_clusters = ec.selectClusters(cluster_results, 5, 'exp2_kmeans_zero-one')
sanorm_clusters = ec.selectClusters(cluster_results, 5, 'exp2_kmeans_sa_norm')
best_clusters = ec.selectClusters(cluster_results, 10)

### Get normalised cluster centroids

In [None]:
Nd_centroids, Nd_cs, Ndmeta = ec.getCentroids(demin_clusters)
Nzoo_centroids, Nzoo_cs, Nzmeta = ec.getCentroids(zerone_clusters)
Nsan_centroids, Nsan_cs, Nsmeta = ec.getCentroids(sanorm_clusters)
Nnorm_centroids, Nnorm_cs, Nnmeta = ec.getCentroids(best_clusters)

### Get denormalised (real) cluster centroids

In [None]:
d_centroids, d_cs, dmeta = ec.realCentroids('exp2_kmeans_demin')
zoo_centroids, zoo_cs, zmeta = ec.realCentroids('exp2_kmeans_zero-one')
san_centroids, san_cs, smeta = ec.realCentroids('exp2_kmeans_sa_norm')
norm_centroids, norm_cs, nmeta = ec.realCentroids('exp2_norm_kmeans')

### Visualise Centroids

In [None]:
pc.plotClusterCentroids(d_centroids, d_cs, dmeta)

## Explore Patterns in Cluster Labels

In [None]:
demin_labels = ec.bestLabels('exp2_kmeans_demin')
zerone_labels = ec.bestLabels('exp2_kmeans_zero-one')
sanorm_labels = ec.bestLabels('exp2_kmeans_sa_norm')
norm_labels = ec.bestLabels('exp2_norm_kmeans')

### Visualise Cluster Label Assignment

In [None]:
pc.plotClusterLabels(demin_labels, 2014)

### Visualise TEMPORAL Cluster Specificity

In [None]:
pc.plotClusterSpecificity(demin_labels, corr_list=['daytype','weekday','monthly','season','yearly'])

### Visualise CONTEXTUAL Cluster Specificity (Daily Demand Assignment)

In [None]:
int100_likelihood, q100_likelihood = ec.demandCorr('exp2_norm_kmeans', 1)

In [None]:
#Equally spaced daily demand intervals
i = int100_likelihood.stack().reset_index()
i.columns = ['int100_bins', 'cluster', 'values']
fig = i.iplot(kind='heatmap', x = 'int100_bins', y='cluster', z='values', colorscale='Reds', 
              title= 'Heatmap of relative likelihood of Cluster k being used in consumption bin', asFigure=True)
fig['layout']['xaxis'].update(dict(title = 'total daily demand bins (Amps)', 
                                   tickmode='array', tickvals=list(range(0,100,10)), ticktext = list(range(0,1000,100))))
fig['layout']['yaxis'].update(dict(title='Cluster k'))
po.iplot(fig)

#Equally sized daily demand intervals (quantiles)
rel_q100 = q100_likelihood.drop(columns='Cluster 33')/0.01

slatered=['#232c2e', '#ffffff','#c34513']
label_cmap, label_cs = pc.colorscale_from_list(slatered, 'label_cmap') 
colorscl= pc.asymmetric_colorscale(rel_q100, label_cmap, ref_point=1.0)

heatmap = go.Heatmap(z = rel_q100.T.values, x = rel_q100.index, y = rel_q100.columns, name = 'corr', 
                          colorscale=colorscl)
layout = go.Layout(
        title= 'Heatmap of relative likelihood of Cluster k being used in consumption quantile',
        xaxis=dict(title = 'total daily demand quantiles (Amps) - log scale', type='log'),
        yaxis=dict(title ='Cluster k'))
fig = {'data':[heatmap], 'layout':layout }
po.iplot(fig)

## Analyse Cluster Representativity and Specificity

### Consumption Error - total

### Consumption Error - max

### Peak Coincidence

### Cluster Entropy - TEMPORAL
#### weekday, month

### Cluster Entropy - CONTEXTUAL
#### total daily demand, max daily demand