# Daily Load Profile Timeseries Clustering Evaluation

In [None]:
import pandas as pd
import numpy as np
import datetime as dt
import os
from math import ceil

import plotly.plotly as py
import plotly.offline as po
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.tools as tools
import colorlover as cl
#from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
cf.go_offline()

import matplotlib. pyplot as plt
from matplotlib import colors
from matplotlib.colors import LinearSegmentedColormap

import evaluation.eval_clusters as ec
import evaluation.eval_cluster_plot as pc
from support import data_dir
eval_dir = os.path.join(data_dir,'cluster_evaluation')

In [None]:
experiments = ec.getExperiments('exp')

## Analyse Cluster Scores

### Davies-Bouldin Index

In [None]:
pc.plotClusterIndex('dbi', 'Davies-Bouldin Index', experiments, groupby='algorithm')

### Mean Index Adequacy

In [None]:
pc.plotClusterIndex('mia','Mean Index Adequacy', experiments)

### Silhouette Score

The best value is 1 and the worst value is -1. Values near 0 indicate overlapping clusters. Negative values generally indicate that a sample has been assigned to the wrong cluster, as a different cluster is more similar.

In [None]:
pc.plotClusterIndex('silhouette', 'Silhouette Score', experiments, groupby='algorithm')

### Combined Cluster Score

In [None]:
pc.plotClusterIndex('score','Combined Cluster Score', experiments, groupby='algorithm', ylog=True)

## Explore Cluster Labels, Centroids and Sizes

### Select best clusters for different algorithms

In [None]:
cluster_results = ec.readResults()

selected_clusters = dict()
for e in experiments:
    clusters = ec.selectClusters(cluster_results, 5, experiment=e)
    selected_clusters[e] = clusters
selected_clusters['best'] = ec.selectClusters(cluster_results, 10)

In [None]:
experiment_rank = selected_clusters['best'].set_axis(range(1,11), inplace=False)
experiment_rank

In [None]:
best_exp = ['exp2_kmeans_unit_norm', 'exp5_kmeans_zero-one', 'exp4_kmeans_zero-one', 'exp6_kmeans_unit_norm',
            'exp5_kmeans_unit_norm','exp7_kmeans_unit_norm']

### Get denormalised (real) cluster centroids

In [None]:
real_cluster_centroids = dict()

for e in best_exp:
    rccpath = os.path.join(eval_dir, 'best_centroids', e +'BEST1_centroids.csv')
    centroids  = pd.read_csv(rccpath, index_col='k')
    real_cluster_centroids[e] = centroids

### Visualise Centroids

In [None]:
i = 5
ec.exploreAMDBins(best_exp[i])

In [None]:
for i in range(0,6):
    pc.plotClusterCentroids(real_cluster_centroids[best_exp[i]])

## Explore Patterns in Cluster Labels

### Visualise TEMPORAL Cluster Specificity

In [None]:
for i in range(0,6):
    pc.plotClusterSpecificity(best_exp[i], corr_list=['daytype','weekday'], threshold=1200, relative=[[5,1,1],1])
    pc.plotClusterSpecificity(best_exp[i], corr_list=['season','monthly'], threshold=1200, relative=[[8, 4],1])
    pc.plotClusterSpecificity(best_exp[i], corr_list=['yearly'], threshold=1200)

### Visualise CONTEXTUAL Cluster Specificity (Daily Demand Assignment)

In [None]:
experiment = 'exp7_kmeans_unit_norm'

corr_path = os.path.join(data_dir, 'cluster_evaluation', 'k_correlations')

dif = pd.read_csv(os.path.join(corr_path, 'demandi_corr.csv'), index_col=[0,1,2], header=[0]).drop_duplicates()
dif_temp = dif.reset_index(level=[-2,-1])
int100_total = dif_temp[(dif_temp.experiment==experiment+'BEST1')&(dif_temp.compare=='total')].drop(['experiment','compare'],axis=1)

dqf = pd.read_csv(os.path.join(corr_path, 'demandq_corr.csv'), index_col=[0,1,2], header=[0]).drop_duplicates()
dqf_temp = dqf.reset_index(level=[-2,-1])
q100_total = dqf_temp[(dqf_temp.experiment==experiment+'BEST1')&(dqf_temp.compare=='total')].drop(['experiment','compare'],axis=1)

In [None]:
#Equally spaced daily demand intervals
i = int100_total.T.stack().reset_index()
i.columns = ['int100_bins', 'cluster', 'values']

heatmap = go.Heatmap(z = i['values'], x = i['int100_bins'], y = i['cluster'], 
                          colorscale='Reds')
layout = go.Layout(
        title= 'Relative likelihood that cluster k is used in particular consumption bin',
        xaxis=dict(title = 'total daily demand bins (Amps)', 
                   tickmode='array', tickvals=list(range(0,100,10)), ticktext = list(range(0,1000,100))),
        yaxis=dict(title ='k clusters for exp7_kmeans_unit_norm')
        )
fig = {'data':[heatmap], 'layout':layout }
po.iplot(fig)

#Equally sized daily demand intervals (quantiles)
rel_q100 = q100_total.T[1::]#.drop(columns=37)/0.01

slatered=['#232c2e', '#ffffe0','#c34513']
label_cmap, label_cs = pc.colorscale_from_list(slatered, 'label_cmap') 
colorscl= pc.asymmetric_colorscale(rel_q100, label_cmap, ref_point=1/49)

heatmap = go.Heatmap(z = rel_q100.T.values, x = rel_q100.index, y = rel_q100.columns, name = 'corr', 
                          colorscale=colorscl)
layout = go.Layout(
        title= 'Heatmap of relative likelihood of Cluster k being used in consumption quantile',
        xaxis=dict(title = 'total daily demand quantiles (Amps) - log scale', type='log'),
        yaxis=dict(title ='Cluster k'))
fig = {'data':[heatmap], 'layout':layout }
po.iplot(fig)

## Analyse Cluster Representativity and Specificity

In [None]:
total_consE, peak_consE, peak_coincR, temporal_entropy, demand_entropy = ec.getMeasures(best_exp, 1200)

### Consumption Error - total

In [None]:
pc.plotClusterMetrics(total_consE, 'TOTAL consumption error evaluation metrics')

### Consumption Error - max

In [None]:
pc.plotClusterMetrics(peak_consE, 'PEAK consumption error evaluation metrics')

### Peak Coincidence Ratio

In [None]:
pc.plotClusterMetrics(peak_coincR, 'daily peak coincidence ratios', metric='coincidence_ratio', make_area_plot=True)

### Cluster Entropy - TEMPORAL
#### weekday, month

In [None]:
pc.plotClusterMetrics(temporal_entropy, 'time-derived cluster entropy')#, metric='weekday_entropy', make_area_plot=False )

### Cluster Entropy - ENERGY DEMAND
#### total daily demand, max daily demand

In [None]:
pc.plotClusterMetrics(demand_entropy, 'demand-based cluster entropy')

In [None]:
#ec.saveMeasures(best_exp, 1200)
df = pd.read_csv(os.path.join(eval_dir,'cluster_entropy.csv'), index_col=[0,1], header=[0,1,2])
df.reset_index(level=0, drop=True, inplace=True)
df.rename(dict(zip(df.index, [s.replace('_', ' ', 2) for s in df.index])),inplace=True)

In [None]:
rank_coincR = df[['coincR']].rank(ascending=False).groupby(level=['measure','metric'],axis=1).mean().T

rank_consE = df[['consE']].rank().groupby(level=['measure'],axis=1).mean().T
rank_consE.insert(loc=0, column='metric', value='mean_error')
rank_consE.set_index('metric',append=True,inplace=True)

rank_entropy = df['entropy'].rank().T

In [None]:
ranked_results = pd.concat([rank_coincR, rank_consE, rank_entropy], levels=['measure','metric'])
ranked_results.insert(loc=0, column='weights', value= [4, 7 ,7, 6, 6, 5, 5, 2])

score_results = ranked_results.loc[:,ranked_results.columns[1::]].multiply(ranked_results['weights'], axis='index').sum()
score = pd.DataFrame(score_results, columns=['score']).T
score.index = pd.MultiIndex.from_tuples([('', '', 'SCORE')])

ranked_results.set_index('weights',append=True,inplace=True)
score_results = ranked_results.append(score)
score_results