### New heatmaps

In [None]:
from sklearn.preprocessing import minmax_scale

def scale(group, col):
    group[col] = minmax_scale(group[col])
    return group

In [None]:
import pandas as pd

ENERCOOP_df = pd.read_parquet(r'data/ENERCOOP_load_profiles.parquet.gzip')
ENERCOOPNorm_df = ENERCOOP_df.groupby('Profile').apply(scale, col = 'Consumed energy [Wh]')

##### Create average days

In [None]:
import numpy as np

df = ENERCOOPNorm_df.groupby(['Profile', 'Meteorological season', 'Hour of the day', 'Weekend']).agg({'Consumed energy [Wh]': np.mean})
df.reset_index(inplace = True)
df['Hour of the day'] = df['Hour of the day'].astype(str)
conditions = [
    df['Hour of the day'] ==  '1', df['Hour of the day'] ==  '2', df['Hour of the day'] ==  '3', df['Hour of the day'] ==  '21', df['Hour of the day'] ==  '22', df['Hour of the day'] ==  '23', True
]
choices = [df['Hour of the day'] + item for item in ['st hour', 'nd hour', 'rd hour', 'st hour', 'nd hour', 'rd hour', 'th hour']]
df['Mean scaled consumed energy'] = df['Meteorological season'] + np.where(df['Weekend'] == True, ', weekend, ', ', weekday, ') + np.select(conditions, choices)
df = df.pivot_table(values = 'Consumed energy [Wh]', index = 'Profile', columns = 'Mean scaled consumed energy')

In [None]:
seasons = ['Spring', 'Fall', 'Summer', 'Winter']
dayTypes = ['weekday', 'weekend']
hours = ['1st', '2nd', '3rd'] + [f'{item}th' for item in range(4, 21)] + ['21st', '22nd', '23rd'] + ['24th']
cols = [f'{season}, {dayType}, {hour} hour' for season in seasons for dayType in dayTypes for hour in hours]
df = df[cols]

In [None]:
energyNorm_df = ENERCOOPNorm_df.pivot_table(values = 'Consumed energy [Wh]', index = 'Profile', columns = 'Date')

##### Cluster by average days and create heatmaps of full time series

In [None]:
all_labels = []

from tqdm.notebook import tqdm
from tslearn.clustering import TimeSeriesKMeans
import matplotlib.pyplot as plt
from math import ceil
import seaborn as sns


for clusterCount in tqdm(range(4, 21)):
    model = TimeSeriesKMeans(n_clusters = clusterCount, metric = 'dtw', max_iter = 10, n_jobs = -1, random_state = 42)
    labels = model.fit_predict(df)
    all_labels.append(labels)

    fig, axes = plt.subplots(nrows = ceil(clusterCount/3), ncols = 3, figsize = (36, ceil(clusterCount/3)*8), facecolor = 'w')
    for idx, ax in enumerate(axes.flat):
        if idx < clusterCount:
            temp_df = energyNorm_df[labels == idx]
            sns.heatmap(temp_df, cbar = False, ax = ax)
            plt.sca(ax)
            plt.title(f'Cluster Nr. {idx}: {round(100*sum(labels == idx)/len(labels), 2)} % of all profiles ({sum(labels == idx)})', fontsize = 24)
            plt.xlabel(None)
            plt.xticks([])
            plt.ylabel(None)
            plt.yticks([])
            plt.tight_layout()
            plt.close()
    fig.savefig(f'{clusterCount} clusters')

In [None]:
import pickle

with open('all_labels.pickle', 'wb') as outfile:
    pickle.dump(all_labels, outfile)

#### Moving average test

In [None]:
display(energyNorm_df.iloc[0].plot());
plt.show()
plt.close()
display(energyNorm_df.iloc[1].plot())
plt.show()
plt.close()
display(energyNorm_df.iloc[2].plot())
plt.show()
plt.close()

In [None]:
energyNormSmooth_df = energyNorm_df.rolling(window = 24, axis = 1).mean().iloc[:, 23:]

In [None]:
display(energyNormSmooth_df.iloc[0].plot());
plt.show()
plt.close()
display(energyNormSmooth_df.iloc[1].plot())
plt.show()
plt.close()
display(energyNormSmooth_df.iloc[2].plot())
plt.show()
plt.close()

In [None]:
energyNormSmooth_2_df = energyNorm_df.rolling(window = 7*24, axis = 1).mean().iloc[:, 7*24 - 1:]

In [None]:
display(energyNormSmooth_2_df.iloc[0].plot());
plt.show()
plt.close()
display(energyNormSmooth_2_df.iloc[1].plot())
plt.show()
plt.close()
display(energyNormSmooth_2_df.iloc[2].plot())
plt.show()
plt.close()

In [None]:
all_labels_2 = []

from tqdm.notebook import tqdm
from tslearn.clustering import TimeSeriesKMeans
import matplotlib.pyplot as plt
from math import ceil
import seaborn as sns


for clusterCount in tqdm(range(4, 21)):
    model = TimeSeriesKMeans(n_clusters = clusterCount, max_iter = 10, n_jobs = -1, random_state = 42)
    labels = model.fit_predict(energyNormSmooth_df)
    all_labels_2.append(labels)

    fig, axes = plt.subplots(nrows = ceil(clusterCount/3), ncols = 3, figsize = (36, ceil(clusterCount/3)*8), facecolor = 'w')
    for idx, ax in enumerate(axes.flat):
        if idx < clusterCount:
            temp_df = energyNorm_df[labels == idx]
            sns.heatmap(temp_df, cbar = False, ax = ax)
            plt.sca(ax)
            plt.title(f'Cluster Nr. {idx}: {round(100*sum(labels == idx)/len(labels), 2)} % of all profiles ({sum(labels == idx)})', fontsize = 24)
            plt.xlabel(None)
            plt.xticks([])
            plt.ylabel(None)
            plt.yticks([])
            plt.tight_layout()
            plt.close()
    fig.savefig(f'Moving average {clusterCount} clusters')