In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

enercoop_df = pd.read_parquet(r'data/enercoop_load_profiles.parquet.gzip')

windowSize = 24*7
#windowSize = 24*7*30
movAvg_df = enercoop_df.pivot_table(values = 'Consumed energy [Wh]', index = 'Profile', columns = 'Date')
movAvg_df = movAvg_df.rolling(window = windowSize, axis = 1).mean()
movAvg_df = movAvg_df.iloc[:, windowSize - 1:]
movAvgNorm_df = movAvg_df.copy()
movAvgNorm_df[::] = MinMaxScaler().fit_transform(movAvg_df.T).T

In [None]:
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(font_scale = 1.25)

for item in tqdm(movAvgNorm_df.index):
    plt.figure(figsize = (9.6, 7.2), facecolor = 'w')
    movAvgNorm_df.loc[item].plot(title = f'Profile {item}', ylabel = 'Relative consumed energy', rot = 0)
    plt.savefig(f'Profiles/{item}.png')
    plt.close()

In [None]:
from sklearn.model_selection import GridSearchCV
from tslearn.clustering import TimeSeriesKMeans

In [None]:
model = TimeSeriesKMeans(n_clusters = 4, n_jobs = -1, random_state = 42)
labels = model.fit_predict(movAvgNorm_df)

##### Test area

In [None]:
avgDay_df[' | '.join(featureSelection)] = avgDay_df['Meteorological season'] + ' | ' + avgDay_df['Hour of the day'].astype(str) + ' | ' + avgDay_df['Weekend'].astype(str)

In [None]:
enercoop_df[featureSelection].astype(str).agg(' | '.join, axis = 1)

In [None]:
enercoop_df[featureSelection].apply(lambda x: ' | '.join(x.astype(str)), axis = 1)

In [None]:
enercoopNorm_df.groupby(['Profile', 'Hour of the day']).agg({'Consumed energy [Wh]': np.mean})

In [None]:
import numpy as np

avgDayNorm_df = enercoopNorm_df.groupby(['Profile', 'Hour of the day']).agg({'Consumed energy [Wh]': np.mean})
avgDayNorm_df.reset_index(inplace = True)
avgDayNorm_df.rename(columns = {'Consumed energy [Wh]': 'Consumed energy'}, inplace = True)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(font_scale = 1.25)

plt.figure(figsize = (9.6, 7.2))
sns.lineplot(data = avgDayNorm_df, x = 'Hour of the day', y = 'Consumed energy', hue = 'Profile', legend = False);

In [None]:
from tslearn.clustering import TimeSeriesKMeans

clusterCount = 2
avgDayEnergyNorm_df = avgDayNorm_df.pivot_table(values = 'Consumed energy', index = 'Profile', columns = 'Hour of the day')
model = TimeSeriesKMeans(n_clusters = clusterCount, metric = 'softdtw', max_iter = 10, n_jobs = -1, random_state = 42)
labels = model.fit_predict(avgDayEnergyNorm_df)

In [None]:
cluster_list = [avgDayEnergyNorm_df[labels == item].index for item in range(clusterCount)]
fig, axes = plt.subplots(ncols = 2, sharey = True, figsize = (19.2, 7.2))
for item in range(clusterCount):
    sns.lineplot(data = avgDayNorm_df.query(f"`Profile` in @cluster_list[{item}]"), x = 'Hour of the day', y = 'Consumed energy', hue = 'Profile', legend = False, ax = axes[item])
fig.subplots_adjust(wspace = 0.025);

In [None]:
fig, axes = plt.subplots(ncols = 2, sharex = True, sharey = True)
sns.lineplot(data = avgDayEnergyNorm_df[labels == 0], x = 'Hour of the day', y = 'Consumed energy', hue = 'Profile', legend = False, ax = axes[0])
sns.lineplot(data = avgDayEnergyNorm_df[labels == 1], x = 'Hour of the day', y = 'Consumed energy', hue = 'Profile', legend = False, ax = axes[1])

In [None]:
avgDayEnergyNorm_df[labels == 0]

In [None]:
avgDayEnergyNorm_df[labels == 1]