# tsam - 1. Example
Example usage of the time series aggregation module (tsam)
Date: 08.05.2017
Update: 28.06.2023

Author: Leander Kotzur

Import pandas and the relevant time series aggregation class

In [None]:
%load_ext autoreload
%autoreload 2
import copy
import os

import matplotlib.pyplot as plt
import pandas as pd

import tsam.timeseriesaggregation as tsam

%matplotlib inline

### Input data 

Read in time series from testdata.csv with pandas

In [None]:
raw = pd.read_csv("testdata.csv", index_col=0)

Show a slice of the dataset

In [None]:
raw.head()

Show the shape of the raw input data: 4 types of timeseries (GHI, Temperature, Wind and Load) for every hour in a year

In [None]:
raw.shape

Create a plot function for the temperature for a visual comparison of the time series

In [None]:
def plotTS(data, periodlength, vmin, vmax):
    fig, axes = plt.subplots(figsize=[6, 2], dpi=100, nrows=1, ncols=1)
    stacked, timeindex = tsam.unstackToPeriods(copy.deepcopy(data), periodlength)
    cax = axes.imshow(stacked.values.T, interpolation="nearest", vmin=vmin, vmax=vmax)
    axes.set_aspect("auto")
    axes.set_ylabel("Hour")
    plt.xlabel("Day")

    fig.subplots_adjust(right=1.2)
    cbar = plt.colorbar(cax)
    cbar.set_label("T [Â°C]")

Plot an example series - in this case the temperature

In [None]:
plotTS(raw["T"], 24, vmin=raw["T"].min(), vmax=raw["T"].max())

### Simple k-mean aggregation

Initialize an aggregation class object with k-mean as method for eight typical days, without any integration of extreme periods. Alternative clusterMethod's are 'averaging','hierarchical' and 'k_medoids'.

In [None]:
aggregation = tsam.TimeSeriesAggregation(
    raw, noTypicalPeriods=8, hoursPerPeriod=24, clusterMethod="k_means"
)

Create the typical periods

In [None]:
typPeriods = aggregation.createTypicalPeriods()

Show shape of typical periods: 4 types of timeseries for 8*24 hours

In [None]:
typPeriods.shape

Save typical periods to .csv file

In [None]:
typPeriods.to_csv(os.path.join("results", "testperiods_kmeans.csv"))

Repredict the original time series based on the typical periods

In [None]:
predictedPeriods = aggregation.predictOriginalData()

Plot the repredicted data

In [None]:
plotTS(predictedPeriods["T"], 24, vmin=raw["T"].min(), vmax=raw["T"].max())

As seen, they days with the minimal temperature are excluded. In case that they are required they can be added to the aggregation as follow.

### Hierarchical aggregation including extreme periods

Initialize a time series aggregation which integrates the day with the minimal temperature and the day with the maximal load as periods.

In [None]:
aggregation = tsam.TimeSeriesAggregation(
    raw,
    noTypicalPeriods=8,
    hoursPerPeriod=24,
    clusterMethod="hierarchical",
    extremePeriodMethod="new_cluster_center",
    addPeakMin=["T"],
    addPeakMax=["Load"],
)

Create the typical periods

In [None]:
typPeriods = aggregation.createTypicalPeriods()

The aggregation can also be evaluated by indicators

In [None]:
aggregation.accuracyIndicators()

Save typical periods to .csv file

In [None]:
typPeriods.to_csv(os.path.join("results", "testperiods_hierarchical.csv"))

Repredict the original time series based on the typical periods

In [None]:
predictedPeriodsWithEx = aggregation.predictOriginalData()

Plot repredicted data

In [None]:
plotTS(predictedPeriodsWithEx["T"], 24, vmin=raw["T"].min(), vmax=raw["T"].max())

Now also the days with the minimal temperature are integrated into the typical periods.

### Advanced aggregation method

Combining hierarchical clustering of periods, segments inside the periods, and representation reproducing the distribution of the original data

In [None]:
aggregation = tsam.TimeSeriesAggregation(
    raw,
    noTypicalPeriods=24,
    clusterMethod="hierarchical",
    segmentation=True,
    noSegments=8,
    representationMethod="distributionAndMinMaxRepresentation",
    distributionPeriodWise=False,
    hoursPerPeriod=24,
)

In [None]:
predictedPeriodsAdvanced = aggregation.predictOriginalData()

In [None]:
plotTS(predictedPeriodsAdvanced["T"], 24, vmin=raw["T"].min(), vmax=raw["T"].max())

### Comparison of the aggregations 
It was shown for the temperature, but both times all four time series have been aggregated. Therefore, we compare here also the duration curves  of the electrical load for the original time series, the aggregation with k-mean, and the hierarchical aggregation including peak periods.

In [None]:
fig, axes = plt.subplots(figsize=[10, 6], dpi=100, nrows=1, ncols=1)
raw["Load"].sort_values(ascending=False).reset_index(drop=True).plot(
    label="Original", lw=3
)
predictedPeriods["Load"].sort_values(ascending=False).reset_index(drop=True).plot(
    label="8 typ days"
)
predictedPeriodsWithEx["Load"].sort_values(ascending=False).reset_index(drop=True).plot(
    label="8 typ days peak period"
)
predictedPeriodsAdvanced["Load"].sort_values(ascending=False).reset_index(
    drop=True
).plot(label="24 typ days with 8 segments")
plt.legend()
plt.xlabel("Hours [h]")
plt.ylabel("Duration Load [MW]")

Or as unsorted time series for an example week

In [None]:
fig, axes = plt.subplots(figsize=[10, 6], dpi=100, nrows=1, ncols=1)
raw["Load"]["20100210":"20100218"].plot(label="Original", lw=3)
predictedPeriods["Load"]["20100210":"20100218"].plot(label="8 typ days")
predictedPeriodsWithEx["Load"]["20100210":"20100218"].plot(
    label="8 typ days \n + peak period"
)
predictedPeriodsAdvanced["Load"]["20100210":"20100218"].plot(
    label="24 typ days \n + 8 segments "
)
plt.legend()
plt.ylabel("Load [MW]")

In [None]:
fig, axes = plt.subplots(figsize=[10, 6], dpi=100, nrows=1, ncols=1)
raw["GHI"]["20100210":"20100218"].plot(label="Original", lw=3)
predictedPeriods["GHI"]["20100210":"20100218"].plot(label="8 typ days")
predictedPeriodsWithEx["GHI"]["20100210":"20100218"].plot(
    label="8 typ days \n + peak period"
)
predictedPeriodsAdvanced["GHI"]["20100210":"20100218"].plot(
    label="24 typ days \n + 8 segments "
)
plt.legend()
plt.ylabel("Solar irradiance [W/m2]")