# tsam - Segmentation
Example usage of the time series aggregation module (tsam)
Date: 31.10.2019

Author: Maximilian Hoffmann

Import pandas and the relevant time series aggregation class

In [None]:
%load_ext autoreload
%autoreload 2
import copy

import matplotlib.pyplot as plt
import pandas as pd

import tsam.timeseriesaggregation as tsam

%matplotlib inline

### Input data 

Read in time series from testdata.csv with pandas

In [None]:
raw = pd.read_csv("testdata.csv", index_col=0)

Create a plot function for the temperature for a visual comparison of the time series

In [None]:
def plotTS(data, periodlength, vmin, vmax, label="T [Â°C]"):
    fig, axes = plt.subplots(figsize=[6, 2], dpi=100, nrows=1, ncols=1)
    stacked, timeindex = tsam.unstackToPeriods(copy.deepcopy(data), periodlength)
    cax = axes.imshow(stacked.values.T, interpolation="nearest", vmin=vmin, vmax=vmax)
    axes.set_aspect("auto")
    axes.set_ylabel("Hour")
    plt.xlabel("Day")

    fig.subplots_adjust(right=1.2)
    cbar = plt.colorbar(cax)
    cbar.set_label(label)

### Hierarchical aggregation with medoid representation and 10 typical days with 24 hourly segments

Initialize an aggregation class object with hierarchical as method for eight typical days

In [None]:
aggregation = tsam.TimeSeriesAggregation(
    raw, noTypicalPeriods=10, hoursPerPeriod=24, clusterMethod="hierarchical"
)

Create the typical periods

In [None]:
typPeriods = aggregation.createTypicalPeriods()

Predict original data

In [None]:
predictedPeriods = aggregation.predictOriginalData()

Get accuracy indicators

In [None]:
aggregation.accuracyIndicators()

### Hierarchical aggregation with medoid representation and 20 typical days with 12 irregular segments

In [None]:
aggregationSeg = tsam.TimeSeriesAggregation(
    raw,
    noTypicalPeriods=20,
    hoursPerPeriod=24,
    clusterMethod="hierarchical",
    segmentation=True,
    noSegments=12,
)

Create the typical periods

In [None]:
typPeriodsSeg = aggregationSeg.createTypicalPeriods()

Predict original data

In [None]:
predictedPeriodsSeg = aggregationSeg.predictOriginalData()

Get accuracy indicators

In [None]:
aggregationSeg.accuracyIndicators()

### Comparison of the aggregations 
It was shown for the temperature, but both times all four time series have been aggregated. Therefore, we compare here also the duration curves  of the electrical load for the original time series, the aggregation with k-mean, and the hierarchical aggregation including peak periods.

In [None]:
fig, axes = plt.subplots(figsize=[6, 2], dpi=100, nrows=1, ncols=1)
raw["Load"].sort_values(ascending=False).reset_index(drop=True).plot(label="Original")
predictedPeriods["Load"].sort_values(ascending=False).reset_index(drop=True).plot(
    label="10 with 24 hours"
)
predictedPeriodsSeg["Load"].sort_values(ascending=False).reset_index(drop=True).plot(
    label="20 with 12 Seg"
)
plt.legend()
plt.xlabel("Hours [h]")
plt.ylabel("Duration Load [MW]")

In [None]:
param = "GHI"

In [None]:
plotTS(raw[param], 24, vmin=raw[param].min(), vmax=raw[param].max(), label=param)

In [None]:
plotTS(
    predictedPeriods[param],
    24,
    vmin=raw[param].min(),
    vmax=raw[param].max(),
    label=param,
)

In [None]:
plotTS(
    predictedPeriodsSeg[param],
    24,
    vmin=raw[param].min(),
    vmax=raw[param].max(),
    label=param,
)

In [None]:
fig, axes = plt.subplots(figsize=[6, 2], dpi=100, nrows=1, ncols=1)
raw["Load"]["20100210":"20100218"].plot(label="Original")
predictedPeriods["Load"]["20100210":"20100218"].plot(label="10 with 24 hours")
predictedPeriodsSeg["Load"]["20100210":"20100218"].plot(label="20 with 12 seg")
plt.legend()
plt.ylabel("Load [MW]")

### Validation

Check that the means of the original time series and the predicted ones are the same.

In [None]:
raw.mean()

In [None]:
predictedPeriods.mean()

In [None]:
predictedPeriodsSeg.mean()

Check that a segmented period has the same column-wise means as a non-segmented period for if the periods are the same.

In [None]:
aggregation.createTypicalPeriods().loc[0, :].mean()

In [None]:
aggregationSegTest = tsam.TimeSeriesAggregation(
    raw,
    noTypicalPeriods=10,
    hoursPerPeriod=24,
    clusterMethod="hierarchical",
    segmentation=True,
    noSegments=12,
)

In [None]:
segmentDurations = (
    aggregationSegTest.createTypicalPeriods()
    .loc[0, :]
    .reset_index(0, drop=True)
    .index.values
)

In [None]:
aggregationSegTest.createTypicalPeriods().loc[0, :].mul(
    segmentDurations, axis=0
).sum() / segmentDurations.sum()

Print out the (segmented) typical periods.

In [None]:
aggregationSeg.createTypicalPeriods()

In [None]:
aggregation.createTypicalPeriods()