In [None]:
%load_ext autoreload
%autoreload 2

import os

import warnings
import functools
import numpy as np
import matplotlib.pyplot as plt

from tensorflow import keras

warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
import tsgm

Let's generate a real `d_real` and a synthetic `d_syn` dataset.

In [None]:
eps = 1e-5

Xr, yr = tsgm.utils.gen_sine_vs_const_dataset(10, 100, 20, max_value=2, const=1)
Xs, ys = Xr + eps, yr

d_real = tsgm.dataset.Dataset(Xr, yr)
d_syn = tsgm.dataset.Dataset(Xs, ys)

## Distance metric

First, we define a list of summary statistics that reflect the distance between the datasets. Module `tss.metrics.statistics` defines a set of handy statistics.

In [None]:
statistics = [functools.partial(tsgm.metrics.statistics.axis_max_s, axis=None),
              functools.partial(tsgm.metrics.statistics.axis_min_s, axis=None),
              functools.partial(tsgm.metrics.statistics.axis_max_s, axis=1),
              functools.partial(tsgm.metrics.statistics.axis_min_s, axis=1)]

Next, we define a discrepancy function. In our case, it is simply Euclidean norm.

In [None]:
discrepancy_func = lambda x, y: np.linalg.norm(x - y)

Finally, we are putting all together using `tss.metrics.DistanceMetric` object.

In [None]:
sim_metric = tsgm.metrics.DistanceMetric(
    statistics=statistics, discrepancy=discrepancy_func
)

In [None]:
sim_metric(d_real, d_syn)

## Consistency Metric

The consistency metric measures whether a family of models show consistent performance on real and synthetic datasets. First, we define an evaluator that returns the predictive performance on a downstream task.

In [None]:
import sklearn

class EvaluatorConvLSTM():
    '''
    NB an oversimplified classifier, for educational purposes only.
    '''
    
    def __init__(self, model):
        self._model = model

    def evaluate(self, D: tsgm.dataset.Dataset) -> float:
        X, y = D.Xy
        
        X_train , X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, random_state=0)
        y_train = keras.utils.to_categorical(y_train, 2)
        self._model.fit(X_train, y_train)
        
        y_pred = np.argmax(self._model.predict(X_test), 1)
        return sklearn.metrics.accuracy_score(y_pred, y_test)


seq_len, feat_dim, n_classes = *Xr.shape[1:], 2
models = [tsgm.models.zoo["clf_cl_n"](seq_len, feat_dim, n_classes, n_conv_lstm_blocks=i) for i in range(1, 4)]
for m in models:
    m.model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
evaluators = [EvaluatorConvLSTM(m.model) for m in models]

Instantiate a consistency metric object using the set of model, and evaluator wrapper class.

In [None]:
consistency_metric = tsgm.metrics.ConsistencyMetric(evaluators=evaluators)

In [None]:
consistency_metric(d_real, d_syn)

## Downstream Performance

Downstream performance metric measures the quality of the generated time by **evaluating a particular downstream model on real dataset and real dataset augmented with synthetically generated data**.

In [None]:
downstream_model = tsgm.models.zoo["clf_cl_n"](seq_len, feat_dim, n_classes, n_conv_lstm_blocks=1).model
downstream_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

evaluator = EvaluatorConvLSTM(downstream_model)

downstream_perf_metric = tsgm.metrics.DownstreamPerformanceMetric(evaluator)

In [None]:
print(downstream_perf_metric(d_real, d_syn))

## Privacy: Membership Inference Attack Metric

`tsgm.metrics.PrivacyMembershipInferenceMetric` measures the possibility of membership inference attacks using synthetic data.
The evaluation procedure is following:  
    1. Split the historical data into training and hold-out sets ($D_{tr}$ and $D_{test}$),  
    2. Train a generative model on $D_{train}$ and generate a synthetic dataset $\hat{D}$,  
    3. Train a one-class classification (OCC) model on synthetic data $\hat{D}$ and evaluate it on $D_{tr}$ and $D_{test}$,  
    4. Use the precision of the OCC model as the target score.

Let's define an attacker model. For the demonstration purposes, we will define a one class SVM classifier.

In [None]:
class FlattenTSOneClassSVM:
    def __init__(self, clf):
        self._clf = clf

    def fit(self, X):
        X_fl = X.reshape(X.shape[0], -1)
        self._clf.fit(X_fl)

    def predict(self, X):
        X_fl = X.reshape(X.shape[0], -1)
        return self._clf.predict(X_fl)

In [None]:
attacker = FlattenTSOneClassSVM(sklearn.svm.OneClassSVM())
privacy_metric = tsgm.metrics.PrivacyMembershipInferenceMetric(
    attacker=attacker
)

In [None]:
Xr, yr = tsgm.utils.gen_sine_vs_const_dataset(10, 100, 20, max_value=2, const=1)
d_test = tsgm.dataset.Dataset(Xr, yr)

privacy_metric(d_real, d_syn, d_test)