# An example of early classification using RNNs

Let's start by loading data using `tslearn`.

In [None]:
import tensorflow as tf
import numpy as np
from tslearn.datasets import CachedDatasets
from model import DualOutputRNN

X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
y_train = tf.keras.utils.to_categorical(y_train)

In [None]:
tf.set_random_seed(0)
np.random.seed(0)

model = DualOutputRNN(n_classes=y_train.shape[1],
                      batch_size=10,
                      ts_size=X_train.shape[1],
                      epochs=200,
                      earliness_factor=.01,
                      lr=.001,
                      reg=.01)
model.fit(X_train, y_train)

In [None]:
y_pred, tau_pred = model.predict(X_test)
for yi, yi_hat, taui in zip(y_test, y_pred, tau_pred):
    print(yi, yi_hat, taui)

## Sanity tests

Some tests to check if our assumptions are correct

### 1. Insert random values at the initial timeframes.

Inserting random values at the beginning of the timeseries should delay the classification decision.
`taui` should be optimally equivalent to the values above plus the number of padded random values `pad`

This should make the taus longer since the classification-relevant information is actually comming later in the timeseries. 

In [None]:
def one_hot(a, num_classes):
    return np.squeeze(np.eye(num_classes)[a.reshape(-1)])

tf.set_random_seed(0)
np.random.seed(0)

# number if padded times
npad = 10

randompadded_X_train = np.append(arr=np.random.rand(X_train.shape[0],npad,X_train.shape[2]),values=X_train,axis=1)
randompadded_X_test = np.append(arr=np.random.rand(X_test.shape[0],npad,X_test.shape[2]),values=X_test,axis=1)

randompadded_model = DualOutputRNN(n_classes=y_train.shape[1],
                      batch_size=10,
                      ts_size=randompadded_X_train.shape[1],
                      epochs=200,
                      earliness_factor=.01,
                      lr=.001,
                      reg=.01)

randompadded_model.fit(randompadded_X_train, y_train)

In [None]:
y_pred, tau_pred = randompadded_model.predict(randompadded_X_test)
for yi, yi_hat, taui in zip(y_test, y_pred, tau_pred):
    print(yi, yi_hat, taui)

In [None]:
one_hot(np.random.randint(5,size=(100)),5)

### 2. Insert a new class that always has the same representation through time

For this class the optimal time of classification should always be the first one since other time observations do not bring further information about this class

In [None]:
np.random.seed(0)

def insert_prototype_as_new_class_to_dataset(X, y, prototype_array, ncopies):
    """
    Insert copies of a prototype timeseries in into the dataset as new class
    """
    
    # repeat and append the prototype array at the beginning of X
    new_samples = np.repeat(prototype_array,ncopies,axis=0)
    X_ = np.append(arr=new_samples,values=X,axis=0)
    
    # extend the old one hot class labels by zeros for the new class
    y_extended = np.pad(y_train,((0,0),(0,1)),mode='constant', constant_values=0)
    
    # add new one hot classes with ones at new class
    y_newclass = np.pad(np.zeros([ncopies,y_train.shape[1]]),((0,0),(0,1)),mode='constant',constant_values=1)
    
    # append the new class labels at the beginning of y
    y_ = np.append(arr=y_newclass,values=y_extended,axis=0)
    
    # shuffle X_ and y_ in unison
    randomize = np.arange(y_.shape[0])
    np.random.shuffle(randomize)
    X_ = X_[randomize]
    y_ = y_[randomize]

    return X_, y_

prototype_array = np.random.rand(1,X_train.shape[1],1)
X_train_aug, y_train_aug = insert_prototype_as_new_class_to_dataset(X_train, y_train, prototype_array, ncopies=25)
X_test_aug, y_test_aug = insert_prototype_as_new_class_to_dataset(X_test, y_test, prototype_array, ncopies=25)

In [None]:
tf.set_random_seed(0)
np.random.seed(0)

model = DualOutputRNN(n_classes=y_train_aug.shape[1],
                      batch_size=10,
                      ts_size=X_train_aug.shape[1],
                      epochs=200,
                      earliness_factor=.01,
                      lr=.001,
                      reg=.01)
model.fit(X_train_aug, y_train_aug)

In [None]:
y_pred, tau_pred = model.predict(X_test_aug)
for yi, yi_hat, taui in zip(y_test_aug, y_pred, tau_pred):
    print(yi, yi_hat, taui)