In [2]:
import numpy as np


def readucr(filename):
    data = np.loadtxt(filename, delimiter="\t")
    y = data[:, 0]
    x = data[:, 1:]
    return x, y.astype(int)


root_url = "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/"

x_train, y_train = readucr(root_url + "FordA_TRAIN.tsv")
x_test, y_test = readucr(root_url + "FordA_TEST.tsv")

x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

n_classes = len(np.unique(y_train))

idx = np.random.permutation(len(x_train))
x_train = x_train[idx]
y_train = y_train[idx]

y_train[y_train == -1] = 0
y_test[y_test == -1] = 0


In [6]:
#  x_train  == numppy.array
print('x_train.shape, ' ', y_train.shape')
print(x_train.shape, ' ', y_train.shape)
print('idx : ', idx, 'len : ' , len(idx))
x_train[0:1]

x_train.shape, , y_train.shape
(3601, 500, 1)   (3601,)
idx :  [2627  205  505 ...  588 3275 3286]


array([[[-1.4028552e+00],
        [-1.0306994e+00],
        [-5.9277512e-01],
        [-1.6164558e-01],
        [ 2.0414627e-01],
        [ 4.5454831e-01],
        [ 5.7315732e-01],
        [ 5.7054647e-01],
        [ 4.8993483e-01],
        [ 3.8316207e-01],
        [ 2.9393190e-01],
        [ 2.4968453e-01],
        [ 2.6859470e-01],
        [ 3.3670422e-01],
        [ 4.3638198e-01],
        [ 5.3859472e-01],
        [ 6.2363574e-01],
        [ 6.7545856e-01],
        [ 6.8563011e-01],
        [ 6.5896958e-01],
        [ 5.9884221e-01],
        [ 5.0950878e-01],
        [ 4.0442601e-01],
        [ 2.9338613e-01],
        [ 1.7078689e-01],
        [ 4.2282175e-02],
        [-1.0534091e-01],
        [-2.6246651e-01],
        [-4.2622056e-01],
        [-5.8086722e-01],
        [-7.1552692e-01],
        [-8.1927793e-01],
        [-8.8689436e-01],
        [-9.1749538e-01],
        [-9.0812458e-01],
        [-8.6457891e-01],
        [-7.8705224e-01],
        [-6.7295900e-01],
        [-5.

In [2]:
# Build the model
# Our model processes a tensor of shape (batch size, sequence length, features), where sequence length is the number of time steps and features is each input timeseries.

# You can replace your classification RNN layers with this one: the inputs are fully compatible!
from tensorflow import keras
from tensorflow.keras import layers

# We include residual connections, layer normalization, and dropout. The resulting layer can be stacked multiple times.

# The projection layers are implemented through keras.layers.Conv1D.

### Popular time series preprocessing techniques include:
Just scaling to [0, 1] or [-1, 1]
Standard Scaling (removing mean, dividing by standard deviation)
Power Transforming (using a power function to push the data to a more normal distribution, typically used on skewed data / where outliers are present)
Outlier Removal
Pairwise Diffing or Calculating Percentage Differences
Seasonal Decomposition (trying to make the time series stationary)
Engineering More Features (automated feature extractors, bucketing to percentiles, etc)
Resampling in the time dimension
Resampling in a feature dimension (instead of using the time interval, use a predicate on a feature to re-arrange your time steps — for example when recorded quantity exceeds N units)
Rolling Values
Aggregations
Combinations of these techniques


In [9]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention and Normalization
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(inputs, inputs)
    x = layers.Dropout(dropout)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    return x + res



In [10]:
'''
The main part of our model is now complete. We can stack multiple of those transformer_encoder blocks and
we can also proceed to add the final Multi-Layer Perceptron classification head. Apart from a stack of Dense layers,
we need to reduce the output tensor of the TransformerEncoder part of our model 
    down to a vector of features for each data point in the current batch.
A common way to achieve this is to use a pooling layer. For this example, a GlobalAveragePooling1D layer is sufficient.
'''
def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0, ):

    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(n_classes, activation="softmax")(x)
    
    return keras.Model(inputs, outputs)

In [11]:

#Train and evaluate

input_shape = x_train.shape[1:]

model = build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[128],
    mlp_dropout=0.4,
    dropout=0.25,
)

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=["sparse_categorical_accuracy"],
)
model.summary()

callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

model.fit(
    x_train,
    y_train,
    validation_split=0.2,
    epochs=200,
    batch_size=64,
    callbacks=callbacks,
)

model.evaluate(x_test, y_test, verbose=1)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 500, 1)]     0           []                               
                                                                                                  
 multi_head_attention (MultiHea  (None, 500, 1)      7169        ['input_1[0][0]',                
 dAttention)                                                      'input_1[0][0]']                
                                                                                                  
 dropout (Dropout)              (None, 500, 1)       0           ['multi_head_attention[0][0]']   
                                                                                                  
 layer_normalization (LayerNorm  (None, 500, 1)      2           ['dropout[0][0]']            

KeyboardInterrupt: 