<a href="https://colab.research.google.com/github/Janina712/PUS2024_JBoecher/blob/main/Transformers_class_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# tranformer: type of neural network with multi-headed attention layer

In [2]:
import numpy as np
import keras
from keras import layers

In [3]:
def readucr(filename):
    data = np.loadtxt(filename, delimiter="\t")
    y = data[:, 0]
    x = data[:, 1:]
    return x, y.astype(int)


root_url = "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/"

x_train, y_train = readucr(root_url + "FordA_TRAIN.tsv")
x_test, y_test = readucr(root_url + "FordA_TEST.tsv")

In [4]:
x_train.shape

(3601, 500)

In [5]:
y_train.shape

(3601,)

In [6]:
np.unique(y_train)

array([-1,  1])

In [7]:
# reshape
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

In [8]:
x_train.shape, x_test.shape

((3601, 500, 1), (1320, 500, 1))

In [9]:
n_classes = len(np.unique(y_train))
n_classes

2

In [10]:
# shuffle data
idx = np.random.permutation(x_train.shape[0])
x_train = x_train[idx]
y_train = y_train[idx]

In [11]:
y_train[y_train == -1] = 0
y_test[y_test == -1] = 0

In [24]:
def trandformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
  x = layers.LayerNormalization(epsilon=1e-6)(inputs)
  x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
  res = x + inputs # residual connection

  x = layers.LayerNormalization(epsilon=1e-6)(res)
  x = layers.Conv1D(filters=2000, kernel_size=1, activation="relu")(x)
  x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)

  return x + res

def build_model(input_shape,n_classes):
  inputs = keras.Input(shape=input_shape)
  x = inputs

  for _ in range(2):
    x = trandformer_encoder(x, 8, 8, 1)

  x = layers.GlobalMaxPool1D(data_format = "channels_first")(x)

  x = layers.Dense(256, activation="relu")(x)
  x = layers.Dense(50, activation="relu")(x)

  outputs = layers.Dense(n_classes, activation="softmax")(x)

  model = keras.Model(inputs=inputs, outputs=outputs)
  return model

In [25]:
input_shape=x_train.shape[1:]
model = build_model(input_shape, n_classes)

In [26]:
model.summary()

In [28]:
# compile model
model.compile(loss="sparse_categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate = 1e-4), metrics=["sparse_categorical_accuracy"])

In [29]:
callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

In [30]:
# fit model
model.fit(x_train, y_train, validation_split=0.2, epochs=100, batch_size=32, verbose = 2)

Epoch 1/100
90/90 - 21s - 238ms/step - loss: 0.6836 - sparse_categorical_accuracy: 0.5910 - val_loss: 0.6366 - val_sparse_categorical_accuracy: 0.6505
Epoch 2/100
90/90 - 9s - 104ms/step - loss: 0.5495 - sparse_categorical_accuracy: 0.7260 - val_loss: 0.5790 - val_sparse_categorical_accuracy: 0.6990
Epoch 3/100
90/90 - 6s - 64ms/step - loss: 0.4798 - sparse_categorical_accuracy: 0.7844 - val_loss: 0.5593 - val_sparse_categorical_accuracy: 0.7226
Epoch 4/100
90/90 - 10s - 116ms/step - loss: 0.4306 - sparse_categorical_accuracy: 0.8299 - val_loss: 0.5376 - val_sparse_categorical_accuracy: 0.7184
Epoch 5/100
90/90 - 6s - 65ms/step - loss: 0.3912 - sparse_categorical_accuracy: 0.8556 - val_loss: 0.5237 - val_sparse_categorical_accuracy: 0.7351
Epoch 6/100
90/90 - 10s - 112ms/step - loss: 0.3562 - sparse_categorical_accuracy: 0.8799 - val_loss: 0.5081 - val_sparse_categorical_accuracy: 0.7503
Epoch 7/100
90/90 - 6s - 66ms/step - loss: 0.3262 - sparse_categorical_accuracy: 0.8931 - val_loss:

<keras.src.callbacks.history.History at 0x7d5e9ebc82e0>