In [None]:
import tensorflow as tf
import pickle
from data import load_data
from tensorflow import keras
from embedding_layers import Conv1DEmbed, LinearEmbed
from encoder_layers import LSTMEncoder, TransformerEncoder, MLPEncoder
from tensorflow.keras import layers
from data import load_data
from keras.models import Model
from keras import backend as K

import cv2
import matplotlib.pyplot as plt
import numpy as np


In [None]:
EMBEDDING_REGISTRY = {"conv1d": Conv1DEmbed, "linear": LinearEmbed}
ENCODER_REGISTRY = {"lstm": LSTMEncoder, "transformer": TransformerEncoder, "mlp": MLPEncoder}

In [None]:
"""train_data, val_data, test_data = load_data(data_dir)
pickle.dump(train_data, open("train_data.pkl", "wb"))
pickle.dump(test_data, open("test_data.pkl", "wb"))
pickle.dump(val_data, open("val_data.pkl", "wb"))"""

In [None]:
# train_data, val_data, test_data = load_data(data_dir)

train_data = pickle.load(open("train_data.pkl", "rb"))
test_data = pickle.load(open("test_data.pkl", "rb"))
val_data = pickle.load(open("val_data.pkl", "rb"))

In [None]:
# pickle.dump(train_data, open("train_data.pkl", "wb"))
# pickle.dump(test_data, open("test_data.pkl", "wb"))
# pickle.dump(val_data, open("val_data.pkl", "wb"))

In [None]:
x_train, y_train = train_data[0], train_data[1]
x_val, y_val =  val_data[0], val_data[1]
x_test, y_test = test_data[0], test_data[1]

In [None]:
"""inputs = layers.Input(shape=(d_input,max_len, ))
embed_layer = EMBEDDING_REGISTRY[embedding_layer](**EMBED_PARAMS)
encoder_layer = ENCODER_REGISTRY[encoder_layer](**ENCODER_PARAMS)
avg_pool_layer = layers.GlobalAveragePooling1D()
dropout_layer = layers.Dropout(0.1)
prediction_layer = layers.Dense(num_classes, activation="softmax")

x = embed_layer(inputs)
x = encoder_layer(x)
x = avg_pool_layer(x)
x = dropout_layer(x)
outputs = prediction_layer(x)

model = keras.Model(inputs=inputs, outputs=outputs)"""

In [None]:
def positional_signal(hidden_size: int, length: int,
                      min_timescale: float = 1.0, max_timescale: float = 1e4):
    """
    Helper function, constructing basic positional encoding.
    The code is partially based on implementation from Tensor2Tensor library
    https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/layers/common_attention.py
    """

    if hidden_size % 2 != 0:
        raise ValueError(
            f"The hidden dimension of the model must be divisible by 2."
            f"Currently it is {hidden_size}")
    position = K.arange(0, length, dtype=K.floatx())
    num_timescales = hidden_size // 2
    log_timescale_increment = K.constant(
        (np.log(float(max_timescale) / float(min_timescale)) /
         (num_timescales - 1)),
        dtype=K.floatx())
    inv_timescales = (
            min_timescale *
            K.exp(K.arange(num_timescales, dtype=K.floatx()) *
                  -log_timescale_increment))
    scaled_time = K.expand_dims(position, 1) * K.expand_dims(inv_timescales, 0)
    signal = K.concatenate([K.sin(scaled_time), K.cos(scaled_time)], axis=1)
    return K.expand_dims(signal, axis=0)


class AddPositionalEncoding(layers.Layer):
    """
    Injects positional encoding signal described in section 3.5 of the original
    paper "Attention is all you need". Also a base class for more complex
    coordinate encoding described in "Universal Transformers".
    """

    def __init__(self, min_timescale: float = 1.0,
                 max_timescale: float = 1.0e4, **kwargs):
        self.min_timescale = min_timescale
        self.max_timescale = max_timescale
        self.signal = None
        super().__init__(**kwargs)

    def get_config(self):
        config = super().get_config()
        config['min_timescale'] = self.min_timescale
        config['max_timescale'] = self.max_timescale
        return config

    def build(self, input_shape):
        _, length, hidden_size = input_shape
        self.signal = positional_signal(
            hidden_size, length, self.min_timescale, self.max_timescale)
        return super().build(input_shape)

    def call(self, inputs, **kwargs):
        return inputs + self.signal

In [None]:
n, d = max_len, d_model
pos_encoding = positional_signal(d,n)
print(pos_encoding.shape)
pos_encoding = pos_encoding[0]

# Juggle the dimensions for the plot
pos_encoding = tf.reshape(pos_encoding, (n, d//2, 2))
pos_encoding = tf.transpose(pos_encoding, (2, 1, 0))
pos_encoding = tf.reshape(pos_encoding, (d, n))

plt.pcolormesh(pos_encoding, cmap='RdBu')
plt.ylabel('Depth')
plt.xlabel('Position')
plt.colorbar()
plt.show()

In [None]:
# EXPERIMENT PARAMETERS
d_input = 12 # number of channels in input
max_len = 1000
d_model = 256
data_dir = "/home/thomasjiang/cs271-project/CS271/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/"
embedding_layer = "conv1d"
encoder_layer = "transformer"
kernel_size=4
num_classes=5
num_heads = 6
ff_dim = 128
rate=0.1
patch_size=16



EMBED_PARAMS = {
    'd_input' : d_input,
    'd_model' : d_model,
    'kernel_size': kernel_size,
    'patch_size': patch_size
}

ENCODER_PARAMS = {
    'd_input' : d_input,
    'd_model' : d_model,
    'kernel_size': kernel_size,
    'num_heads': num_heads, 
    'ff_dim' : ff_dim , 
    'rate' : rate,
    'max_len' : max_len
}

model = keras.Sequential([
    layers.Input(shape=(d_input, max_len, )),
    EMBEDDING_REGISTRY[embedding_layer](**EMBED_PARAMS),
    AddPositionalEncoding(),
    ENCODER_REGISTRY[encoder_layer](**ENCODER_PARAMS),
    layers.GlobalAveragePooling1D(),
    layers.Dropout(0.1),
    layers.Dense(num_classes, activation="softmax")
    ])

model.compile(optimizer="adam", loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
history = model.fit(
    x_train, y_train, batch_size=128, epochs=20, validation_data=(x_val, y_val)
)
model.save(f"{embedding_layer}_{encoder_layer}_positional")

In [None]:
score, acc = model.evaluate(x_train, y_train,
                            batch_size=128)

In [None]:
# EXPERIMENT PARAMETERS
d_input = 12 # number of channels in input
max_len = 1000
d_model = 256
data_dir = "/home/thomasjiang/cs271-project/CS271/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/"
embedding_layer = "conv1d"
encoder_layer = "transformer"
kernel_size=4
num_classes=5
num_heads = 6
ff_dim = 128
rate=0.1
patch_size=16



EMBED_PARAMS = {
    'd_input' : d_input,
    'd_model' : d_model,
    'kernel_size': kernel_size,
    'patch_size': patch_size
}

ENCODER_PARAMS = {
    'd_input' : d_input,
    'd_model' : d_model,
    'kernel_size': kernel_size,
    'num_heads': num_heads, 
    'ff_dim' : ff_dim , 
    'rate' : rate,
    'max_len' : max_len
}

model = keras.Sequential([
    layers.Input(shape=(d_input, max_len, )),
    EMBEDDING_REGISTRY[embedding_layer](**EMBED_PARAMS),
    PositionEmbedding(maxlen)
    ENCODER_REGISTRY[encoder_layer](**ENCODER_PARAMS),
    layers.GlobalAveragePooling1D(),
    layers.Dropout(0.1),
    layers.Dense(num_classes, activation="softmax")
    ])

model.compile(optimizer="adam", loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
history = model.fit(
    x_train, y_train, batch_size=128, epochs=10, validation_data=(x_val, y_val)
)
model.save(f"{embedding_layer}_{encoder_layer}")

In [None]:
# EXPERIMENT PARAMETERS
d_input = 12 # number of channels in input
max_len = 1000
d_model = 256
data_dir = "/home/thomasjiang/cs271-project/CS271/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/"
embedding_layer = "linear"
encoder_layer = "mlp"
kernel_size=4
num_classes=5
num_heads = 6
ff_dim = 128
rate=0.1
patch_size=16



EMBED_PARAMS = {
    'd_input' : d_input,
    'd_model' : d_model,
    'kernel_size': kernel_size,
    'patch_size': patch_size
}

ENCODER_PARAMS = {
#     'd_input' : d_input,
    'd_model' : d_model,
#     'kernel_size': kernel_size,
#     'num_heads': num_heads, 
#     'ff_dim' : ff_dim , 
    'dropout_rate' : rate,
    'max_len' : max_len
}

model = keras.Sequential([
    layers.Input(shape=(d_input, max_len, )),
    EMBEDDING_REGISTRY[embedding_layer](**EMBED_PARAMS),
    ENCODER_REGISTRY[encoder_layer](**ENCODER_PARAMS),
    layers.GlobalAveragePooling1D(),
    layers.Dropout(0.1),
    layers.Dense(num_classes, activation="softmax")
    ])

model.compile(optimizer="adam", loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
history = model.fit(
    x_train, y_train, batch_size=128, epochs=20, validation_data=(x_val, y_val)
)
model.save(f"{embedding_layer}_{encoder_layer}")

In [None]:
# EXPERIMENT PARAMETERS
d_input = 12 # number of channels in input
max_len = 1000
d_model = 256
data_dir = "/home/thomasjiang/cs271-project/CS271/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/"
embedding_layer = "conv1d"
encoder_layer = "mlp"
kernel_size=4
num_classes=5
num_heads = 6
ff_dim = 128
rate=0.1
patch_size=16



EMBED_PARAMS = {
    'd_input' : d_input,
    'd_model' : d_model,
    'kernel_size': kernel_size,
    'patch_size': patch_size
}

ENCODER_PARAMS = {
#     'd_input' : d_input,
    'd_model' : d_model,
#     'kernel_size': kernel_size,
#     'num_heads': num_heads, 
#     'ff_dim' : ff_dim , 
    'dropout_rate' : rate,
    'max_len' : max_len
}

model = keras.Sequential([
    layers.Input(shape=(d_input, max_len, )),
    EMBEDDING_REGISTRY[embedding_layer](**EMBED_PARAMS),
    ENCODER_REGISTRY[encoder_layer](**ENCODER_PARAMS),
    layers.GlobalAveragePooling1D(),
    layers.Dropout(0.1),
    layers.Dense(num_classes, activation="softmax")
    ])

model.compile(optimizer="adam", loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
history = model.fit(
    x_train, y_train, batch_size=128, epochs=20, validation_data=(x_val, y_val)
)
model.save(f"{embedding_layer}_{encoder_layer}")

In [None]:
model_conv1d_lstm = keras.models.load_model('conv1d_lstm')
model_conv1d_transformer = keras.models.load_model('conv1d_transformer')
model_conv1d_mlp = keras.models.load_model('conv1d_mlp')
model_linear_mlp = keras.models.load_model('linear_mlp')
model_linear_lstm = keras.models.load_model('linear_lstm')
model_linear_transformer = keras.models.load_model('linear_transformer')
positional_linear = keras.models.load_model('linear_transformer_positional')
positional_conv1d = keras.models.load_model('conv1d_transformer_positional')

In [None]:
model_linear_mlp.summary()
model_conv1d_mlp.summary()
model_linear_lstm.summary()
model_conv1d_lstm.summary()
model_linear_transformer.summary()
model_conv1d_transformer.summary()
positional_linear.summary()
positional_conv1d.summary()

In [None]:
# Helper function for visualization (from CS224W Colab 2)
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np

def visualize(h, color, title, sample=False):
    # h = h.detach().cpu().numpy()

    if sample:
        random_idx = np.random.choice(h.shape[0], size=100)
        print(h.shape, color.shape)
        h = h[random_idx, :]
        color = color[random_idx]

    z = TSNE(n_components=2).fit_transform(h)
    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])
    plt.title(title)
    classes = ['NORM', 'MI', 'STTC', 'CD', 'HYP']
    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.legend()
    plt.show()

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pylab as plt

def heatmap2d(arr: np.ndarray, title):
    fig, ax = plt.subplots(figsize=(20,5)) 
    plt.imshow(arr)
    plt.colorbar()
    plt.title(title)
    plt.show()

def multi_heatmap_attention(arr, title):
    print(arr.shape)
    fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2,3, figsize=(15,15)) 
    
    sns.heatmap(arr[0], ax=ax1)
    sns.heatmap(arr[1], ax=ax2)
    sns.heatmap(arr[2], ax=ax3)
    sns.heatmap(arr[3], ax=ax4)
    sns.heatmap(arr[4], ax=ax5)
    sns.heatmap(arr[5], ax=ax6)
    ax1.title.set_text(f"{title} 1")
    ax2.title.set_text(f"{title} 2")
    ax3.title.set_text(f"{title} 3")
    ax4.title.set_text(f"{title} 4")
    ax5.title.set_text(f"{title} 5")
    ax6.title.set_text(f"{title} 6")
    plt.show()
    
def get_embedding_weights(model, i):
    layer_weights = model.layers[i].output
    layer_W = layer_weights[0]
    layer_b = layer_weights[1]
    return layer_W, layer_b

In [None]:
ins = tf.reshape(
    x_test, (-1, x_test.shape[1] * x_test.shape[2])
)
visualize(ins, y_test, "Initial embedding space")

In [None]:
linear_mlp_w, linear_mlp_b = get_embedding_weights(model_linear_mlp, 0)
linear_lstm_w, linear_lstm_b = get_embedding_weights(model_linear_lstm, 0)
linear_transformer_w, linear_transformer_b = get_embedding_weights(model_linear_transformer, 0)
conv1d_mlp_w, conv1d_mlp_b = get_embedding_weights(model_conv1d_mlp, 0)
conv1d_lstm_w, conv1d_lstm_b = get_embedding_weights(model_conv1d_lstm, 0)
conv1d_transformer_w, conv1d_transformer_b = get_embedding_weights(model_conv1d_transformer, 0)

In [None]:
heatmap2d(linear_mlp_w, "embedding heatmap for linear embedding layer")

In [None]:
for i in range(4):
    heatmap2d(conv1d_mlp_w[i], "one embedding heatmap for conv1d embedding layers")

In [None]:

s1 = model_linear_mlp.predict(x_test)
visualize(s1, y_test, "Linear_MLP final embedding space")

In [None]:
s1

In [None]:
heatmap2d(linear_lstm_w, "embedding heatmap for linear_lstm")
s2 = model_linear_lstm.predict(x_test)
visualize(s2, y_test, "Linear_LSTM final embedding space")

In [None]:

s3 = model_linear_transformer.predict(x_test)
visualize(s3, y_test, "Linear_Transformer final embedding space")

In [None]:

s4 = model_conv1d_mlp.predict(x_test)
visualize(s4, y_test, "Conv1d_MLP final embedding space")

In [None]:

s5 = model_conv1d_lstm.predict(x_test)
visualize(s5, y_test, "Conv1d_LSTM final embedding space")

In [None]:
s6 = model_conv1d_transformer.predict(x_test)
visualize(s6, y_test, "Conv1d_Transformer final embedding space")

In [None]:
score, acc = model_conv1d_lstm.evaluate(x_test, y_test,
                            batch_size=128)

In [None]:
s7 = positional_linear.predict(x_test)

In [None]:
visualize(s7, y_test, "Positional_linear_transformer final embedding space")

In [None]:
s8 = positional_conv1d.predict(x_test)
visualize(s8, y_test, "Positional_conv1d_transformer final embedding space")

In [None]:
attention_linear = model_linear_transformer.layers[1].att.get_weights()[0]
attention_linear = np.transpose(attention_linear, [1,0,2,])
multi_heatmap_attention(attention_linear, f"Linear Transformer Attention Head ")




In [None]:
attention_pos_linear = positional_linear.layers[2].att.get_weights()[0]
attention_pos_linear = np.transpose(attention_pos_linear, [1,0,2,])
multi_heatmap_attention(attention_pos_linear, f"Linear + positional Attention Head ")

In [None]:
attention_conv1d = model_conv1d_transformer.layers[1].att.get_weights()[0]
attention_conv1d = np.transpose(attention_conv1d, [1,0,2,])
multi_heatmap_attention(attention_conv1d, f"Conv1d Attention Head ")

In [None]:
attention_pos_conv1d = positional_conv1d.layers[2].att.get_weights()[0]
attention_pos_conv1d = np.transpose(attention_pos_conv1d, [1,0,2,])
multi_heatmap_attention(attention_pos_conv1d, f"Conv1d + positional Attention Head ")