---
# Transformer XL
---

---
# Import Libraries

In [1]:
import os
import sys

In [2]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

In [3]:
sys.path.append("../../deep-learning-dna")

In [4]:
import wandb

In [5]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display
import math
import string

import Set_Transformer 
from common.models import dnabert
from common import dna
from lmdbm import Lmdb
from common.data import DnaSequenceGenerator, DnaLabelType, DnaSampleGenerator, find_dbs
import wandb

import tf_utils as tfu

In [6]:
strategy = tfu.devices.select_gpu(0, use_dynamic_memory=True)

---
# Load Data

In [7]:
#Import pretrained model
api = wandb.Api()
model_path = api.artifact("sirdavidludwig/dnabert-pretrain/dnabert-pretrain-8dim:latest").download()
pretrained_model = dnabert.DnaBertModel.load(model_path)
pretrained_model.load_weights(model_path + "/model.h5")
pretrained_model

<common.models.dnabert.DnaBertPretrainModel at 0x7fdf70c1fdc0>

In [8]:
#Load datafiles
dataset_path = api.artifact("sirdavidludwig/nachusa-dna/dnasamples-complete:latest").download('/data/dna_samples:v1')
samples = find_dbs(dataset_path + '/train')
samples[13]

[34m[1mwandb[0m: Downloading large artifact dnasamples-complete:latest, 4079.09MB. 420 files... Done. 0:0:0.1


'/data/dna_samples:v1/train/WS-CCW-Jul2015_S82_L001_R1_001.db'

---
# Create Dataset

In [9]:
split_ratios = [0.8, 0.2]
set_len = 1000
sequence_len = 150
kmer = 3
batch_size = [20,5]
batches_per_epoch = 20
augument = True
labels = DnaLabelType.SampleIds
seed = 0
rng = np.random.default_rng(seed)
random_samples = samples.copy()

In [10]:
rng.shuffle(random_samples)

In [11]:
trimmed_samples, (train_dataset, val_dataset) = DnaSampleGenerator.split(samples=random_samples[0:50], split_ratios=split_ratios, subsample_length=set_len, sequence_length=sequence_len,kmer=kmer,batch_size=batch_size,batches_per_epoch=batches_per_epoch,augment=augument,labels=labels, rng=rng)

Sample '/data/dna_samples:v1/train/Wes7-PCRblank1_S8_L001_R1_001.db' does not contain enough sequences. This sample will be ignored.
Sample '/data/dna_samples:v1/train/Wes7-PCRblank1_S8_L001_R1_001.db' does not contain enough sequences. This sample will be ignored.


In [12]:
random_samples[0:50]

['/data/dna_samples:v1/train/WS-CCE-Apr2016_S6_L001_R1_001.db',
 '/data/dna_samples:v1/train/Wes52-10-TC_S53_L001_R1_001.db',
 '/data/dna_samples:v1/train/WS-WH-Jul2016_S46_L001_R1_001.db',
 '/data/dna_samples:v1/train/Wes41-10-HN_S42_L001_R1_001.db',
 '/data/dna_samples:v1/train/Wesley026-Ag-072820_S165_L001_R1_001.db',
 '/data/dna_samples:v1/train/WS-MU-Apr2016_S84_L001_R1_001.db',
 '/data/dna_samples:v1/train/Wes5-5-CCE_S6_L001_R1_001.db',
 '/data/dna_samples:v1/train/WS-MR-Apr2016_S13_L001_R1_001.db',
 '/data/dna_samples:v1/train/WS-MU-Sep2015_S43_L001_R1_001.db',
 '/data/dna_samples:v1/train/Wesley012-HN-051120_S151_L001_R1_001.db',
 '/data/dna_samples:v1/train/WS-HPN-Sep2015_S91_L001_R1_001.db',
 '/data/dna_samples:v1/train/WS-TCR-Sep2015_S52_L001_R1_001.db',
 '/data/dna_samples:v1/train/Wes26-8-AG_S27_L001_R1_001.db',
 '/data/dna_samples:v1/train/WS-SB-Jul2016_S22_L001_R1_001.db',
 '/data/dna_samples:v1/train/Wes25-8-MU_S26_L001_R1_001.db',
 '/data/dna_samples:v1/train/WS-SB-Oct

--- 
# Batch Parameters

In [13]:
block_size = 200
seq_len = set_len
maxlen = set_len
vocab_size = 5
num_chars_data = set_len*sequence_len
max_files = len(train_dataset.samples)
max_files

49

In [14]:
if block_size-2 > seq_len:
    raise ValueError("Block size should not be bigger than sequence length")

In [15]:
print(maxlen)
print(vocab_size)
print(num_chars_data)

1000
5
150000


---
# Create Embeddings

In [16]:
#Create 8 dimensional embeddings
pretrained_encoder = dnabert.DnaBertEncoderModel(pretrained_model.base)
pretrained_encoder.trainable = False

In [17]:
class Create_Embeddings(keras.layers.Layer):
    def __init__(self, encoder):
        super(Create_Embeddings, self).__init__()
        self.encoder = encoder
        
    def subbatch_predict(self, model, batch, subbatch_size, concat=lambda old, new: tf.concat((old, new), axis=0)):
        def predict(i, result=None):
            n = i + subbatch_size
            pred = tf.stop_gradient(model(batch[i:n]))
            if result is None:
                return [n, pred]
            return [n, concat(result, pred)]
        i, result = predict(0)
        batch_size = tf.shape(batch)[0]
        i, result = tf.while_loop(
            cond=lambda i, _: i < batch_size,
            body=predict,
            loop_vars=[i, result],
            parallel_iterations=1)

        return result
    
    def modify_data_for_input(self, data):
        batch_size = tf.shape(data)[0]
        subsample_size = tf.shape(data)[1]
        flat_data = tf.reshape(data, (batch_size*subsample_size, -1))
        encoded = self.subbatch_predict(self.encoder, flat_data, 128)
        return tf.reshape(encoded, (batch_size, subsample_size, -1))
    
    def call(self, data):
        return  self.modify_data_for_input(data)

---
# Setup

In [18]:
def clone_initializer(initializer):
    if isinstance(initializer, tf.keras.initializers.Initializer):
        return initializer.__class__.from_config(initializer.get_config())
    return initializer

---
# Cache Memory

In [19]:
def _cache_memory(current_state, previous_state, memory_length, reuse_length=0):
    if memory_length is None or memory_length == 0:
        return None
    else:
        if reuse_length > 0:
            current_state = current_state[:, :reuse_length, :]

        if previous_state is None:
            new_mem = current_state[:, -memory_length:, :]
        else:
            new_mem = tf.concat(
                    [previous_state, current_state], 1)[:, -memory_length:, :]

    return tf.stop_gradient(new_mem)

---
# MultiHead Relative Attention

In [20]:
class MultiHeadRelativeAttention(tf.keras.layers.MultiHeadAttention):
    def __init__(self,
                 kernel_initializer="variance_scaling",
                 **kwargs):
        super().__init__(kernel_initializer=kernel_initializer,
                                         **kwargs)

    def _build_from_signature(self, query, value, key=None):
        super(MultiHeadRelativeAttention, self)._build_from_signature(
                query=query,
                value=value,
                key=key)
        if hasattr(value, "shape"):
            value_shape = tf.TensorShape(value.shape)
        else:
            value_shape = value
        if key is None:
            key_shape = value_shape
        elif hasattr(key, "shape"):
            key_shape = tf.TensorShape(key.shape)
        else:
            key_shape = key

        common_kwargs = dict(
                kernel_initializer=self._kernel_initializer,
                bias_initializer=self._bias_initializer,
                kernel_regularizer=self._kernel_regularizer,
                bias_regularizer=self._bias_regularizer,
                activity_regularizer=self._activity_regularizer,
                kernel_constraint=self._kernel_constraint,
                bias_constraint=self._bias_constraint)

        with tf.init_scope():
            einsum_equation, _, output_rank = _build_proj_equation(
                    key_shape.rank - 1, bound_dims=1, output_dims=2)
            self._encoding_dense = tf.keras.layers.experimental.EinsumDense(
                    einsum_equation,
                    output_shape=_get_output_shape(
                        output_rank - 1,
                        [self._num_heads, self._key_dim]),
                        bias_axes=None,
                        name="encoding",
                        **common_kwargs)

    def compute_attention(
        self,
        query,
        key,
        value,
        position,
        content_attention_bias,
        positional_attention_bias,
        attention_mask=None
    ):
        attention_mask = None
        
        #AC
        content_attention = tf.einsum(self._dot_product_equation, key, query + content_attention_bias)
        
        attention_sum = content_attention

        attention_scores = tf.multiply(attention_sum, 1.0 / math.sqrt(float(self._key_dim)))

        attention_scores = self._masked_softmax(attention_scores, attention_mask)

        attention_output = self._dropout_layer(attention_scores)

        attention_output = tf.einsum(self._combine_equation, attention_output, value)
        
        return attention_output

    def call(self,
             query,
             value,
             content_attention_bias,
             positional_attention_bias,
             key=None,
             relative_position_encoding=None,
             state=None,
             attention_mask=None):
        
        attention_mask = None
        
        
        if not self._built_from_signature:
            self._build_from_signature(query, value, key=key)
        if key is None:
            key = value
        if state is not None and state.shape.ndims > 1:
            value = tf.concat([state, value], 1)
            key = tf.concat([state, key], 1)

        query = self._query_dense(query)

        key = self._key_dense(key)

        value = self._value_dense(value)
        position = None
        
        attention_output = self.compute_attention(
                query=query,
                key=key,
                value=value,
                position=position,
                content_attention_bias=content_attention_bias,
                positional_attention_bias=positional_attention_bias,
                attention_mask=attention_mask)

        attention_output = self._output_dense(attention_output)

        return attention_output

---
# Build Einsum Equation

In [21]:
_CHR_IDX = string.ascii_lowercase

def _build_proj_equation(free_dims, bound_dims, output_dims):
    input_str = ""
    kernel_str = ""
    output_str = ""
    bias_axes = ""
    letter_offset = 0
    for i in range(free_dims):
        char = _CHR_IDX[i + letter_offset]
        input_str += char
        output_str += char

    letter_offset += free_dims
    for i in range(bound_dims):
        char = _CHR_IDX[i + letter_offset]
        input_str += char
        kernel_str += char

    letter_offset += bound_dims
    for i in range(output_dims):
        char = _CHR_IDX[i + letter_offset]
        kernel_str += char
        output_str += char
        bias_axes += char
    equation = "%s,%s->%s" % (input_str, kernel_str, output_str)

    return equation, bias_axes, len(output_str)


def _get_output_shape(output_rank, known_last_dims):
    return [None] * (output_rank - len(known_last_dims)) + list(known_last_dims)

---
# XL Block

In [22]:
class TransformerXLBlock(tf.keras.layers.Layer):
    def __init__(self,
                 vocab_size,
                 hidden_size,
                 num_attention_heads,
                 head_size,
                 inner_size,
                 dropout_rate,
                 attention_dropout_rate,
                 norm_epsilon=1e-12,
                 inner_activation="relu",
                 kernel_initializer="variance_scaling",
                 inner_dropout=0.0,
                 **kwargs):

        super(TransformerXLBlock, self).__init__(**kwargs)
        self._vocab_size = vocab_size
        self._num_heads = num_attention_heads
        self._head_size = head_size
        self._hidden_size = hidden_size
        self._inner_size = inner_size
        self._dropout_rate = dropout_rate
        self._attention_dropout_rate = attention_dropout_rate
        self._inner_activation = inner_activation
        self._norm_epsilon = norm_epsilon
        self._kernel_initializer = kernel_initializer
        self._inner_dropout = inner_dropout
        self._attention_layer_type = MultiHeadRelativeAttention
        
    def build(self, input_shape):
        input_tensor = input_shape[0] if len(input_shape) == 2 else input_shape
        input_tensor_shape = tf.TensorShape(input_tensor)
        if len(input_tensor_shape.as_list()) != 3:
            raise ValueError("TransformerLayer expects a three-dimensional input of "
                                             "shape [batch, sequence, width].")
        batch_size, sequence_length, hidden_size = input_tensor_shape

        if hidden_size % self._num_heads != 0:
            raise ValueError(
                    "The input size (%d) is not a multiple of the number of attention "
                    "heads (%d)" % (hidden_size, self._num_heads))
            

        self._attention_layer = self._attention_layer_type(
                num_heads=self._num_heads,
                key_dim=self._head_size,
                value_dim=self._head_size,
                dropout=self._attention_dropout_rate,
                use_bias=False,
                kernel_initializer=clone_initializer(self._kernel_initializer),
                name="rel_attn")
        
        self._attention_dropout = tf.keras.layers.Dropout(
                rate=self._attention_dropout_rate)
        self._attention_layer_norm = tf.keras.layers.LayerNormalization(
                name="self_attention_layer_norm",
                axis=-1,
                epsilon=self._norm_epsilon,
                dtype=tf.float32)
        self._inner_dense = tf.keras.layers.experimental.EinsumDense(
                "abc,cd->abd",
                output_shape=(None, self._inner_size),
                bias_axes="d",
                kernel_initializer=clone_initializer(self._kernel_initializer),
                name="inner")

        self._inner_activation_layer = tf.keras.layers.Activation(
                self._inner_activation)
        self._inner_dropout_layer = tf.keras.layers.Dropout(
                rate=self._inner_dropout)
        self._output_dense = tf.keras.layers.experimental.EinsumDense(
                "abc,cd->abd",
                output_shape=(None, hidden_size),
                bias_axes="d",
                name="output",
                kernel_initializer=clone_initializer(self._kernel_initializer))
        self._output_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)
        self._output_layer_norm = tf.keras.layers.LayerNormalization(
                name="output_layer_norm",
                axis=-1,
                epsilon=self._norm_epsilon)

        super(TransformerXLBlock, self).build(input_shape)


    def call(self,
             content_stream,
             content_attention_bias,
             positional_attention_bias,
             relative_position_encoding=None,
             state=None,
             content_attention_mask=None,
             query_attention_mask=None,
             target_mapping=None):
        
        attention_kwargs = dict(
                query=content_stream,
                value=content_stream,
                key=content_stream,
                attention_mask=content_attention_mask)

        common_attention_kwargs = dict(
                content_attention_bias=content_attention_bias,
                relative_position_encoding=relative_position_encoding,
                positional_attention_bias=positional_attention_bias,
                state=state)

        attention_kwargs.update(common_attention_kwargs)
        attention_output = self._attention_layer(**attention_kwargs)
        
        attention_stream = attention_output
        input_stream = content_stream
        attention_key = "content_attention"
        attention_output = {}
        
        attention_stream = self._attention_dropout(attention_stream)
        attention_stream = self._attention_layer_norm(attention_stream + input_stream)
        inner_output = self._inner_dense(attention_stream)
        inner_output = self._inner_activation_layer(
                inner_output)
        inner_output = self._inner_dropout_layer(
                inner_output)
        layer_output = self._output_dense(inner_output)
        layer_output = self._output_dropout(layer_output)
        layer_output = self._output_layer_norm(layer_output + attention_stream)
        attention_output[attention_key] = layer_output

        return attention_output

---
# Transformer XL

In [23]:
class TransformerXL(tf.keras.layers.Layer):
    def __init__(self,
                 vocab_size,
                 num_layers,
                 hidden_size,
                 maxlen,
                 embed_dim,
                 num_attention_heads,
                 head_size,
                 inner_size,
                 dropout_rate,
                 attention_dropout_rate,
                 initializer,
                 tie_attention_biases=True,
                 memory_length=None,
                 reuse_length=None,
                 inner_activation="relu",
                 **kwargs):
        super(TransformerXL, self).__init__(**kwargs)

        self._vocab_size = vocab_size
        self._initializer = initializer
        self._num_layers = num_layers
        self._hidden_size = hidden_size
        self._num_attention_heads = num_attention_heads
        self._head_size = head_size
        self._inner_size = inner_size
        self._inner_activation = inner_activation
        self._dropout_rate = dropout_rate
        self._attention_dropout_rate = attention_dropout_rate
        self._tie_attention_biases = tie_attention_biases

        self._memory_length = memory_length
        self._reuse_length = reuse_length

        if self._tie_attention_biases:
            attention_bias_shape = [self._num_attention_heads, self._head_size]
        else:
            attention_bias_shape = [self._num_layers, self._num_attention_heads, self._head_size]

        self.content_attention_bias = self.add_weight(
                "content_attention_bias",
                shape=attention_bias_shape,
                dtype=tf.float32,
                initializer=clone_initializer(self._initializer))
        self.positional_attention_bias = self.add_weight(
                "positional_attention_bias",
                shape=attention_bias_shape,
                dtype=tf.float32,
                initializer=clone_initializer(self._initializer))

        self.transformer_xl_layers = []
        for i in range(self._num_layers):
            self.transformer_xl_layers.append(
                    TransformerXLBlock(
                            vocab_size=self._vocab_size,
                            hidden_size=self._head_size * self._num_attention_heads,
                            num_attention_heads=self._num_attention_heads,
                            head_size=self._head_size,
                            inner_size=self._inner_size,
                            dropout_rate=self._dropout_rate,
                            attention_dropout_rate=self._attention_dropout_rate,
                            norm_epsilon=1e-12,
                            inner_activation=self._inner_activation,
                            kernel_initializer="variance_scaling",
                            name="layer_%d" % i))

        self.output_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)

    def call(self,
             content_stream,
             relative_position_encoding,
             state=None,
             content_attention_mask=None,
             query_attention_mask=None,
             target_mapping=None):
        
        new_mems = []

        content_attention_mask = None
        query_attention_mask = None
        
        if state is None:
            state = [None] * self._num_layers
        for i in range(self._num_layers):
            # cache new mems
            new_mems.append( _cache_memory(content_stream, state[i], self._memory_length, self._reuse_length))

            if self._tie_attention_biases:
                content_attention_bias = self.content_attention_bias
            else:
                content_attention_bias = self.content_attention_bias[i]

            transformer_xl_layer = self.transformer_xl_layers[i]
            
            transformer_xl_output = transformer_xl_layer(
                    content_stream=content_stream,
                    content_attention_bias=content_attention_bias,
                    positional_attention_bias=None,
                    relative_position_encoding=None,
                    state=state[i],
                    content_attention_mask=None,
                    query_attention_mask=None,
                    target_mapping=target_mapping)
            content_stream = transformer_xl_output["content_attention"]
            
        output_stream = content_stream
        return output_stream, new_mems

---
# Xl Model Class

In [24]:
class XlModel(keras.Model):
    def __init__(self, max_files, encoder, block_size, seq_len_padded, embed_dim, vocab_size, num_layers, hidden_size, num_attention_heads, maxlen, memory_length, reuse_length, head_size, inner_size, dropout_rate, attention_dropout_rate, initializer):
        super(XlModel, self).__init__()
        
        self.block_size = block_size
        self.seq_len_padded = seq_len_padded
        self.embed_dim = embed_dim
        self.num_heads = num_attention_heads
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.maxlen = maxlen
        self.memory_length = memory_length
        self.max_files = max_files
        self.encoder = encoder
        self.isabs = []
        
        self.embedding_layer = Create_Embeddings(encoder)

        self.linear_layer = keras.layers.Dense(embed_dim)
        
        self.transformer_xl = TransformerXL(
                vocab_size=vocab_size,
                num_layers=num_layers,
                hidden_size=hidden_size,
                num_attention_heads=num_attention_heads,
                maxlen=maxlen,
                embed_dim=embed_dim,
                memory_length=memory_length,
                reuse_length=reuse_length,
                head_size=head_size,
                inner_size=inner_size,
                dropout_rate=dropout_rate,
                attention_dropout_rate=attention_dropout_rate,
                initializer=initializer, 
            )
        

        self.pooling_layer = Set_Transformer.PoolingByMultiHeadAttention(num_seeds=1,embed_dim=embed_dim,num_heads=1,use_layernorm=True,pre_layernorm=True, use_keras_mha=True,is_final_block=True)
    
        self.reshape_layer = keras.layers.Reshape((embed_dim,))
   
        self.output_layer = keras.layers.Dense(self.max_files, activation=keras.activations.softmax)
        
    
    def call(self, x, training=None):        
 
        mems = tf.zeros((self.num_layers, tf.shape(x)[0], self.memory_length, self.embed_dim))
        
        embeddings = self.embedding_layer(x)
            
        linear_transform = self.linear_layer(embeddings)    
            
        for i in range(0, self.seq_len_padded, self.block_size):
            block = embeddings[:,i:i+self.block_size]
            
            output, mems = self.transformer_xl(content_stream=block, relative_position_encoding=None, state=mems)
                
        pooling = self.pooling_layer(output)

        reshape = self.reshape_layer(pooling)

        output = self.output_layer(reshape)          
        
        return output

---
# Xl Parameters

In [25]:
#Xl Parameters 
embed_dim = 8
num_layers = 8
hidden_size = 32
num_attention_heads = 8
memory_length = 200
reuse_length = 0
head_size = 8
inner_size = 32
dropout_rate = 0.01
attention_dropout_rate = 0.01
initializer = keras.initializers.RandomNormal(stddev=0.1) 

encoder = pretrained_encoder
epochs = 10000

In [28]:
Parameters = dict(
    embed_dim = 8,
    num_layers = 8,
    hidden_size = 32,
    num_attention_heads = 8,
    memory_length = 200,
    reuse_length = 0,
    head_size = 8,
    inner_size = 32,
    dropout_rate = 0.01,
    attention_dropout_rate = 0.01)

In [29]:
run = wandb.init(project="Str_XL_Relative_Mha", config=Parameters)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkendragivens[0m. Use [1m`wandb login --relogin`[0m to force relogin


---
# Create Models

In [30]:
model = XlModel(max_files, encoder, block_size, seq_len, embed_dim, vocab_size, num_layers, hidden_size, num_attention_heads, maxlen, memory_length, reuse_length, head_size, inner_size, dropout_rate, attention_dropout_rate, initializer)
model.compile(loss = keras.losses.SparseCategoricalCrossentropy(from_logits=False), optimizer = keras.optimizers.Nadam(1e-4), metrics = keras.metrics.SparseCategoricalAccuracy())

In [31]:
history = model.fit(x=train_dataset, validation_data=val_dataset, epochs=epochs, verbose=1, callbacks=[wandb.keras.WandbCallback(save_weights_only=True)])



Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
Epoch 18/10000
Epoch 19/10000
Epoch 20/10000
Epoch 21/10000
Epoch 22/10000
Epoch 23/10000
Epoch 24/10000
Epoch 25/10000
Epoch 26/10000
Epoch 27/10000
Epoch 28/10000
Epoch 29/10000
Epoch 30/10000
Epoch 31/10000
Epoch 32/10000
Epoch 33/10000
Epoch 34/10000
Epoch 35/10000
Epoch 36/10000
Epoch 37/10000
Epoch 38/10000
Epoch 39/10000
Epoch 40/10000
Epoch 41/10000
Epoch 42/10000
Epoch 43/10000
Epoch 44/10000
Epoch 45/10000
Epoch 46/10000
Epoch 47/10000
Epoch 48/10000
Epoch 49/10000
Epoch 50/10000
Epoch 51/10000
Epoch 52/10000
Epoch 53/10000
Epoch 54/10000
Epoch 55/10000
Epoch 56/10000
Epoch 57/10000
Epoch 58/10000
Epoch 59/10000
Epoch 60/10000
Epoch 61/10000
Epoch 62/10000
Epoch 63/10000
Epoch 64/10000
Epoch 65/10000
Epoch 66/10000
Epoch 67/10000
Epoc

KeyboardInterrupt: 

In [32]:
run.finish()

VBox(children=(Label(value='0.656 MB of 0.656 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,█▇▇▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁
sparse_categorical_accuracy,▁▁▂▂▁▃▂▂▂▄▂▂▃▃▅▄▅▅▅▅▅▅▆▅▅▅▆▅▅▇▆▇▆▇███▇▇█
val_loss,█▇▇▇▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▂▂▂▃▂▂▁▂▂▂▁▁▂▁▁▂
val_sparse_categorical_accuracy,▂▂▂▃▁▂▃▃▄▄▆▅▃▇▆▇▅▄▇▅▃▂▄▃▆▆▅▃▃▆▇▄▃▄█▇▅▅▆▃

0,1
best_epoch,646.0
best_val_loss,2.42284
epoch,662.0
loss,2.38979
sparse_categorical_accuracy,0.23
val_loss,2.45343
val_sparse_categorical_accuracy,0.2


In [33]:
previous_history = history

NameError: name 'history' is not defined

In [34]:
model.save_weights("./Saved_Models/Str_Xl_Relative.h5")

In [35]:
model.load_weights("./Saved_Models/Str_Xl_Relative.h5")

In [None]:
u = model.evaluate(train_dataset[3][0], train_dataset[3][1])

In [None]:
u = model.evaluate(val_dataset)

In [None]:
u = model.predict(val_dataset)

In [None]:
u = u.argmax(-1)

In [None]:
u

In [None]:
trimmed_samples[0:5]