In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from keras import layers
import sys
sys.path.insert(1, "Informer-Tensorflow/models")
sys.path.insert(1, "Informer-Tensorflow")

2023-07-08 19:39:35.590054: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from attn import ProbAttention, AttentionLayer

In [3]:
class MultiHeadProbAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadProbAttention, self).__init__()
        self.d_model = d_model
        self.num_heads = num_heads
        self.attn = AttentionLayer(ProbAttention(False), self.d_model, self.num_heads)
    def call(self, inputs):
        query = inputs
        key = inputs
        value = inputs
        x = self.attn([query, key, value])
        return x


In [4]:
def positional_encoding(length, depth):
    depth = depth/2

    positions = np.arange(length)[:, np.newaxis]     # (seq, 1)
    depths = np.arange(depth)[np.newaxis, :]/depth   # (1, depth)

    angle_rates = 1 / (10000**depths)         # (1, depth)
    angle_rads = positions * angle_rates      # (pos, depth)

    pos_encoding = np.concatenate(
        [np.sin(angle_rads), np.cos(angle_rads)],
        axis=-1)

    return tf.cast(pos_encoding, dtype=tf.float32)

class PositionalEmbedding(tf.keras.layers.Layer):
    def __init__(self, d_model):
        super().__init__()
        self.d_model = d_model
        # self.embedding = tf.keras.layers.Embedding(vocab_size, d_model, mask_zero=True)
        self.pos_encoding = positional_encoding(length=2048, depth=d_model)

    # def compute_mask(self, *args, **kwargs):
    #     return self.embedding.compute_mask(*args, **kwargs)

    def call(self, x):
        length = tf.shape(x)[1]
        # x = self.embedding(x)
        # This factor sets the relative scale of the embedding and positonal_encoding.
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x = x + self.pos_encoding[tf.newaxis, :length, :]
        return x

In [5]:
class ConvLayer(keras.layers.Layer):
    def __init__(self, c_in):
        super(ConvLayer, self).__init__()
        self.downConv = tf.keras.layers.Conv1D(
                                  filters=c_in,
                                  kernel_size=3,
                                  padding='causal')
        self.norm = tf.keras.layers.BatchNormalization()
        self.activation = tf.keras.layers.ELU()
        self.maxPool = tf.keras.layers.MaxPool1D(pool_size=3)#, strides=2)

    def call(self, x, **kargs):
        x = self.downConv(x)
        x = self.norm(x)
        x = self.activation(x)
        x = self.maxPool(x)
        return x

In [6]:
BATCH_SIZE = 6
def models():
    inputs = keras.layers.Input(shape=(2000,1), batch_size=BATCH_SIZE)
    # x = keras.layers.Dense(32, activation="tanh")(inputs)
    x = keras.layers.Conv1D(32, 1, activation='tanh')(inputs)
    x = PositionalEmbedding(32)(x)
    x = MultiHeadProbAttention(128, 32)(x)
    x = ConvLayer(64)(x)
    x = MultiHeadProbAttention(64, 32)(x)
    x = ConvLayer(32)(x)
    x = keras.layers.LayerNormalization()(x)
    x = keras.layers.GlobalAveragePooling1D(data_format='channels_first')(x)
    x = keras.layers.Dense(5, activation='softmax')(x)
    return keras.Model(inputs, x)

In [7]:
model = models()
model.summary()
# del model

2023-07-08 19:39:50.226864: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-07-08 19:39:50.262455: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-07-08 19:39:50.262800: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

[TensorShape([6, 2000, 32]), TensorShape([6, 2000, 32]), TensorShape([6, 2000, 32])]
[TensorShape([6, 666, 64]), TensorShape([6, 666, 64]), TensorShape([6, 666, 64])]
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(6, 2000, 1)]            0         
                                                                 
 conv1d (Conv1D)             (6, 2000, 32)             64        
                                                                 
 positional_embedding (Posit  (6, 2000, 32)            0         
 ionalEmbedding)                                                 
                                                                 
 multi_head_prob_attention (  (6, 2000, 128)           29184     
 MultiHeadProbAttention)                                         
                                                                 
 conv_layer (ConvLayer)   

In [8]:
x = tf.random.normal((BATCH_SIZE, 2000, 1))
model(x)

2023-07-08 19:39:59.257040: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-07-08 19:39:59.790329: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8800


<tf.Tensor: shape=(6, 5), dtype=float32, numpy=
array([[0.20000002, 0.20000002, 0.20000002, 0.19999997, 0.19999999],
       [0.2       , 0.19999997, 0.2       , 0.2       , 0.19999997],
       [0.20000003, 0.19999999, 0.19999999, 0.19999999, 0.20000003],
       [0.19999997, 0.2       , 0.2       , 0.2       , 0.2       ],
       [0.19999999, 0.19999999, 0.19999997, 0.20000002, 0.20000002],
       [0.19999999, 0.20000002, 0.20000002, 0.19999999, 0.19999999]],
      dtype=float32)>

In [9]:
data = np.load("data_shuffled.npz")
X = data["X"]
Y = data["Y"]

x_train = tf.reshape(X[0:846], (-1, 2000, 1))
y_train = Y[0:846]
x_test = tf.reshape(X[850:], (-1, 2000, 1))
y_test = Y[850:]

In [10]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=100000).batch(BATCH_SIZE)

val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_dataset = val_dataset.batch(BATCH_SIZE)

In [11]:
x = x_train[0:BATCH_SIZE]
x_rank = tf.rank(x).numpy()
x_norm_resize_shape = [BATCH_SIZE] + list(tf.ones(tf.rank(x), dtype=tf.int32).numpy())[1:]

In [12]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=4000):
        super().__init__()

        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)

        self.warmup_steps = warmup_steps

    def __call__(self, step):
        step = tf.cast(step, dtype=tf.float32)
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)

        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

In [13]:
def perturbation_loss(x, y, from_logits=False):
    return keras.losses.CategoricalCrossentropy(from_logits=from_logits)(x, y)

loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=False)
learning_rate = CustomSchedule(32)
optimizer = keras.optimizers.experimental.AdamW(learning_rate=learning_rate, beta_1=0.9, beta_2=0.98,
                                     epsilon=1e-9)

train_acc_metric = keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = keras.metrics.SparseCategoricalAccuracy()

In [14]:
eps = 8.     # the perturbation parameter
sig = 1e-5   # initial perturbation StdDev
zeta = 1e-6  # differentiation constant
lamd = 1     # regularization parameter

@tf.function
def training_step(x, label):
    x_p = tf.random.normal((BATCH_SIZE,2000, 1), stddev=sig)
    x_norm = x_p
    for i in range(x_rank-1, 0, -1):
        x_norm = tf.norm(x_norm, ord=2, axis=int(i))
    x_p /= tf.reshape(x_norm, (BATCH_SIZE, 1, 1))
    x_p *= zeta


    with tf.GradientTape() as model_tape, tf.GradientTape() as perturbation_tape:
        perturbation_tape.watch(x_p)
        y_p = model(x + x_p, training=True)
        y = model(x, training=True)
        l = perturbation_loss(y, y_p)
        g = perturbation_tape.gradient(l, x_p)

        g_norm = g
        for i in range(x_rank-1, 0, -1):
            g_norm = tf.norm(g_norm, ord=2, axis=int(i))

        x_p = eps * g / tf.reshape(g_norm, x_norm_resize_shape)

        logits = model(x, training=True)
        loss = loss_fn(label, logits) + lamd * l / BATCH_SIZE
        grads = model_tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

    train_acc_metric.update_state(label, logits)
    return loss, train_acc_metric.result(), l, x_p, g

In [None]:
train_loss = []
train_metric = []
val_metric = []
p_loss = []
for epoch in tqdm(range(1000)):
    print("\nStart of epoch %d" % (epoch,))
    for step, (x, label) in enumerate(train_dataset):
        loss, train_acc, l, x_p, g = training_step(x, label)

    print(
        "Training loss: %.4f\nTraining metric: %.4f"
        % (float(loss), float(train_acc))
    )
    print("perturbation loss: %.4f" % float(l))

    for x_batch_val, y_batch_val in val_dataset:
        val_logits = model(x_batch_val, training=False)
        # Update val metrics
        val_acc_metric.update_state(y_batch_val, val_logits)
    val_acc = val_acc_metric.result()
    val_acc_metric.reset_states()
    print("Validation acc: %.4f" % (float(val_acc)))

    train_loss.append(loss)
    train_metric.append(train_acc)
    val_metric.append(val_acc)
    p_loss.append(l)
    
    tl = np.array(train_loss)
    tm = np.array(train_metric)
    vm = np.array(val_metric)
    pl = np.array(p_loss)
    
    np.savez("logs.npz", train_loss=tl, train_acc=tm, val_acc=vm, p_loss=pl, x_p=x_p)

  0%|          | 0/1000 [00:00<?, ?it/s]


Start of epoch 0
Training loss: 1.8677
Training metric: 0.3022
perturbation loss: 1.6093


2023-07-08 19:44:12.209798: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [150]
	 [[{{node Placeholder/_1}}]]
  0%|          | 1/1000 [00:28<7:47:49, 28.10s/it]

Validation acc: 0.3867

Start of epoch 1
Training loss: 1.7331
Training metric: 0.3417
perturbation loss: 1.5921


  0%|          | 2/1000 [00:56<7:46:56, 28.07s/it]

Validation acc: 0.4133

Start of epoch 2
Training loss: 1.5783
Training metric: 0.3580
perturbation loss: 1.4706


  0%|          | 3/1000 [01:24<7:45:06, 27.99s/it]

Validation acc: 0.4133

Start of epoch 3
Training loss: 1.4152
Training metric: 0.3755
perturbation loss: 1.2811


  0%|          | 4/1000 [01:52<7:46:03, 28.08s/it]

Validation acc: 0.3733

Start of epoch 4
Training loss: 1.6300
Training metric: 0.3747
perturbation loss: 1.4078


  0%|          | 5/1000 [02:20<7:44:09, 27.99s/it]

Validation acc: 0.4067

Start of epoch 5
Training loss: 1.3498
Training metric: 0.3804
perturbation loss: 1.1685


  1%|          | 6/1000 [02:48<7:44:02, 28.01s/it]

Validation acc: 0.3933

Start of epoch 6
Training loss: 0.9009
Training metric: 0.3803
perturbation loss: 0.7549


  1%|          | 7/1000 [03:15<7:42:36, 27.95s/it]

Validation acc: 0.4533

Start of epoch 7
Training loss: 1.2825
Training metric: 0.3850
perturbation loss: 1.1636


  1%|          | 8/1000 [03:43<7:41:28, 27.91s/it]

Validation acc: 0.4000

Start of epoch 8
Training loss: 0.8472
Training metric: 0.3885
perturbation loss: 0.8577


  1%|          | 9/1000 [04:11<7:40:38, 27.89s/it]

Validation acc: 0.4000

Start of epoch 9
Training loss: 1.3936
Training metric: 0.3949
perturbation loss: 1.1385


  1%|          | 10/1000 [04:39<7:40:08, 27.89s/it]

Validation acc: 0.4867

Start of epoch 10
Training loss: 1.6654
Training metric: 0.4013
perturbation loss: 1.3782


  1%|          | 11/1000 [05:07<7:39:27, 27.87s/it]

Validation acc: 0.4000

Start of epoch 11
Training loss: 0.9858
Training metric: 0.4086
perturbation loss: 0.8823


  1%|          | 12/1000 [05:35<7:39:33, 27.91s/it]

Validation acc: 0.4000

Start of epoch 12
Training loss: 1.6884
Training metric: 0.4122
perturbation loss: 1.3715


  1%|▏         | 13/1000 [06:03<7:38:36, 27.88s/it]

Validation acc: 0.2933

Start of epoch 13
Training loss: 1.7969
Training metric: 0.4144
perturbation loss: 1.3403


  1%|▏         | 14/1000 [06:31<7:38:12, 27.88s/it]

Validation acc: 0.4200

Start of epoch 14
Training loss: 1.3952
Training metric: 0.4164
perturbation loss: 1.1949


  2%|▏         | 15/1000 [06:58<7:37:32, 27.87s/it]

Validation acc: 0.3000

Start of epoch 15
Training loss: 1.6214
Training metric: 0.4177
perturbation loss: 1.3261


  2%|▏         | 16/1000 [07:26<7:37:14, 27.88s/it]

Validation acc: 0.3867

Start of epoch 16
Training loss: 1.3118
Training metric: 0.4201
perturbation loss: 1.1000


  2%|▏         | 17/1000 [07:54<7:36:49, 27.88s/it]

Validation acc: 0.4333

Start of epoch 17
Training loss: 1.0593
Training metric: 0.4206
perturbation loss: 0.9196


  2%|▏         | 18/1000 [08:22<7:36:19, 27.88s/it]

Validation acc: 0.2133

Start of epoch 18
Training loss: 0.9709
Training metric: 0.4226
perturbation loss: 0.8282


  2%|▏         | 19/1000 [08:50<7:36:03, 27.89s/it]

Validation acc: 0.4133

Start of epoch 19
Training loss: 1.4715
Training metric: 0.4245
perturbation loss: 1.3729


  2%|▏         | 20/1000 [09:18<7:35:30, 27.89s/it]

Validation acc: 0.4000

Start of epoch 20
Training loss: 1.5467
Training metric: 0.4262
perturbation loss: 1.3104


  2%|▏         | 21/1000 [09:46<7:34:58, 27.88s/it]

Validation acc: 0.4667

Start of epoch 21
Training loss: 0.9078
Training metric: 0.4300
perturbation loss: 0.9363


  2%|▏         | 22/1000 [10:14<7:34:41, 27.90s/it]

Validation acc: 0.2333

Start of epoch 22
Training loss: 1.1256
Training metric: 0.4366
perturbation loss: 0.8103


  2%|▏         | 23/1000 [10:42<7:34:22, 27.90s/it]

Validation acc: 0.4667

Start of epoch 23
Training loss: 0.8202
Training metric: 0.4480
perturbation loss: 0.9166


  2%|▏         | 24/1000 [11:09<7:33:55, 27.90s/it]

Validation acc: 0.2067

Start of epoch 24
Training loss: 0.3185
Training metric: 0.4605
perturbation loss: 0.3939


  2%|▎         | 25/1000 [11:37<7:33:23, 27.90s/it]

Validation acc: 0.4067

Start of epoch 25
Training loss: 0.2712
Training metric: 0.4730
perturbation loss: 0.4563


  3%|▎         | 26/1000 [12:05<7:32:49, 27.89s/it]

Validation acc: 0.4133

Start of epoch 26
Training loss: 0.3623
Training metric: 0.4850
perturbation loss: 0.4850


  3%|▎         | 27/1000 [12:33<7:32:19, 27.89s/it]

Validation acc: 0.3000

Start of epoch 27
Training loss: 0.7106
Training metric: 0.4968
perturbation loss: 0.2833


  3%|▎         | 28/1000 [13:01<7:31:56, 27.90s/it]

Validation acc: 0.6133

Start of epoch 28
Training loss: 0.1765
Training metric: 0.5086
perturbation loss: 0.2353


  3%|▎         | 29/1000 [13:29<7:31:24, 27.89s/it]

Validation acc: 0.2000

Start of epoch 29
Training loss: 0.8100
Training metric: 0.5200
perturbation loss: 0.4071


  3%|▎         | 30/1000 [13:57<7:30:47, 27.88s/it]

Validation acc: 0.7200

Start of epoch 30
Training loss: 0.3172
Training metric: 0.5312
perturbation loss: 0.2920


  3%|▎         | 31/1000 [14:25<7:30:29, 27.89s/it]

Validation acc: 0.6467

Start of epoch 31
Training loss: 0.3498
Training metric: 0.5418
perturbation loss: 0.4008


  3%|▎         | 32/1000 [14:53<7:30:07, 27.90s/it]

Validation acc: 0.7200

Start of epoch 32
Training loss: 0.9618
Training metric: 0.5523
perturbation loss: 0.4494


  3%|▎         | 33/1000 [15:21<7:29:51, 27.91s/it]

Validation acc: 0.7800

Start of epoch 33
Training loss: 0.5705
Training metric: 0.5624
perturbation loss: 0.2159


  3%|▎         | 34/1000 [15:49<7:29:29, 27.92s/it]

Validation acc: 0.8400

Start of epoch 34
Training loss: 0.0459
Training metric: 0.5714
perturbation loss: 0.1257


  4%|▎         | 35/1000 [16:16<7:28:57, 27.91s/it]

Validation acc: 0.8333

Start of epoch 35
Training loss: 0.3013
Training metric: 0.5805
perturbation loss: 0.2263


  4%|▎         | 36/1000 [16:44<7:28:25, 27.91s/it]

Validation acc: 0.8533

Start of epoch 36
Training loss: 0.8091
Training metric: 0.5892
perturbation loss: 0.3390


  4%|▎         | 37/1000 [17:12<7:27:59, 27.91s/it]

Validation acc: 0.7133

Start of epoch 37
Training loss: 0.0952
Training metric: 0.5978
perturbation loss: 0.2642


  4%|▍         | 38/1000 [17:40<7:27:27, 27.91s/it]

Validation acc: 0.4267

Start of epoch 38
Training loss: 0.7293
Training metric: 0.6056
perturbation loss: 0.4351


  4%|▍         | 39/1000 [18:08<7:27:03, 27.91s/it]

Validation acc: 0.7200

Start of epoch 39
Training loss: 0.0638
Training metric: 0.6137
perturbation loss: 0.1877


  4%|▍         | 40/1000 [18:36<7:26:35, 27.91s/it]

Validation acc: 0.6400

Start of epoch 40
Training loss: 0.0800
Training metric: 0.6211
perturbation loss: 0.1399


  4%|▍         | 41/1000 [19:04<7:26:09, 27.91s/it]

Validation acc: 0.2000

Start of epoch 41
Training loss: 0.0319
Training metric: 0.6280
perturbation loss: 0.0473


  4%|▍         | 42/1000 [19:32<7:25:48, 27.92s/it]

Validation acc: 0.9000

Start of epoch 42
Training loss: 0.1561
Training metric: 0.6347
perturbation loss: 0.2246


  4%|▍         | 43/1000 [20:00<7:25:21, 27.92s/it]

Validation acc: 0.3667

Start of epoch 43
Training loss: 0.9386
Training metric: 0.6412
perturbation loss: 0.3906


  4%|▍         | 44/1000 [20:28<7:25:31, 27.96s/it]

Validation acc: 0.9000

Start of epoch 44
Training loss: 0.0094
Training metric: 0.6475
perturbation loss: 0.0309


  4%|▍         | 45/1000 [20:56<7:24:44, 27.94s/it]

Validation acc: 0.8533

Start of epoch 45
Training loss: 0.2244
Training metric: 0.6534
perturbation loss: 0.2295


  5%|▍         | 46/1000 [21:24<7:24:11, 27.94s/it]

Validation acc: 0.8067

Start of epoch 46
Training loss: 1.3087
Training metric: 0.6591
perturbation loss: 0.3670


  5%|▍         | 47/1000 [21:52<7:23:44, 27.94s/it]

Validation acc: 0.5600

Start of epoch 47
Training loss: 0.0094
Training metric: 0.6646
perturbation loss: 0.0198


  5%|▍         | 48/1000 [22:20<7:23:46, 27.97s/it]

Validation acc: 0.4000

Start of epoch 48
Training loss: 0.1788
Training metric: 0.6700
perturbation loss: 0.2818


  5%|▍         | 49/1000 [22:48<7:23:15, 27.97s/it]

Validation acc: 0.4067

Start of epoch 49
Training loss: 0.1053
Training metric: 0.6753
perturbation loss: 0.1911


  5%|▌         | 50/1000 [23:15<7:22:34, 27.95s/it]

Validation acc: 0.8333

Start of epoch 50
Training loss: 0.0297
Training metric: 0.6802
perturbation loss: 0.0768


  5%|▌         | 51/1000 [23:43<7:21:56, 27.94s/it]

Validation acc: 0.8800

Start of epoch 51
Training loss: 0.1703
Training metric: 0.6850
perturbation loss: 0.1905


  5%|▌         | 52/1000 [24:11<7:21:25, 27.94s/it]

Validation acc: 0.7667

Start of epoch 52
Training loss: 0.1174
Training metric: 0.6898
perturbation loss: 0.2437


  5%|▌         | 53/1000 [24:39<7:20:49, 27.93s/it]

Validation acc: 0.4200

Start of epoch 53
Training loss: 0.2362
Training metric: 0.6941
perturbation loss: 0.2097


  5%|▌         | 54/1000 [25:07<7:20:17, 27.93s/it]

Validation acc: 0.8333

Start of epoch 54
Training loss: 0.0191
Training metric: 0.6986
perturbation loss: 0.0486


  6%|▌         | 55/1000 [25:35<7:19:48, 27.92s/it]

Validation acc: 0.7933

Start of epoch 55
Training loss: 0.1844
Training metric: 0.7029
perturbation loss: 0.1744


  6%|▌         | 56/1000 [26:03<7:19:15, 27.92s/it]

Validation acc: 0.9200

Start of epoch 56
Training loss: 0.0358
Training metric: 0.7071
perturbation loss: 0.0731


  6%|▌         | 57/1000 [26:31<7:18:46, 27.92s/it]

Validation acc: 0.9067

Start of epoch 57
Training loss: 0.0079
Training metric: 0.7112
perturbation loss: 0.0270


  6%|▌         | 58/1000 [26:59<7:18:24, 27.92s/it]

Validation acc: 0.8733

Start of epoch 58
Training loss: 0.0397
Training metric: 0.7151
perturbation loss: 0.0821


  6%|▌         | 59/1000 [27:27<7:17:52, 27.92s/it]

Validation acc: 0.6200

Start of epoch 59
Training loss: 0.1311
Training metric: 0.7188
perturbation loss: 0.1781


  6%|▌         | 60/1000 [27:55<7:17:16, 27.91s/it]

Validation acc: 0.3800

Start of epoch 60
Training loss: 1.7945
Training metric: 0.7223
perturbation loss: 0.0134


  6%|▌         | 61/1000 [28:23<7:16:45, 27.91s/it]

Validation acc: 0.7067

Start of epoch 61
Training loss: 0.0503
Training metric: 0.7259
perturbation loss: 0.1120


  6%|▌         | 62/1000 [28:50<7:16:20, 27.91s/it]

Validation acc: 0.8467

Start of epoch 62
Training loss: 0.2296
Training metric: 0.7294
perturbation loss: 0.3184


  6%|▋         | 63/1000 [29:18<7:15:49, 27.91s/it]

Validation acc: 0.8933

Start of epoch 63
Training loss: 0.0027
Training metric: 0.7326
perturbation loss: 0.0106


  6%|▋         | 64/1000 [29:46<7:15:24, 27.91s/it]

Validation acc: 0.8133

Start of epoch 64
Training loss: 0.9364
Training metric: 0.7358
perturbation loss: 0.2067


  6%|▋         | 65/1000 [30:14<7:15:12, 27.93s/it]

Validation acc: 0.8933

Start of epoch 65
Training loss: 0.0078
Training metric: 0.7390
perturbation loss: 0.0248


  7%|▋         | 66/1000 [30:42<7:14:54, 27.94s/it]

Validation acc: 0.7267

Start of epoch 66
Training loss: 0.2664
Training metric: 0.7420
perturbation loss: 0.2549


  7%|▋         | 67/1000 [31:10<7:14:18, 27.93s/it]

Validation acc: 0.7400

Start of epoch 67
Training loss: 0.0269
Training metric: 0.7449
perturbation loss: 0.0814


  7%|▋         | 68/1000 [31:38<7:14:03, 27.94s/it]

Validation acc: 0.8733

Start of epoch 68
Training loss: 0.2490
Training metric: 0.7477
perturbation loss: 0.3634


  7%|▋         | 69/1000 [32:06<7:13:26, 27.93s/it]

Validation acc: 0.7467

Start of epoch 69
Training loss: 0.0090
Training metric: 0.7505
perturbation loss: 0.0338


  7%|▋         | 70/1000 [32:34<7:12:55, 27.93s/it]

Validation acc: 0.4200

Start of epoch 70
Training loss: 0.0045
Training metric: 0.7531
perturbation loss: 0.0137


  7%|▋         | 71/1000 [33:02<7:12:11, 27.91s/it]

Validation acc: 0.8867

Start of epoch 71
Training loss: 0.3510
Training metric: 0.7558
perturbation loss: 0.4234


  7%|▋         | 72/1000 [33:30<7:11:39, 27.91s/it]

Validation acc: 0.6600

Start of epoch 72
Training loss: 0.0108
Training metric: 0.7584
perturbation loss: 0.0400


  7%|▋         | 73/1000 [33:58<7:11:14, 27.91s/it]

Validation acc: 0.7867

Start of epoch 73
Training loss: 0.0447
Training metric: 0.7609
perturbation loss: 0.0789


  7%|▋         | 74/1000 [34:26<7:10:49, 27.92s/it]

Validation acc: 0.8400

Start of epoch 74
Training loss: 0.0208
Training metric: 0.7633
perturbation loss: 0.0580


  8%|▊         | 75/1000 [34:53<7:10:16, 27.91s/it]

Validation acc: 0.8800

Start of epoch 75
Training loss: 0.0027
Training metric: 0.7658
perturbation loss: 0.0133


  8%|▊         | 76/1000 [35:21<7:10:01, 27.92s/it]

Validation acc: 0.9000

Start of epoch 76
Training loss: 0.4518
Training metric: 0.7682
perturbation loss: 0.3238


  8%|▊         | 77/1000 [35:49<7:09:39, 27.93s/it]

Validation acc: 0.7800

Start of epoch 77
Training loss: 0.7504
Training metric: 0.7704
perturbation loss: 0.1673


  8%|▊         | 78/1000 [36:17<7:09:11, 27.93s/it]

Validation acc: 0.7133

Start of epoch 78
Training loss: 0.1697
Training metric: 0.7726
perturbation loss: 0.2254


  8%|▊         | 79/1000 [36:45<7:08:46, 27.93s/it]

Validation acc: 0.8800

Start of epoch 79
Training loss: 0.0381
Training metric: 0.7749
perturbation loss: 0.0870


  8%|▊         | 80/1000 [37:13<7:08:35, 27.95s/it]

Validation acc: 0.9067

Start of epoch 80
Training loss: 0.0399
Training metric: 0.7770
perturbation loss: 0.1324


  8%|▊         | 81/1000 [37:41<7:07:55, 27.94s/it]

Validation acc: 0.8867

Start of epoch 81
Training loss: 0.0152
Training metric: 0.7790
perturbation loss: 0.0430


  8%|▊         | 82/1000 [38:09<7:07:25, 27.94s/it]

Validation acc: 0.8400

Start of epoch 82
Training loss: 0.0442
Training metric: 0.7811
perturbation loss: 0.0774


  8%|▊         | 83/1000 [38:37<7:07:17, 27.96s/it]

Validation acc: 0.8400

Start of epoch 83
Training loss: 0.0295
Training metric: 0.7831
perturbation loss: 0.0579


  8%|▊         | 84/1000 [39:05<7:07:00, 27.97s/it]

Validation acc: 0.8867

Start of epoch 84
Training loss: 0.2159
Training metric: 0.7852
perturbation loss: 0.1333


  8%|▊         | 85/1000 [39:33<7:06:31, 27.97s/it]

Validation acc: 0.8533

Start of epoch 85
Training loss: 0.1222
Training metric: 0.7872
perturbation loss: 0.1662


  9%|▊         | 86/1000 [40:01<7:05:56, 27.96s/it]

Validation acc: 0.5000

Start of epoch 86
Training loss: 0.0923
Training metric: 0.7890
perturbation loss: 0.1962


  9%|▊         | 87/1000 [40:29<7:05:24, 27.96s/it]

Validation acc: 0.7267

Start of epoch 87
Training loss: 0.0250
Training metric: 0.7908
perturbation loss: 0.0604


  9%|▉         | 88/1000 [40:57<7:04:48, 27.95s/it]

Validation acc: 0.9267

Start of epoch 88
Training loss: 0.5007
Training metric: 0.7925
perturbation loss: 0.2052


  9%|▉         | 89/1000 [41:25<7:04:20, 27.95s/it]

Validation acc: 0.6333

Start of epoch 89
Training loss: 0.0689
Training metric: 0.7942
perturbation loss: 0.1436


  9%|▉         | 90/1000 [41:53<7:03:44, 27.94s/it]

Validation acc: 0.7333

Start of epoch 90
Training loss: 0.0168
Training metric: 0.7959
perturbation loss: 0.0309


  9%|▉         | 91/1000 [42:21<7:03:24, 27.95s/it]

Validation acc: 0.8800

Start of epoch 91
Training loss: 1.3865
Training metric: 0.7976
perturbation loss: 0.0301


  9%|▉         | 92/1000 [42:49<7:02:39, 27.93s/it]

Validation acc: 0.9067

Start of epoch 92
Training loss: 0.2873
Training metric: 0.7993
perturbation loss: 0.2857


  9%|▉         | 93/1000 [43:16<7:02:13, 27.93s/it]

Validation acc: 0.8600

Start of epoch 93
Training loss: 0.0712
Training metric: 0.8009
perturbation loss: 0.1493


  9%|▉         | 94/1000 [43:44<7:01:50, 27.94s/it]

Validation acc: 0.7533

Start of epoch 94
Training loss: 0.0019
Training metric: 0.8025
perturbation loss: 0.0067


 10%|▉         | 95/1000 [44:12<7:01:08, 27.92s/it]

Validation acc: 0.5200

Start of epoch 95
Training loss: 0.3706
Training metric: 0.8040
perturbation loss: 0.2536


 10%|▉         | 96/1000 [44:40<7:00:31, 27.91s/it]

Validation acc: 0.8133

Start of epoch 96
Training loss: 0.4913
Training metric: 0.8056
perturbation loss: 0.1977


 10%|▉         | 97/1000 [45:08<6:59:58, 27.91s/it]

Validation acc: 0.9133

Start of epoch 97
Training loss: 0.0206
Training metric: 0.8071
perturbation loss: 0.0525


 10%|▉         | 98/1000 [45:36<6:59:32, 27.91s/it]

Validation acc: 0.7667

Start of epoch 98
Training loss: 0.0676
Training metric: 0.8086
perturbation loss: 0.1742


 10%|▉         | 99/1000 [46:04<6:59:04, 27.91s/it]

Validation acc: 0.9000

Start of epoch 99
Training loss: 0.4230
Training metric: 0.8101
perturbation loss: 0.3894


 10%|█         | 100/1000 [46:32<6:58:30, 27.90s/it]

Validation acc: 0.9200

Start of epoch 100
Training loss: 0.6194
Training metric: 0.8115
perturbation loss: 0.2113


 10%|█         | 101/1000 [47:00<6:58:14, 27.91s/it]

Validation acc: 0.8467

Start of epoch 101
Training loss: 0.0343
Training metric: 0.8128
perturbation loss: 0.0571


 10%|█         | 102/1000 [47:28<6:57:39, 27.91s/it]

Validation acc: 0.8067

Start of epoch 102
Training loss: 0.1110
Training metric: 0.8142
perturbation loss: 0.1195


 10%|█         | 103/1000 [47:56<6:57:00, 27.89s/it]

Validation acc: 0.9333

Start of epoch 103
Training loss: 0.0905
Training metric: 0.8156
perturbation loss: 0.2572


 10%|█         | 104/1000 [48:23<6:56:34, 27.90s/it]

Validation acc: 0.8933

Start of epoch 104
Training loss: 0.0425
Training metric: 0.8170
perturbation loss: 0.0899


 10%|█         | 105/1000 [48:51<6:56:03, 27.89s/it]

Validation acc: 0.9133

Start of epoch 105
Training loss: 0.5547
Training metric: 0.8183
perturbation loss: 0.2632


 11%|█         | 106/1000 [49:19<6:55:22, 27.88s/it]

Validation acc: 0.8800

Start of epoch 106
Training loss: 0.1085
Training metric: 0.8195
perturbation loss: 0.1859


 11%|█         | 107/1000 [49:47<6:55:20, 27.91s/it]

Validation acc: 0.8400

Start of epoch 107
Training loss: 0.0229
Training metric: 0.8207
perturbation loss: 0.0951


 11%|█         | 108/1000 [50:15<6:54:56, 27.91s/it]

Validation acc: 0.8667

Start of epoch 108
Training loss: 0.0200
Training metric: 0.8219
perturbation loss: 0.0675


 11%|█         | 109/1000 [50:43<6:54:22, 27.90s/it]

Validation acc: 0.7933

Start of epoch 109
Training loss: 0.0517
Training metric: 0.8231
perturbation loss: 0.1432


 11%|█         | 110/1000 [51:11<6:53:40, 27.89s/it]

Validation acc: 0.9267

Start of epoch 110
Training loss: 0.0033
Training metric: 0.8243
perturbation loss: 0.0098


 11%|█         | 111/1000 [51:39<6:53:18, 27.89s/it]

Validation acc: 0.6067

Start of epoch 111
Training loss: 0.0604
Training metric: 0.8255
perturbation loss: 0.1695


 11%|█         | 112/1000 [52:07<6:53:02, 27.91s/it]

Validation acc: 0.8933

Start of epoch 112
Training loss: 0.0059
Training metric: 0.8266
perturbation loss: 0.0221


 11%|█▏        | 113/1000 [52:35<6:52:33, 27.91s/it]

Validation acc: 0.8867

Start of epoch 113
Training loss: 0.0233
Training metric: 0.8278
perturbation loss: 0.0416


 11%|█▏        | 114/1000 [53:02<6:51:58, 27.90s/it]

Validation acc: 0.9067

Start of epoch 114
Training loss: 0.0270
Training metric: 0.8289
perturbation loss: 0.0550


 12%|█▏        | 115/1000 [53:30<6:51:34, 27.90s/it]

Validation acc: 0.4400

Start of epoch 115
Training loss: 0.0001
Training metric: 0.8299
perturbation loss: 0.0006


 12%|█▏        | 116/1000 [53:58<6:51:02, 27.90s/it]

Validation acc: 0.8800

Start of epoch 116
Training loss: 0.3066
Training metric: 0.8310
perturbation loss: 0.1399


 12%|█▏        | 117/1000 [54:26<6:50:44, 27.91s/it]

Validation acc: 0.6667

Start of epoch 117
Training loss: 0.0732
Training metric: 0.8321
perturbation loss: 0.1227


 12%|█▏        | 118/1000 [54:54<6:50:14, 27.91s/it]

Validation acc: 0.7867

Start of epoch 118
Training loss: 0.0319
Training metric: 0.8331
perturbation loss: 0.0880


 12%|█▏        | 119/1000 [55:22<6:49:42, 27.90s/it]

Validation acc: 0.8933

Start of epoch 119
Training loss: 0.0094
Training metric: 0.8341
perturbation loss: 0.0175


 12%|█▏        | 120/1000 [55:50<6:49:19, 27.91s/it]

Validation acc: 0.9000

Start of epoch 120
Training loss: 0.3086
Training metric: 0.8351
perturbation loss: 0.3163


 12%|█▏        | 121/1000 [56:18<6:48:55, 27.91s/it]

Validation acc: 0.6533

Start of epoch 121
Training loss: 0.0790
Training metric: 0.8361
perturbation loss: 0.2074


 12%|█▏        | 122/1000 [56:46<6:48:24, 27.91s/it]

Validation acc: 0.6400

Start of epoch 122
Training loss: 0.0611
Training metric: 0.8371
perturbation loss: 0.1013


 12%|█▏        | 123/1000 [57:14<6:47:53, 27.91s/it]

Validation acc: 0.8733

Start of epoch 123
Training loss: 0.2566
Training metric: 0.8381
perturbation loss: 0.2695


 12%|█▏        | 124/1000 [57:41<6:47:24, 27.90s/it]

Validation acc: 0.9067

Start of epoch 124
Training loss: 0.0873
Training metric: 0.8390
perturbation loss: 0.1769


 12%|█▎        | 125/1000 [58:09<6:46:57, 27.91s/it]

Validation acc: 0.9267

Start of epoch 125
Training loss: 0.0030
Training metric: 0.8399
perturbation loss: 0.0087


 13%|█▎        | 126/1000 [58:38<6:48:08, 28.02s/it]

Validation acc: 0.8600

Start of epoch 126
Training loss: 0.1614
Training metric: 0.8409
perturbation loss: 0.2517


 13%|█▎        | 127/1000 [59:06<6:47:11, 27.99s/it]

Validation acc: 0.8933

Start of epoch 127
Training loss: 0.0137
Training metric: 0.8418
perturbation loss: 0.0483


 13%|█▎        | 128/1000 [59:34<6:46:59, 28.00s/it]

Validation acc: 0.4467

Start of epoch 128
Training loss: 0.1504
Training metric: 0.8427
perturbation loss: 0.1526


 13%|█▎        | 129/1000 [1:00:02<6:46:06, 27.97s/it]

Validation acc: 0.8933

Start of epoch 129
Training loss: 0.0167
Training metric: 0.8436
perturbation loss: 0.0360


 13%|█▎        | 130/1000 [1:00:30<6:46:28, 28.03s/it]

Validation acc: 0.9133

Start of epoch 130
Training loss: 0.0250
Training metric: 0.8445
perturbation loss: 0.0683


 13%|█▎        | 131/1000 [1:00:58<6:45:23, 27.99s/it]

Validation acc: 0.8733

Start of epoch 131
Training loss: 0.3940
Training metric: 0.8453
perturbation loss: 0.0959


 13%|█▎        | 132/1000 [1:01:26<6:44:56, 27.99s/it]

Validation acc: 0.8667

Start of epoch 132
Training loss: 0.0957
Training metric: 0.8462
perturbation loss: 0.0976


 13%|█▎        | 133/1000 [1:01:54<6:44:14, 27.97s/it]

Validation acc: 0.4867

Start of epoch 133


 14%|█▎        | 137/1000 [1:03:45<6:42:05, 27.96s/it]

Validation acc: 0.4000

Start of epoch 137
Training loss: 0.0024
Training metric: 0.8505
perturbation loss: 0.0085


 14%|█▍        | 138/1000 [1:04:13<6:41:33, 27.95s/it]

Validation acc: 0.8667

Start of epoch 138
Training loss: 0.1167
Training metric: 0.8513
perturbation loss: 0.0995


 14%|█▍        | 139/1000 [1:04:41<6:42:12, 28.03s/it]

Validation acc: 0.9267

Start of epoch 139
Training loss: 0.0097
Training metric: 0.8520
perturbation loss: 0.0279


 14%|█▍        | 140/1000 [1:05:09<6:41:14, 27.99s/it]

Validation acc: 0.9267

Start of epoch 140
Training loss: 0.3457
Training metric: 0.8528
perturbation loss: 0.2112


 14%|█▍        | 141/1000 [1:05:37<6:40:50, 28.00s/it]

Validation acc: 0.9333

Start of epoch 141
Training loss: 0.0024
Training metric: 0.8536
perturbation loss: 0.0118


 14%|█▍        | 142/1000 [1:06:05<6:40:03, 27.98s/it]

Validation acc: 0.8067

Start of epoch 142
Training loss: 0.0037
Training metric: 0.8544
perturbation loss: 0.0138


 14%|█▍        | 143/1000 [1:06:33<6:39:03, 27.94s/it]

Validation acc: 0.9333

Start of epoch 143
