In [1]:
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf

# Load data

In [2]:
def color_preprocessing(x_train, x_test):
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    mean = [125.307, 122.95, 113.865]
    std = [62.9932, 62.0887, 66.7048]
    for i in range(3):
        x_train[:, :, :, i] = (x_train[:, :, :, i] - mean[i]) / std[i]
        x_test[:, :, :, i] = (x_test[:, :, :, i] - mean[i]) / std[i]
    return x_train, x_test


# load data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)
x_train, x_test = color_preprocessing(x_train, x_test)

# mobilnet-v2.py

## 定义Block

In [2]:
class Block(layers.Layer):
    def __init__(self,
                 in_planes,
                 out_planes,
                 expansion,
                 stride,
                 weight_decay=1e-4):
        super(Block, self).__init__()
        self.stride = stride
        self.in_planes = in_planes
        self.out_planes = out_planes
        planes = in_planes * expansion
        self.conv1 = layers.Conv2D(
            filters=planes,
            kernel_size=1,
            strides=1,
            padding='valid',
            use_bias=False,
            kernel_initializer='he_normal',
            kernel_regularizer=keras.regularizers.l2(weight_decay))
        self.bn1 = layers.BatchNormalization()
        self.conv2 = layers.DepthwiseConv2D(
            kernel_size=3,
            strides=stride,
            padding='same',
            use_bias=False,
            depthwise_initializer='he_normal',
            depthwise_regularizer=keras.regularizers.l2(weight_decay))
        self.bn2 = layers.BatchNormalization()
        self.conv3 = layers.Conv2D(
            filters=out_planes,
            kernel_size=1,
            strides=1,
            padding='valid',
            use_bias=False,
            kernel_initializer='he_normal',
            kernel_regularizer=keras.regularizers.l2(weight_decay))
        self.bn3 = layers.BatchNormalization()

        if stride == 1 and in_planes != out_planes:
            self.shortcut = keras.Sequential()
            self.shortcut.add(
                layers.Conv2D(
                    filters=out_planes,
                    kernel_size=1,
                    strides=1,
                    padding='valid',
                    use_bias=False,
                    kernel_initializer='he_normal',
                    kernel_regularizer=keras.regularizers.l2(weight_decay)))
            self.shortcut.add(layers.BatchNormalization())

    def call(self, inputs):
        outputs = self.conv1(inputs)
        outputs = self.bn1(outputs)
        outputs = tf.nn.relu(outputs)
        outputs = self.conv2(outputs)
        outputs = self.bn2(outputs)
        outputs = tf.nn.relu(outputs)
        outputs = self.conv3(outputs)
        outputs = self.bn3(outputs)
        if self.stride == 1:
            if self.in_planes == self.out_planes:
                outputs += inputs
            else:
                outputs += self.shortcut(inputs)
        return outputs

## 定义网络架构

In [3]:
class MobileNet(keras.Model):
    # (expansion, out_planes, num_blocks, stride)
    cfg = [
        (1, 16, 1, 1),
        (6, 24, 2, 1),  # NOTE: change stride 2 -> 1 for CIFAR10
        (6, 32, 3, 2),
        (6, 64, 4, 2),
        (6, 96, 3, 1),
        (6, 160, 3, 2),
        (6, 320, 1, 1)
    ]

    def __init__(self, num_classes=10, weight_decay=1e-4):
        super(MobileNet, self).__init__()
        # NOTE: change conv1 stride 2 -> 1 for CIFAR10
        self.conv1 = layers.Conv2D(
            filters=32,
            kernel_size=3,
            strides=1,
            padding='same',
            use_bias=False,
            kernel_initializer='he_normal',
            kernel_regularizer=keras.regularizers.l2(weight_decay))
        self.bn1 = layers.BatchNormalization()
        self.sequential = self._make_layers(in_planes=32)
        self.conv2 = layers.Conv2D(
            filters=1280,
            kernel_size=3,
            strides=1,
            padding='same',
            use_bias=False,
            kernel_initializer='he_normal',
            kernel_regularizer=keras.regularizers.l2(weight_decay))
        self.bn2 = layers.BatchNormalization()
        self.pooling = layers.GlobalAveragePooling2D()
        self.flatten = layers.Flatten()
        self.dense = layers.Dense(
            units=num_classes,
            activation='softmax',
            kernel_initializer='he_normal',
            kernel_regularizer=keras.regularizers.l2(weight_decay))
    
    def _make_layers(self, in_planes):
        sequential = keras.Sequential()
        for expansion, out_planes, num_blocks, stride in self.cfg:
            strides = [stride] + [1] * (num_blocks - 1) # 下采样只在每个bottlenect的第一次
            for stride in strides:
                sequential.add(Block(in_planes, out_planes, expansion, stride))
                in_planes = out_planes
        return sequential
    
    def call(self, inputs):
        outputs = self.conv1(inputs)
        outputs = self.bn1(outputs)
        outputs = tf.nn.relu(outputs)
        outputs = self.sequential(outputs)
        outputs = self.conv2(outputs)
        outputs = self.bn2(outputs)
        outputs = tf.nn.relu(outputs)
        outputs = self.pooling(outputs)
        outputs = self.flatten(outputs)
        outputs = self.dense(outputs)
        return outputs

## 构建Mobilenet 网络

In [4]:
# def MobileNetV2(input_shape, num_classes=10, weight_decay=1e-4):
#     model = MobileNet(num_classes=num_classes, weight_decay=weight_decay)
#     model.build(input_shape=input_shape)
#     return model

def MobileNetV2(input_shape, num_classes=10, weight_decay=1e-4):
    inputs = keras.Input(shape=input_shape)
    outputs = MobileNet(num_classes=num_classes, weight_decay=weight_decay)(inputs)
    return keras.Model(inputs=inputs, outputs=outputs)

In [5]:
# model = MobileNetV2(input_shape=(None, 224, 224, 3))
# model = MobileNetV2(input_shape=(224, 224, 3))
# model.summary()

# 数据处理 parse.py

In [6]:
import pickle
import numpy as np
import os
import cv2

In [7]:
def unpickle(file):
    with open(file, 'rb') as f:
        dict = pickle.load(f, encoding='bytes')
        x = dict[b'data']
        y = dict[b'labels']
        x = np.reshape(x, (10000, 3, 32, 32))
        x = np.transpose(x, (0, 2, 3, 1))
        y = np.array(y)
    return x, y

In [8]:
def save(dir, x, y):
    assert x.shape[0] == y.shape[0], 'x num is not equal with y'
    for i, label in enumerate(y):
        sub_dir = os.path.join(dir, str(label))
        if os.path.exists(sub_dir):
            pass
        else:
            os.makedirs(sub_dir)
        path = os.path.join(sub_dir, str(i) + '.png')
        image = cv2.cvtColor(x[i], cv2.COLOR_RGB2BGR)
        cv2.imwrite(path, image)
        if (i + 1) % 100 == 0:
            print(i)

In [9]:
def parse(input_dir, output_dir, file_names):
    for name in file_names:
        file = os.path.join(input_dir, name)
        x, y = unpickle(file)
        save(output_dir, x, y)

## main函数

In [11]:
train = [
    'data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4',
    'data_batch_5'
]
test = ['test_batch']
input_dir = '/home/kang/CV/DATASETS/cifar10/original'
train_dir = '/home/kang/CV/DATASETS/cifar10/train'
test_dir = '/home/kang/CV/DATASETS/cifar10/test'

parse(input_dir, train_dir, train)

99
199
299
399
499
599
699
799
899
999
1099
1199
1299
1399
1499
1599
1699
1799
1899
1999
2099
2199
2299
2399
2499
2599
2699
2799
2899
2999
3099
3199
3299
3399
3499
3599
3699
3799
3899
3999
4099
4199
4299
4399
4499
4599
4699
4799
4899
4999
5099
5199
5299
5399
5499
5599
5699
5799
5899
5999
6099
6199
6299
6399
6499
6599
6699
6799
6899
6999
7099
7199
7299
7399
7499
7599
7699
7799
7899
7999
8099
8199
8299
8399
8499
8599
8699
8799
8899
8999
9099
9199
9299
9399
9499
9599
9699
9799
9899
9999
99
199
299
399
499
599
699
799
899
999
1099
1199
1299
1399
1499
1599
1699
1799
1899
1999
2099
2199
2299
2399
2499
2599
2699
2799
2899
2999
3099
3199
3299
3399
3499
3599
3699
3799
3899
3999
4099
4199
4299
4399
4499
4599
4699
4799
4899
4999
5099
5199
5299
5399
5499
5599
5699
5799
5899
5999
6099
6199
6299
6399
6499
6599
6699
6799
6899
6999
7099
7199
7299
7399
7499
7599
7699
7799
7899
7999
8099
8199
8299
8399
8499
8599
8699
8799
8899
8999
9099
9199
9299
9399
9499
9599
9699
9799
9899
9999
99
199
299
399
499
599

In [12]:
parse(input_dir, test_dir, test)

99
199
299
399
499
599
699
799
899
999
1099
1199
1299
1399
1499
1599
1699
1799
1899
1999
2099
2199
2299
2399
2499
2599
2699
2799
2899
2999
3099
3199
3299
3399
3499
3599
3699
3799
3899
3999
4099
4199
4299
4399
4499
4599
4699
4799
4899
4999
5099
5199
5299
5399
5499
5599
5699
5799
5899
5999
6099
6199
6299
6399
6499
6599
6699
6799
6899
6999
7099
7199
7299
7399
7499
7599
7699
7799
7899
7999
8099
8199
8299
8399
8499
8599
8699
8799
8899
8999
9099
9199
9299
9399
9499
9599
9699
9799
9899
9999


# 数据生成 generator.py

In [10]:
from functools import partial
import numpy as np
import random
import cv2
import traceback
import os

## 定义生成类

In [11]:
class Generator(object):
    def __init__(self, num_classes, is_shuffle, is_horizontal_flip,
                 is_random_crop, is_random_cutout):
        self.num_classes = num_classes
        self.is_shuffle = is_shuffle
        self.is_horizontal_flip = is_horizontal_flip
        self.is_random_crop = is_random_crop
        self.is_random_cutout = is_random_cutout

    def random_horizontal_flip(self, image):
        if random.random() <= 0.5:
            return cv2.flip(image, 1)
        else:
            return image

    def random_crop(self, image, padding=4):
        if random.random() <= 0.5:
            image = cv2.copyMakeBorder(image,
                                       padding,
                                       padding,
                                       padding,
                                       padding,
                                       cv2.BORDER_CONSTANT,
                                       value=[0, 0, 0])
            x = np.random.randint(0, padding * 2)
            y = np.random.randint(0, padding * 2)
            image = image[y:y + 32, x:x + 32]
            return image
        else:
            return image
    
    def random_cutout(self, image, offset=8):
        if random.random() <= 0.5:
            h, w, _ = image.shape
            x = np.random.randint(0, w - offset)
            y = np.random.randint(0, h - offset)
            image[y:y + offset, x: x + offset, :] = 0
            return image
        else:
            return image
    
    def preprocess(self, x, y):
        x = x.astype('float32') / 255
        x -= (0.4914, 0.4822, 0.4465)
        x /= (0.2023, 0.1994, 0.2010)
        return x, y
    
    def load(self, path):
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return image
    
    def sample(self, image_path, label):
        image = self.load(image_path)
        image = np.array(image, dtype=np.float32)
        one_hot = np.zeros(self.num_classes, dtype=np.int32)
        one_hot[label] = 1
        label = one_hot
        image, label = self.preprocess(image, one_hot)
        
        if self.is_horizontal_flip:
            image = self.random_horizontal_flip(image)
        if self.is_random_crop:
            image = self.random_crop(image)
        if self.is_random_cutout:
            image = self.random_cutout(image)
        
        return image, label
    
    def generate(self, dataset):
        if self.is_shuffle:
            random.shuffle(dataset)
        for image_path, label in dataset:
            try:
                data = self.sample(image_path, label)
                yield data
            except Exception as e:
                traceback.print_tb(e.__traceback__)

## 读取数据集

In [12]:
def get_data(dir):
    dataset = list()
    for lable in os.listdir(dir):
        sub_dir = os.path.join(dir, lable)
        for name in os.listdir(sub_dir):
            path = os.path.join(sub_dir, name)
            dataset.append((path, int(lable)))
    return dataset

## 生成数据

In [13]:
def generator(dir,
              num_classes,
              is_shuffle=False,
              is_horizontal_flip=False,
              is_random_crop=False,
              is_random_cutout=False):
    dataset = get_data(dir)
    gt = Generator(num_classes=num_classes,
                   is_shuffle=is_shuffle,
                   is_horizontal_flip=is_horizontal_flip,
                   is_random_crop=is_random_crop,
                   is_random_cutout=is_random_cutout)
    return partial(gt.generate, dataset=dataset)

# 定义学习率类 cb_learning_rate.py

In [14]:
from tensorflow import keras
import tensorflow as tf

In [15]:
RANGE = 'range'
EXPONENT = 'exponent'


class LearningRate(object):
    def __init__(self,
                 optimizer=None,
                 method=None,
                 lr_range=None,
                 initial_lr=None):
        self.optimizer = optimizer
        self.method = method
        self.lr_range = lr_range
        self.initial_lr = initial_lr

    def __call__(self, epoch, logs=None):
        if self.optimizer is None:
            raise ValueError('optimizer is none.')
        if not hasattr(self.optimizer, 'learning_rate'):
            raise ValueError(
                'Optimizer must have a "learning_rate" attribute.')
        # 从模型的优化器中获取当前的学习率。
        lr = float(keras.backend.get_value(self.optimizer.learning_rate))

        # 调用调度函数来获取预定的学习率。
        if self.method == 'range':
            scheduled_lr = self.adjust_range(epoch, lr)
        elif self.method == 'exponent':
            scheduled_lr = self.adjust_exponent(epoch)
        else:
            scheduled_lr = lr
        # 在这个周期开始之前，将值设置为优化器的值。
        keras.backend.set_value(self.optimizer.learning_rate, scheduled_lr)
    
    def adjust_range(self, epoch, lr):
        if self.lr_range is None:
            raise ValueError('lr_ranges is none.')
        if epoch < self.lr_range[0][0] or epoch > self.lr_range[-1][0]:
            return lr
        for i in range(len(self.lr_range)-1, -1, -1):
            if epoch >= self.lr_range[i][0]:
                return self.lr_range[i][1]
        return lr
    
    def adjust_exponent(self, epoch, lr):
        if self.initial_lr is None:
            raise ValueError('initial_lr is none.')
        if epoch < 10:
            return self.initial_lr
        else:
            return self.initial_lr * tf.math.exp(0.01 * (10 - epoch))

# 超参数设置 config.py

In [16]:
width = 32
height = 32
num_channels = 3

num_classes = 10
batch_size = 64
epochs = 400
show_every_steps = 100
start_epoch = 0

num_train_samples = 50000
lr = 1e-1
lr_range = [(150, 1e-2), (250, 1e-3), (350, 1e-4)]

weight_decay = 5e-4
label_smoothing = 0.2

model_name = 'mobilenet_v2'
resnet_version = 2
resnet_depth = 164

model_dir = './models/' + model_name + '/'
summary_dir = './summaries/' + model_name + '/'

train_dir = '/home/kang/CV/DATASETS/cifar10/train/'
test_dir = '/home/kang/CV/DATASETS/cifar10/test'

# 训练 train.py

In [17]:
from tensorflow import keras
from tqdm import tqdm
import tensorflow as tf
import datetime
import numpy as np
import os

## 定义训练类

In [18]:
class Trainer(object):
    def __init__(self,
                 model_dir,
                 summary_dir,
                 model,
                 optimizer,
                 lr_range=None,
                 label_smoothing=0.2):
        self.model_dir = model_dir
        self.summary_dir = summary_dir
        self.model = model
        self.optimizer = optimizer
        self.lr_range = lr_range
        self.label_smoothing = label_smoothing
        self.cb_lr = self.load_cb_lr()
        self.summary_writer = self.get_summary_writer()
        self.create_model_dir()

    def load_cb_lr(self):
        return LearningRate(optimizer=self.optimizer,
                            method=RANGE,
                            lr_range=lr_range)

    def create_model_dir(self):
        if self.model_dir is None:
            return None
        self.model_dir = os.path.join(
            self.model_dir,
            datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"))
        os.makedirs(self.model_dir)

    def get_summary_writer(self):
        if self.summary_dir is None:
            return None
        else:
            log_dir = os.path.join(
                self.summary_dir,
                datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"))
            return tf.summary.create_file_writer(logdir=log_dir)

    def update_summary(self, **kwargs):
        if self.summary_writer is None:
            pass
        else:
            with self.summary_writer.as_default():
                for name in kwargs:
                    tf.summary.scalar(name,
                                      kwargs[name],
                                      step=self.optimizer.iterations)

    def save_weights(self, filepath, save_format=None):
        self.model.save_weights(filepath=filepath, save_format=save_format)

    def load_weights(self, filepath):
        self.model.load_weights(filepath=filepath)

    @tf.function
    def accuracy(self, y_true, y_pred):
        res = keras.metrics.categorical_accuracy(y_true, y_pred)
        acc = tf.reduce_mean(res)
        sum = tf.reduce_sum(res)
        num = res.shape[0]
        return num, sum, acc

    @tf.function
    def train_on_batch(self, x, y):
        keras.backend.set_learning_phase(1)
        with tf.GradientTape() as tape:
            y_pred = self.model(x)
            loss = tf.math.reduce_mean(
                keras.losses.categorical_crossentropy(
                    y_true=y,
                    y_pred=y_pred,
                    label_smoothing=self.label_smoothing))
            if len(self.model.losses) == 0:
                total_loss = loss
            else:
                regularization_loss = tf.math.add_n(self.model.losses)
                total_loss = loss + regularization_loss
        variables = self.model.trainable_variables
        gradients = tape.gradient(total_loss, variables)
        self.optimizer.apply_gradients(zip(gradients, variables))
        self.update_summary(loss=loss)
        return total_loss, loss, y_pred

    def train_on_epoch(self, epochs, epoch, dataset, show_every_steps):
        total_n = 0
        total_s = 0
        losses = list()
        stats = dict()
        pbar = tqdm(dataset)
        pbar.set_description('Epoch %03d / %03d' % (epoch, epochs))
        stats['lr'] = self.optimizer.learning_rate.numpy()
        for x, y in pbar:
            total_loss, loss, y_pred = self.train_on_batch(x, y)
            n, s, acc = self.accuracy(y_true=y, y_pred=y_pred)
            total_n += n.numpy()
            total_s += s.numpy()
            stats['iterations'] = self.optimizer.iterations.numpy()
            stats['loss'] = loss.numpy()
            stats['total loss'] = total_loss.numpy()
            losses.append(total_loss.numpy())
            if self.optimizer.iterations % show_every_steps == 0:
                avg_acc = total_s / (total_n + 1e-5)
                avg_loss = np.mean(losses)
                stats['avg loss'] = avg_loss
                stats['train acc'] = avg_acc
                self.update_summary(avg_acc=avg_acc, avg_loss=avg_loss)
            pbar.set_postfix(stats)
        pbar.close()

    @tf.function
    def val_on_batch(self, x):
        keras.backend.set_learning_phase(0)
        y_pred = self.model(x)
        return y_pred

    def val_on_epoch(self, epochs, epoch, dataset):
        total_n = 0
        total_s = 0
        avg_acc = 0
        pbar = tqdm(dataset)
        pbar.set_description('Epoch %03d / %03d' % (epoch, epochs))
        for x, y in pbar:
            y_pred = self.val_on_batch(x)
            n, s, acc = self.accuracy(y_true=y, y_pred=y_pred)
            total_n += n.numpy()
            total_s += s.numpy()
            avg_acc = total_s / (total_n + 1e-5)
            pbar.set_postfix({'val acc': avg_acc})
        pbar.close()
        self.update_summary(val_acc=avg_acc)

    def train(self,
              train_dataset,
              val_dataset,
              show_every_steps,
              epochs,
              start_epoch=0):
        for epoch in range(start_epoch, epochs):
            self.cb_lr(epoch=epoch)
            self.train_on_epoch(epochs, epoch, train_dataset, show_every_steps)
            self.val_on_epoch(epochs, epoch, val_dataset)
            if self.model_dir is None:
                pass
            else:
                self.save_weights(
                    os.path.join(self.model_dir, 'snapshot-%d.h5') % epoch)

## main函数

In [19]:
def main():
    train_generator = generator(dir=train_dir,
                                num_classes=num_classes,
                                is_shuffle=True,
                                is_horizontal_flip=True,
                                is_random_crop=True,
                                is_random_cutout=True)
    train_generator = tf.data.Dataset.from_generator(train_generator,
                                                     output_types=(tf.float32,
                                                                   tf.int32))
    train_generator = train_generator.batch(batch_size=batch_size)
    train_generator = train_generator.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_generator = generator(dir=test_dir, num_classes=num_classes)
    val_generator = tf.data.Dataset.from_generator(val_generator,
                                                   output_types=(tf.float32,
                                                                 tf.int32))
    val_generator = val_generator.batch(batch_size=batch_size)
    val_generator = val_generator.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    model = MobileNetV2(input_shape=(height, width, num_channels),
                        num_classes=num_classes,
                        weight_decay=weight_decay)
    trainer = Trainer(model_dir=model_dir,
                      summary_dir=summary_dir,
                      model=model,
                      optimizer=keras.optimizers.SGD(learning_rate=lr, momentum=0.9),
                      lr_range=lr_range,
                      label_smoothing=label_smoothing)
    trainer.train(train_dataset=train_generator,
                  val_dataset=val_generator,
                  show_every_steps=show_every_steps,
                  epochs=epochs,
                  start_epoch=start_epoch)

In [20]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
main()

Epoch 000 / 400: : 643it [01:42,  6.29it/s, lr=0.1, iterations=643, loss=1.86, total loss=3.83, avg loss=4.58, train acc=0.377]
Epoch 000 / 400: : 157it [00:06, 25.41it/s, val acc=0.404]
Epoch 001 / 400: : 643it [01:33,  6.86it/s, lr=0.1, iterations=1286, loss=1.54, total loss=2.92, avg loss=3.35, train acc=0.545]
Epoch 001 / 400: : 157it [00:05, 29.63it/s, val acc=0.59] 
Epoch 002 / 400: : 643it [01:34,  6.82it/s, lr=0.1, iterations=1929, loss=1.57, total loss=2.64, avg loss=2.76, train acc=0.628]
Epoch 002 / 400: : 157it [00:06, 25.91it/s, val acc=0.664]
Epoch 003 / 400: : 643it [01:34,  6.81it/s, lr=0.1, iterations=2572, loss=1.44, total loss=2.3, avg loss=2.42, train acc=0.684] 
Epoch 003 / 400: : 157it [00:04, 34.21it/s, val acc=0.684]
Epoch 004 / 400: : 643it [01:34,  6.77it/s, lr=0.1, iterations=3215, loss=1.42, total loss=2.13, avg loss=2.16, train acc=0.727]
Epoch 004 / 400: : 157it [00:04, 33.94it/s, val acc=0.729]
Epoch 005 / 400: : 643it [01:34,  6.79it/s, lr=0.1, iteration

Epoch 043 / 400: : 643it [01:30,  7.11it/s, lr=0.1, iterations=28292, loss=1.22, total loss=1.49, avg loss=1.41, train acc=0.853] 
Epoch 043 / 400: : 157it [00:04, 33.84it/s, val acc=0.807]
Epoch 044 / 400: : 643it [01:30,  7.09it/s, lr=0.1, iterations=28935, loss=1.2, total loss=1.46, avg loss=1.42, train acc=0.852] 
Epoch 044 / 400: : 157it [00:04, 33.89it/s, val acc=0.824]
Epoch 045 / 400: : 643it [01:30,  7.13it/s, lr=0.1, iterations=29578, loss=1.09, total loss=1.35, avg loss=1.41, train acc=0.853] 
Epoch 045 / 400: : 157it [00:04, 33.74it/s, val acc=0.821]
Epoch 046 / 400: : 643it [01:30,  7.12it/s, lr=0.1, iterations=30221, loss=1.21, total loss=1.47, avg loss=1.41, train acc=0.852] 
Epoch 046 / 400: : 157it [00:04, 33.59it/s, val acc=0.822]
Epoch 047 / 400: : 643it [01:30,  7.14it/s, lr=0.1, iterations=30864, loss=1.15, total loss=1.42, avg loss=1.41, train acc=0.852] 
Epoch 047 / 400: : 157it [00:04, 33.62it/s, val acc=0.809]
Epoch 048 / 400: : 643it [01:30,  7.14it/s, lr=0.1,

Epoch 086 / 400: : 643it [01:30,  7.13it/s, lr=0.1, iterations=55941, loss=1.18, total loss=1.46, avg loss=1.4, train acc=0.864]  
Epoch 086 / 400: : 157it [00:04, 33.93it/s, val acc=0.813]
Epoch 087 / 400: : 643it [01:28,  7.25it/s, lr=0.1, iterations=56584, loss=1.3, total loss=1.58, avg loss=1.41, train acc=0.863]  
Epoch 087 / 400: : 157it [00:04, 33.59it/s, val acc=0.844]
Epoch 088 / 400: : 643it [01:30,  7.13it/s, lr=0.1, iterations=57227, loss=1.22, total loss=1.5, avg loss=1.41, train acc=0.861]  
Epoch 088 / 400: : 157it [00:04, 33.82it/s, val acc=0.831]
Epoch 089 / 400: : 643it [01:30,  7.10it/s, lr=0.1, iterations=57870, loss=1.4, total loss=1.67, avg loss=1.41, train acc=0.863]  
Epoch 089 / 400: : 157it [00:04, 33.86it/s, val acc=0.783]
Epoch 090 / 400: : 643it [01:30,  7.11it/s, lr=0.1, iterations=58513, loss=1.05, total loss=1.32, avg loss=1.41, train acc=0.862]
Epoch 090 / 400: : 157it [00:04, 33.89it/s, val acc=0.81] 
Epoch 091 / 400: : 643it [01:31,  7.06it/s, lr=0.1,

Epoch 129 / 400: : 643it [01:30,  7.14it/s, lr=0.1, iterations=83590, loss=1.31, total loss=1.59, avg loss=1.4, train acc=0.868]  
Epoch 129 / 400: : 157it [00:04, 33.50it/s, val acc=0.781]
Epoch 130 / 400: : 643it [01:30,  7.14it/s, lr=0.1, iterations=84233, loss=1.26, total loss=1.54, avg loss=1.41, train acc=0.869] 
Epoch 130 / 400: : 157it [00:04, 33.53it/s, val acc=0.842]
Epoch 131 / 400: : 643it [01:30,  7.11it/s, lr=0.1, iterations=84876, loss=1.12, total loss=1.41, avg loss=1.41, train acc=0.866] 
Epoch 131 / 400: : 157it [00:04, 33.84it/s, val acc=0.84] 
Epoch 132 / 400: : 643it [01:30,  7.12it/s, lr=0.1, iterations=85519, loss=1.18, total loss=1.47, avg loss=1.41, train acc=0.866] 
Epoch 132 / 400: : 157it [00:04, 33.67it/s, val acc=0.813]
Epoch 133 / 400: : 643it [01:30,  7.09it/s, lr=0.1, iterations=86162, loss=1.23, total loss=1.51, avg loss=1.41, train acc=0.867] 
Epoch 133 / 400: : 157it [00:04, 33.93it/s, val acc=0.84] 
Epoch 134 / 400: : 643it [01:30,  7.13it/s, lr=0.1

Epoch 171 / 400: : 157it [00:04, 33.88it/s, val acc=0.922]
Epoch 172 / 400: : 643it [01:30,  7.07it/s, lr=0.01, iterations=111239, loss=0.919, total loss=1.09, avg loss=1.09, train acc=0.976]
Epoch 172 / 400: : 157it [00:04, 33.74it/s, val acc=0.921]
Epoch 173 / 400: : 643it [01:31,  7.01it/s, lr=0.01, iterations=111882, loss=0.9, total loss=1.07, avg loss=1.08, train acc=0.977]  
Epoch 173 / 400: : 157it [00:04, 33.97it/s, val acc=0.923]
Epoch 174 / 400: : 643it [01:31,  7.06it/s, lr=0.01, iterations=112525, loss=0.903, total loss=1.07, avg loss=1.08, train acc=0.977]
Epoch 174 / 400: : 157it [00:04, 33.89it/s, val acc=0.926]
Epoch 175 / 400: : 643it [01:30,  7.09it/s, lr=0.01, iterations=113168, loss=0.93, total loss=1.09, avg loss=1.08, train acc=0.975] 
Epoch 175 / 400: : 157it [00:04, 33.71it/s, val acc=0.922]
Epoch 176 / 400: : 643it [01:31,  7.05it/s, lr=0.01, iterations=113811, loss=0.876, total loss=1.03, avg loss=1.07, train acc=0.975]
Epoch 176 / 400: : 157it [00:04, 33.69it

Epoch 214 / 400: : 643it [01:30,  7.12it/s, lr=0.01, iterations=138245, loss=0.926, total loss=1.04, avg loss=1.03, train acc=0.974] 
Epoch 214 / 400: : 157it [00:04, 33.97it/s, val acc=0.919]
Epoch 215 / 400: : 643it [01:31,  7.02it/s, lr=0.01, iterations=138888, loss=0.92, total loss=1.03, avg loss=1.03, train acc=0.975]  
Epoch 215 / 400: : 157it [00:04, 33.89it/s, val acc=0.919]
Epoch 216 / 400: : 643it [01:31,  7.05it/s, lr=0.01, iterations=139531, loss=0.889, total loss=0.999, avg loss=1.03, train acc=0.975]
Epoch 216 / 400: : 157it [00:04, 33.80it/s, val acc=0.92] 
Epoch 217 / 400: : 643it [01:31,  7.01it/s, lr=0.01, iterations=140174, loss=0.984, total loss=1.09, avg loss=1.03, train acc=0.976] 
Epoch 217 / 400: : 157it [00:04, 33.85it/s, val acc=0.916]
Epoch 218 / 400: : 643it [01:30,  7.09it/s, lr=0.01, iterations=140817, loss=0.934, total loss=1.04, avg loss=1.03, train acc=0.976] 
Epoch 218 / 400: : 157it [00:04, 34.02it/s, val acc=0.91] 
Epoch 219 / 400: : 643it [01:31,  7

Epoch 256 / 400: : 643it [01:30,  7.10it/s, lr=0.001, iterations=165251, loss=0.894, total loss=0.997, avg loss=0.984, train acc=0.994]
Epoch 256 / 400: : 157it [00:04, 33.96it/s, val acc=0.938]
Epoch 257 / 400: : 643it [01:30,  7.12it/s, lr=0.001, iterations=165894, loss=0.869, total loss=0.972, avg loss=0.984, train acc=0.994]
Epoch 257 / 400: : 157it [00:04, 33.86it/s, val acc=0.938]
Epoch 258 / 400: : 643it [01:31,  7.05it/s, lr=0.001, iterations=166537, loss=0.886, total loss=0.989, avg loss=0.984, train acc=0.994]
Epoch 258 / 400: : 157it [00:04, 33.92it/s, val acc=0.937]
Epoch 259 / 400: : 643it [01:30,  7.11it/s, lr=0.001, iterations=167180, loss=0.88, total loss=0.983, avg loss=0.983, train acc=0.993] 
Epoch 259 / 400: : 157it [00:04, 31.66it/s, val acc=0.938]
Epoch 260 / 400: : 643it [01:31,  7.00it/s, lr=0.001, iterations=167823, loss=0.876, total loss=0.978, avg loss=0.981, train acc=0.995]
Epoch 260 / 400: : 157it [00:04, 33.54it/s, val acc=0.939]
Epoch 261 / 400: : 643it 

Epoch 298 / 400: : 643it [01:31,  7.05it/s, lr=0.001, iterations=192257, loss=0.875, total loss=0.968, avg loss=0.966, train acc=0.997]
Epoch 298 / 400: : 157it [00:05, 30.96it/s, val acc=0.942]
Epoch 299 / 400: : 643it [01:31,  7.03it/s, lr=0.001, iterations=192900, loss=0.871, total loss=0.962, avg loss=0.966, train acc=0.997]
Epoch 299 / 400: : 157it [00:04, 33.80it/s, val acc=0.942]
Epoch 300 / 400: : 643it [01:30,  7.07it/s, lr=0.001, iterations=193543, loss=0.871, total loss=0.963, avg loss=0.966, train acc=0.996]
Epoch 300 / 400: : 157it [00:04, 33.88it/s, val acc=0.941]
Epoch 301 / 400: : 643it [01:30,  7.07it/s, lr=0.001, iterations=194186, loss=0.868, total loss=0.959, avg loss=0.965, train acc=0.997]
Epoch 301 / 400: : 157it [00:04, 33.74it/s, val acc=0.94] 
Epoch 302 / 400: : 643it [01:30,  7.10it/s, lr=0.001, iterations=194829, loss=0.871, total loss=0.962, avg loss=0.965, train acc=0.997]
Epoch 302 / 400: : 157it [00:04, 33.71it/s, val acc=0.941]
Epoch 303 / 400: : 643it 

Epoch 340 / 400: : 643it [01:31,  7.06it/s, lr=0.001, iterations=219263, loss=0.868, total loss=0.951, avg loss=0.955, train acc=0.998]
Epoch 340 / 400: : 157it [00:04, 33.87it/s, val acc=0.939]
Epoch 341 / 400: : 643it [01:30,  7.13it/s, lr=0.001, iterations=219906, loss=0.869, total loss=0.952, avg loss=0.955, train acc=0.998]
Epoch 341 / 400: : 157it [00:04, 33.22it/s, val acc=0.941]
Epoch 342 / 400: : 643it [01:31,  7.05it/s, lr=0.001, iterations=220549, loss=0.868, total loss=0.95, avg loss=0.955, train acc=0.998] 
Epoch 342 / 400: : 157it [00:04, 33.68it/s, val acc=0.94] 
Epoch 343 / 400: : 643it [01:31,  7.06it/s, lr=0.001, iterations=221192, loss=0.923, total loss=1, avg loss=0.955, train acc=0.997]    
Epoch 343 / 400: : 157it [00:04, 33.89it/s, val acc=0.94] 
Epoch 344 / 400: : 643it [01:31,  7.03it/s, lr=0.001, iterations=221835, loss=0.868, total loss=0.949, avg loss=0.954, train acc=0.998]
Epoch 344 / 400: : 157it [00:04, 33.95it/s, val acc=0.939]
Epoch 345 / 400: : 643it 

Epoch 382 / 400: : 643it [01:31,  7.00it/s, lr=1e-04, iterations=246269, loss=0.869, total loss=0.949, avg loss=0.952, train acc=0.998]
Epoch 382 / 400: : 157it [00:04, 33.67it/s, val acc=0.943]
Epoch 383 / 400: : 643it [01:30,  7.10it/s, lr=1e-04, iterations=246912, loss=0.868, total loss=0.948, avg loss=0.951, train acc=0.998]
Epoch 383 / 400: : 157it [00:04, 33.45it/s, val acc=0.942]
Epoch 384 / 400: : 643it [01:31,  7.04it/s, lr=1e-04, iterations=247555, loss=0.868, total loss=0.947, avg loss=0.951, train acc=0.998]
Epoch 384 / 400: : 157it [00:04, 33.63it/s, val acc=0.942]
Epoch 385 / 400: : 643it [01:31,  7.02it/s, lr=1e-04, iterations=248198, loss=0.888, total loss=0.968, avg loss=0.951, train acc=0.999]
Epoch 385 / 400: : 157it [00:05, 29.32it/s, val acc=0.941]
Epoch 386 / 400: : 643it [01:30,  7.08it/s, lr=1e-04, iterations=248841, loss=0.884, total loss=0.964, avg loss=0.951, train acc=0.998]
Epoch 386 / 400: : 157it [00:04, 32.88it/s, val acc=0.943]
Epoch 387 / 400: : 643it 