# Лабораторная работа 4

Tensorflow 2.x

1) Подготовка данных

2) Использование Keras Model API

3) Использование Keras Sequential + Functional API

https://www.tensorflow.org/tutorials

Для выполнения лабораторной работы необходимо установить tensorflow версии 2.0 или выше .

Рекомендуется использовать возможности Colab'а по обучению моделей на GPU.



In [1]:
import tensorflow as tf
import numpy as np
import os
import math
import timeit
import matplotlib.pyplot as plt

%matplotlib inline


USE_GPU = True

if USE_GPU:
    device = '/device:GPU:0'
else:
    device = '/cpu:0'

# Constant to control how often we print when training models.
print_every = 100
print('Using device: ', device)

Using device:  /device:GPU:0


# Подготовка данных
Загрузите набор данных из предыдущей лабораторной работы.

In [2]:
def load_mnist(num_training=49000, num_validation=1000, num_test=10000):
    """
    Fetch the CIFAR-10 dataset from the web and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.
    """
    # Load the raw CIFAR-10 dataset and use appropriate data types and shapes
    mnist = tf.keras.datasets.mnist.load_data()
    (X_train, y_train), (X_test, y_test) = mnist
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32).flatten()
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32).flatten()

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean pixel and divide by std
    mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
    std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
    X_train = (X_train - mean_pixel) / std_pixel
    X_val = (X_val - mean_pixel) / std_pixel
    X_test = (X_test - mean_pixel) / std_pixel

    return X_train, y_train, X_val, y_val, X_test, y_test

# If there are errors with SSL downloading involving self-signed certificates,
# it may be that your Python version was recently installed on the current machine.
# See: https://github.com/tensorflow/tensorflow/issues/10779
# To fix, run the command: /Applications/Python\ 3.7/Install\ Certificates.command
#   ...replacing paths as necessary.

# Invoke the above function to get our data.
NHW = (0, 1, 2)
X_train, y_train, X_val, y_val, X_test, y_test = load_mnist()
X_train = X_train.reshape(-1, 28, 28, 1)
X_val = X_val.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape, y_train.dtype)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (49000, 28, 28, 1)
Train labels shape:  (49000,) int32
Validation data shape:  (1000, 28, 28, 1)
Validation labels shape:  (1000,)
Test data shape:  (10000, 28, 28, 1)
Test labels shape:  (10000,)


In [3]:
class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        """
        Construct a Dataset object to iterate over data X and labels y

        Inputs:
        - X: Numpy array of data, of any shape
        - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0]
        - batch_size: Integer giving number of elements per minibatch
        - shuffle: (optional) Boolean, whether to shuffle the data on each epoch
        """
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False)
test_dset = Dataset(X_test, y_test, batch_size=64)

In [4]:
# We can iterate through a dataset like this:
for t, (x, y) in enumerate(train_dset):
    print(t, x.shape, y.shape)
    if t > 5: break

0 (64, 28, 28, 1) (64,)
1 (64, 28, 28, 1) (64,)
2 (64, 28, 28, 1) (64,)
3 (64, 28, 28, 1) (64,)
4 (64, 28, 28, 1) (64,)
5 (64, 28, 28, 1) (64,)
6 (64, 28, 28, 1) (64,)


#  Keras Model Subclassing API


Для реализации собственной модели с помощью Keras Model Subclassing API необходимо выполнить следующие шаги:

1) Определить новый класс, который является наследником tf.keras.Model.

2) В методе __init__() определить все необходимые слои из модуля tf.keras.layer

3) Реализовать прямой проход в методе call() на основе слоев, объявленных в __init__()

Ниже приведен пример использования keras API для определения двухслойной полносвязной сети.

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras

In [5]:
class TwoLayerFC(tf.keras.Model):
    def __init__(self, hidden_size, num_classes):
        super(TwoLayerFC, self).__init__()
        initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.fc1 = tf.keras.layers.Dense(hidden_size, activation='relu',
                                   kernel_initializer=initializer)
        self.fc2 = tf.keras.layers.Dense(num_classes, activation='softmax',
                                   kernel_initializer=initializer)
        self.flatten = tf.keras.layers.Flatten()

    def call(self, x, training=False):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x


def test_TwoLayerFC():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    x = tf.zeros((64, input_size))
    model = TwoLayerFC(hidden_size, num_classes)
    # with tf.device(device):
    scores = model(x)
    print(scores.shape)

test_TwoLayerFC()

(64, 10)


Реализуйте трехслойную CNN для вашей задачи классификации.

Архитектура сети:
    
1. Сверточный слой (5 x 5 kernels, zero-padding = 'same')
2. Функция активации ReLU
3. Сверточный слой (3 x 3 kernels, zero-padding = 'same')
4. Функция активации ReLU
5. Полносвязный слой
6. Функция активации Softmax

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Conv2D

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dense

In [6]:
class ThreeLayerConvNet(tf.keras.Model):
    def __init__(self, channel_1, channel_2, num_classes):
        super(ThreeLayerConvNet, self).__init__()
        ########################################################################
        # Определение слоев для сверточной нейронной сети.
        ########################################################################
        self.conv1 = tf.keras.layers.Conv2D(channel_1, (5, 5), padding='same', activation='relu')
        self.conv2 = tf.keras.layers.Conv2D(channel_2, (3, 3), padding='same', activation='relu')
        self.flatten = tf.keras.layers.Flatten()
        self.fc = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, x, training=False):
        ########################################################################
        # Прямой проход для сверточной нейронной сети.
        ########################################################################
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

In [7]:
def test_ThreeLayerConvNet():
    channel_1, channel_2, num_classes = 12, 8, 10
    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)
    with tf.device(device):
        x = tf.zeros((64, 3, 32, 32))
        scores = model(x)
        print(scores.shape)

test_ThreeLayerConvNet()

(64, 10)


Пример реализации процесса обучения:

In [10]:
def train_part34(model_init_fn, optimizer_init_fn, num_epochs=1, is_training=False):
    """
    Simple training loop for use with models defined using tf.keras. It trains
    a model for one epoch on the CIFAR-10 training set and periodically checks
    accuracy on the CIFAR-10 validation set.

    Inputs:
    - model_init_fn: A function that takes no parameters; when called it
      constructs the model we want to train: model = model_init_fn()
    - optimizer_init_fn: A function which takes no parameters; when called it
      constructs the Optimizer object we will use to optimize the model:
      optimizer = optimizer_init_fn()
    - num_epochs: The number of epochs to train for

    Returns: Nothing, but prints progress during trainingn
    """
    with tf.device(device):

        # Compute the loss like we did in Part II
        loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

        model = model_init_fn()
        optimizer = optimizer_init_fn()

        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

        val_loss = tf.keras.metrics.Mean(name='val_loss')
        val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

        t = 0
        for epoch in range(num_epochs):


            for x_np, y_np in train_dset:
                with tf.GradientTape() as tape:

                    # Use the model function to build the forward pass.
                    scores = model(x_np, training=is_training)
                    loss = loss_fn(y_np, scores)

                    gradients = tape.gradient(loss, model.trainable_variables)
                    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

                    # Update the metrics
                    train_loss.update_state(loss)
                    train_accuracy.update_state(y_np, scores)

                    if t % print_every == 0:
                        for test_x, test_y in val_dset:
                            # During validation at end of epoch, training set to False
                            prediction = model(test_x, training=False)
                            t_loss = loss_fn(test_y, prediction)

                            val_loss.update_state(t_loss)
                            val_accuracy.update_state(test_y, prediction)

                        template = 'Iteration {}, Epoch {}, Loss: {}, Accuracy: {}, Val Loss: {}, Val Accuracy: {}'
                        print (template.format(t, epoch+1,
                                             train_loss.result(),
                                             train_accuracy.result()*100,
                                             val_loss.result(),
                                             val_accuracy.result()*100))
                    t += 1

In [11]:
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2

def model_init_fn():
    return TwoLayerFC(hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 2.881380319595337, Accuracy: 4.6875, Val Loss: 2.4567816257476807, Val Accuracy: 16.399999618530273
Iteration 100, Epoch 1, Loss: 0.6470337510108948, Accuracy: 80.151611328125, Val Loss: 1.5014357566833496, Val Accuracy: 49.54999923706055
Iteration 200, Epoch 1, Loss: 0.5125266909599304, Accuracy: 84.67817687988281, Val Loss: 1.1509863138198853, Val Accuracy: 61.966670989990234
Iteration 300, Epoch 1, Loss: 0.4545842111110687, Accuracy: 86.47737121582031, Val Loss: 0.9624605178833008, Val Accuracy: 68.5250015258789
Iteration 400, Epoch 1, Loss: 0.40970030426979065, Accuracy: 87.7844467163086, Val Loss: 0.8429557085037231, Val Accuracy: 72.53999328613281
Iteration 500, Epoch 1, Loss: 0.3858059346675873, Accuracy: 88.55414581298828, Val Loss: 0.760904848575592, Val Accuracy: 75.3499984741211
Iteration 600, Epoch 1, Loss: 0.36307987570762634, Accuracy: 89.22888946533203, Val Loss: 0.7005277276039124, Val Accuracy: 77.44285583496094
Iteration 700, Epoch 1, Loss:

Обучите трехслойную CNN. В tf.keras.optimizers.SGD укажите Nesterov momentum = 0.9 .

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/optimizers/SGD

Значение accuracy на валидационной выборке после 1 эпохи обучения должно быть > 50% .

In [12]:
learning_rate = 3e-3
channel_1, channel_2, num_classes = 32, 16, 10

def model_init_fn():
    model = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return model

def optimizer_init_fn():
    optimizer = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 2.300961971282959, Accuracy: 7.8125, Val Loss: 2.2755849361419678, Val Accuracy: 9.800000190734863
Iteration 100, Epoch 1, Loss: 0.6715565919876099, Accuracy: 80.81682586669922, Val Loss: 1.404179334640503, Val Accuracy: 46.20000076293945
Iteration 200, Epoch 1, Loss: 0.5201778411865234, Accuracy: 85.05130767822266, Val Loss: 1.0794507265090942, Val Accuracy: 59.96666717529297
Iteration 300, Epoch 1, Loss: 0.43750956654548645, Accuracy: 87.37022399902344, Val Loss: 0.8807223439216614, Val Accuracy: 68.0
Iteration 400, Epoch 1, Loss: 0.3750922381877899, Accuracy: 89.11315155029297, Val Loss: 0.7519369721412659, Val Accuracy: 73.05999755859375
Iteration 500, Epoch 1, Loss: 0.33468759059906006, Accuracy: 90.3349609375, Val Loss: 0.6620727777481079, Val Accuracy: 76.43333435058594
Iteration 600, Epoch 1, Loss: 0.3021031618118286, Accuracy: 91.24375915527344, Val Loss: 0.599862277507782, Val Accuracy: 78.69999694824219
Iteration 700, Epoch 1, Loss: 0.276764839887

# Использование Keras Sequential API для реализации последовательных моделей.

Пример для полносвязной сети:

In [13]:
learning_rate = 1e-2

def model_init_fn():
    input_shape = (28,28,1)
    hidden_layer_size, num_classes = 4000, 10
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    layers = [
        tf.keras.layers.Flatten(input_shape=input_shape),
        tf.keras.layers.Dense(hidden_layer_size, activation='relu',
                              kernel_initializer=initializer),
        tf.keras.layers.Dense(num_classes, activation='softmax',
                              kernel_initializer=initializer),
    ]
    model = tf.keras.Sequential(layers)
    return model

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

  super().__init__(**kwargs)


Iteration 0, Epoch 1, Loss: 2.787783145904541, Accuracy: 15.625, Val Loss: 2.76202130317688, Val Accuracy: 17.100000381469727
Iteration 100, Epoch 1, Loss: 0.6589286923408508, Accuracy: 79.81126403808594, Val Loss: 1.673017144203186, Val Accuracy: 49.29999923706055
Iteration 200, Epoch 1, Loss: 0.5193970203399658, Accuracy: 84.53824615478516, Val Loss: 1.2731164693832397, Val Accuracy: 61.400001525878906
Iteration 300, Epoch 1, Loss: 0.4595524072647095, Accuracy: 86.3943099975586, Val Loss: 1.0615370273590088, Val Accuracy: 67.69999694824219
Iteration 400, Epoch 1, Loss: 0.41477879881858826, Accuracy: 87.8039321899414, Val Loss: 0.9254206418991089, Val Accuracy: 71.97999572753906
Iteration 500, Epoch 1, Loss: 0.3903908431529999, Accuracy: 88.55414581298828, Val Loss: 0.8312861323356628, Val Accuracy: 74.96666717529297
Iteration 600, Epoch 1, Loss: 0.3671102821826935, Accuracy: 89.23928833007812, Val Loss: 0.7614436745643616, Val Accuracy: 77.1142807006836
Iteration 700, Epoch 1, Loss: 

Альтернативный менее гибкий способ обучения:

In [14]:
model = model_init_fn()
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)

[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 0.5401 - sparse_categorical_accuracy: 0.8362 - val_loss: 0.2995 - val_sparse_categorical_accuracy: 0.9020
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.2639 - sparse_categorical_accuracy: 0.9253


[0.22451354563236237, 0.9358000159263611]

Перепишите реализацию трехслойной CNN с помощью tf.keras.Sequential API . Обучите модель двумя способами.

In [15]:
def model_init_fn():
    model = None
    ############################################################################
    # TODO: Construct a three-layer ConvNet using tf.keras.Sequential.         #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                            END OF YOUR CODE                              #
    ############################################################################
    return model

learning_rate = 5e-4
def optimizer_init_fn():
    optimizer = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    optimizer = tf.keras.optimizers.SGD(learning_rate)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 2.3250391483306885, Accuracy: 6.25, Val Loss: 2.317898750305176, Val Accuracy: 9.700000762939453
Iteration 100, Epoch 1, Loss: 2.2475337982177734, Accuracy: 20.018564224243164, Val Loss: 2.2563745975494385, Val Accuracy: 19.850000381469727
Iteration 200, Epoch 1, Loss: 2.170219659805298, Accuracy: 32.361629486083984, Val Loss: 2.1805479526519775, Val Accuracy: 31.0
Iteration 300, Epoch 1, Loss: 2.0760531425476074, Accuracy: 41.777408599853516, Val Loss: 2.076848030090332, Val Accuracy: 39.75
Iteration 400, Epoch 1, Loss: 1.9336026906967163, Accuracy: 49.00249099731445, Val Loss: 1.9395687580108643, Val Accuracy: 45.939998626708984
Iteration 500, Epoch 1, Loss: 1.7789456844329834, Accuracy: 54.484779357910156, Val Loss: 1.7924151420593262, Val Accuracy: 50.93333053588867
Iteration 600, Epoch 1, Loss: 1.6264835596084595, Accuracy: 58.83943557739258, Val Loss: 1.6583442687988281, Val Accuracy: 54.77143096923828
Iteration 700, Epoch 1, Loss: 1.4966870546340942, 

In [16]:
model = model_init_fn()
model.compile(optimizer='sgd',
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)

[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - loss: 0.6701 - sparse_categorical_accuracy: 0.8020 - val_loss: 0.2219 - val_sparse_categorical_accuracy: 0.9390
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1699 - sparse_categorical_accuracy: 0.9526


[0.1477421373128891, 0.9584000110626221]

# Использование Keras Functional API

Для реализации более сложных архитектур сети с несколькими входами/выходами, повторным использованием слоев, "остаточными" связями (residual connections) необходимо явно указать входные и выходные тензоры.

Ниже представлен пример для полносвязной сети.

In [17]:
def two_layer_fc_functional(input_shape, hidden_size, num_classes):
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    inputs = tf.keras.Input(shape=input_shape)
    flattened_inputs = tf.keras.layers.Flatten()(inputs)
    fc1_output = tf.keras.layers.Dense(hidden_size, activation='relu',
                                 kernel_initializer=initializer)(flattened_inputs)
    scores = tf.keras.layers.Dense(num_classes, activation='softmax',
                             kernel_initializer=initializer)(fc1_output)

    # Instantiate the model given inputs and outputs.
    model = tf.keras.Model(inputs=inputs, outputs=scores)
    return model

def test_two_layer_fc_functional():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    input_shape = (50,)

    x = tf.zeros((64, input_size))
    model = two_layer_fc_functional(input_shape, hidden_size, num_classes)

    with tf.device(device):
        scores = model(x)
        print(scores.shape)

test_two_layer_fc_functional()

(64, 10)


In [18]:
input_shape = (28, 28, 1)
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2

def model_init_fn():
    return two_layer_fc_functional(input_shape, hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 3.0077171325683594, Accuracy: 4.6875, Val Loss: 2.6899092197418213, Val Accuracy: 19.799999237060547
Iteration 100, Epoch 1, Loss: 0.6498576402664185, Accuracy: 80.33724975585938, Val Loss: 1.6222048997879028, Val Accuracy: 51.30000305175781
Iteration 200, Epoch 1, Loss: 0.5125072002410889, Accuracy: 84.86473846435547, Val Loss: 1.237444281578064, Val Accuracy: 62.66666793823242
Iteration 300, Epoch 1, Loss: 0.45453721284866333, Accuracy: 86.4617919921875, Val Loss: 1.0344390869140625, Val Accuracy: 68.69999694824219
Iteration 400, Epoch 1, Loss: 0.4109973907470703, Accuracy: 87.80782318115234, Val Loss: 0.9021774530410767, Val Accuracy: 72.77999877929688
Iteration 500, Epoch 1, Loss: 0.38661324977874756, Accuracy: 88.56973266601562, Val Loss: 0.8118723034858704, Val Accuracy: 75.5999984741211
Iteration 600, Epoch 1, Loss: 0.3635864555835724, Accuracy: 89.2470932006836, Val Loss: 0.7450323104858398, Val Accuracy: 77.74285888671875
Iteration 700, Epoch 1, Los

Поэкспериментируйте с архитектурой сверточной сети. Для вашего набора данных вам необходимо получить как минимум 70% accuracy на валидационной выборке за 10 эпох обучения. Опишите все эксперименты и сделайте выводы (без выполнения данного пункта работы приниматься не будут).

Эспериментируйте с архитектурой, гиперпараметрами, функцией потерь, регуляризацией, методом оптимизации.  

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/BatchNormalization#methods https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dropout#methods

In [19]:
class _IdentityBlock(tf.keras.Model):
    """Identity block utilizing skip connections."""

    def __init__(self, out_channels):
        super().__init__()
        """Initializes the identiy block.

        Here we simply initialize 2 layers which process the input and after the output
        is produces it is added together with the input whcih is the final output.

        Args:
            out_channels (int): The number of activation maps this block should produce
        """
        # Acts as Kaiming weight initalization
        initializer = tf.initializers.VarianceScaling(scale=2.0)

        # Part 1 of the convolution, normalization and non-linearity
        self.conv1 = tf.keras.layers.Conv2D(out_channels, 3, padding='same', use_bias=False, kernel_initializer=initializer)
        self.norm1 = tf.keras.layers.BatchNormalization(axis=3)
        self.relu1 = tf.keras.layers.Activation('relu')

        # Part 2 of the convolution, normalization and non-linearity
        self.conv2 = tf.keras.layers.Conv2D(out_channels, 3, padding='same', use_bias=False, kernel_initializer=initializer)
        self.norm2 = tf.keras.layers.BatchNormalization(axis=3)
        self.relu2 = tf.keras.layers.Activation('relu')

        # Add layer will add together the input and the output
        self.add = tf.keras.layers.Add()

    def call(self, x, training=False):
        """Performs forward pass on the given input.

        Args:
            x (Tensor):      The input of dimensions (N, H, W, C)
            training (bool): Indicates whether the forward pass happens in the training mode

        Returns:
              out (Tensor): Output data of dim (N, H, W, C)
        """
        x_skip = tf.identity(x)                   # prepare to add the input to the output
        x = self.relu1(self.norm1(self.conv1(x))) # pass input through the first layer
        x = self.norm2(self.conv2(x))             # pass input through the second layer (without ReLU)
        out = self.relu2(self.add([x, x_skip]))   # perform ReLU on the processed input added with the raw input

        return out


class ResNet(tf.keras.Model):

    def __init__(self, in_channels=32, block_config=(2, 2, 2, 2), num_classes=10):
        """Initializes the residual network.

        The first layer produces `in_channels` activation maps which are then fed to a
        sequence of blocks containing a specified number of identity sub-blocks (first
        block is always *_BottleneckBlock*). At the end the _global average pooling_
        layer is used to flatten the activations for the linear softmax classifier.

        Args:
            in_channels (int):    The number of channels to extract after the first convolution
            block_config (tuple): The number of layers each bloack should have in sequence
            num_classes (int):    The total number of classes
        """
        super().__init__()

        # Acts as Kaiming weight initalization
        initializer = tf.initializers.VarianceScaling(scale=2.0)

        # Prepare the input for the chains of identity blocks
        self.features = tf.keras.Sequential([
            tf.keras.layers.Conv2D(in_channels, 5, padding='same', use_bias=False, kernel_initializer=initializer),
            tf.keras.layers.BatchNormalization(axis=3),
            tf.keras.layers.Activation('relu'),
        ])

        num_features = in_channels # num feaure maps to produce after each group of identity blocks

        # Loop through every group of blocks
        for i, num_layers in enumerate(block_config):
            # Use bottleneck block as the first block in every group (except first)
            if i != 0:
                self.features.add(_BottleneckBlock(num_features))
            else:
                self.features.add(_IdentityBlock(num_features))

            # Create the specified number of identity blocks for i'th group
            for j in range(num_layers-1):
                self.features.add(_IdentityBlock(num_features))

            num_features *= 2 # increase the nuber of features to be produced

        # Flatten the final activation maps using global average pooling
        self.features.add(tf.keras.layers.GlobalAveragePooling2D())

        # Softmax classifier is used as the final layer
        self.classifier = tf.keras.layers.Dense(num_classes, activation='softmax', kernel_initializer=initializer)

    def call(self, x, training=False):
        """Performs forward pass on the given input.

        Args:
            x (Tensor):      The input of dimensions (N, H, W, C)
            training (bool): Indicates whether the forward pass happens in the training mode

        Returns:
            out (Tensor): Output data of dim (N, 10)
        """
        out = self.features(x)     # Get the extracted features for the linear classfier
        out = self.classifier(out) # Perform classification with softmax activation

        return out
class _BottleneckBlock(tf.keras.Model):
    """Same as identity block except it reduces the spacial area before processing the input."""

    def __init__(self, out_channels):
        """Initializes the bottleneck block.

        Unlike *_IdentityBlock*, the first convolution here reduces the spacial size of the input
        by a factor of `2`. Then, it performs the main convolution after which the output maps
        are added together with the input maps to produce final activations.

        Args:
            out_channels (int): The number of activation maps this block should produce
        """
        super().__init__()

        # Acts as Kaiming weight initalization
        initializer = tf.initializers.VarianceScaling(scale=2.0)

        # Reduce the input size by 2 to match output size
        self.skip1 = tf.keras.layers.Conv2D(out_channels, 1, strides=2, use_bias=False, kernel_initializer=initializer)

        # Part 1 of the convolution which reduces the spacial area
        self.conv1 = tf.keras.layers.Conv2D(out_channels, 3, strides=2, padding='same', use_bias=False, kernel_initializer=initializer)
        self.norm1 = tf.keras.layers.BatchNormalization(axis=3)
        self.relu1 = tf.keras.layers.Activation('relu')

        # Part 2 of the convolution which extracts features from the reduced input
        self.conv2 = tf.keras.layers.Conv2D(out_channels, 3, padding='same', use_bias=False, kernel_initializer=initializer)
        self.norm2 = tf.keras.layers.BatchNormalization(axis=3)
        self.relu2 = tf.keras.layers.Activation('relu')

        # Add layer will add together the input and the output
        self.add = tf.keras.layers.Add()

    def call(self, x, training=False):
        """Performs forward pass on the given input.

        Args:
            x (Tensor):      The input of dimensions (N, H, W, C)
            training (bool): Indicates whether the forward pass happens in the training mode

        Returns:
            out (Tensor): Output data of dim (N, H/2, W/2, out_channels)
        """
        x_skip = self.skip1(x)                    # prepare to add the input to the output
        x = self.relu1(self.norm1(self.conv1(x))) # pass input through the first layer
        x = self.norm2(self.conv2(x))             # pass input through the second layer (without ReLU)
        out = self.relu2(self.add([x, x_skip]))   # perform ReLU on the processed input added with the raw input

        return out


In [22]:
class CustomConvNet(tf.keras.Model):
    def __init__(self):
        super(CustomConvNet, self).__init__()
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        self.model = ResNet()

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################

    def call(self, input_tensor, training=False):
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        x = self.model.call(input_tensor, training)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
        return x


print_every = 100
num_epochs = 3

model = CustomConvNet()

def model_init_fn():
    return CustomConvNet()

def optimizer_init_fn():
    learning_rate = 1e-3
    return tf.keras.optimizers.Adam(learning_rate)

train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True)



Iteration 0, Epoch 1, Loss: 3.767016887664795, Accuracy: 14.0625, Val Loss: 131.228515625, Val Accuracy: 10.699999809265137
Iteration 100, Epoch 1, Loss: 0.3972124457359314, Accuracy: 88.07240295410156, Val Loss: 67.78279876708984, Val Accuracy: 25.099998474121094
Iteration 200, Epoch 1, Loss: 0.26396840810775757, Accuracy: 91.8998794555664, Val Loss: 45.44111251831055, Val Accuracy: 44.16666793823242
Iteration 300, Epoch 1, Loss: 0.2082356959581375, Accuracy: 93.5994644165039, Val Loss: 34.16777038574219, Val Accuracy: 55.97500228881836
Iteration 400, Epoch 1, Loss: 0.17309176921844482, Accuracy: 94.70464324951172, Val Loss: 27.372440338134766, Val Accuracy: 63.540000915527344
Iteration 500, Epoch 1, Loss: 0.15379154682159424, Accuracy: 95.29690551757812, Val Loss: 22.862985610961914, Val Accuracy: 68.04999542236328
Iteration 600, Epoch 1, Loss: 0.14017567038536072, Accuracy: 95.73107147216797, Val Loss: 19.641305923461914, Val Accuracy: 71.42857360839844
Iteration 700, Epoch 1, Loss:

Опишите все эксперименты, результаты. Сделайте выводы.

  Была реализована следующая архитектура:
        1. `CONV->NORM->RELU->CONV->NORM->RELU` для предварительной обработки входного сигнала для цепочки блоков слоев
        2. `IDENTITY->[BOTTLENECK->IDENTITY] x N`, где каждый блок состоит из произвольного
           где каждый блок состоит из произвольного количества слоев, использующих "skip" соединения
        3. `POOL->DENSE->SOFTMAX`, где выполняется глобальное усреднение пула
           перед вычислением raw scores
В результате произведенной работы были получены необходимые результаты уже на 3 эпохе