# Лабораторная работа 4

Tensorflow 2.x

1) Подготовка данных

2) Использование Keras Model API

3) Использование Keras Sequential + Functional API

https://www.tensorflow.org/tutorials

Для выполнения лабораторной работы необходимо установить tensorflow версии 2.0 или выше .

Рекомендуется использовать возможности Colab'а по обучению моделей на GPU.



In [7]:
import os
import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt

%matplotlib inline

# Подготовка данных
Загрузите набор данных из предыдущей лабораторной работы. 

In [8]:
def load_mnist(num_training=49000, num_validation=1000, num_test=10000):
    """
    Fetch the mnist dataset from the web and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.
    """
    # Load the raw mnist dataset and use appropriate data types and shapes
    mnist = tf.keras.datasets.mnist.load_data()
    (X_train, y_train), (X_test, y_test) = mnist
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32).flatten()
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32).flatten()

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean pixel and divide by std
    mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
    std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
    X_train = (X_train - mean_pixel) / std_pixel
    X_val = (X_val - mean_pixel) / std_pixel
    X_test = (X_test - mean_pixel) / std_pixel

    return X_train, y_train, X_val, y_val, X_test, y_test

# If there are errors with SSL downloading involving self-signed certificates,
# it may be that your Python version was recently installed on the current machine.
# See: https://github.com/tensorflow/tensorflow/issues/10779
# To fix, run the command: /Applications/Python\ 3.7/Install\ Certificates.command
#   ...replacing paths as necessary.

# Invoke the above function to get our data.
NHW = (0, 1, 2)
X_train, y_train, X_val, y_val, X_test, y_test = load_mnist()
X_train = X_train.reshape(-1, 28, 28, 1)
X_val = X_val.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape, y_train.dtype)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (49000, 28, 28, 1)
Train labels shape:  (49000,) int32
Validation data shape:  (1000, 28, 28, 1)
Validation labels shape:  (1000,)
Test data shape:  (10000, 28, 28, 1)
Test labels shape:  (10000,)


In [9]:
class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        """
        Construct a Dataset object to iterate over data X and labels y
        
        Inputs:
        - X: Numpy array of data, of any shape
        - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0]
        - batch_size: Integer giving number of elements per minibatch
        - shuffle: (optional) Boolean, whether to shuffle the data on each epoch
        """
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False)
test_dset = Dataset(X_test, y_test, batch_size=64)

In [10]:
# We can iterate through a dataset like this:
for t, (x, y) in enumerate(train_dset):
    print(t, x.shape, y.shape)
    if t > 5: break

0 (64, 28, 28, 1) (64,)
1 (64, 28, 28, 1) (64,)
2 (64, 28, 28, 1) (64,)
3 (64, 28, 28, 1) (64,)
4 (64, 28, 28, 1) (64,)
5 (64, 28, 28, 1) (64,)
6 (64, 28, 28, 1) (64,)


#  Keras Model Subclassing API


Для реализации собственной модели с помощью Keras Model Subclassing API необходимо выполнить следующие шаги:

1) Определить новый класс, который является наследником tf.keras.Model.

2) В методе __init__() определить все необходимые слои из модуля tf.keras.layer

3) Реализовать прямой проход в методе call() на основе слоев, объявленных в __init__()

Ниже приведен пример использования keras API для определения двухслойной полносвязной сети. 

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras

In [11]:
device = '/CPU:0'
initializer = tf.initializers.VarianceScaling(scale=2.0, seed=42)
print('Using device: ', device)

Using device:  /CPU:0


In [12]:
class TwoLayerFC(tf.keras.Model):
    def __init__(self, hidden_size, num_classes):
        super(TwoLayerFC, self).__init__()        
        initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.fc1 = tf.keras.layers.Dense(hidden_size, activation='relu',
                                   kernel_initializer=initializer)
        self.fc2 = tf.keras.layers.Dense(num_classes, activation='softmax',
                                   kernel_initializer=initializer)
        self.flatten = tf.keras.layers.Flatten()
    
    def call(self, x, training=False):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x


def test_TwoLayerFC():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    x = tf.zeros((64, input_size))
    model = TwoLayerFC(hidden_size, num_classes)
    with tf.device(device):
        scores = model(x)
        print(scores.shape)
        
test_TwoLayerFC()

(64, 10)


Реализуйте трехслойную CNN для вашей задачи классификации. 

Архитектура сети:
    
1. Сверточный слой (5 x 5 kernels, zero-padding = 'same')
2. Функция активации ReLU 
3. Сверточный слой (3 x 3 kernels, zero-padding = 'same')
4. Функция активации ReLU 
5. Полносвязный слой 
6. Функция активации Softmax 

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Conv2D

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dense

In [13]:
class ThreeLayerConvNet(tf.keras.Model):
    def __init__(self, channel_1, channel_2, num_classes):
        super(ThreeLayerConvNet, self).__init__()
        ########################################################################
        # TODO: Implement the __init__ method for a three-layer ConvNet. You   #
        # should instantiate layer objects to be used in the forward pass.     #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        self.conv1 = tf.keras.layers.Conv2D(channel_1, (5, 5), padding='same', activation='relu')
        self.conv2 = tf.keras.layers.Conv2D(channel_2, (3, 3), padding='same', activation='relu')
        self.flatten = tf.keras.layers.Flatten()
        self.fc = tf.keras.layers.Dense(num_classes, activation='softmax')

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################
        
    def call(self, x, training=False):
        ########################################################################
        # TODO: Implement the forward pass for a three-layer ConvNet. You      #
        # should use the layer objects defined in the __init__ method.         #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        x = self.conv1(x)
        x = self.conv2(x)
        x = self.flatten(x)
        scores = self.fc(x)
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################        
        return scores

In [14]:
def test_ThreeLayerConvNet():    
    channel_1, channel_2, num_classes = 12, 8, 10
    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)
    with tf.device(device):
        x = tf.zeros((64, 3, 32, 32))
        scores = model(x)
        print(scores.shape)

test_ThreeLayerConvNet()

(64, 10)


Пример реализации процесса обучения:

In [15]:
def train_part34(model_init_fn, optimizer_init_fn, num_epochs=1, is_training=False):
    """
    Simple training loop for use with models defined using tf.keras. It trains
    a model for one epoch on the CIFAR-10 training set and periodically checks
    accuracy on the CIFAR-10 validation set.
    
    Inputs:
    - model_init_fn: A function that takes no parameters; when called it
      constructs the model we want to train: model = model_init_fn()
    - optimizer_init_fn: A function which takes no parameters; when called it
      constructs the Optimizer object we will use to optimize the model:
      optimizer = optimizer_init_fn()
    - num_epochs: The number of epochs to train for
    
    Returns: Nothing, but prints progress during trainingn
    """    
    with tf.device(device):

        
        loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
        
        model = model_init_fn()
        optimizer = optimizer_init_fn()
        
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    
        val_loss = tf.keras.metrics.Mean(name='val_loss')
        val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')
        
        t = 0
        for epoch in range(num_epochs):
            
            # Reset the metrics - https://www.tensorflow.org/alpha/guide/migration_guide#new-style_metrics
            train_loss.reset_states()
            train_accuracy.reset_states()
            
            for x_np, y_np in train_dset:
                with tf.GradientTape() as tape:
                    
                    # Use the model function to build the forward pass.
                    scores = model(x_np, training=is_training)
                    loss = loss_fn(y_np, scores)
      
                    gradients = tape.gradient(loss, model.trainable_variables)
                    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                    
                    # Update the metrics
                    train_loss.update_state(loss)
                    train_accuracy.update_state(y_np, scores)
                    
                    if t % print_every == 0:
                        val_loss.reset_states()
                        val_accuracy.reset_states()
                        for test_x, test_y in val_dset:
                            # During validation at end of epoch, training set to False
                            prediction = model(test_x, training=False)
                            t_loss = loss_fn(test_y, prediction)

                            val_loss.update_state(t_loss)
                            val_accuracy.update_state(test_y, prediction)
                        
                        template = 'Iteration {}, Epoch {}, Loss: {}, Accuracy: {}, Val Loss: {}, Val Accuracy: {}'
                        print (template.format(t, epoch+1,
                                             train_loss.result(),
                                             train_accuracy.result()*100,
                                             val_loss.result(),
                                             val_accuracy.result()*100))
                    t += 1

In [16]:
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2
print_every =50
def model_init_fn():
    return TwoLayerFC(hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 3.064849376678467, Accuracy: 7.8125, Val Loss: 2.7793993949890137, Val Accuracy: 24.200000762939453
Iteration 50, Epoch 1, Loss: 0.8965718746185303, Accuracy: 72.48774719238281, Val Loss: 0.6515597105026245, Val Accuracy: 79.9000015258789
Iteration 100, Epoch 1, Loss: 0.6551655530929565, Accuracy: 80.22895812988281, Val Loss: 0.5606053471565247, Val Accuracy: 81.0
Iteration 150, Epoch 1, Loss: 0.5784794092178345, Accuracy: 82.77111053466797, Val Loss: 0.5058924555778503, Val Accuracy: 84.4000015258789
Iteration 200, Epoch 1, Loss: 0.517216145992279, Accuracy: 84.57711029052734, Val Loss: 0.47161221504211426, Val Accuracy: 85.19999694824219
Iteration 250, Epoch 1, Loss: 0.4870522916316986, Accuracy: 85.53286743164062, Val Loss: 0.3995288014411926, Val Accuracy: 88.70000457763672
Iteration 300, Epoch 1, Loss: 0.45469731092453003, Accuracy: 86.59676361083984, Val Loss: 0.4065869152545929, Val Accuracy: 87.19999694824219
Iteration 350, Epoch 1, Loss: 0.428629994

Обучите трехслойную CNN. В tf.keras.optimizers.SGD укажите Nesterov momentum = 0.9 . 

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/optimizers/SGD

Значение accuracy на валидационной выборке после 1 эпохи обучения должно быть > 50% .

In [17]:
learning_rate = 3e-3
channel_1, channel_2, num_classes = 32, 16, 10

def model_init_fn():
    model = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return model

def optimizer_init_fn():
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 2.3382437229156494, Accuracy: 3.125, Val Loss: 2.3042333126068115, Val Accuracy: 7.0
Iteration 50, Epoch 1, Loss: 1.0767600536346436, Accuracy: 67.06495666503906, Val Loss: 0.577302873134613, Val Accuracy: 83.60000610351562
Iteration 100, Epoch 1, Loss: 0.7256202101707458, Accuracy: 78.34158325195312, Val Loss: 0.5033233165740967, Val Accuracy: 83.20000457763672
Iteration 150, Epoch 1, Loss: 0.6147544980049133, Accuracy: 81.73633575439453, Val Loss: 0.4483001232147217, Val Accuracy: 87.0
Iteration 200, Epoch 1, Loss: 0.535682201385498, Accuracy: 84.1262435913086, Val Loss: 0.4032776355743408, Val Accuracy: 87.9000015258789
Iteration 250, Epoch 1, Loss: 0.4865170121192932, Accuracy: 85.62002563476562, Val Loss: 0.2721148729324341, Val Accuracy: 91.19999694824219
Iteration 300, Epoch 1, Loss: 0.43691545724868774, Accuracy: 87.11067199707031, Val Loss: 0.24511055648326874, Val Accuracy: 92.9000015258789
Iteration 350, Epoch 1, Loss: 0.39785224199295044, Accurac

# Использование Keras Sequential API для реализации последовательных моделей.

Пример для полносвязной сети:

In [18]:
learning_rate = 1e-2

def model_init_fn():
    input_shape = (28, 28, 1)
    hidden_layer_size, num_classes = 4000, 10
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    layers = [
        tf.keras.layers.Flatten(input_shape=input_shape),
        tf.keras.layers.Dense(hidden_layer_size, activation='relu',
                              kernel_initializer=initializer),
        tf.keras.layers.Dense(num_classes, activation='softmax', 
                              kernel_initializer=initializer),
    ]
    model = tf.keras.Sequential(layers)
    return model

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate) 

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 3.004228115081787, Accuracy: 17.1875, Val Loss: 2.7842531204223633, Val Accuracy: 16.799999237060547
Iteration 50, Epoch 1, Loss: 0.877299964427948, Accuracy: 72.67156982421875, Val Loss: 0.624040961265564, Val Accuracy: 81.69999694824219
Iteration 100, Epoch 1, Loss: 0.6377058029174805, Accuracy: 80.6621322631836, Val Loss: 0.5205370187759399, Val Accuracy: 83.0
Iteration 150, Epoch 1, Loss: 0.5626254081726074, Accuracy: 83.28849029541016, Val Loss: 0.5075733065605164, Val Accuracy: 84.89999389648438
Iteration 200, Epoch 1, Loss: 0.5019893646240234, Accuracy: 85.09795379638672, Val Loss: 0.4466663599014282, Val Accuracy: 85.29999542236328
Iteration 250, Epoch 1, Loss: 0.47608307003974915, Accuracy: 85.85034942626953, Val Loss: 0.3862399458885193, Val Accuracy: 89.0
Iteration 300, Epoch 1, Loss: 0.44580358266830444, Accuracy: 86.79402160644531, Val Loss: 0.4076204001903534, Val Accuracy: 86.29999542236328
Iteration 350, Epoch 1, Loss: 0.4208563268184662, Acc

Альтернативный менее гибкий способ обучения:

In [19]:
model = model_init_fn()
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)



[0.21395255625247955, 0.9370999932289124]

Перепишите реализацию трехслойной CNN с помощью tf.keras.Sequential API . Обучите модель двумя способами.

In [20]:
def model_init_fn():
    
    ############################################################################
    # TODO: Construct a three-layer ConvNet using tf.keras.Sequential.         #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (5, 5), padding='same', activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                            END OF YOUR CODE                              #
    ############################################################################
    return model

learning_rate = 5e-4
def optimizer_init_fn():
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    optimizer = tf.keras.optimizers.SGD(learning_rate=5e-4)
    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 2.302030563354492, Accuracy: 10.9375, Val Loss: 2.3161652088165283, Val Accuracy: 13.500000953674316
Iteration 50, Epoch 1, Loss: 2.278923273086548, Accuracy: 19.209558486938477, Val Loss: 2.2712481021881104, Val Accuracy: 20.100000381469727
Iteration 100, Epoch 1, Loss: 2.2509891986846924, Accuracy: 24.42759895324707, Val Loss: 2.2192165851593018, Val Accuracy: 30.599998474121094
Iteration 150, Epoch 1, Loss: 2.2235517501831055, Accuracy: 29.397764205932617, Val Loss: 2.1592841148376465, Val Accuracy: 40.5
Iteration 200, Epoch 1, Loss: 2.1891987323760986, Accuracy: 33.84639358520508, Val Loss: 2.0784430503845215, Val Accuracy: 46.39999771118164
Iteration 250, Epoch 1, Loss: 2.1528918743133545, Accuracy: 36.92729187011719, Val Loss: 1.9762403964996338, Val Accuracy: 51.599998474121094
Iteration 300, Epoch 1, Loss: 2.107665538787842, Accuracy: 40.23567199707031, Val Loss: 1.8450473546981812, Val Accuracy: 57.0
Iteration 350, Epoch 1, Loss: 2.048365831375122, 

In [21]:
model = model_init_fn()
model.compile(optimizer='sgd',
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)



[0.16302064061164856, 0.9528999924659729]

# Использование Keras Functional API

Для реализации более сложных архитектур сети с несколькими входами/выходами, повторным использованием слоев, "остаточными" связями (residual connections) необходимо явно указать входные и выходные тензоры. 

Ниже представлен пример для полносвязной сети. 

In [22]:
def two_layer_fc_functional(input_shape, hidden_size, num_classes):  
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    inputs = tf.keras.Input(shape=input_shape)
    flattened_inputs = tf.keras.layers.Flatten()(inputs)
    fc1_output = tf.keras.layers.Dense(hidden_size, activation='relu',
                                 kernel_initializer=initializer)(flattened_inputs)
    scores = tf.keras.layers.Dense(num_classes, activation='softmax',
                             kernel_initializer=initializer)(fc1_output)

    # Instantiate the model given inputs and outputs.
    model = tf.keras.Model(inputs=inputs, outputs=scores)
    return model

def test_two_layer_fc_functional():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    input_shape = (50,)
    
    x = tf.zeros((64, input_size))
    model = two_layer_fc_functional(input_shape, hidden_size, num_classes)
    
    with tf.device(device):
        scores = model(x)
        print(scores.shape)
        
test_two_layer_fc_functional()

(64, 10)


In [23]:
input_shape = (28, 28, 1)
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2

def model_init_fn():
    return two_layer_fc_functional(input_shape, hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 3.2287349700927734, Accuracy: 12.5, Val Loss: 2.869480609893799, Val Accuracy: 17.899999618530273
Iteration 50, Epoch 1, Loss: 0.9346903562545776, Accuracy: 71.99754333496094, Val Loss: 0.640773594379425, Val Accuracy: 82.20000457763672
Iteration 100, Epoch 1, Loss: 0.6718783378601074, Accuracy: 80.39913177490234, Val Loss: 0.5560400485992432, Val Accuracy: 81.9000015258789
Iteration 150, Epoch 1, Loss: 0.5895205140113831, Accuracy: 82.80215454101562, Val Loss: 0.527276873588562, Val Accuracy: 84.5
Iteration 200, Epoch 1, Loss: 0.524778425693512, Accuracy: 84.76367950439453, Val Loss: 0.4614454507827759, Val Accuracy: 86.69999694824219
Iteration 250, Epoch 1, Loss: 0.4956998825073242, Accuracy: 85.55776977539062, Val Loss: 0.3961638808250427, Val Accuracy: 89.0999984741211
Iteration 300, Epoch 1, Loss: 0.4621570408344269, Accuracy: 86.57599639892578, Val Loss: 0.4159914255142212, Val Accuracy: 86.4000015258789
Iteration 350, Epoch 1, Loss: 0.4352039098739624

Поэкспериментируйте с архитектурой сверточной сети. Для вашего набора данных вам необходимо получить как минимум 70% accuracy на валидационной выборке за 10 эпох обучения. Опишите все эксперименты и сделайте выводы (без выполнения данного пункта работы приниматься не будут). 

Эспериментируйте с архитектурой, гиперпараметрами, функцией потерь, регуляризацией, методом оптимизации.  

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/BatchNormalization#methods https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dropout#methods

In [24]:
class CustomConvNet(tf.keras.Model):
    def __init__(self):
        super(CustomConvNet, self).__init__()
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        self.conv1 = tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu')
        self.conv2 = tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu')
        self.conv3 = tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu')
        self.flatten = tf.keras.layers.Flatten()
        self.fc1 = tf.keras.layers.Dense(256, activation='relu')
        self.fc2 = tf.keras.layers.Dense(10, activation='softmax')

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
    
    def call(self, input_tensor, training=False):
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        x = self.conv1(input_tensor)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
        return x


print_every = 700
num_epochs = 10

model = CustomConvNet()

def model_init_fn():
    return CustomConvNet()

def optimizer_init_fn():
    learning_rate = 1e-3
    return tf.keras.optimizers.Adam(learning_rate) 

train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True)

Iteration 0, Epoch 1, Loss: 2.3320109844207764, Accuracy: 12.5, Val Loss: 2.7032454013824463, Val Accuracy: 32.5
Iteration 700, Epoch 1, Loss: 0.12146905064582825, Accuracy: 96.26426696777344, Val Loss: 0.0669647604227066, Val Accuracy: 97.89999389648438
Iteration 1400, Epoch 2, Loss: 0.03514748811721802, Accuracy: 98.87056732177734, Val Loss: 0.09132395684719086, Val Accuracy: 97.29999542236328
Iteration 2100, Epoch 3, Loss: 0.02056107483804226, Accuracy: 99.30525207519531, Val Loss: 0.07919204980134964, Val Accuracy: 97.5
Iteration 2800, Epoch 4, Loss: 0.014073321595788002, Accuracy: 99.52783203125, Val Loss: 0.08352985978126526, Val Accuracy: 97.39999389648438
Iteration 3500, Epoch 5, Loss: 0.011354857124388218, Accuracy: 99.64960479736328, Val Loss: 0.10005294531583786, Val Accuracy: 97.39999389648438
Iteration 4200, Epoch 6, Loss: 0.008517461828887463, Accuracy: 99.70939636230469, Val Loss: 0.06404273211956024, Val Accuracy: 98.5999984741211
Iteration 4900, Epoch 7, Loss: 0.010796

Опишите все эксперименты, результаты. Сделайте выводы.

Я провел несколько экспериментов на 3-х слойной сети с разными функциями активациями (ReLU и sigmoid), также попробовал добавить Dropout, BatchNormalization и использовал разные оптимизаторы на разных датасетах.

Ожидаемо, что при сравнении резульатов я получил, что SGD выдает результаты хуже, чем Adam.

Ожидаемо получил, что при меньшей скорости обучения тех же результатов приходилось достигать дольше.

Использование Dropout and BatchNormalization совсем чуть чуть улучшило показатели на валидационной выборке.

Sigmoid показала себя чуть хуже, чем ReLU.

В целом экспемирент довольно бессмысленный. Если и делать полный перебор параметров - на это уйдет несколько дней. Стоит добавлять больше слоев в текущую реализацию для данных датасетов? Наверное нет, результаты и так более 95%