# Лабораторная работа 4
# Tensorflow 2.x
# 6408 Дробин Роман
# Вариант 2 (mnist dataset)

1) Подготовка данных

2) Использование Keras Model API

3) Использование Keras Sequential + Functional API

https://www.tensorflow.org/tutorials

Для выполнения лабораторной работы необходимо установить tensorflow версии 2.0 или выше .

Рекомендуется использовать возможности Colab'а по обучению моделей на GPU.



In [1]:
import os
import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt
import itertools

%matplotlib inline

# 1. Подготовка данных
Загрузите набор данных из предыдущей лабораторной работы. 

In [2]:
def load_mnist(num_training=49000, num_validation=1000, num_test=10000):
    """
    Fetch the MNIST dataset from the web and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.
    """
    # Load the raw Mnist dataset and use appropriate data types and shapes
    mnist = tf.keras.datasets.mnist.load_data()
    (X_train, y_train), (X_test, y_test) = mnist
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32).flatten()
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32).flatten()

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean pixel and divide by std
    mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
    std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
    X_train = (X_train - mean_pixel) / std_pixel
    X_val = (X_val - mean_pixel) / std_pixel
    X_test = (X_test - mean_pixel) / std_pixel

    return X_train, y_train, X_val, y_val, X_test, y_test

# If there are errors with SSL downloading involving self-signed certificates,
# it may be that your Python version was recently installed on the current machine.
# See: https://github.com/tensorflow/tensorflow/issues/10779
# To fix, run the command: /Applications/Python\ 3.7/Install\ Certificates.command
#   ...replacing paths as necessary.

# Invoke the above function to get our data.
NHW = (0, 1, 2)
X_train, y_train, X_val, y_val, X_test, y_test = load_mnist()
X_train = X_train.reshape(49000, 28, 28, 1)
X_val = X_val.reshape(1000, 28, 28, 1)
X_test = X_test.reshape(10000, 28, 28, 1)
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (49000, 28, 28, 1)
Train labels shape:  (49000,)
Validation data shape:  (1000, 28, 28, 1)
Validation labels shape:  (1000,)
Test data shape:  (10000, 28, 28, 1)
Test labels shape:  (10000,)


In [3]:
class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        """
        Construct a Dataset object to iterate over data X and labels y
        
        Inputs:
        - X: Numpy array of data, of any shape
        - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0]
        - batch_size: Integer giving number of elements per minibatch
        - shuffle: (optional) Boolean, whether to shuffle the data on each epoch
        """
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False)
test_dset = Dataset(X_test, y_test, batch_size=64)

In [4]:
# We can iterate through a dataset like this:
for t, (x, y) in enumerate(train_dset):
    print(t, x.shape, y.shape)
    if t > 5: break

0 (64, 28, 28, 1) (64,)
1 (64, 28, 28, 1) (64,)
2 (64, 28, 28, 1) (64,)
3 (64, 28, 28, 1) (64,)
4 (64, 28, 28, 1) (64,)
5 (64, 28, 28, 1) (64,)
6 (64, 28, 28, 1) (64,)


# 2. Keras Model Subclassing API


Для реализации собственной модели с помощью Keras Model Subclassing API необходимо выполнить следующие шаги:

1) Определить новый класс, который является наследником tf.keras.Model.

2) В методе \_\_init\_\_() определить все необходимые слои из модуля tf.keras.layer

3) Реализовать прямой проход в методе call() на основе слоев, объявленных в \_\_init\_\_()

Ниже приведен пример использования keras API для определения двухслойной полносвязной сети. 

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras

In [5]:
#Определяем на чём обучать модель
def set_GPU_param(IS_USED_GPU = True):
    return '/device:GPU:0' if IS_USED_GPU else '/cpu:0'

In [6]:
class TwoLayerFC(tf.keras.Model):
    def __init__(self, hidden_size, num_classes):
        super(TwoLayerFC, self).__init__() #Вызываем метод __init__ у базового класса tf.keras.Model    
        initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.fc1 = tf.keras.layers.Dense(hidden_size, activation='relu',
                                   kernel_initializer=initializer)
        self.fc2 = tf.keras.layers.Dense(num_classes, activation='softmax',
                                   kernel_initializer=initializer)
        self.flatten = tf.keras.layers.Flatten()
    
    def call(self, x, training=False):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

device = set_GPU_param()

def test_TwoLayerFC():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    x = tf.zeros((64, input_size))
    model = TwoLayerFC(hidden_size, num_classes)
    with tf.device(device):
        scores = model(x)
        print(scores.shape)
        
test_TwoLayerFC()

(64, 10)


Реализуйте трехслойную CNN для вашей задачи классификации. 

Архитектура сети:
    
1. Сверточный слой (5 x 5 kernels, zero-padding = 'same')
2. Функция активации ReLU 
3. Сверточный слой (3 x 3 kernels, zero-padding = 'same')
4. Функция активации ReLU 
5. Полносвязный слой 
6. Функция активации Softmax 

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Conv2D

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dense

In [7]:
class ThreeLayerConvNet(tf.keras.Model):
    def __init__(self, channel_1, channel_2, num_classes):
        super(ThreeLayerConvNet, self).__init__()
        ########################################################################
        # TODO: Implement the __init__ method for a three-layer ConvNet. You   #
        # should instantiate layer objects to be used in the forward pass.     #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        self.conv1 = tf.keras.layers.Conv2D(channel_1, 
                                            (5, 5), 
                                            padding='same', 
                                            activation='relu')
        self.conv2 = tf.keras.layers.Conv2D(channel_2, 
                                            (3, 3), 
                                            padding='same', 
                                            activation='relu')
        self.flatten = tf.keras.layers.Flatten()
        self.fc = tf.keras.layers.Dense(num_classes, 
                                        activation='softmax')
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################
        
    def call(self, x, training=False):
        scores = None
        ########################################################################
        # TODO: Implement the forward pass for a three-layer ConvNet. You      #
        # should use the layer objects defined in the __init__ method.         #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        scores = self.conv1(x)
        scores = self.conv2(scores)
        scores = self.flatten(scores)
        scores = self.fc(scores)
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################        
        return scores

In [8]:
def test_ThreeLayerConvNet():    
    channel_1, channel_2, num_classes = 12, 8, 10
    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)
    with tf.device(device):
        x = tf.zeros((64, 3, 32, 32))
        scores = model(x)
        print(scores.shape)

test_ThreeLayerConvNet()

(64, 10)


In [9]:
print(tf.config.list_physical_devices())
print(device)
print_every = 100

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]
/device:GPU:0


Пример реализации процесса обучения:

In [10]:
def train_part34(model_init_fn, optimizer_init_fn, num_epochs=1, is_training=False):
    """
    Simple training loop for use with models defined using tf.keras. It trains
    a model for one epoch on the MNIST training set and periodically checks
    accuracy on the MNIST validation set.
    
    Inputs:
    - model_init_fn: A function that takes no parameters; when called it
      constructs the model we want to train: model = model_init_fn()
    - optimizer_init_fn: A function which takes no parameters; when called it
      constructs the Optimizer object we will use to optimize the model:
      optimizer = optimizer_init_fn()
    - num_epochs: The number of epochs to train for
    
    Returns: Nothing, but prints progress during trainingn
    """    
    with tf.device(device):
        loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
        
        model = model_init_fn()
        optimizer = optimizer_init_fn()
        
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    
        val_loss = tf.keras.metrics.Mean(name='val_loss')
        val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')
        
        i = 0
        for epoch in range(num_epochs):
            
            # Reset the metrics - https://www.tensorflow.org/alpha/guide/migration_guide#new-style_metrics
            train_loss.reset_state()
            train_accuracy.reset_state()
            
            for x_np, y_np in train_dset:
                with tf.GradientTape() as tape:
                    
                    # Use the model function to build the forward pass.
                    scores = model(x_np, training=is_training)
                    loss = loss_fn(y_np, scores)
      
                    gradients = tape.gradient(loss, model.trainable_variables)
                    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                    
                    # Update the metrics
                    train_loss.update_state(loss)
                    train_accuracy.update_state(y_np, scores)
                    
                    if i % print_every == 0:
                        val_loss.reset_state()
                        val_accuracy.reset_state()
                        for test_x, test_y in val_dset:
                            # During validation at end of epoch, training set to False
                            prediction = model(test_x, training=False)
                            t_loss = loss_fn(test_y, prediction)

                            val_loss.update_state(t_loss)
                            val_accuracy.update_state(test_y, prediction)
                        
                        template = 'Iteration {}, Epoch {}, Loss: {}, Accuracy: {}, Val Loss: {}, Val Accuracy: {}'
                        print (template.format(i, epoch + 1,
                                             train_loss.result(),
                                             train_accuracy.result() * 100,
                                             val_loss.result(),
                                             val_accuracy.result() * 100))
                    i += 1

In [11]:
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2

def model_init_fn():
    return TwoLayerFC(hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 2.932678699493408, Accuracy: 18.75, Val Loss: 2.363511800765991, Val Accuracy: 24.69999885559082
Iteration 100, Epoch 1, Loss: 0.6379953026771545, Accuracy: 80.61571502685547, Val Loss: 0.5589851140975952, Val Accuracy: 81.5999984741211
Iteration 200, Epoch 1, Loss: 0.5103380084037781, Accuracy: 84.86473846435547, Val Loss: 0.47028595209121704, Val Accuracy: 85.29999542236328
Iteration 300, Epoch 1, Loss: 0.45379912853240967, Accuracy: 86.5604248046875, Val Loss: 0.4456632435321808, Val Accuracy: 86.4000015258789
Iteration 400, Epoch 1, Loss: 0.411295622587204, Accuracy: 87.82341003417969, Val Loss: 0.38464611768722534, Val Accuracy: 88.80000305175781
Iteration 500, Epoch 1, Loss: 0.38880079984664917, Accuracy: 88.5104751586914, Val Loss: 0.3675382435321808, Val Accuracy: 89.30000305175781
Iteration 600, Epoch 1, Loss: 0.3655530512332916, Accuracy: 89.16648864746094, Val Loss: 0.3497719466686249, Val Accuracy: 89.9000015258789
Iteration 700, Epoch 1, Loss: 0

Обучите трехслойную CNN. В tf.keras.optimizers.SGD укажите Nesterov momentum = 0.9 . 

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/optimizers/SGD

Значение accuracy на валидационной выборке после 1 эпохи обучения должно быть > 50% .

In [12]:
learning_rate = 3e-3
channel_1, channel_2, num_classes = 32, 16, 10

def model_init_fn():
    model = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    model = ThreeLayerConvNet(channel_1, 
                              channel_2, 
                              num_classes)
    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return model

def optimizer_init_fn():
    optimizer = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, 
                                        momentum=0.9, 
                                        nesterov=True)
    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 2.303586006164551, Accuracy: 9.375, Val Loss: 2.288219690322876, Val Accuracy: 14.30000114440918
Iteration 100, Epoch 1, Loss: 0.6931470632553101, Accuracy: 80.12067413330078, Val Loss: 0.5041230916976929, Val Accuracy: 82.80000305175781
Iteration 200, Epoch 1, Loss: 0.5150760412216187, Accuracy: 85.20677947998047, Val Loss: 0.36565157771110535, Val Accuracy: 88.80000305175781
Iteration 300, Epoch 1, Loss: 0.4171001613140106, Accuracy: 88.06063079833984, Val Loss: 0.2254384607076645, Val Accuracy: 92.79999542236328
Iteration 400, Epoch 1, Loss: 0.35133764147758484, Accuracy: 89.89635467529297, Val Loss: 0.2020379900932312, Val Accuracy: 94.0999984741211
Iteration 500, Epoch 1, Loss: 0.31177279353141785, Accuracy: 90.98365783691406, Val Loss: 0.1862219274044037, Val Accuracy: 94.4000015258789
Iteration 600, Epoch 1, Loss: 0.2806331217288971, Accuracy: 91.8625259399414, Val Loss: 0.20509473979473114, Val Accuracy: 93.5
Iteration 700, Epoch 1, Loss: 0.256892710

# 3.1. Использование Keras Sequential API для реализации последовательных моделей.

Пример для полносвязной сети:

In [22]:
learning_rate = 1e-2

def model_init_fn():
    input_shape = (28, 28, 1)
    hidden_layer_size, num_classes = 4000, 10
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    layers = [
        tf.keras.layers.Input(shape=input_shape),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(hidden_layer_size, 
                              activation='relu',
                              kernel_initializer=initializer),
        tf.keras.layers.Dense(num_classes, 
                              activation='softmax', 
                              kernel_initializer=initializer),
    ]
    model = tf.keras.Sequential(layers)
    return model

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 2.77708101272583, Accuracy: 14.0625, Val Loss: 2.403806209564209, Val Accuracy: 20.700000762939453
Iteration 100, Epoch 1, Loss: 0.6345235109329224, Accuracy: 80.86324310302734, Val Loss: 0.5187417268753052, Val Accuracy: 83.60000610351562
Iteration 200, Epoch 1, Loss: 0.503331184387207, Accuracy: 84.98912048339844, Val Loss: 0.43620529770851135, Val Accuracy: 87.0
Iteration 300, Epoch 1, Loss: 0.4450308084487915, Accuracy: 86.79402160644531, Val Loss: 0.4021882712841034, Val Accuracy: 86.69999694824219
Iteration 400, Epoch 1, Loss: 0.4013379216194153, Accuracy: 88.11954498291016, Val Loss: 0.35931792855262756, Val Accuracy: 89.0999984741211
Iteration 500, Epoch 1, Loss: 0.37838059663772583, Accuracy: 88.81300354003906, Val Loss: 0.3352443277835846, Val Accuracy: 90.5
Iteration 600, Epoch 1, Loss: 0.3558100461959839, Accuracy: 89.4914779663086, Val Loss: 0.3236437141895294, Val Accuracy: 90.30000305175781
Iteration 700, Epoch 1, Loss: 0.3388758897781372, Acc

Альтернативный менее гибкий способ обучения:

In [23]:
model = model_init_fn()
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, 
          y_train, 
          batch_size=64, 
          epochs=1, 
          validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)

[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - loss: 0.5441 - sparse_categorical_accuracy: 0.8397 - val_loss: 0.2979 - val_sparse_categorical_accuracy: 0.9160
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.2431 - sparse_categorical_accuracy: 0.9308


[0.20824557542800903, 0.9409999847412109]

Перепишите реализацию трехслойной CNN с помощью tf.keras.Sequential API . Обучите модель двумя способами.

In [26]:
def model_init_fn():
    model = None
    ############################################################################
    # TODO: Construct a three-layer ConvNet using tf.keras.Sequential.         #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(28, 28, 1)),
        tf.keras.layers.Conv2D(filters=32, 
                               kernel_size=(5, 5), 
                               padding='same', 
                               activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
                                     strides=(2, 2)),
        tf.keras.layers.Conv2D(filters=64, 
                               kernel_size=(5, 5),
                               padding='same', 
                               activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
                                     strides=(2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(units=1024, 
                              activation='relu'),
        tf.keras.layers.Dense(units=10, 
                              activation='softmax')
        ])
    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                            END OF YOUR CODE                              #
    ############################################################################
    return model

learning_rate = 5e-4
def optimizer_init_fn():
    optimizer = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 2.3240113258361816, Accuracy: 12.5, Val Loss: 2.2113516330718994, Val Accuracy: 26.19999885559082
Iteration 100, Epoch 1, Loss: 0.4356260299682617, Accuracy: 86.97401428222656, Val Loss: 0.28096169233322144, Val Accuracy: 91.19999694824219
Iteration 200, Epoch 1, Loss: 0.3026847541332245, Accuracy: 90.85820770263672, Val Loss: 0.2984236478805542, Val Accuracy: 90.20000457763672
Iteration 300, Epoch 1, Loss: 0.23671585321426392, Accuracy: 92.81561279296875, Val Loss: 0.11650169640779495, Val Accuracy: 96.0
Iteration 400, Epoch 1, Loss: 0.19696937501430511, Accuracy: 94.0032730102539, Val Loss: 0.11136055737733841, Val Accuracy: 96.4000015258789
Iteration 500, Epoch 1, Loss: 0.1731095165014267, Accuracy: 94.74800109863281, Val Loss: 0.08947378396987915, Val Accuracy: 97.19999694824219
Iteration 600, Epoch 1, Loss: 0.15513679385185242, Accuracy: 95.28390502929688, Val Loss: 0.13157851994037628, Val Accuracy: 96.0
Iteration 700, Epoch 1, Loss: 0.1426423192024231

In [27]:
model = model_init_fn()
model.compile(optimizer='sgd',
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, 
          y_train, 
          batch_size=64, 
          epochs=1, 
          validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)

[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 29ms/step - loss: 0.7274 - sparse_categorical_accuracy: 0.7960 - val_loss: 0.1923 - val_sparse_categorical_accuracy: 0.9420
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.1440 - sparse_categorical_accuracy: 0.9530


[0.12412513792514801, 0.9610000252723694]

# 3.2. Использование Keras Functional API

Для реализации более сложных архитектур сети с несколькими входами/выходами, повторным использованием слоев, "остаточными" связями (residual connections) необходимо явно указать входные и выходные тензоры. 

Ниже представлен пример для полносвязной сети. 

In [28]:
def two_layer_fc_functional(input_shape, hidden_size, num_classes):  
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    inputs = tf.keras.Input(shape=input_shape)
    flattened_inputs = tf.keras.layers.Flatten()(inputs)
    fc1_output = tf.keras.layers.Dense(hidden_size, activation='relu',
                                 kernel_initializer=initializer)(flattened_inputs)
    scores = tf.keras.layers.Dense(num_classes, activation='softmax',
                             kernel_initializer=initializer)(fc1_output)

    # Instantiate the model given inputs and outputs.
    model = tf.keras.Model(inputs=inputs, outputs=scores)
    return model

def test_two_layer_fc_functional():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    input_shape = (50,)
    
    x = tf.zeros((64, input_size))
    model = two_layer_fc_functional(input_shape, hidden_size, num_classes)
    
    with tf.device(device):
        scores = model(x)
        print(scores.shape)
        
test_two_layer_fc_functional()

(64, 10)


In [29]:
input_shape = (28, 28, 1)
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2

def model_init_fn():
    return two_layer_fc_functional(input_shape, hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 3.5689733028411865, Accuracy: 14.0625, Val Loss: 2.924168109893799, Val Accuracy: 20.0
Iteration 100, Epoch 1, Loss: 0.6908491253852844, Accuracy: 79.31620788574219, Val Loss: 0.5610119700431824, Val Accuracy: 81.5
Iteration 200, Epoch 1, Loss: 0.5398033261299133, Accuracy: 84.0096435546875, Val Loss: 0.46026119589805603, Val Accuracy: 85.39999389648438
Iteration 300, Epoch 1, Loss: 0.4762960970401764, Accuracy: 85.92711639404297, Val Loss: 0.4238292872905731, Val Accuracy: 86.0999984741211
Iteration 400, Epoch 1, Loss: 0.4288247525691986, Accuracy: 87.3636245727539, Val Loss: 0.37715601921081543, Val Accuracy: 88.80000305175781
Iteration 500, Epoch 1, Loss: 0.40308693051338196, Accuracy: 88.16429138183594, Val Loss: 0.3535812497138977, Val Accuracy: 89.80000305175781
Iteration 600, Epoch 1, Loss: 0.37811338901519775, Accuracy: 88.88831329345703, Val Loss: 0.33893123269081116, Val Accuracy: 90.20000457763672
Iteration 700, Epoch 1, Loss: 0.3594030439853668, 

Поэкспериментируйте с архитектурой сверточной сети. Для вашего набора данных вам необходимо получить как минимум 70% accuracy на валидационной выборке за 10 эпох обучения. Опишите все эксперименты и сделайте выводы (без выполнения данного пункта работы приниматься не будут). 

Эспериментируйте с архитектурой, гиперпараметрами, функцией потерь, регуляризацией, методом оптимизации.  

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/BatchNormalization#methods https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dropout#methods

Рассмотрим следующие модели:
### 1. CustomConvNet_B_D.
   В этой модели используется сверточный слой, за которым следуют слои BatchNormalization, ReLU и Dropout. BatchNormalization используется для нормализации активаций между слоями, что может способствовать обучению более стабильной и эффективной модели. Dropout применяется для случайного исключения некоторых нейронов в процессе обучения с целью предотвращения переобучения.

In [30]:
class CustomConvNet_B_D(tf.keras.Model):
    def __init__(self):
        super(CustomConvNet_B_D, self).__init__()
        channel_1, channel_2, num_classes = 28, 14, 10
        dp_rate = 0.2
        initializer = tf.initializers.VarianceScaling(scale=2.0, seed=42)
        self.conv1 = tf.keras.layers.Conv2D(filters=channel_1, 
                                            kernel_size=[3,3], 
                                            strides=[1,1], 
                                            padding='same',
                                            kernel_initializer=initializer)
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.relu1 = tf.keras.layers.ReLU()
        self.dp1 = tf.keras.layers.Dropout(rate=dp_rate)
        self.conv2 = tf.keras.layers.Conv2D(filters=channel_2, 
                                            kernel_size=[3,3], 
                                            strides=[1,1], 
                                            padding='same',
                                            kernel_initializer=initializer)
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.relu2 = tf.keras.layers.ReLU()
        self.dp2 = tf.keras.layers.Dropout(rate=dp_rate)
        self.fl = tf.keras.layers.Flatten()
        self.fc = tf.keras.layers.Dense(units=num_classes,
                                        activation='softmax',
                                        kernel_initializer=initializer)

    def call(self, input_tensor, training=False):
        x = input_tensor
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.dp1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.dp2(x)

        x = self.fl(x)
        x = self.fc(x)
        return x

### 2. CustomConvNet:
В этой модели отсутствуют слои BatchNormalization и Dropout, присутствующие в CustomConvNet_B_D.
Она содержит только сверточные слои и слои активации ReLU.

In [31]:
class CustomConvNet(tf.keras.Model):
    def __init__(self):
        super(CustomConvNet, self).__init__()
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        channel_1, channel_2, num_classes = 28, 14, 10
        dp_rate = 0.2
        initializer = tf.initializers.VarianceScaling(scale=2.0, seed=42)
        self.conv1 = tf.keras.layers.Conv2D(filters=channel_1, 
                                            kernel_size=[3,3], 
                                            strides=[1,1],
                                            padding='same',
                                            kernel_initializer=initializer)
        self.relu1 = tf.keras.layers.ReLU()
        self.conv2 = tf.keras.layers.Conv2D(filters=channel_2, 
                                            kernel_size=[3,3], 
                                            strides=[1,1],
                                            padding='same',
                                            kernel_initializer=initializer)
        self.relu2 = tf.keras.layers.ReLU()
        self.fl = tf.keras.layers.Flatten()
        self.fc = tf.keras.layers.Dense(units=num_classes,
                                        activation='softmax',
                                        kernel_initializer=initializer)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################

    def call(self, input_tensor, training=False):
        ############################################################################
        # TODO: Construct a model that performs well on MNIST                      #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        x = input_tensor
        x = self.conv1(x)
        x = self.relu1(x)

        x = self.conv2(x)
        x = self.relu2(x)

        x = self.fl(x)
        x = self.fc(x)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
        return x

### 3. CustomConvNet_Sig:
В этой модели используется активация сигмоид для всех сверточных слоев вместо ReLU.
Сигмоидные активации могут привести к более ограниченному выходу, что может быть полезным в некоторых случаях.

In [32]:
class CustomConvNet_Sig(tf.keras.Model):
    def __init__(self):
        super(CustomConvNet_Sig, self).__init__()
        ############################################################################
        # TODO: Construct a model that performs well on MNIST                      #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        channel_1, channel_2, num_classes = 28, 14, 10
        dp_rate = 0.2
        initializer = tf.initializers.VarianceScaling(scale=2.0, seed=42)
        self.conv1 = tf.keras.layers.Conv2D(filters=channel_1, 
                                            kernel_size=[3,3], 
                                            strides=[1,1],
                                            padding='same',
                                            kernel_initializer=initializer)
        self.conv2 = tf.keras.layers.Conv2D(filters=channel_2, 
                                            kernel_size=[3,3], 
                                            strides=[1,1],
                                            padding='same',
                                            kernel_initializer=initializer)
        self.fl = tf.keras.layers.Flatten()
        self.fc = tf.keras.layers.Dense(units=num_classes,
                                        activation='softmax',
                                        kernel_initializer=initializer)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################

    def call(self, input_tensor, training=False):
        ############################################################################
        # TODO: Construct a model that performs well on MNIST                      #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        x = input_tensor
        x = self.conv1(x)
        x = tf.keras.activations.sigmoid(x)

        x = self.conv2(x)
        x = tf.keras.activations.sigmoid(x)

        x = self.fl(x)
        x = self.fc(x)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
        return x

In [33]:
#Эмпирическим путём выявлена верхняя граница числа эпох, после которой результаты практически не меняются
num_epochs = 5 
#Определяем параметры эксперимента
models = ['CustomConvNet_B_D', 'CustomConvNet', 'CustomConvNet_Sig']
optimizers = ['Adam', 'SGD']
#learning_rates = [1e-2, 1e-3, 1e-4]
learning_rates = [1e-2] #Для ускорения эксперимента используем одно значение

In [34]:
def model_init_fn(model_name):
    if model_name == 'CustomConvNet_B_D':
        return CustomConvNet_B_D()
    elif model_name == 'CustomConvNet':
        return CustomConvNet()
    elif model_name == 'CustomConvNet_Sig':
        return CustomConvNet_Sig()

def optimizer_init_fn(optimizer_name, learning_rate):
    if optimizer_name == 'Adam':
        return tf.keras.optimizers.Adam(learning_rate)
    elif optimizer_name == 'SGD':
        return tf.keras.optimizers.SGD(learning_rate)

# Функция для обучения модели
def train_model(model_init_fn, optimizer_init_fn, num_epochs):
    model = model_init_fn()
    optimizer = optimizer_init_fn()
    train_part34(model_init_fn, 
                           optimizer_init_fn, 
                           num_epochs=num_epochs, 
                           is_training=True)

In [35]:
def start_experiment():
    for model, optimizer, lr in itertools.product(models, optimizers, learning_rates):
        print(f'\nModel: {model}, oprimizer: {optimizer}, lr: {lr}')
        train_model(lambda: model_init_fn(model), 
                    lambda: optimizer_init_fn(optimizer, lr), 
                    num_epochs)

In [36]:
print_every = 500
start_experiment()


Model: CustomConvNet_B_D, oprimizer: Adam, lr: 0.01
Iteration 0, Epoch 1, Loss: 3.023545742034912, Accuracy: 4.6875, Val Loss: 24.35333824157715, Val Accuracy: 20.700000762939453
Iteration 500, Epoch 1, Loss: 0.5834444165229797, Accuracy: 92.26858520507812, Val Loss: 0.1650119572877884, Val Accuracy: 94.70000457763672
Iteration 1000, Epoch 2, Loss: 0.09028811007738113, Accuracy: 97.3271255493164, Val Loss: 0.12753242254257202, Val Accuracy: 96.0
Iteration 1500, Epoch 2, Loss: 0.08368509262800217, Accuracy: 97.45536041259766, Val Loss: 0.10774662345647812, Val Accuracy: 96.69999694824219
Iteration 2000, Epoch 3, Loss: 0.06987664103507996, Accuracy: 97.84114837646484, Val Loss: 0.13869178295135498, Val Accuracy: 95.70000457763672
Iteration 2500, Epoch 4, Loss: 0.06125343590974808, Accuracy: 97.96797943115234, Val Loss: 0.11766186356544495, Val Accuracy: 96.20000457763672
Iteration 3000, Epoch 4, Loss: 0.05974874645471573, Accuracy: 98.08187866210938, Val Loss: 0.11902928352355957, Val A

Опишите все эксперименты, результаты. Сделайте выводы.

1. Эксперимент проводился только для $5$ эпох в силу того, что после проведения $3$ эпох обучения заметно, что изменения результатов становятся незначительными. 

2. Целесообразно использовать оптимизатор Adam, так как в большинстве случаев он превосходит SGD в эффективности.

3. Уменьшение скорости обучения привело к медленной, но ожидаемой сходимости модели к тем же фиксированным результатам. Поэтому важно подбирать параметры для обучения оптимальным образом.

4. Использование методов регуляризации, таких как Dropout и BatchNormalization, незначительно, но положительно повлияло на качество модели на валидационной выборке.

5. Функция активации ReLU показала результаты немного лучше, чем sigmoid.

6. Подобные исследования требуют значительного времени, так как происходит перебор большого количества возможных комбинаций параметров, моделей и оптимизаторов.

7. Для некоторых наборов параметров был достигнут вполне приемлемый результат: accuracy $\thickapprox$ $96\%$.