# Лабораторная работа 4

Tensorflow 2.x

1) Подготовка данных

2) Использование Keras Model API

3) Использование Keras Sequential + Functional API

https://www.tensorflow.org/tutorials

Для выполнения лабораторной работы необходимо установить tensorflow версии 2.0 или выше .

Рекомендуется использовать возможности Colab'а по обучению моделей на GPU.



In [1]:
import os
import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt

%matplotlib inline

# Подготовка данных
Загрузите набор данных из предыдущей лабораторной работы. 

In [2]:
def load_data(num_training=49000, num_validation=1000, num_test=10000):
    """
    Fetch the CIFAR-10 dataset from the web and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.
    """
    # Load the raw CIFAR-10 dataset and use appropriate data types and shapes
    data = tf.keras.datasets.mnist.load_data()
    (X_train, y_train), (X_test, y_test) = data
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32).flatten()
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32).flatten()

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean pixel and divide by std
    mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
    std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
    X_train = (X_train - mean_pixel) / std_pixel
    X_val = (X_val - mean_pixel) / std_pixel
    X_test = (X_test - mean_pixel) / std_pixel

    return X_train, y_train, X_val, y_val, X_test, y_test

# If there are errors with SSL downloading involving self-signed certificates,
# it may be that your Python version was recently installed on the current machine.
# See: https://github.com/tensorflow/tensorflow/issues/10779
# To fix, run the command: /Applications/Python\ 3.7/Install\ Certificates.command
#   ...replacing paths as necessary.

# Invoke the above function to get our data.
NHW = (0, 1, 2)
X_train, y_train, X_val, y_val, X_test, y_test = load_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape, y_train.dtype)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Train data shape:  (49000, 28, 28)
Train labels shape:  (49000,) int32
Validation data shape:  (1000, 28, 28)
Validation labels shape:  (1000,)
Test data shape:  (10000, 28, 28)
Test labels shape:  (10000,)


In [16]:
class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        """
        Construct a Dataset object to iterate over data X and labels y
        
        Inputs:
        - X: Numpy array of data, of any shape
        - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0]
        - batch_size: Integer giving number of elements per minibatch
        - shuffle: (optional) Boolean, whether to shuffle the data on each epoch
        """
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


X_train, y_train, X_val, y_val, X_test, y_test = load_data()
X_train = X_train.reshape(-1, 28, 28, 1)
X_val = X_val.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)
train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False)
test_dset = Dataset(X_test, y_test, batch_size=64)

In [4]:
# We can iterate through a dataset like this:
for t, (x, y) in enumerate(train_dset):
    print(t, x.shape, y.shape)
    if t > 5: break

0 (64, 28, 28) (64,)
1 (64, 28, 28) (64,)
2 (64, 28, 28) (64,)
3 (64, 28, 28) (64,)
4 (64, 28, 28) (64,)
5 (64, 28, 28) (64,)
6 (64, 28, 28) (64,)


#  Keras Model Subclassing API


Для реализации собственной модели с помощью Keras Model Subclassing API необходимо выполнить следующие шаги:

1) Определить новый класс, который является наследником tf.keras.Model.

2) В методе __init__() определить все необходимые слои из модуля tf.keras.layer

3) Реализовать прямой проход в методе call() на основе слоев, объявленных в __init__()

Ниже приведен пример использования keras API для определения двухслойной полносвязной сети. 

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras

In [8]:
device = '/device:GPU:0'

class TwoLayerFC(tf.keras.Model):
    def __init__(self, hidden_size, num_classes):
        super(TwoLayerFC, self).__init__()        
        initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.fc1 = tf.keras.layers.Dense(hidden_size, activation='relu',
                                   kernel_initializer=initializer)
        self.fc2 = tf.keras.layers.Dense(num_classes, activation='softmax',
                                   kernel_initializer=initializer)
        self.flatten = tf.keras.layers.Flatten()
    
    def call(self, x, training=False):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x


def test_TwoLayerFC():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    x = tf.zeros((64, input_size))
    model = TwoLayerFC(hidden_size, num_classes)
    with tf.device(device):
        scores = model(x)
        print(scores.shape)
        
test_TwoLayerFC()

(64, 10)


Реализуйте трехслойную CNN для вашей задачи классификации. 

Архитектура сети:
    
1. Сверточный слой (5 x 5 kernels, zero-padding = 'same')
2. Функция активации ReLU 
3. Сверточный слой (3 x 3 kernels, zero-padding = 'same')
4. Функция активации ReLU 
5. Полносвязный слой 
6. Функция активации Softmax 

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Conv2D

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dense

In [9]:
class ThreeLayerConvNet(tf.keras.Model):
    def __init__(self, channel_1, channel_2, num_classes):
        super(ThreeLayerConvNet, self).__init__()
        ########################################################################
        # TODO: Implement the __init__ method for a three-layer ConvNet. You   #
        # should instantiate layer objects to be used in the forward pass.     #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        kernel_initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.flatten = tf.keras.layers.Flatten()
        
        # First layer
        self.conv_2d_1 = tf.keras.layers.Conv2D(channel_1, [5,5], [1,1], padding='same', kernel_initializer=kernel_initializer, activation='relu')
        # Second layer
        self.conv_2d_2 = tf.keras.layers.Conv2D(channel_2, [3,3], [1,1], padding='same', kernel_initializer=kernel_initializer, activation='relu')
        # Output layer
        self.dense = tf.keras.layers.Dense(num_classes, activation='softmax', kernel_initializer=kernel_initializer)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################
        
    def call(self, x, training=False):
        scores = None
        ########################################################################
        # TODO: Implement the forward pass for a three-layer ConvNet. You      #
        # should use the layer objects defined in the __init__ method.         #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        x = self.conv_2d_1(x)
        x = self.conv_2d_2(x)
        x = self.flatten(x)
        scores = self.dense(x)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################        
        return scores

In [10]:
def test_ThreeLayerConvNet():    
    channel_1, channel_2, num_classes = 12, 8, 10
    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)
    with tf.device(device):
        x = tf.zeros((64, 3, 32, 32))
        scores = model(x)
        print(scores.shape)

test_ThreeLayerConvNet()

(64, 10)




Пример реализации процесса обучения:

In [11]:
print_every = 200

def train_part34(model_init_fn, optimizer_init_fn, num_epochs=1, is_training=False):
    """
    Simple training loop for use with models defined using tf.keras. It trains
    a model for one epoch on the CIFAR-10 training set and periodically checks
    accuracy on the CIFAR-10 validation set.
    
    Inputs:
    - model_init_fn: A function that takes no parameters; when called it
      constructs the model we want to train: model = model_init_fn()
    - optimizer_init_fn: A function which takes no parameters; when called it
      constructs the Optimizer object we will use to optimize the model:
      optimizer = optimizer_init_fn()
    - num_epochs: The number of epochs to train for
    
    Returns: Nothing, but prints progress during trainingn
    """    
    with tf.device(device):

        
        loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
        
        model = model_init_fn()
        optimizer = optimizer_init_fn()
        
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    
        val_loss = tf.keras.metrics.Mean(name='val_loss')
        val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')
        
        t = 0
        for epoch in range(num_epochs):
            
            # Reset the metrics - https://www.tensorflow.org/alpha/guide/migration_guide#new-style_metrics
            train_loss.reset_states()
            train_accuracy.reset_states()
            
            for x_np, y_np in train_dset:
                with tf.GradientTape() as tape:
                    
                    # Use the model function to build the forward pass.
                    scores = model(x_np, training=is_training)
                    loss = loss_fn(y_np, scores)
      
                    gradients = tape.gradient(loss, model.trainable_variables)
                    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                    
                    # Update the metrics
                    train_loss.update_state(loss)
                    train_accuracy.update_state(y_np, scores)
                    
                    if t % print_every == 0:
                        val_loss.reset_states()
                        val_accuracy.reset_states()
                        for test_x, test_y in val_dset:
                            # During validation at end of epoch, training set to False
                            prediction = model(test_x, training=False)
                            t_loss = loss_fn(test_y, prediction)

                            val_loss.update_state(t_loss)
                            val_accuracy.update_state(test_y, prediction)
                        
                        template = 'Iteration {}, Epoch {}, Loss: {}, Accuracy: {}, Val Loss: {}, Val Accuracy: {}'
                        print (template.format(t, epoch+1,
                                             train_loss.result(),
                                             train_accuracy.result()*100,
                                             val_loss.result(),
                                             val_accuracy.result()*100))
                    t += 1

In [12]:
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2

def model_init_fn():
    return TwoLayerFC(hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 3.3293864727020264, Accuracy: 1.5625, Val Loss: 2.467919111251831, Val Accuracy: 23.399999618530273
Iteration 200, Epoch 1, Loss: 0.5054786801338196, Accuracy: 84.91915130615234, Val Loss: 0.4547783434391022, Val Accuracy: 85.9000015258789
Iteration 400, Epoch 1, Loss: 0.40421444177627563, Accuracy: 88.03382110595703, Val Loss: 0.36935093998908997, Val Accuracy: 89.20000457763672
Iteration 600, Epoch 1, Loss: 0.3589380383491516, Accuracy: 89.40827941894531, Val Loss: 0.34514451026916504, Val Accuracy: 90.30000305175781


Обучите трехслойную CNN. В tf.keras.optimizers.SGD укажите Nesterov momentum = 0.9 . 

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/optimizers/SGD

Значение accuracy на валидационной выборке после 1 эпохи обучения должно быть > 50% .

In [17]:
learning_rate = 3e-3
channel_1, channel_2, num_classes = 32, 16, 10

def model_init_fn():
    model = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return model

def optimizer_init_fn():
    optimizer = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    optimizer = tf.keras.optimizers.SGD(learning_rate, 0.9, nesterov=True)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn)



Iteration 0, Epoch 1, Loss: 2.64253306388855, Accuracy: 15.625, Val Loss: 2.33231258392334, Val Accuracy: 16.100000381469727
Iteration 200, Epoch 1, Loss: 0.30792173743247986, Accuracy: 90.63277435302734, Val Loss: 0.23065949976444244, Val Accuracy: 92.0999984741211
Iteration 400, Epoch 1, Loss: 0.21408917009830475, Accuracy: 93.52790069580078, Val Loss: 0.1509048491716385, Val Accuracy: 95.0
Iteration 600, Epoch 1, Loss: 0.1777547299861908, Accuracy: 94.68594360351562, Val Loss: 0.16283921897411346, Val Accuracy: 94.5999984741211


# Использование Keras Sequential API для реализации последовательных моделей.

Пример для полносвязной сети:

In [19]:
learning_rate = 1e-2

def model_init_fn():
    input_shape = (28, 28, 1)
    hidden_layer_size, num_classes = 4000, 10
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    layers = [
        tf.keras.layers.Flatten(input_shape=input_shape),
        tf.keras.layers.Dense(hidden_layer_size, activation='relu',
                              kernel_initializer=initializer),
        tf.keras.layers.Dense(num_classes, activation='softmax', 
                              kernel_initializer=initializer),
    ]
    model = tf.keras.Sequential(layers)
    return model

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate) 

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 3.1466312408447266, Accuracy: 1.5625, Val Loss: 2.6549553871154785, Val Accuracy: 22.0
Iteration 200, Epoch 1, Loss: 0.5178578495979309, Accuracy: 84.68594360351562, Val Loss: 0.45274806022644043, Val Accuracy: 86.0999984741211
Iteration 400, Epoch 1, Loss: 0.41404861211776733, Accuracy: 87.81951904296875, Val Loss: 0.36187389492988586, Val Accuracy: 89.4000015258789
Iteration 600, Epoch 1, Loss: 0.36701303720474243, Accuracy: 89.22628784179688, Val Loss: 0.33457234501838684, Val Accuracy: 90.69999694824219


Альтернативный менее гибкий способ обучения:

In [20]:
model = model_init_fn()
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)



[0.23197638988494873, 0.9330000281333923]

Перепишите реализацию трехслойной CNN с помощью tf.keras.Sequential API . Обучите модель двумя способами.

In [21]:
def model_init_fn():
    model = None
    ############################################################################
    # TODO: Construct a three-layer ConvNet using tf.keras.Sequential.         #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    channel_1, channel_2, num_classes = 28, 14, 10
    kernel_initializer = tf.initializers.VarianceScaling(scale=2.0)
    layers = [
        tf.keras.layers.Conv2D(channel_1, [5,5], [1,1], padding='same', kernel_initializer=kernel_initializer, activation='relu'),
        tf.keras.layers.Conv2D(channel_2, [3,3], [1,1], padding='same', kernel_initializer=kernel_initializer, activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(num_classes, activation='softmax', kernel_initializer=kernel_initializer)
    ]
    model = tf.keras.Sequential(layers)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                            END OF YOUR CODE                              #
    ############################################################################
    return model

learning_rate = 5e-4
def optimizer_init_fn():
    optimizer = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    optimizer = tf.keras.optimizers.SGD(learning_rate, momentum=0.9, nesterov=True)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 2.4916000366210938, Accuracy: 17.1875, Val Loss: 2.40303373336792, Val Accuracy: 15.399999618530273
Iteration 200, Epoch 1, Loss: 0.5852739214897156, Accuracy: 82.40827178955078, Val Loss: 0.4662199914455414, Val Accuracy: 85.29999542236328
Iteration 400, Epoch 1, Loss: 0.4268709123134613, Accuracy: 87.25841522216797, Val Loss: 0.29291850328445435, Val Accuracy: 90.69999694824219
Iteration 600, Epoch 1, Loss: 0.3553744852542877, Accuracy: 89.48627471923828, Val Loss: 0.2744094431400299, Val Accuracy: 91.5


In [22]:
model = model_init_fn()
model.compile(optimizer='sgd',
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)



[0.10435730218887329, 0.9700999855995178]

# Использование Keras Functional API

Для реализации более сложных архитектур сети с несколькими входами/выходами, повторным использованием слоев, "остаточными" связями (residual connections) необходимо явно указать входные и выходные тензоры. 

Ниже представлен пример для полносвязной сети. 

In [23]:
def two_layer_fc_functional(input_shape, hidden_size, num_classes):  
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    inputs = tf.keras.Input(shape=input_shape)
    flattened_inputs = tf.keras.layers.Flatten()(inputs)
    fc1_output = tf.keras.layers.Dense(hidden_size, activation='relu', 
                                 kernel_initializer=initializer)(flattened_inputs)
    scores = tf.keras.layers.Dense(num_classes, activation='softmax',
                             kernel_initializer=initializer)(fc1_output)

    # Instantiate the model given inputs and outputs.
    model = tf.keras.Model(inputs=inputs, outputs=scores)
    return model

def test_two_layer_fc_functional():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    input_shape = (50,)
    
    x = tf.zeros((64, input_size))
    model = two_layer_fc_functional(input_shape, hidden_size, num_classes)
    
    with tf.device(device):
        scores = model(x)
        print(scores.shape)
        
test_two_layer_fc_functional()

(64, 10)


In [25]:
input_shape = (28, 28, 1)
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2

def model_init_fn():
    return two_layer_fc_functional(input_shape, hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 3.031647205352783, Accuracy: 7.8125, Val Loss: 2.748664379119873, Val Accuracy: 17.5
Iteration 200, Epoch 1, Loss: 0.5168659687042236, Accuracy: 84.48383331298828, Val Loss: 0.4636557102203369, Val Accuracy: 86.0999984741211
Iteration 400, Epoch 1, Loss: 0.4133908748626709, Accuracy: 87.597412109375, Val Loss: 0.38099777698516846, Val Accuracy: 89.0999984741211
Iteration 600, Epoch 1, Loss: 0.3642720878124237, Accuracy: 89.16129302978516, Val Loss: 0.3371207118034363, Val Accuracy: 90.80000305175781


Поэкспериментируйте с архитектурой сверточной сети. Для вашего набора данных вам необходимо получить как минимум 70% accuracy на валидационной выборке за 10 эпох обучения. Опишите все эксперименты и сделайте выводы (без выполнения данного пункта работы приниматься не будут). 

Эспериментируйте с архитектурой, гиперпараметрами, функцией потерь, регуляризацией, методом оптимизации.  

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/BatchNormalization#methods https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dropout#methods

In [30]:
class CustomConvNet(tf.keras.Model):
    def __init__(self):
        super(CustomConvNet, self).__init__()
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        channel_1, channel_2, num_classes = 28, 14, 10
        kernel_initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.conv2d_1 = tf.keras.layers.Conv2D(channel_1, [3,3], [1,1], padding='same', kernel_initializer=kernel_initializer)
        self.relu_1 = tf.keras.layers.ReLU()
        self.conv2d_2 = tf.keras.layers.Conv2D(channel_2, [3,3], [1,1], padding='same', kernel_initializer=kernel_initializer)
        self.relu_2 = tf.keras.layers.ReLU()
        self.flatten = tf.keras.layers.Flatten()
        self.dense = tf.keras.layers.Dense(num_classes, activation='softmax', kernel_initializer=kernel_initializer)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
    
    def call(self, input_tensor, training=False):
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        x = self.dense(self.flatten(self.relu_2(self.conv2d_2(self.relu_1(self.conv2d_1(input_tensor))))))

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
        return x


print_every = 700
num_epochs = 10

model = CustomConvNet()

def model_init_fn():
    return CustomConvNet()

def optimizer_init_fn():
    learning_rate = 1e-3
    return tf.keras.optimizers.Adam(learning_rate) 

train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True)



Iteration 0, Epoch 1, Loss: 2.576293706893921, Accuracy: 7.8125, Val Loss: 2.8266239166259766, Val Accuracy: 19.5
Iteration 700, Epoch 1, Loss: 0.16973356902599335, Accuracy: 94.8332748413086, Val Loss: 0.13477586209774017, Val Accuracy: 95.70000457763672
Iteration 1400, Epoch 2, Loss: 0.05423677712678909, Accuracy: 98.30462646484375, Val Loss: 0.10108760744333267, Val Accuracy: 96.9000015258789
Iteration 2100, Epoch 3, Loss: 0.027833860367536545, Accuracy: 99.10204315185547, Val Loss: 0.14685797691345215, Val Accuracy: 95.70000457763672
Iteration 2800, Epoch 4, Loss: 0.01835336536169052, Accuracy: 99.3694076538086, Val Loss: 0.1152573823928833, Val Accuracy: 96.80000305175781
Iteration 3500, Epoch 5, Loss: 0.016301140189170837, Accuracy: 99.38143157958984, Val Loss: 0.16427810490131378, Val Accuracy: 95.80000305175781
Iteration 4200, Epoch 6, Loss: 0.010541919618844986, Accuracy: 99.62516784667969, Val Loss: 0.15349283814430237, Val Accuracy: 96.80000305175781
Iteration 4900, Epoch 7,

In [31]:
def model_init_fn():
    return CustomConvNet()

# Let's try SGD optimizer
def optimizer_init_fn():
    learning_rate = 1e-3
    return tf.keras.optimizers.SGD(learning_rate) 

train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True)

Iteration 0, Epoch 1, Loss: 3.065743923187256, Accuracy: 12.5, Val Loss: 2.6301918029785156, Val Accuracy: 9.0
Iteration 700, Epoch 1, Loss: 0.5249208211898804, Accuracy: 84.43740844726562, Val Loss: 0.402607798576355, Val Accuracy: 88.70000457763672
Iteration 1400, Epoch 2, Loss: 0.28697553277015686, Accuracy: 91.75688934326172, Val Loss: 0.3366349935531616, Val Accuracy: 90.5
Iteration 2100, Epoch 3, Loss: 0.2380547821521759, Accuracy: 93.28042602539062, Val Loss: 0.3009321689605713, Val Accuracy: 91.9000015258789
Iteration 2800, Epoch 4, Loss: 0.20469968020915985, Accuracy: 94.20663452148438, Val Loss: 0.26400548219680786, Val Accuracy: 93.0999984741211
Iteration 3500, Epoch 5, Loss: 0.17488960921764374, Accuracy: 95.05148315429688, Val Loss: 0.24129781126976013, Val Accuracy: 92.4000015258789
Iteration 4200, Epoch 6, Loss: 0.15263627469539642, Accuracy: 95.65785217285156, Val Loss: 0.21150705218315125, Val Accuracy: 94.0
Iteration 4900, Epoch 7, Loss: 0.13690835237503052, Accuracy:

In [34]:
class CustomConvNet_drp(tf.keras.Model):
    def __init__(self):
        super(CustomConvNet_drp, self).__init__()
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        channel_1, channel_2, num_classes = 28, 14, 10
        kernel_initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.conv2d_1 = tf.keras.layers.Conv2D(channel_1, [3,3], [1,1], padding='same', kernel_initializer=kernel_initializer)
        self.relu_1 = tf.keras.layers.ReLU()
        self.dropout_1 = tf.keras.layers.Dropout(rate=0.8)
        self.conv2d_2 = tf.keras.layers.Conv2D(channel_2, [3,3], [1,1], padding='same', kernel_initializer=kernel_initializer)
        self.relu_2 = tf.keras.layers.ReLU()
        self.dropout_2 = tf.keras.layers.Dropout(rate=0.8)
        self.flatten = tf.keras.layers.Flatten()
        self.dense = tf.keras.layers.Dense(num_classes, activation='softmax', kernel_initializer=kernel_initializer)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################

    def call(self, input_tensor, training=False):
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        
        x = self.dense(self.flatten(self.dropout_2(self.relu_2(self.conv2d_2(self.dropout_1(self.relu_1(self.conv2d_1(input_tensor))))))))

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
        return x


In [35]:
def model_init_fn():
    return CustomConvNet_drp()

# Let's try Adam optimizer and another architecture
def optimizer_init_fn():
    learning_rate = 1e-3
    return tf.keras.optimizers.Adam(learning_rate)

train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True)



Iteration 0, Epoch 1, Loss: 9.975008964538574, Accuracy: 9.375, Val Loss: 2.8528974056243896, Val Accuracy: 11.200000762939453
Iteration 700, Epoch 1, Loss: 1.108396053314209, Accuracy: 72.38097381591797, Val Loss: 0.3192019462585449, Val Accuracy: 91.79999542236328
Iteration 1400, Epoch 2, Loss: 0.3703233599662781, Accuracy: 88.69832611083984, Val Loss: 0.201957106590271, Val Accuracy: 94.5999984741211
Iteration 2100, Epoch 3, Loss: 0.28532084822654724, Accuracy: 91.10281372070312, Val Loss: 0.15183916687965393, Val Accuracy: 95.20000457763672
Iteration 2800, Epoch 4, Loss: 0.23482541739940643, Accuracy: 92.74664306640625, Val Loss: 0.13948631286621094, Val Accuracy: 96.20000457763672
Iteration 3500, Epoch 5, Loss: 0.21522189676761627, Accuracy: 93.21366882324219, Val Loss: 0.1340184360742569, Val Accuracy: 96.30000305175781
Iteration 4200, Epoch 6, Loss: 0.19679488241672516, Accuracy: 93.82581329345703, Val Loss: 0.11862444132566452, Val Accuracy: 96.5
Iteration 4900, Epoch 7, Loss: 

In [36]:
def model_init_fn():
    return CustomConvNet_drp()

# Let's try Adam optimizer and another architecture
def optimizer_init_fn():
    learning_rate = 1e-3
    return tf.keras.optimizers.SGD(learning_rate)

train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True)

Iteration 0, Epoch 1, Loss: 12.6145658493042, Accuracy: 12.5, Val Loss: 2.848093271255493, Val Accuracy: 13.799999237060547
Iteration 700, Epoch 1, Loss: 2.684824228286743, Accuracy: 53.8984489440918, Val Loss: 0.6661323308944702, Val Accuracy: 82.30000305175781
Iteration 1400, Epoch 2, Loss: 0.978321373462677, Accuracy: 73.27263641357422, Val Loss: 0.5713334679603577, Val Accuracy: 86.5
Iteration 2100, Epoch 3, Loss: 0.76649409532547, Accuracy: 77.86138153076172, Val Loss: 0.5331937074661255, Val Accuracy: 87.69999694824219
Iteration 2800, Epoch 4, Loss: 0.6546801328659058, Accuracy: 80.54485321044922, Val Loss: 0.480038046836853, Val Accuracy: 89.0
Iteration 3500, Epoch 5, Loss: 0.5788421630859375, Accuracy: 82.76602172851562, Val Loss: 0.42741259932518005, Val Accuracy: 90.5999984741211
Iteration 4200, Epoch 6, Loss: 0.5230112671852112, Accuracy: 84.42132568359375, Val Loss: 0.3830554187297821, Val Accuracy: 91.39999389648438
Iteration 4900, Epoch 7, Loss: 0.4929662048816681, Accura

Опишите все эксперименты, результаты. Сделайте выводы.

Основные выводы по проделанной работе:
1. Исследованные несколько конфигураций эксперементов показали достойные результаты;
2. Говоря, об оптимальной архитектуре - проведенные эксперементы говорят о том, что данные лучше работают с моделью, в которой нет Dropout слоев.
3. Сравниавая оптимизаторы, приходим к выводу, что наиболее эффективным является Adam.