# Лабораторная работа 4

Tensorflow 2.x

1) Подготовка данных

2) Использование Keras Model API

3) Использование Keras Sequential + Functional API

https://www.tensorflow.org/tutorials

Для выполнения лабораторной работы необходимо установить tensorflow версии 2.0 или выше .

Рекомендуется использовать возможности Colab'а по обучению моделей на GPU.



In [103]:
import os
import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

%matplotlib inline

image_h = 8
image_w = 8
image_size = image_h * image_w
print_every = 100


device = '/CPU:0'

# Подготовка данных
Загрузите набор данных из предыдущей лабораторной работы. 

In [104]:
def load_dataset(test_proportion=0.2, val_proportion=0.2, flatten=True):
    digits = load_digits()
    digits_data = digits.data if flatten else digits.data.reshape(digits.data.shape[0], 8, 8, 1)
    X_train, X_val, y_train, y_val = train_test_split(digits_data, digits.target, test_size=val_proportion, random_state=1337)
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=test_proportion / (1 - val_proportion), random_state=1337)

    mean_image = np.mean(X_train, axis=0)

    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    return X_train, y_train, X_val, y_val, X_test, y_test


# If there are errors with SSL downloading involving self-signed certificates,
# it may be that your Python version was recently installed on the current machine.
# See: https://github.com/tensorflow/tensorflow/issues/10779
# To fix, run the command: /Applications/Python\ 3.7/Install\ Certificates.command
#   ...replacing paths as necessary.

# Invoke the above function to get our data.
NHW = (0, 1, 2)
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset(flatten=False)
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape, y_train.dtype)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (1077, 8, 8, 1)
Train labels shape:  (1077,) int64
Validation data shape:  (360, 8, 8, 1)
Validation labels shape:  (360,)
Test data shape:  (360, 8, 8, 1)
Test labels shape:  (360,)


In [105]:
class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        """
        Construct a Dataset object to iterate over data X and labels y
        
        Inputs:
        - X: Numpy array of data, of any shape
        - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0]
        - batch_size: Integer giving number of elements per minibatch
        - shuffle: (optional) Boolean, whether to shuffle the data on each epoch
        """
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64)
test_dset = Dataset(X_test, y_test, batch_size=64)

In [106]:
# We can iterate through a dataset like this:
for t, (x, y) in enumerate(train_dset):
    print(t, x.shape, y.shape)
    if t > 5: break

0 (64, 8, 8, 1) (64,)
1 (64, 8, 8, 1) (64,)
2 (64, 8, 8, 1) (64,)
3 (64, 8, 8, 1) (64,)
4 (64, 8, 8, 1) (64,)
5 (64, 8, 8, 1) (64,)
6 (64, 8, 8, 1) (64,)


#  Keras Model Subclassing API


Для реализации собственной модели с помощью Keras Model Subclassing API необходимо выполнить следующие шаги:

1) Определить новый класс, который является наследником tf.keras.Model.

2) В методе __init__() определить все необходимые слои из модуля tf.keras.layer

3) Реализовать прямой проход в методе call() на основе слоев, объявленных в __init__()

Ниже приведен пример использования keras API для определения двухслойной полносвязной сети. 

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras

In [107]:
class TwoLayerFC(tf.keras.Model):
    def __init__(self, hidden_size, num_classes):
        super(TwoLayerFC, self).__init__()        
        initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.fc1 = tf.keras.layers.Dense(hidden_size, activation='relu',
                                   kernel_initializer=initializer)
        self.fc2 = tf.keras.layers.Dense(num_classes, activation='softmax',
                                   kernel_initializer=initializer)
        self.flatten = tf.keras.layers.Flatten()
    
    def call(self, x, training=False):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x


def test_TwoLayerFC():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    x = tf.zeros((64, input_size))
    model = TwoLayerFC(hidden_size, num_classes)
    with tf.device(device):
        scores = model(x)
        print(scores.shape)
        
test_TwoLayerFC()

(64, 10)


Реализуйте трехслойную CNN для вашей задачи классификации. 

Архитектура сети:
    
1. Сверточный слой (5 x 5 kernels, zero-padding = 'same')
2. Функция активации ReLU 
3. Сверточный слой (3 x 3 kernels, zero-padding = 'same')
4. Функция активации ReLU 
5. Полносвязный слой 
6. Функция активации Softmax 

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Conv2D

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dense

In [108]:
class ThreeLayerConvNet(tf.keras.Model):
    def __init__(self, channel_1, channel_2, num_classes):
        super(ThreeLayerConvNet, self).__init__()
        ########################################################################
        # TODO: Implement the __init__ method for a three-layer ConvNet. You   #
        # should instantiate layer objects to be used in the forward pass.     #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.conv1 = tf.keras.layers.Conv2D(channel_1, (5, 5), activation='relu', padding='same',
                                            kernel_initializer=initializer)
        self.conv2 = tf.keras.layers.Conv2D(channel_2, (3, 3), activation='relu', padding='same',
                                            kernel_initializer=initializer)
        self.flatten = tf.keras.layers.Flatten()
        self.fc1 = tf.keras.layers.Dense(128, activation='relu',
                                         kernel_initializer=initializer)
        self.fc2 = tf.keras.layers.Dense(num_classes, activation='softmax',
                                         kernel_initializer=initializer)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################
        
    def call(self, x, training=False):
        scores = None
        ########################################################################
        # TODO: Implement the forward pass for a three-layer ConvNet. You      #
        # should use the layer objects defined in the __init__ method.         #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        x = self.conv1(x)
        x = self.conv2(x)
        x = self.flatten(x)
        x = self.fc1(x)
        scores = self.fc2(x)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################        
        return scores

In [109]:
def test_ThreeLayerConvNet():    
    channel_1, channel_2, num_classes = 12, 8, 10
    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)
    with tf.device(device):
        x = tf.zeros((64, 1, 8, 8))
        scores = model(x)
        print(scores.shape)

test_ThreeLayerConvNet()

(64, 10)


Пример реализации процесса обучения:

In [110]:
def train_part34(model_init_fn, optimizer_init_fn, num_epochs=1, is_training=False, print_every = 1, loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()):
    """
    Simple training loop for use with models defined using tf.keras. It trains
    a model for one epoch on the CIFAR-10 training set and periodically checks
    accuracy on the CIFAR-10 validation set.
    
    Inputs:
    - model_init_fn: A function that takes no parameters; when called it
      constructs the model we want to train: model = model_init_fn()
    - optimizer_init_fn: A function which takes no parameters; when called it
      constructs the Optimizer object we will use to optimize the model:
      optimizer = optimizer_init_fn()
    - num_epochs: The number of epochs to train for
    
    Returns: Nothing, but prints progress during trainingn
    """    
    with tf.device(device):
        model = model_init_fn()
        optimizer = optimizer_init_fn()
        
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    
        val_loss = tf.keras.metrics.Mean(name='val_loss')
        val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')
        
        t = 0
        for epoch in range(num_epochs):
            
            # Reset the metrics - https://www.tensorflow.org/alpha/guide/migration_guide#new-style_metrics
            # train_loss.reset_states()
            # train_accuracy.reset_states()
            
            for x_np, y_np in train_dset:
                with tf.GradientTape() as tape:
                    
                    # Use the model function to build the forward pass.
                    scores = model(x_np, training=is_training)
                    loss = loss_fn(y_np, scores)
      
                    gradients = tape.gradient(loss, model.trainable_variables)
                    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                    
                    # Update the metrics
                    train_loss.update_state(loss)
                    train_accuracy.update_state(y_np, scores)
                    
                    if t % print_every == 0:
                        # val_loss.reset_states()
                        # val_accuracy.reset_states()
                        for test_x, test_y in val_dset:
                            # During validation at end of epoch, training set to False
                            prediction = model(test_x, training=False)
                            t_loss = loss_fn(test_y, prediction)

                            val_loss.update_state(t_loss)
                            val_accuracy.update_state(test_y, prediction)
                        
                        template = 'Iteration {}, Epoch {}, Loss: {}, Accuracy: {}, Val Loss: {}, Val Accuracy: {}'
                        print (template.format(t, epoch+1,
                                             train_loss.result(),
                                             train_accuracy.result()*100,
                                             val_loss.result(),
                                             val_accuracy.result()*100))
                    t += 1
        return train_accuracy.result()*100

In [111]:
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2

def model_init_fn():
    return TwoLayerFC(hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 8.162327766418457, Accuracy: 12.5, Val Loss: 13.33396053314209, Val Accuracy: 44.72222137451172
Iteration 1, Epoch 1, Loss: 10.469593048095703, Accuracy: 29.6875, Val Loss: 15.84652328491211, Val Accuracy: 41.80555725097656
Iteration 2, Epoch 1, Loss: 14.974430084228516, Accuracy: 28.64583396911621, Val Loss: 17.982723236083984, Val Accuracy: 43.98147964477539
Iteration 3, Epoch 1, Loss: 17.286230087280273, Accuracy: 34.765625, Val Loss: 15.2111234664917, Val Accuracy: 48.125
Iteration 4, Epoch 1, Loss: 15.572816848754883, Accuracy: 39.375, Val Loss: 13.973977088928223, Val Accuracy: 53.0555534362793
Iteration 5, Epoch 1, Loss: 13.992852210998535, Accuracy: 45.833335876464844, Val Loss: 12.330374717712402, Val Accuracy: 56.89814758300781
Iteration 6, Epoch 1, Loss: 12.350545883178711, Accuracy: 51.33928680419922, Val Loss: 10.861659049987793, Val Accuracy: 60.357139587402344
Iteration 7, Epoch 1, Loss: 11.193564414978027, Accuracy: 54.4921875, Val Loss: 9.66

Обучите трехслойную CNN. В tf.keras.optimizers.SGD укажите Nesterov momentum = 0.9 . 

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/optimizers/SGD

Значение accuracy на валидационной выборке после 1 эпохи обучения должно быть > 50% .

In [112]:
learning_rate = 3e-3
channel_1, channel_2, num_classes = 32, 16, 10

def model_init_fn():
    model = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return model

def optimizer_init_fn():
    optimizer = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn, print_every=1)

Iteration 0, Epoch 1, Loss: 9.984516143798828, Accuracy: 10.9375, Val Loss: 11.171607971191406, Val Accuracy: 15.555556297302246
Iteration 1, Epoch 1, Loss: 9.524357795715332, Accuracy: 17.1875, Val Loss: 8.430203437805176, Val Accuracy: 27.5
Iteration 2, Epoch 1, Loss: 8.767992973327637, Accuracy: 19.270832061767578, Val Loss: 6.809556007385254, Val Accuracy: 34.53703689575195
Iteration 3, Epoch 1, Loss: 7.636340141296387, Accuracy: 25.78125, Val Loss: 5.730673313140869, Val Accuracy: 38.33333206176758
Iteration 4, Epoch 1, Loss: 6.5225629806518555, Accuracy: 31.562501907348633, Val Loss: 4.974728107452393, Val Accuracy: 41.0555534362793
Iteration 5, Epoch 1, Loss: 5.69639778137207, Accuracy: 35.9375, Val Loss: 4.413943290710449, Val Accuracy: 43.61111068725586
Iteration 6, Epoch 1, Loss: 5.062277793884277, Accuracy: 38.83928680419922, Val Loss: 3.9844436645507812, Val Accuracy: 46.5476188659668
Iteration 7, Epoch 1, Loss: 4.6079230308532715, Accuracy: 41.6015625, Val Loss: 3.64492678

# Использование Keras Sequential API для реализации последовательных моделей.

Пример для полносвязной сети:

In [113]:
learning_rate = 1e-2

def model_init_fn():
    input_shape = (8, 8, 1)
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    layers = [
        tf.keras.layers.Flatten(input_shape=input_shape),
        tf.keras.layers.Dense(hidden_size, activation='relu',
                              kernel_initializer=initializer),
        tf.keras.layers.Dense(num_classes, activation='softmax', 
                              kernel_initializer=initializer),
    ]
    model = tf.keras.Sequential(layers)
    return model

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate) 

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 8.52289867401123, Accuracy: 12.5, Val Loss: 12.101468086242676, Val Accuracy: 41.666664123535156
Iteration 1, Epoch 1, Loss: 9.541996002197266, Accuracy: 29.6875, Val Loss: 9.708847999572754, Val Accuracy: 48.33333206176758
Iteration 2, Epoch 1, Loss: 9.516554832458496, Accuracy: 36.458335876464844, Val Loss: 10.252432823181152, Val Accuracy: 47.22222137451172
Iteration 3, Epoch 1, Loss: 9.351118087768555, Accuracy: 39.453125, Val Loss: 9.531935691833496, Val Accuracy: 52.361114501953125
Iteration 4, Epoch 1, Loss: 9.292335510253906, Accuracy: 45.9375, Val Loss: 8.295585632324219, Val Accuracy: 56.722225189208984
Iteration 5, Epoch 1, Loss: 8.164918899536133, Accuracy: 52.08333206176758, Val Loss: 7.129280090332031, Val Accuracy: 61.6203727722168
Iteration 6, Epoch 1, Loss: 7.2055511474609375, Accuracy: 57.142860412597656, Val Loss: 6.501587867736816, Val Accuracy: 64.04761505126953
Iteration 7, Epoch 1, Loss: 6.7384772300720215, Accuracy: 58.984375, Val Los

Альтернативный менее гибкий способ обучения:

In [114]:
model = model_init_fn()
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 8.3488 - sparse_categorical_accuracy: 0.5798 - val_loss: 0.4033 - val_sparse_categorical_accuracy: 0.9444
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.7116 - sparse_categorical_accuracy: 0.9426 


[0.788570761680603, 0.9277777671813965]

Перепишите реализацию трехслойной CNN с помощью tf.keras.Sequential API . Обучите модель двумя способами.

In [115]:
learning_rate = 5e-4

def model_init_fn():
    model = None
    ############################################################################
    # TODO: Construct a three-layer ConvNet using tf.keras.Sequential.         #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    input_shape = (8, 8, 1)
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    layers = [
        tf.keras.layers.Conv2D(channel_1, (5, 5), activation='relu', padding='same',
                               kernel_initializer=initializer, input_shape=input_shape),
        tf.keras.layers.Conv2D(channel_2, (3, 3), activation='relu', padding='same',
                               kernel_initializer=initializer),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu', kernel_initializer=initializer),
        tf.keras.layers.Dense(num_classes, activation='softmax', kernel_initializer=initializer)
    ]
    model = tf.keras.Sequential(layers)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                            END OF YOUR CODE                              #
    ############################################################################

    return model

def optimizer_init_fn():
    optimizer = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True)

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################

    return optimizer

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 10.722034454345703, Accuracy: 3.125, Val Loss: 7.324424743652344, Val Accuracy: 5.277777671813965
Iteration 1, Epoch 1, Loss: 8.789379119873047, Accuracy: 6.25, Val Loss: 6.39747953414917, Val Accuracy: 6.94444465637207
Iteration 2, Epoch 1, Loss: 7.90103006362915, Accuracy: 5.2083330154418945, Val Loss: 5.743509292602539, Val Accuracy: 9.44444465637207
Iteration 3, Epoch 1, Loss: 7.133281230926514, Accuracy: 8.203125, Val Loss: 5.200233459472656, Val Accuracy: 12.222222328186035
Iteration 4, Epoch 1, Loss: 6.480158805847168, Accuracy: 9.0625, Val Loss: 4.7447285652160645, Val Accuracy: 15.5
Iteration 5, Epoch 1, Loss: 5.975378036499023, Accuracy: 11.71875, Val Loss: 4.357387065887451, Val Accuracy: 19.074073791503906
Iteration 6, Epoch 1, Loss: 5.472195625305176, Accuracy: 15.848215103149414, Val Loss: 4.0269856452941895, Val Accuracy: 22.619047164916992
Iteration 7, Epoch 1, Loss: 5.067516803741455, Accuracy: 20.703125, Val Loss: 3.733898401260376, Val Acc

In [116]:
model = model_init_fn()
model.compile(optimizer='sgd',
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 3.8228 - sparse_categorical_accuracy: 0.4432 - val_loss: 0.3327 - val_sparse_categorical_accuracy: 0.9028
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.3949 - sparse_categorical_accuracy: 0.9081 


[0.4229964315891266, 0.894444465637207]

# Использование Keras Functional API

Для реализации более сложных архитектур сети с несколькими входами/выходами, повторным использованием слоев, "остаточными" связями (residual connections) необходимо явно указать входные и выходные тензоры. 

Ниже представлен пример для полносвязной сети. 

In [117]:
def two_layer_fc_functional(input_shape, hidden_size, num_classes):  
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    inputs = tf.keras.Input(shape=input_shape)
    flattened_inputs = tf.keras.layers.Flatten()(inputs)
    fc1_output = tf.keras.layers.Dense(hidden_size, activation='relu',
                                 kernel_initializer=initializer)(flattened_inputs)
    scores = tf.keras.layers.Dense(num_classes, activation='softmax',
                             kernel_initializer=initializer)(fc1_output)

    # Instantiate the model given inputs and outputs.
    model = tf.keras.Model(inputs=inputs, outputs=scores)
    return model

def test_two_layer_fc_functional():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    input_shape = (50,)
    
    x = tf.zeros((64, input_size))
    model = two_layer_fc_functional(input_shape, hidden_size, num_classes)
    
    with tf.device(device):
        scores = model(x)
        print(scores.shape)
        
test_two_layer_fc_functional()

(64, 10)


In [118]:
input_shape = (8, 8, 1)

def model_init_fn():
    return two_layer_fc_functional(input_shape, hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 11.078691482543945, Accuracy: 3.125, Val Loss: 7.880964279174805, Val Accuracy: 11.666666984558105
Iteration 1, Epoch 1, Loss: 9.762127876281738, Accuracy: 8.59375, Val Loss: 6.948307037353516, Val Accuracy: 14.861111640930176
Iteration 2, Epoch 1, Loss: 8.673346519470215, Accuracy: 10.9375, Val Loss: 6.2771100997924805, Val Accuracy: 17.685184478759766
Iteration 3, Epoch 1, Loss: 7.578333377838135, Accuracy: 14.453125, Val Loss: 5.744487762451172, Val Accuracy: 20.55555534362793
Iteration 4, Epoch 1, Loss: 6.7844648361206055, Accuracy: 18.75, Val Loss: 5.301139831542969, Val Accuracy: 24.0
Iteration 5, Epoch 1, Loss: 6.20867919921875, Accuracy: 22.395832061767578, Val Loss: 4.924566745758057, Val Accuracy: 26.712963104248047
Iteration 6, Epoch 1, Loss: 5.701239109039307, Accuracy: 26.5625, Val Loss: 4.618236064910889, Val Accuracy: 29.4841251373291
Iteration 7, Epoch 1, Loss: 5.3088202476501465, Accuracy: 29.8828125, Val Loss: 4.354894161224365, Val Accurac

Поэкспериментируйте с архитектурой сверточной сети. Для вашего набора данных вам необходимо получить как минимум 70% accuracy на валидационной выборке за 10 эпох обучения. Опишите все эксперименты и сделайте выводы (без выполнения данного пункта работы приниматься не будут). 

Эспериментируйте с архитектурой, гиперпараметрами, функцией потерь, регуляризацией, методом оптимизации.  

https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/BatchNormalization#methods https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dropout#methods

In [119]:
num_epochs = 10

class CustomConvNet(tf.keras.Model):
    def __init__(self, num_filters, kernel_size, dense_units, dropout_rate, regularizer):
        super(CustomConvNet, self).__init__()
        initializer = tf.initializers.VarianceScaling(scale=2.0)

        self.conv1 = tf.keras.layers.Conv2D(num_filters, kernel_size, activation='relu', padding='same', kernel_initializer=initializer, kernel_regularizer=regularizer)
        self.conv2 = tf.keras.layers.Conv2D(num_filters, kernel_size, activation='relu', padding='same', kernel_initializer=initializer, kernel_regularizer=regularizer)
        self.flatten = tf.keras.layers.Flatten()
        self.fc1 = tf.keras.layers.Dense(dense_units, activation='relu', kernel_initializer=initializer, kernel_regularizer=regularizer)
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
        self.fc2 = tf.keras.layers.Dense(10, activation='softmax', kernel_initializer=initializer)

    def call(self, input_tensor, training=False):
        x = self.conv1(input_tensor)
        x = self.conv2(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.dropout(x, training=training)
        x = self.fc2(x)
        return x

# Перебор гиперпараметров
num_filters_list = [32, 64]
kernel_size_list = [(3, 3), (5, 5)]
dense_units_list = [128, 256]
dropout_rate_list = [0.3, 0.5]
regularizer_list = [None, tf.keras.regularizers.l2(0.001)]
loss_list = [
    tf.keras.losses.SparseCategoricalCrossentropy(),
    tf.keras.losses.CategoricalCrossentropy(),
]
optimizer_list = ['adam', 'sgd']
learning_rate_list = [1e-3, 1e-4]

best_accuracy = 0.0
best_params = None
results = []

for num_filters in num_filters_list:
    for kernel_size in kernel_size_list:
        for dense_units in dense_units_list:
            for dropout_rate in dropout_rate_list:
                for regularizer in regularizer_list:
                    for loss in loss_list:
                        for optimizer in optimizer_list:
                            for learning_rate in learning_rate_list:
                                def model_init_fn():
                                    return CustomConvNet(num_filters, kernel_size, dense_units, dropout_rate, regularizer)
    
                                def optimizer_init_fn():
                                    if optimizer == 'adam':
                                        return tf.keras.optimizers.Adam(learning_rate)
                                    elif optimizer == 'sgd':
                                        return tf.keras.optimizers.SGD(learning_rate)
                                    elif optimizer == 'rmsprop':
                                        return tf.keras.optimizers.RMSprop(learning_rate)
    
                                test_acc = train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True, print_every=16, loss_fn=loss)

                                current_params = {'num_filters': num_filters, 'kernel_size': kernel_size, 'dense_units': dense_units, 'dropout_rate': dropout_rate, 'regularizer': regularizer, 'loss_fn': loss.name, 'optimizer': optimizer, 'learning_rate': learning_rate, 'test_acc': test_acc}

                                results.append(current_params)
    
                                if test_acc > best_accuracy:
                                    best_accuracy = test_acc
                                    best_params = current_params


print(f"\nBest accuracy: {best_accuracy:.4f}")
print(f"Best parameters:", best_params)

ValueError: Argument `output` must have rank (ndim) `target.ndim - 1`. Received: target.shape=(64,), output.shape=(64, 8, 8, 1)

Опишите все эксперименты, результаты. Сделайте выводы.

In [None]:
import pandas as pd