In [22]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np

In [23]:
SHUFFLE_BUFFER_SIZE = 1000
BATCH_SIZE = 64

In [24]:
def preprocessing(img, label):
    img = tf.cast(img, tf.float32)
    img = (img/128)-1
    label = tf.one_hot(label, depth=10)
    return img, label

def load_and_prep_cifar(batch_size, shuffle_buffer_size):
    train = tfds.load("cifar10", split="train", as_supervised=True)
    train = train.map(lambda img, label: preprocessing(img, label))
    train = train.shuffle(shuffle_buffer_size).batch(batch_size).prefetch(2)
    return train
    

In [25]:
train = load_and_prep_cifar(batch_size=BATCH_SIZE, shuffle_buffer_size=SHUFFLE_BUFFER_SIZE)


In [26]:
def create_cifar_cnn(name="my_cnn"):
    inputs= tf.keras.Input(shape=(32,32,3),batch_size=BATCH_SIZE, dtype=tf.float32)
    cnn_layer_1 = tf.keras.layers.Conv2D(activation="relu", filters=5, kernel_size=3, padding="same")
    x = cnn_layer_1(inputs) # output shape: (batchsize, 32, 32, filters) -> images are 32x32
    cnn_layer_2 = tf.keras.layers.Conv2D(activation="relu", filters=10, kernel_size=3, padding="same")
    x = cnn_layer_2(x) # output shape: (batchsize, 32, 32, 10)
    cnn_layer_3 = tf.keras.layers.Conv2D(activation="relu", filters=64, kernel_size=3, padding="same")
    x = cnn_layer_3(x) # output shape: (batchsize, 32, 32, 64)
    global_pooling = tf.keras.layers.GlobalAveragePooling2D()
    x = global_pooling(x) # feature_vector shape: (batchsize, 64)
    output_layer = tf.keras.layers.Dense(10, activation='softmax')
    output= output_layer(x)
    model = tf.keras.Model(inputs, output, name=name)
    return model

In [29]:
def train(num_epochs, batchsize, shuffle_buffer_size,lr):
    model = create_cifar_cnn()
    data = load_and_prep_cifar(batch_size=batchsize, shuffle_buffer_size=shuffle_buffer_size)
    optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=lr)
    loss_f = tf.keras.losses.CategoricalCrossentropy()
    for epoch in range(num_epochs):
        losses = []
        for x, t in data:
            with tf.GradientTape() as tape:
                pred = model(x)
                l = loss_f(t, pred)
            gradients = tape.gradient(l, model.trainable_variables)
            optimizer.apply_gradients(grads_and_vars=zip(gradients, model.trainable_variables))
            losses.append(l.numpy())
        print(np.mean(losses))
    

In [30]:
train(10, BATCH_SIZE, SHUFFLE_BUFFER_SIZE, lr=0.001)

1.996641
1.7562217
1.7047511
1.6720792
1.6487033
1.6261812
1.6022413
1.5810441
1.5618553
1.5418689
