In [100]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

np.random.seed(1)

In [101]:
def normalise(dataset):
    # Scale images to the [0, 1] range
    dataset = dataset.astype("float32") / 255
    # Make sure images have shape (28, 28, 1)
    return np.expand_dims(dataset, -1)

In [113]:
def _get_sample_mask(size, fraction):
    num_of_samples = np.round(size*fraction).astype("int")
    mask = np.ones(size).astype("bool")
    mask[:num_of_samples] = False
    np.random.default_rng().shuffle(mask)
    return mask

def sample(dataset, fraction):
    num_datapoints = dataset.shape[0]
    # Create a mask of 0s and 1s with the right distribution (as requested in fraction)
    sample_mask = _get_sample_mask(num_datapoints, fraction)
    # Sample from the dataset, using the mask
    sampled = dataset.compress(sample_mask, axis=0)
    # Flip the mask and do it again to get the not sampled
    not_sampled = dataset.compress(1 - sample_mask, axis=0)
    """ Example
    If original dataset is: [1, 2, 3, 4, 5] with a fraction of 0.2
    We might get a sample_mask like [0, 0, 1, 0, 0].

    sampled will thus be [3]
    and not_sampled [1, 2, 4, 5], by flipping the bits to [1, 1, 0, 1, 1]
    """
    return sampled, not_sampled

In [114]:
def print_dataset_summary(name, dataset):
    print(name + " shape:", dataset.shape)

In [117]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# normalise training data
x_train = normalise(x_train)
x_test = normalise(x_test)

# transform only a sample of the data
print_dataset_summary("before sample", x_train)
to_augment, x_train = sample(x_train, 0.1)

print_dataset_summary("x_train", x_train)
print_dataset_summary("x_test", x_test)
print_dataset_summary("to_augment", to_augment)

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

before sample shape: (60000, 28, 28, 1)
x_train shape: (6000, 28, 28, 1)
x_test shape: (10000, 28, 28, 1)
to_augment shape: (54000, 28, 28, 1)


In [3]:
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1600)              0         
_________________________________________________________________
dropout (Dropout)            (None, 1600)              0         
_________________________________________________________________
dense (Dense)                (None, 10)                1

In [4]:
batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7fa08b377670>

In [5]:
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.024920795112848282
Test accuracy: 0.9922000169754028
