# Image Recognition
---

S.Yu. Papulin (papulin.study@yandex.ru)

### Contents

- [Loading Dataset](#Loading-Dataset)
- [Preparing Dataset](#Preparing-Dataset)
- [Building And Fitting Model](#Building-And-Fitting-Model)
- [Evaluating Model](#Evaluating-Model)
- [Saving And Loading Model](#Saving-And-Loading-Model)
- [Sources](#Sources)

In [None]:
import tensorflow as tf
from tensorflow.keras import (
    layers, 
    models, 
    Model, 
    utils, 
    losses, 
    optimizers, 
    metrics
)

In [None]:
from tensorflow.keras.datasets import cifar10
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
RANDOM_STATE = 100

## Loading Dataset

In [None]:
# Load dataset and show shape of data
(X_trainval, y_trainval), (X_test, y_test) = cifar10.load_data()
X_trainval.shape, y_trainval.shape, X_test.shape, y_test.shape

In [None]:
# Image value range
X_trainval.max(), X_trainval.min()

In [None]:
# Unique targets and their counts
np.unique(y_trainval, return_counts=True)

In [None]:
# First n targets
y_trainval[:5]

In [None]:
# Class labels
labels = np.array([
    "airplane", "automobile", "bird", "cat", "deer",
    "dog", "frog", "horse", "ship", "truck"
])

In [None]:
num_classes = len(labels)
num_classes

In [None]:
# Show 10 random images of each class
NUM_DISPLAY_IMAGES = 10
for target in range(num_classes):
    indices = np.asarray(y_trainval==target).nonzero()[0]
    np.random.seed(RANDOM_STATE)
    indices_rnd = np.random.choice(indices, NUM_DISPLAY_IMAGES, replace=False)
    print(f'Class label: {labels[target]}')
    plt.figure(figsize=[10, 10])
    for i in range(NUM_DISPLAY_IMAGES):
        plt.subplot(1, NUM_DISPLAY_IMAGES, i+1)
        plt.title(indices_rnd[i])
        plt.imshow(X_trainval[indices_rnd[i]])
        plt.axis("off")
    plt.show()

## Preparing Dataset

In [None]:
# Compose train and validation subsets
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, 
    y_trainval, 
    test_size=0.1, 
    random_state=RANDOM_STATE
)

In [None]:
X_train.shape, X_val.shape

In [None]:
def convert_to_tf_dataset(X, y, batch_size=64, use_one_hot=False):
    X = X.astype('float32') / 255.0
    if use_one_hot:
        y = utils.to_categorical(y)
    else:
        y = y.flatten()
    return (
        tf.data.Dataset.from_tensor_slices((X, y))
        .batch(batch_size)
        .prefetch(tf.data.AUTOTUNE)
    )


def print_first_batch(ds):
    for X_batch, y_batch in ds.take(1):
        print(X_batch)
        print(y_batch)

In [None]:
train_ds = convert_to_tf_dataset(X_train, y_train)
val_ds = convert_to_tf_dataset(X_val, y_val)
test_ds = convert_to_tf_dataset(X_test, y_test)

In [None]:
# print_first_batch(train_ds)

## Building And Fitting Model

In [None]:
from tensorflow.keras.saving import register_keras_serializable

In [None]:
@register_keras_serializable()
class TinyConvModel(Model):

    def __init__(self, num_classes=10, input_shape=(32, 32, 3), dropout_rate=0.1, **kwargs):
        # self.input_layer = layers.Input(shape=input_shape)
        self.layer_1 = layers.Conv2D(
            filters=16, 
            kernel_size=(3, 3), 
            activation='relu', 
            padding='same',
            name='conv1'
        )
        self.transform_1 = layers.MaxPooling2D((2, 2))
        self.layer_2 = layers.Conv2D(32, (3, 3), activation='relu', padding='same', name='conv2')
        self.dropout = layers.Dropout(dropout_rate)
        self.transform_2 = layers.Flatten()
        self.layer_3 = layers.Dense(128, activation='relu')
        self.classifier = layers.Dense(num_classes)

        # Note: We initialize with a given input shape so that
        # we can later get a computation graph for specific layers
        inputs = layers.Input(shape=input_shape)
        outputs = self.call(inputs)
        super().__init__(inputs=inputs, outputs=outputs, **kwargs)
    
    def call(self, inputs, training=None):
        x = self.layer_1(inputs)
        x = self.transform_1(x)
        x = self.layer_2(x)
        x = self.dropout(x, training=training)
        x = self.transform_2(x)
        x = self.layer_3(x)
        return self.classifier(x)


def build_tiny_conv_model():
    model = models.Sequential(name="ConvNet")
    model.add(layers.Input(shape=(32, 32, 3)))
    model.add(layers.Conv2D(16, (3, 3), activation='relu', padding='same', name='conv1'))
    model.add(layers.MaxPooling2D((2, 2), name='transform_1'))
    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', name="conv2"))
    model.add(layers.Dropout(0.1, name='dropout'))
    model.add(layers.Flatten(name='transform_2'))
    model.add(layers.Dense(128, activation='relu', name='layer_3'))
    model.add(layers.Dense(10, name='layer_4'))
    return model


In [None]:
# Note: Another way to initialize a model graph
# def init_model_inputs(model, input_shape):
#     input_layer = layers.Input(shape=input_shape)
#     return Model(inputs=input_layer, outputs=model(dummy_input))

In [None]:
model = TinyConvModel()
# model = build_tiny_conv_model()
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-3), 
    loss=losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[metrics.SparseCategoricalAccuracy(),]
)

In [None]:
# Note: When we use one-hot representation of target
# model.compile(
#     optimizer=optimizers.Adam(learning_rate=1e-4), 
#     loss=losses.CategoricalCrossentropy(from_logits=True),
#     metrics=[metrics.CategoricalAccuracy(),]
# )

In [None]:
model.summary()

In [None]:
NUM_EPOCHS = 10

train_history = model.fit(
    train_ds,
    # validation_split=0.1,
    validation_data=val_ds,
    epochs=NUM_EPOCHS,
    verbose=1
)


In [None]:
plt.figure(figsize=[14, 4])

epochs = np.arange(1, len(train_history.history['loss'])+1)

plt.subplot(1,2,1)
plt.title('Train vs val loss')
plt.plot(epochs[1:], train_history.history['loss'][1:], '-og', label='train')
plt.plot(epochs[1:], train_history.history['val_loss'][1:], "-o", color='orange', label='val')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.grid(True)
plt.legend()

plt.subplot(1,2,2)
plt.title('Train vs val accuracy')
plt.plot(epochs[1:], train_history.history['sparse_categorical_accuracy'][1:], '-og', label='train')
plt.plot(epochs[1:], train_history.history['val_sparse_categorical_accuracy'][1:], '-o', color='orange', label='val')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.grid(True)
plt.legend()

plt.show()

## Evaluating Model

##### Accuracy in test subset

In [None]:
_, test_error = model.evaluate(test_ds)
test_error

##### Showing confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
y_test__logits = model.predict(test_ds.map(lambda image, target: image))
y_test__pred = np.argmax(y_test__logits, axis=-1)

In [None]:
y_test__true = y_test.flatten()

In [None]:
ConfusionMatrixDisplay.from_predictions(
    y_true=y_test__true,
    y_pred=y_test__pred,
    labels=range(10),
    display_labels=labels,
    xticks_rotation='vertical'
)
plt.title('Confusion Matrix')
plt.show()

##### Single image

In [None]:
test_image = X_test[6] / 255.0

In [None]:
plt.figure(figsize=(2,2))
plt.imshow(test_image)
plt.axis('off')
plt.show()

In [None]:
test_image_batch = test_image[np.newaxis, :]

In [None]:
logits_batch = model.predict(test_image_batch)
predictions_batch = np.argmax(logits_batch, axis=-1)
predictions_batch

In [None]:
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

In [None]:
probabilities_batch = softmax(logits_batch)
probabilities_batch

In [None]:
labels[predictions_batch[0]]

##### Batch of images

In [None]:
test_images = X_test[:10] / 255.0
test_targets = y_test[:10].flatten()

In [None]:
logits_batch = model.predict(test_images)
test_pred = np.argmax(logits_batch, axis=-1)
test_pred_labels = labels[test_pred]
test_pred_labels

In [None]:
plt.figure(figsize=[14, 4])
for index in range(len(test_images)):
    plt.subplot(1, NUM_DISPLAY_IMAGES, index+1)
    plt.title(
        f'true: {labels[test_targets[index]]}\npred: {test_pred_labels[index]}',
        fontsize=10,
        fontweight='normal'
    )
    plt.imshow(test_images[index])
    plt.axis("off")
plt.show()

**Filters**

In [None]:
def get_weigths_by_layer_index(model, index):
    # get weights
    W = model.layers[index].weights[0]
    b = model.layers[index].weights[1]
    # transponse to match imshow shape
    W_T = tf.transpose(W, perm=[3, 0, 1, 2])
    return W_T, b

In [None]:
# Transposed weights
W_T_conv1, _ = get_weigths_by_layer_index(model, 1)
W_T_conv2, _ = get_weigths_by_layer_index(model, 3)

W_T_conv1.shape, W_T_conv2.shape

In [None]:
def display_filters(W_T, num_per_row=10):
    NUM_PER_ROW = num_per_row
    num_images = W_T.shape[0]
    num_rows = -(-num_images // NUM_PER_ROW)
    plt.figure(figsize=[10, 1 * num_rows])
    for index, image in enumerate(W_T):
        plt.subplot(num_rows, NUM_PER_ROW, index+1)
        plt.imshow(image)
        plt.axis('off')
    
    plt.show()


def display_all_filters(W_T):
    for i in range(W_T.shape[3]):
        display_filters(W_T[:, :, :, i], num_per_row=W_T.shape[0])
    

In [None]:
# Conv 1 filters
display_all_filters(W_T_conv1)

In [None]:
# Conv 2 filters
display_all_filters(W_T_conv2)

**Convolutions**

In [None]:
def display_images(I):
    NUM_PER_ROW = 10
    num_images = I.shape[0]
    num_rows = -(-num_images // NUM_PER_ROW)
    plt.figure(figsize=[14, 1.5 * num_rows])
    for index, image in enumerate(I):
        plt.subplot(num_rows, NUM_PER_ROW, index+1)
        plt.imshow(image)
        plt.axis("off")
    
    plt.show()

In [None]:
# Create the model with intermediate conv layers
intermediate_layer_model = Model(
    inputs=model.input,
    outputs=[
        model.layers[1].output,
        model.layers[3].output
    ]
)

In [None]:
# Run the model on a test image
I_conv1, I_conv2 = intermediate_layer_model(test_image_batch)

I_conv1.shape, I_conv2.shape

In [None]:
# Transponse to match imshow shape
I_conv1 = tf.transpose(I_conv1, perm=[0, 3, 1, 2])
I_conv2 = tf.transpose(I_conv2, perm=[0, 3, 1, 2])

I_conv1.shape, I_conv2.shape

In [None]:
# Conv 1
display_images(I_conv1[0, :, :, :])

In [None]:
# Conv 2 
display_images(I_conv2[0, :, :, :])

## Saving And Loading Model

In [None]:
import os

BASE_MODEL_PATH = '~/.keras/models'

In [None]:
# Save model as .keras
model_filename = 'tiny_conv_net_128@10.keras'
model_path = os.path.expanduser(os.path.join(BASE_MODEL_PATH, model_filename))
print(f'Model path: {model_path}')
model.save(model_path)

In [None]:
# Load model
model_filename = 'tiny_conv_net_128@10.keras'
model_path = os.path.expanduser(os.path.join(BASE_MODEL_PATH, model_filename))
reconstructed_model = models.load_model(model_path)
reconstructed_model

In [None]:
_, test_error = reconstructed_model.evaluate(test_ds)
test_error

In [None]:
# model = reconstructed_model

## Sources