##### Introduction to Convnets

 a convnet example to classify MINST digits

In [1]:
# instantiating a small convnet
from keras import layers
from keras import models

model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

2024-07-04 14:16:05.675511: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-04 14:16:06.061278: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-07-04 14:16:07.495302: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sy

- a convnet takes as input tensors of shape (image_height, image_width, image_channels)(not including the batch dimension)
    - in this cause, inputs of size (28, 28, 1) format of MNIST images

In [2]:
model.summary()

then to feed the last output tensor (of shape (3, 3,64)) into a densely connected classifier network, first flatten the 3D outputs to 1D

In [3]:
# adding a classifier on top of the convnet
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

model.summary()

In [4]:
# training the convnet on MNIST images
from keras.datasets import mnist
from keras.utils import to_categorical

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32') / 255
#train_images = train_images[:6000]

test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32') / 255
#test_images = test_images[:1000]

train_labels = to_categorical(train_labels)
#train_labels = train_labels[:6000]
test_labels = to_categorical(test_labels)
#test_labels = test_labels[:1000]

model.compile(optimizer='rmsprop', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])
model.fit(train_images, train_labels, epochs=5, batch_size=64)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Epoch 1/5


I0000 00:00:1720073770.830226   15407 service.cc:145] XLA service 0x74c5800041c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1720073770.830244   15407 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce RTX 4090 Laptop GPU, Compute Capability 8.9
2024-07-04 14:16:10.858337: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-07-04 14:16:10.948140: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907


[1m182/938[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 840us/step - accuracy: 0.6917 - loss: 0.9447

I0000 00:00:1720073772.005271   15407 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8741 - loss: 0.3910
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9850 - loss: 0.0464
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 939us/step - accuracy: 0.9897 - loss: 0.0314
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 978us/step - accuracy: 0.9928 - loss: 0.0235
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9950 - loss: 0.0155


<keras.src.callbacks.history.History at 0x74c6d71370a0>

In [5]:
test_loss, test_acc = model.evaluate(test_images, test_labels)
test_acc

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9872 - loss: 0.0436


0.9901999831199646

##### The Convolution Operation

![Image Breakdown](./img_breakdown.png)

![Spatial Hierarchy of Visual Modules](./spatial_hierarchy_visual_module.png)

![Response Map](./response_map.png)

![Convolution](./convolution.png)

##### The Max-Pooling Operation

In [9]:
model_no_max_pool = models.Sequential()
model_no_max_pool.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model_no_max_pool.add(layers.Conv2D(64, (3, 3), activation='relu'))
model_no_max_pool.add(layers.Conv2D(64, (3, 3), activation='relu'))

model_no_max_pool.summary()