In [1]:
# %pip install tensorflow-datasets
# %pip install -q tfds-nightly tensorflow matplotlib


In [2]:
import numpy as np

from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, Input
from tensorflow.keras.models import Model, Sequential

from tensorflow.keras.datasets import mnist
# # import tensorflow_datasets as tfds
# import tensorflow.keras.datasets as tfds
import tensorflow_datasets as tfds
import tensorflow as tf


## Task
Try to solve mnist task using MLP. How many weights are needed, how does the size of an image affect the number of weights?

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

In [4]:
model = Sequential([
    Flatten(input_shape=(28,28)),
    Dense(128, activation="relu"),
    Dense(64, activation="relu"),
    Dense(10, activation="softmax")
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, batch_size=128, validation_split=0.2)

model.summary() 

loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

  super().__init__(**kwargs)


Epoch 1/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.8156 - loss: 0.6833 - val_accuracy: 0.9480 - val_loss: 0.1824
Epoch 2/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9526 - loss: 0.1665 - val_accuracy: 0.9613 - val_loss: 0.1344
Epoch 3/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9675 - loss: 0.1135 - val_accuracy: 0.9634 - val_loss: 0.1171
Epoch 4/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9751 - loss: 0.0837 - val_accuracy: 0.9686 - val_loss: 0.1026
Epoch 5/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9804 - loss: 0.0644 - val_accuracy: 0.9645 - val_loss: 0.1123
Epoch 6/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9846 - loss: 0.0520 - val_accuracy: 0.9740 - val_loss: 0.0891
Epoch 7/10
[1m375/375[0m 

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9752 - loss: 0.0865
Test Accuracy: 0.9777


Input 28x28= 784 flattened parameters.
2 hidden layers, each densly connected. 
last layer with 10 neurons for classification to 0-9 classes.
first layer = 784 * 128 weights
2nd layer = 128 * 64 weights
output layer = 64 * 10 weights 
total  = 784×128+128×64+64×10
So, the size of the image changes only number of input neurons, the 784. 

In [None]:
(ds_train, ds_val, ds_test), ds_info = tfds.load(
    'oxford_flowers102',
    split=['train[:80%]', 'train[80%:]', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

def preprocess(image, label):
  image = tf.image.resize(image, (128, 128))  # Resize images
  image = tf.cast(image, tf.float32) / 255.0  # Normalize pixel values
  label = tf.one_hot(label, depth=102)  # One-hot encode labels
  return image, label

tfds.show_examples(ds_train, ds_info)

ds_train = ds_train.map(preprocess).cache().shuffle(1024).batch(32).prefetch(tf.data.AUTOTUNE)
ds_val = ds_val.map(preprocess).cache().batch(32).prefetch(tf.data.AUTOTUNE)
ds_test = ds_test.map(preprocess).cache().batch(32).prefetch(tf.data.AUTOTUNE)

Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\mary9\tensorflow_datasets\oxford_flowers102\2.1.1...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

## Task
Try to predict type of a flower from an image using cnn. You can directly use ds_train, ds_val in a fit function

As you probably noticed it's not so easy. We can enhance the process by using a CNN pretrained on a set of general images. Of course, it cannot be used directly, but the whole feature extraction part can be copied and then maybe slightly adjusted.

In [None]:
(ds_train, ds_val, ds_test), ds_info = tfds.load(
    'oxford_flowers102',
    split=['train[:80%]', 'train[80%:]', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)
# Preprocess the images
def preprocess(image, label):
  image = tf.image.resize(image, (224, 224))
  image = tf.keras.applications.vgg16.preprocess_input(image)
  return image, label

ds_train = ds_train.map(preprocess).cache().shuffle(1024).batch(32).prefetch(tf.data.AUTOTUNE)
ds_val = ds_val.map(preprocess).cache().batch(32).prefetch(tf.data.AUTOTUNE)
ds_test = ds_test.map(preprocess).cache().batch(32).prefetch(tf.data.AUTOTUNE)

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))  #imagenet: pre-training on ImageNet
for layer in base_model.layers:
  layer.trainable = False
base_model.summary()

## Task

The model is quite big, but it's already trained so we froze the weights to not change them during the training phase. After the training is finished the weights can be unfrozen and the training can be repeated to adjust them even better. Now we have only the convolutional part, add the missing part to perform a classification, and compare the results with a model built from scratch.

Image segmentation is an image-to-image task where the output is a binary image with the same shape as the input showing the location of a given object e.g. roads, humans, or signs.

A U-net architecture is a popular model used in this task. It allows us to capture not only local patterns and map them to the output. It's presented in a picture below. Of course, it's just an example and the number of layers or number of neurons do not have to be copied one to one.

<img src="https://media.geeksforgeeks.org/wp-content/uploads/20220614121231/Group14.jpg">

https://www.geeksforgeeks.org/u-net-architecture-explained/

## Task
Download a retina blood vessel dataset https://www.kaggle.com/datasets/abdallahwagih/retina-blood-vessel create and train an U-net. You can use transfer learning, but it's not a must