# Training Resnet50 for Head's Position Detection
After preparing the `margined_cropped_images`, and `data.csv`, now we are able to feed them to a model to predict Hexbugs' heads' position.
The approach that we managed to work with was using ***transfer learning*** to build a better model. For this, we used pre-trained `Resnet50` model, which is trained on `CoCo` dataset. This helps us to extract feature better. Hoever, top layers of the `resnet` model are used for classification tasks. To modify the model to suit our problem, we removed tha last `Dense` layer and added two other `Dense` layers to build a regressor with only ***two*** outputs for `(x, y)`.
After modifying the model, we can train it over our dataset, and evaluate its performance on the validation set.
At last, we save the model for further use cases.

### Essential Imports

* `cv2` is required to read and write images.
* `json` is used to open `json` files.
* `random` is used to pick random samples for model evaluation.
* `logging` is used to ignore `warning` messages. (not mandatory)
* `numoy` is cruicial to work with images.
* `pandas` is used to read the `data.csv` file.
* `tensorflow` is used to create model. it's used as background for `Keras`.
* `sklearn` is used to split dataset to `train` and `test` sets.

In [1]:
# !pip install --user --proxy http://proxy:80 protobuf==3.19.6
# !pip install --user --proxy http://proxy:80 tensorboard==2.11
# !pip install --user --proxy http://proxy:80 pip -U scikit-learn

In [2]:
!nvidia-smi

/bin/bash: /apps/python/3.10-anaconda/envs/tensorflow/lib/libtinfo.so.6: no version information available (required by /bin/bash)
Mon May 29 11:05:53 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.41.03              Driver Version: 530.41.03    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB           On | 00000000:01:00.0 Off |                    0 |
| N/A   33C    P0               58W / 400W|      0MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+---------------------------

In [3]:
import os
import cv2
import json
import random
import logging
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import load_img
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications import ResNet101, DenseNet121, DenseNet169, DenseNet201
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tqdm import tqdm
from IPython.display import clear_output

tf.get_logger().setLevel(logging.ERROR)
clear_output()

### Reading data from `data.csv`

In [4]:
data = pd.read_csv('data_cropped_bugs.csv').drop(['Unnamed: 0'], axis=1).sort_values(['ID', 'OriginalImagePath'], ascending=True)
# data = data[(data['CroppedHexBugCoordinationX'] > 0) &
#             (data['CroppedHexBugCoordinationY'] > 0) &
#             (data['OriginalBoxCoordinationX1'] > 0) &
#             (data['OriginalBoxCoordinationY1'] > 0)]
data

Unnamed: 0,CroppedHexBugCoordinationX,CroppedHexBugCoordinationY,OriginalBoxCoordinationX1,OriginalBoxCoordinationY1,CroppedImagePath,OriginalImagePath,Xoffset,Yoffset,ID
4174,88.267358,26.049741,142,502,cropped_bugs/training01/frame0_0.jpg,samples/training01/frame0.jpg,143,114,0
838,53.343207,176.848746,5,411,cropped_bugs/training0100/frame0_0.jpg,samples/training0100/frame0.jpg,151,100,0
4275,79.964068,23.098983,587,447,cropped_bugs/training012/frame0_0.jpg,samples/training012/frame0.jpg,144,94,0
2213,36.618670,28.188413,230,26,cropped_bugs/training018/frame0_0.jpg,samples/training018/frame0.jpg,104,96,0
1437,183.235621,55.906871,558,935,cropped_bugs/training020/frame0_0.jpg,samples/training020/frame0.jpg,97,150,0
...,...,...,...,...,...,...,...,...,...
3503,82.155047,163.824014,426,1020,cropped_bugs/training087/frame100_0.jpg,samples/training087/frame100.jpg,68,98,100
2412,185.219286,35.021525,768,370,cropped_bugs/training090/frame100_0.jpg,samples/training090/frame100.jpg,87,154,100
3016,75.259948,33.841683,138,610,cropped_bugs/training091/frame100_0.jpg,samples/training091/frame100.jpg,148,110,100
2021,98.251275,28.137171,833,277,cropped_bugs/training094/frame100_0.jpg,samples/training094/frame100.jpg,139,106,100


In [5]:
# Load the images
images = []
y = []

# Iterates over the `data.csv` file's rows to read images and annotatopns.
for idx, row in tqdm(data.iterrows()):
#     img_path = f"{row.Path.replace('cropped_bugs', 'image_sharpning/sharpened_images')}"
    img_path = f"{row.CroppedImagePath}"
    annotations = [row.CroppedHexBugCoordinationX + row.Xoffset, row.CroppedHexBugCoordinationY + row.Yoffset]
    # Reads the image
    img = load_img(img_path, color_mode='rgb')
    # Converts the image to a numpy array
    np_img = np.array(img)
    
#     extended_np_img = np.expand_dims(np_img, axis=2)
    # Adds the image to a list of images.
    images.append(np_img)
    # Adds the annotations to y.
    y.append(annotations)
    
X = np.array(images)
y = np.array(y)

print(X.shape)
print(y.shape)

5112it [00:23, 213.06it/s]


(5112, 400, 400, 3)
(5112, 2)


In [6]:
# Splits the dataset to `train` and `test` sets with the given ratio.
# Also, shuffles the data to prevent feeding the model same data over different runs.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=True)

In [7]:
print(f'X train shape: {X_train.shape}')
print(f'X test shape: {X_test.shape}')
print(f'y train shape: {y_train.shape}')
print(f'y test shape: {y_test.shape}')

X train shape: (3425, 400, 400, 3)
X test shape: (1687, 400, 400, 3)
y train shape: (3425, 2)
y test shape: (1687, 2)


In [8]:
# dense_model = DenseNet121(
#     include_top=False,
#     weights='densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5',
#     input_shape=X_train[0].shape)

# model = Sequential(dense_model, name='DenseNet169')

# x = model.output
# x = Flatten()(x)
# x = Dense(16, activation='relu')(x)
# x = Dense(8, activation='relu')(x)

# # The last layer should be the layer that computes the (x, y) coordinations.
# output_layer = Dense(2, activation='linear')(x)

# # Create the model based on the input and output layer.
# model = Model(inputs=model.input, outputs=output_layer)

# clear_output()

In [9]:
# vgg_model = tf.keras.applications.VGG19(
#     include_top=False,
#     weights="imagenet",
#     input_shape=X_train[0].shape,
# )

# for layer in vgg_model.layers:
#     layer.trainable = True

# # Adds two dense layers for regression problem.
# # It basically uses the output of the `resnet50` model to feed the dense layers.
# x = vgg_model.output
# x = Flatten()(x)
# x = Dense(128, activation='relu')(x)
# x = Dense(64, activation='relu')(x)

# # The last layer should be the layer that computes the (x, y) coordinations.
# output_layer = Dense(2, activation='linear')(x)

# # Create the model based on the input and output layer.
# model = Model(inputs=vgg_model.input, outputs=output_layer)

In [10]:
# model = Sequential()
# model.add(Conv2D(64, (3, 3), activation='relu', input_shape=X_train[0].shape))
# # model.add(Dropout(0.25))
# # model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(128, (3, 3), activation='relu', kernel_regularizer='L2'))
# # model.add(Dropout(0.25))
# model.add(MaxPooling2D((2, 2)))

# model.add(Conv2D(256, (3, 3), activation='relu', kernel_regularizer='L2'))
# # model.add(Dropout(0.25))
# model.add(MaxPooling2D((2, 2)))

# model.add(Conv2D(512, (3, 3), activation='relu', kernel_regularizer='L2'))
# # model.add(Dropout(0.25))
# # model.add(MaxPooling2D((2, 2)))

# model.add(Conv2D(256, (3, 3), activation='relu', kernel_regularizer='L2'))
# # model.add(Dropout(0.25))
# model.add(MaxPooling2D((2, 2)))

# model.add(Conv2D(128, (3, 3), activation='relu', kernel_regularizer='L2'))
# # model.add(Dropout(0.25))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer='L2'))
# # model.add(Dropout(0.25))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(32, (3, 3), activation='relu', kernel_regularizer='L2'))
# # model.add(Dropout(0.25))
# model.add(MaxPooling2D((2, 2)))
# model.add(Flatten())
# model.add(Dense(16, activation='relu'))
# model.add(Dense(2))

In [11]:
# # Creates a pre-trained model instance from class `ResNet50`.
# # This model doesn't include the last classification dense layer.
# # The given `input_shape` should be the same as our dataset images.
# resnet50 = ResNet50(weights='resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
#                     include_top=False, input_shape=(X_train[0].shape))

# # Iterates over each layer in the model and makes them to be trainable.
# for layer in resnet50.layers:
#     layer.trainable = True

# # Adds two dense layers for regression problem.
# # It basically uses the output of the `resnet50` model to feed the dense layers.
# x = resnet50.output
# x = Flatten()(x)
# x = Dense(32, activation='relu')(x)
# x = Dense(16, activation='relu')(x)

# # The last layer should be the layer that computes the (x, y) coordinations.
# output_layer = Dense(2, activation='linear')(x)

# # Create the model based on the input and output layer.
# model = Model(inputs=resnet50.input, outputs=output_layer)

# clear_output()

### Visualization
For better visualization, we use [tensorboard](https://www.tensorflow.org/tensorboard/get_started). It helps us to plot the figures real-time and analyze the model performance.

In [12]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

# We need datetime to name our checkpoints.
import datetime

# Clear any logs from previous runs
!rm -rf ./logs/

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# Creates a callback, which then will be called by the model during training process.
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

/bin/bash: /apps/python/3.10-anaconda/envs/tensorflow/lib/libtinfo.so.6: no version information available (required by /bin/bash)


### Saving the best model
During the training process, we need to track the model's performance and make sure that at the end of the training process, we have the best model.
To do so, we used a custom `callback` that checks if the `val_loss` has been decreased over the past epoch. In that case, it will simply save the model to the provided directory, which can be used to load and evaluate later.
This call back is then called by the model during the training process.

In [13]:
# Specifies the checkpoint directory
checkpoint_filepath = 'checkpoint_callback'

# Creates an instance from the ModelCheckpoint class.
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor='val_loss',
    mode='min',
    save_best_only=True)

In [14]:
# Define the strategy for distributing the training across GPUs
strategy = tf.distribute.MirroredStrategy()

# Create a distributed training context
with strategy.scope():
    
    # Compile your model
    dense_model = DenseNet201(
        include_top=False,
        weights='imagenet',
        input_shape=X_train[0].shape
    )

    model = Sequential(dense_model, name='DenseNet201')

    x = model.output
    x = Flatten()(x)
    x = Dense(32, activation='relu')(x)
    x = Dense(16, activation='relu')(x)

    # The last layer should be the layer that computes the (x, y) coordinations.
    output_layer = Dense(2, activation='linear')(x)

    # Create the model based on the input and output layer.
    model = Model(inputs=model.input, outputs=output_layer)
    
    """
    resnet50 = ResNet50(weights='imagenet',
                        include_top=False, input_shape=(X_train[0].shape))

    # Iterates over each layer in the model and makes them to be trainable.
    for layer in resnet50.layers:
        layer.trainable = True

    # Adds two dense layers for regression problem.
    # It basically uses the output of the `resnet50` model to feed the dense layers.
    x = resnet50.output
    x = Flatten()(x)
    x = Dense(32, activation='relu')(x)
    x = Dense(16, activation='relu')(x)

    # The last layer should be the layer that computes the (x, y) coordinations.
    output_layer = Dense(2, activation='linear')(x)

    # Create the model based on the input and output layer.
    model = Model(inputs=resnet50.input, outputs=output_layer)
    """
    model.compile(loss='mse', optimizer='adam')

2023-05-29 11:06:29.328233: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-29 11:06:32.794260: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38241 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:01:00.0, compute capability: 8.0
2023-05-29 11:06:32.796424: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 38241 MB memory:  -> device: 1, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:41:00.0, compute capability: 8.0
2023-05-29 11:06:32.797886: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/devi

In [15]:
# Since we are dealing with regression problem, it's recommended to use `mse`.
# Also we use `Adam` as our optimizer.
# model.compile(loss='mse', optimizer='adam')

In [16]:
# Prints a summary of the compiled model
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 densenet201_input (InputLay  [(None, 400, 400, 3)]    0         
 er)                                                             
                                                                 
 densenet201 (Functional)    (None, 12, 12, 1920)      18321984  
                                                                 
 flatten (Flatten)           (None, 276480)            0         
                                                                 
 dense (Dense)               (None, 32)                8847392   
                                                                 
 dense_1 (Dense)             (None, 16)                528       
                                                                 
 dense_2 (Dense)             (None, 2)                 34        
                                                             

### Training the model
Now that we have everything ready, we can train the model. we pass the `validation_data` containing the `X_test` and `y_test` to the model as well.
Also, for above mentioned checkopints, we need to call those callbacks.

In [None]:
history = model.fit(X_train, y_train, epochs=500, batch_size=256, validation_data=(X_test, y_test), callbacks=[tensorboard_callback, model_checkpoint_callback])

Epoch 1/500


2023-05-29 11:10:52.278733: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100
2023-05-29 11:10:53.886445: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100
2023-05-29 11:10:54.628557: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-05-29 11:10:55.030188: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100
2023-05-29 11:10:57.080546: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100
2023-05-29 11:11:02.613491: W tensorflow/tsl/framework/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.66GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-05-29 11:11:02.613546: W tensorflow/tsl/framework/bfc_allocator.cc:290] Allocat

In [None]:
# Plotting the tensorboard data in jupyter notebook
%tensorboard --logdir logs/fit

In [None]:
def compute_error(y, y_pred):
    """
    Since the competion computes the error based on Euclidian distances, this function computes
    the error for predicted and ground truth results.
    
    @param: list y. a (1, 2) list with original coordinations.
    @param: list y_pred. a (1, 2) list containing the predicted coordinations.
    
    @return: float error. The Euclidian distance between these two given datapoints.
    """
    error = np.sqrt(np.abs(y[0] - y_pred[0]) ** 2 + np.abs(y[1] - y_pred[1]) ** 2)
    return error

In [None]:
def compute_original_coordination(predicted_coordination, original_box_coordination):
    """
    Computes the coordinations of the predicted Hexbug's head's coordination in the original image
    using the bounding box coordinations.
    
    @param: list predicted_coordination. a (1, 2) list with (x, y) format.
    @param: list original_box_coordination. a (1, 2) list with (x, y) format of bounding box.
    
    @return: list. Coordinations of the predicted Hexbug's head's coordination in the original image
    """
    return (predicted_coordination[0] + original_box_coordination[0], predicted_coordination[1] + original_box_coordination[1])

### Loading the best model

In [None]:
reconstructed_model = tf.keras.models.load_model("checkpoint_callback")
# reconstructed_model = tf.keras.models.load_model("resnet50_trained_model_data/v1.5/checkpoint_callback")
clear_output()

### Sample Evaluation
Now that we have successfully trained the model, let's see its performance on unseen data.

In [None]:
num_samples = 30

# Chooses random indices
random_samples_indices = random.sample(range(0, len(X_test)), num_samples)

# Iterates over each index and retrieves the image and annotaion
for sample_index in random_samples_indices:
    sample_img = X_test[sample_index]
    
    # Configs for drawing a circle on the image.
    center = y_test[sample_index]
    radius = 5
    # Color map is BGR
    color = (0, 255, 0)
    color_pred = (0, 0, 255)
    thickness = 5
    
    # Since we predict only one image at a time, we need to expand its dimention
    # to fit the input layer of our model.
    expanded_img = np.expand_dims(sample_img, axis=0)
    
    # Predicts the coordinations
    predicted = list(reconstructed_model.predict(expanded_img)[0])
    
    # Computes the error
    error = round(compute_error(center, predicted), 3)
    
    print(f'y_predicted: {predicted}, y: {center}, Error: {error}')
    
    # Draws a circle centered in the correct coordination.
#     img_lv1 = cv2.circle(sample_img, center, radius, color, thickness)
    # Draws a circle centered in the predicted coordination.
#     img_lv2 = cv2.circle(img_lv1, predicted, radius, color_pred, thickness)
    
    # Shows the output image.
#     cv2.imshow(f'Sample No. {sample_index}, error: {error}', img_lv2)
#     cv2.waitKey(0)
#     cv2.destroyAllWindows()

### Sample for coordination convertion

In [None]:
num_samples = 30

random_samples_indices = random.sample(range(0, len(data)), num_samples)

for sample_id in random_samples_indices:
    sample_img_path = data.iloc[sample_id].Path

    # Constructs the path to the original frame
    original_image_path = f"Videos/{sample_img_path.split('/')[1]}/{sample_img_path.split('/')[2]}"

    sample_img = np.array(load_img(sample_img_path))

    expanded_img = np.expand_dims(sample_img, axis=0)

    predicted = list(map(int, reconstructed_model.predict(expanded_img)[0]))

    radius = 10
    color = (0, 255, 0)
    thickness = -1

    # Computes the coordination of the Hexbug's head in the original image.
    scaled_coordinations = compute_original_coordination(predicted, (data.iloc[sample_id].OriginalBoxCoordinationX1, data.iloc[sample_id].OriginalBoxCoordinationY1))

    # Reads and converts the original image into numpy array.
    original_sample_img = np.array(load_img(original_image_path))

    annotated_img = cv2.circle(sample_img, predicted, radius, color, thickness)

    cv2.circle(original_sample_img, scaled_coordinations, radius, color, thickness)
    
    center = [data.iloc[sample_id].CroppedHexBugCoordinationX, data.iloc[sample_id].CroppedHexBugCoordinationY]
    
    error = round(compute_error(center, predicted), 3)
    
    file_name = f'Sample ID: {sample_id}\nTruth: {center}, Predicted: {predicted}, Error: {error}'
    
    print(file_name)
    print('------------------------------------------------------------------------')
    
    if not os.path.exists('annotated_images'):
        os.mkdir('annotated_images')
    
#     cv2.imshow(f'Sample ID. {sample_id}', original_sample_img)
    cv2.imwrite(f'annotated_images/{file_name}.jpg', original_sample_img)
#     cv2.waitKey(0)
#     cv2.destroyAllWindows()

In [None]:
# reconstructed_model.summary()

In [None]:
# model = Sequential()
# model.add(Conv2D(64, (3, 3), activation='relu', input_shape=X_train[0].shape))
# # model.add(Dropout(0.25))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(128, (3, 3), activation='relu', kernel_regularizer='L2'))
# # model.add(Dropout(0.25))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(512, (3, 3), activation='relu', kernel_regularizer='L2'))
# # model.add(Dropout(0.25))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(1028, (3, 3), activation='relu', kernel_regularizer='L2'))
# # model.add(Dropout(0.25))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(256, (3, 3), activation='relu', kernel_regularizer='L2'))
# # model.add(Dropout(0.25))
# model.add(MaxPooling2D((2, 2)))
# model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# model.add(Dense(2))

In [None]:
# model.summary()

In [None]:
# reconstructed_model.compile(loss='mse', optimizer='adam')

In [None]:
# history = reconstructed_model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test), callbacks=[tensorboard_callback, model_checkpoint_callback])