## Train the mobilentv3 model
In this notebook, the mobilenetv3 model will be trained. The model will first be converted to get a single output value (count). This will be done by adding 2 dense layers at the end as done in [this research](https://www.nature.com/articles/s41598-021-02387-9). We can also compare this with an averagePooling or maxPooling at the ens to see if there are major differences.

the model will be trained on three different input types:
- 96, 96, 3
- 128, 128, 3
- 256, 256, 3

and two different dense layers at the end:
- 512 - 256
- 256 - 128

In [8]:
import os
import sys
import time
import matplotlib.pyplot as plt
import tensorflow as tf

root_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if root_dir not in sys.path:
    sys.path.append(root_dir)

from DatasetLoader import DatasetLoader as dl
from save_info import save_model_info

#### 1. load the model

In [9]:
resoluton = [96, 128, 224]
base_models = []

for i in resoluton:
    base_models.append(tf.keras.applications.MobileNetV3Small(
        input_shape=(i, i, 3),
        alpha=1.0,
        minimalistic=True,  #even smaller model
        include_top=False,  
        weights="imagenet",
        name="MobileNetV3Small"
        ))

base_models[0].summary()

  return MobileNetV3(


alter the models to output a count -> add dense layers

In [None]:
layer_name = "expanded_conv_10_add"

models = []
for i in base_models:
    model_trunc = tf.keras.models.Model(inputs=i.input, outputs=i.get_layer(layer_name).output)

    x = tf.keras.layers.Flatten()(model_trunc.output)
    x = tf.keras.layers.Dense(512, activation='relu')(x)
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    x = tf.keras.layers.Dense(1)(x)

    models.append(tf.keras.models.Model(inputs=model_trunc.input, outputs=x))
    
    #save_model_info(name=f"v3_d512_256_r{i.input_shape[1]}", model=models[-1], path="model_info.yaml")

for i in base_models:
    model_trunc = tf.keras.models.Model(inputs=i.input, outputs=i.get_layer(layer_name).output)

    x = tf.keras.layers.Flatten()(model_trunc.output)
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    tf.keras.layers.Dropout(0.4)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    tf.keras.layers.Dropout(0.3)
    x = tf.keras.layers.Dense(1)(x)

    models.append(tf.keras.models.Model(inputs=model_trunc.input, outputs=x))
    
    #save_model_info(name=f"v3_d256_128_r{i.input_shape[1]}", model=models[-1], path="model_info.yaml")

#### 2. Load the datasets (train and validation)

In [None]:
datasets = []
train_ds = []

val_datasets = []
val_ds = []
counts = []
dl_loaders = []
for i in range(1, 3):
    for r in range(len(resoluton)):
        dl_loaders.append(dl("RGB", "count", resoluton[r]))

        ds, c = dl_loaders[-1].create_dataset_count("train")
        datasets.append(ds.shuffle(c, reshuffle_each_iteration=True))
        counts.append(c)

        train_ds.append((datasets[-1]
            .shuffle(c)            
            .map(lambda f, c: dl_loaders[-1].get_sample(f, c, train=True), 
                 num_parallel_calls=tf.data.AUTOTUNE)
            .batch(32)                          
            .prefetch(tf.data.AUTOTUNE)))

        ds, c = dl_loaders[-1].create_dataset_count("validate")
        val_datasets.append(ds)
        val_ds.append((val_datasets[-1]            
            .map(lambda f, c: dl_loaders[-1].get_sample(f, c, train=False), 
                 num_parallel_calls=tf.data.AUTOTUNE)
            .batch(32)                          
            .prefetch(tf.data.AUTOTUNE)))
        
model_to_ds = {
    "r96": [train_ds[0], val_ds[0]],
    "r128": [train_ds[1], val_ds[1]],
    "r224": [train_ds[2], val_ds[2]]
}

#### 3. Train all the models

In [12]:
initial_learning_rate = 0.001

for i in range(len(models)):
    models[i].compile(optimizer="Adam", loss="mae", metrics=['mae']) #tf.keras.losses.Huber(delta=1.0)

In [13]:
NUM_EPOCHS = 30

lr_scheduler_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_mae', factor=0.5, patience=5, verbose=1,
                                                 mode='auto', min_delta=0.0005, cooldown=0, min_lr=0)
checkpoint_filepath = []
for dens in ["d512_256", "d256_128"]:
    for r in ["96", "128", "224"]:
        checkpoint_filepath.append(f"out/v3_{dens}_r{r}.keras")

for i in range(len(models)):
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath[i],
                                                                save_weights_only=False,
                                                                monitor='val_mae',
                                                                mode='min',
                                                                save_best_only=True)

    start_time = time.time()
    models[i].fit(train_ds[i],
            epochs=NUM_EPOCHS,
            validation_data=val_ds[i],
            callbacks=[lr_scheduler_callback, model_checkpoint_callback])

    print(f"Total training time = {time.time() - start_time:.2f} seconds")

Epoch 1/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 613ms/step - loss: 11.2246 - mae: 11.2246 - val_loss: 2.8389 - val_mae: 2.8389 - learning_rate: 0.0010
Epoch 2/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 146ms/step - loss: 1.6515 - mae: 1.6515 - val_loss: 3.4932 - val_mae: 3.4932 - learning_rate: 0.0010
Epoch 3/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 149ms/step - loss: 1.1324 - mae: 1.1324 - val_loss: 3.1881 - val_mae: 3.1881 - learning_rate: 0.0010
Epoch 4/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 198ms/step - loss: 1.1784 - mae: 1.1784 - val_loss: 3.4799 - val_mae: 3.4799 - learning_rate: 0.0010
Epoch 5/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 163ms/step - loss: 1.0527 - mae: 1.0527 - val_loss: 3.7781 - val_mae: 3.7781 - learning_rate: 0.0010
Epoch 6/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step - loss: 0.9470 - mae: 0.9470
Ep

In [14]:
import yaml
import re

RES_FILE = "results.yaml"

def write_res(file_path, name, new_metrics):

    if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
        with open(file_path, 'r') as file:
            existing_data = yaml.safe_load(file) or {}
    else:
        existing_data = {}

    existing_data[name] = new_metrics

    with open(file_path, 'w') as file:
        yaml.dump(existing_data, file, sort_keys=False)

for i in checkpoint_filepath:
    res = int(re.search(r"r(\d+)", i).group(1))
    name = i
    saved_model = tf.keras.models.load_model(name, compile=True)

    loss = saved_model.evaluate(model_to_ds[f"r{str(res)}"][1])
    write_res(RES_FILE, name.replace(".keras", ""), {"loss": loss, "params": saved_model.count_params()})

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 83ms/step - loss: 1.8485 - mae: 1.8485 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 84ms/step - loss: 1.8208 - mae: 1.8208 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 124ms/step - loss: 4.4591 - mae: 4.4591
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 76ms/step - loss: 1.8523 - mae: 1.8523
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 83ms/step - loss: 2.5319 - mae: 2.5319 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 106ms/step - loss: 1.7685 - mae: 1.7685


In [17]:
for i in range(len(models)):
    save_model_info(checkpoint_filepath[i].replace("out/", "").replace(".keras", ""), models[i], "model_info.yaml")

Model info saved to model_info.yaml
Model info saved to model_info.yaml
Model info saved to model_info.yaml
Model info saved to model_info.yaml
Model info saved to model_info.yaml
Model info saved to model_info.yaml
