# Retraining MobileNet V2 for Person Detection and ESP32 Compatibility
## Retraining MobileNet V2 for Person Detection
+ Utilize a pretrained MobileNet V2 model designed to detect 1000 classes.
+ Retrain the model to detect the presence of a person in an image.
## Modifying the Model for ESP32 Compatibility
+ Convert the model input to accept flat array images.
+ Resize images to match the model's input size.
+Quantize the model for efficient execution on the ESP32.

## Training

In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

## setup dataset

In [None]:
BATCH_SIZE = 32


In [None]:
IMAGE_CHANNELS = 3

# input image (ie image from esp 32 looks size)
IMAGE_HEIGHT = 240
IMAGE_WIDTH = 240
IMAGE_SHAPE = (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS)

# pretrained MobileNetV2 model image size
BASE_MODEL_IMAGE_HEIGHT = 96
BASE_MODEL_IMAGE_WIDTH = 96
BASE_MODEL_IMAGE_SHAPE = (BASE_MODEL_IMAGE_HEIGHT, BASE_MODEL_IMAGE_WIDTH, IMAGE_CHANNELS)

# final model input shape (flat)
MODEL_INPUT_SHAPE = (IMAGE_CHANNELS * IMAGE_HEIGHT * IMAGE_WIDTH)

In [None]:
# Data set file location
data_dir = 'G:\ml_datasets\coco'

# Load the dataset with the specified data directory
ds_train, ds_validation = tfds.load('coco/2017', split=['train','validation'], data_dir=data_dir)

# Define the label for 'person' in the dataset
PERSON_LABEL = 1  # Assuming 'person' has label 1, adjust this according to your dataset

# Function to check if a person is in the image
def has_person(example):
    objects = example['objects']['label']
    person_present = tf.reduce_any(tf.equal(objects, PERSON_LABEL))
    example['person_present'] = tf.cast(person_present, tf.int64)
    return example
# Function to check if a person is in the image
def format_image(example):
  image= tf.image.resize(image, [IMAGE_HEIGHT, IMAGE_WIDTH])
  #image=tf.dtypes.cast(image, tf.float32)/255
  return tf.reshape(image,[IMAGE_CHANNELS * IMAGE_HEIGHT * IMAGE_WIDTH]), label
    

# Apply the mapping function to the dataset
ds_train = ds_train.map(has_person)
ds_validation = ds_validation.map(format_image)
ds_train = ds_train.map(format_image)
ds_validation = ds_validation.map(format_image)
# Build your input pipeline
ds_train = ds_train.shuffle(1024).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Iterate through the dataset and print an example
for example in ds_train.take(1):
    image, label, person_present = example["image"], example["label"], example["person_present"]
    print(f"Person present: {person_present.numpy()}")

# Selecting a base model
It is often easier to take an exist model and retrain for more specific tasks
This example use a pretrained version of mobilenetv2 as a feature extrature
The confulation filters are able to generate many feature from the image the could be things like red vertical lines, yello dots etc
We will add new layers to take the input and predrict our clases

Note not all parametes this kera function have pretrianed weights.
## tranfer learing https://www.tensorflow.org/tutorials/images/transfer_learning


In [None]:
base_model =tf.keras.applications.MobileNetV2(
        input_shape=BASE_MODEL_IMAGE_SHAPE,
        alpha=0.35,
        include_top=False,# the top is the last layer of the model (the classifier) we don't want to include it we will train our own final layer
        weights="imagenet",
        input_tensor=None,
        pooling=None,
        classes=1000,
        classifier_activation=None,
    )
# Freeze the base model because we only want to train the new classifier
# We don't want to train the base model because it has already learned many features
# training new features is more difficult than training the classifier and would require more data
base_model.trainable = False
base_model.summary()

In [None]:
# TODO might be better to define the base model with the flat input to make batching work
# this is not goinh to work as is it will provide flat input to the shaped input of the base mmodel

# need to know batch size for some layers?
image_batch, label_batch = next(iter(ds_train))
feature_batch = base_model(image_batch)
print(feature_batch.shape)

global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

# new bnnary classifier layer
prediction_layer = tf.keras.layers.Dense(1, activation='sigmoid')
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)

In [None]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip('horizontal'),
  tf.keras.layers.RandomRotation(0.2),
])
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

In [None]:
flat_input = tf.keras.layers.Input(shape=(IMAGE_CHANNELS * IMAGE_HEIGHT * IMAGE_WIDTH))
reshape = tf.keras.layers.Reshape(IMAGE_SHAPE)(flat_input)
resize = tf.keras.layers.Resizing(
    BASE_MODEL_IMAGE_HEIGHT,
    BASE_MODEL_IMAGE_WIDTH,
    interpolation='nearest',#"bilinear", "nearest" are compatible with tf micro
    crop_to_aspect_ratio=True
    )(reshape)
x = data_augmentation(resize)
x = preprocess_input(x)
x = base_model(x, training=False)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)
model = tf.keras.Model(flat_input, outputs)

In [None]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=[tf.keras.metrics.BinaryAccuracy(threshold=0.5, name='accuracy')])

In [None]:
initial_epochs = 10

loss0, accuracy0 = model.evaluate(ds_validation)

In [None]:
history = model.fit(ds_train,
                    epochs=initial_epochs,
                    validation_data=ds_validation)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()