# Visual Wake Words (VWW) Project - Phase 1
**Goal:** Load dataset, preprocess data, and define baseline models for comparison.
**Environment:** Google Colab / TensorFlow 2.x

## 1. Setup & Imports

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
import os

# Check GPU availability
print(f"TensorFlow Version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")

## 2. Data Loading
We use `tensorflow_datasets` to load the `visual_wake_words` dataset.
*   **Task:** Binary classification (Person vs Not-person).
*   **Label 1:** Person
*   **Label 0:** Not-person

In [None]:
# Load the dataset
# Note: This might take some time to download if not cached.
# 'visual_wake_words' dataset structure:
# It usually comes with 'train' and 'validation' splits.

dataset_name = 'visual_wake_words'

# Load with info to get label names
ds, info = tfds.load(
    dataset_name,
    split=['train', 'validation'],
    with_info=True,
    as_supervised=True, # Returns (image, label) tuples
    shuffle_files=True
)

train_ds, val_ds = ds

print(f"Dataset info: {info.features}")
print(f"Class names: {info.features['label'].names}")
print(f"Training samples: {info.splits['train'].num_examples}")
print(f"Validation samples: {info.splits['validation'].num_examples}")

## 3. Preprocessing
*   **Resize:** 96x96 (Standard for TinyML VWW).
*   **Normalization:** Scale pixel values to [-1, 1] (Expected by MobileNet).
*   **Augmentation:** Random flip/rotation (Optional for training).

In [None]:
IMG_SIZE = 96
BATCH_SIZE = 32

def preprocess(image, label):
    # Resize
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    # Normalize to [-1, 1]
    image = (image / 127.5) - 1.0
    return image, label

def augment(image, label):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.1)
    return image, label

# Prepare pipelines
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.map(preprocess, num_parallel_calls=AUTOTUNE)
train_ds = train_ds.cache()
train_ds = train_ds.shuffle(1000)
train_ds = train_ds.map(augment, num_parallel_calls=AUTOTUNE)
train_ds = train_ds.batch(BATCH_SIZE)
train_ds = train_ds.prefetch(AUTOTUNE)

val_ds = val_ds.map(preprocess, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.batch(BATCH_SIZE)
val_ds = val_ds.prefetch(AUTOTUNE)

## 4. Visualization
Let's verify the data by plotting a few images from the training batch.
Note: Images are normalized to [-1, 1], so we need to rescale them to [0, 1] for display.

In [None]:
image_batch, label_batch = next(iter(train_ds))

plt.figure(figsize=(10, 10))
for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    # Rescale from [-1, 1] to [0, 1] for plotting
    img = (image_batch[i] + 1) / 2.0
    plt.imshow(img)
    label = label_batch[i].numpy()
    plt.title(info.features['label'].int2str(label))
    plt.axis("off")
plt.show()

## 5. Model Definition

### Model A: MobileNetV2 (alpha=0.35)
This is a classic choice for VWW. `alpha=0.35` reduces the width of the network, significantly reducing parameters and FLOPs.

In [None]:
def create_mobilenet_v2_model():
    base_model = tf.keras.applications.MobileNetV2(
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
        include_top=False,
        weights='imagenet',
        alpha=0.35
    )
    
    # Freeze base model for initial transfer learning
    base_model.trainable = False

    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    
    return model

model_v2 = create_mobilenet_v2_model()
model_v2.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model_v2.summary()

### Model B: MobileNetV3-Small
MobileNetV3 uses Neural Architecture Search (NAS) and hard-swish activation functions, often providing better accuracy-latency trade-offs than V2.

In [None]:
def create_mobilenet_v3_model():
    base_model = tf.keras.applications.MobileNetV3Small(
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
        include_top=False,
        weights='imagenet',
        minimalistic=False # Use standard V3 Small
    )
    
    base_model.trainable = False

    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    
    return model

model_v3 = create_mobilenet_v3_model()
model_v3.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model_v3.summary()

## 6. Initial Comparison (Parameter Count)
Before training, let's compare the size of the models. This is a crucial metric for embedded deployment.

In [None]:
def get_model_params(model):
    return model.count_params()

params_v2 = get_model_params(model_v2)
params_v3 = get_model_params(model_v3)

print(f"MobileNetV2 (alpha=0.35) Parameters: {params_v2:,}")
print(f"MobileNetV3-Small Parameters:        {params_v3:,}")

if params_v2 < params_v3:
    print("MobileNetV2 is smaller.")
else:
    print("MobileNetV3-Small is smaller.")