<a href="https://colab.research.google.com/github/A-NGJ/AudioExplorers2023/blob/main/AudioExplorers2023_MobileNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.utils import to_categorical
import tensorflow_addons as tfa
import cv2

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
)


class MetricsAtEndOfEpoch(callbacks.Callback):
    def __init__(self, validation_data):
        super().__init__()
        self.validation_data = validation_data

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        val_predict = np.argmax(self.model.predict(self.validation_data[0]), axis=-1)
        val_true = np.argmax(self.validation_data[1], axis=-1)
        f1 = f1_score(val_true, val_predict, average='weighted')
        precision = precision_score(val_true, val_predict, average='weighted')
        recall = recall_score(val_true, val_predict, average='weighted')
        accuracy = accuracy_score(val_true, val_predict)
        print(f"End of epoch {epoch}: Accuracy: {accuracy}, F1 Score: {f1}, Precision: {precision}, Recall: {recall}\n")



def resize_data(data):
    resized_data = np.zeros((data.shape[0], 32, 96, 3))
    for i, img in enumerate(data):
        resized_img = cv2.resize(img, (96, 32))
        resized_data[i] = np.stack((resized_img, resized_img, resized_img), axis=-1)
    return resized_data

# Load data
X = np.load('training.npy')
y = np.load('training_labels.npy')

# Preprocess input data
X = resize_data(X)

# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
datagen.fit(X_train)

# One-hot encode labels
num_classes = 5
y_train = to_categorical(y_train, num_classes=num_classes)
y_val = to_categorical(y_val, num_classes=num_classes)

# Load MobileNet without the top classification layer and set input shape
base_model = MobileNet(include_top=False, input_shape=(32, 96, 3), weights='imagenet')

# Add a custom classification layer
x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(128, activation='relu')(x)
predictions = layers.Dense(num_classes, activation='softmax')(x)

model = models.Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Fine-tuning: Unfreeze the top 30 layers of the base model
for layer in base_model.layers[:-30]:
    layer.trainable = False
for layer in base_model.layers[-30:]:
    layer.trainable = True

# Compile the model
model.compile(optimizer=optimizers.Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(datagen.flow(X_train, y_train, batch_size=32), validation_data=(X_val, y_val), epochs=10, steps_per_epoch=len(X_train) // 32, callbacks=[MetricsAtEndOfEpoch(validation_data=(X_val, y_val))])

# Save the model
model.save('music_detector_mobilenet_5_classes_augmented.h5')
