In [1]:
# disable warnings
import warnings
warnings.filterwarnings("ignore")

# basic imports
import tensorflow as tf 
import torch
import io
import numpy as np
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.keras import layers

from keras.utils import dataset_utils
import matplotlib.pyplot as plt

import baseline_config

In [2]:
def paths_and_labels_to_dataset(image_paths,labels,num_classes):
    path_ds = tf.data.Dataset.from_tensor_slices(image_paths)
    img_ds = path_ds.map(
        lambda path: tf.io.read_file(path) , num_parallel_calls=tf.data.AUTOTUNE
    )
    label_ds = dataset_utils.labels_to_dataset(labels, True, num_classes)
    img_ds = tf.data.Dataset.zip((img_ds, label_ds))
    return img_ds

def create_dataset(subset):
    image_paths, labels, class_names = dataset_utils.index_directory(
            baseline_config.dataset_path + subset,
            labels="inferred",
            formats=('.pt'),
            class_names=None,
            shuffle=False,
            seed=42,
            follow_links=True)

    dataset = paths_and_labels_to_dataset(
        image_paths=image_paths,
        labels=labels,
        num_classes=len(class_names))
    
    return dataset, class_names

train_dataset, class_names = create_dataset('TRAIN/')
test_dataset,_             = create_dataset('TEST/')
validation_dataset,_       = create_dataset('VALIDATION/')

Found 12384 files belonging to 5 classes.
Found 487 files belonging to 5 classes.
Found 384 files belonging to 5 classes.


In [3]:
def dataset_tranforms(image,label):
    image = torch.load(io.BytesIO(image.numpy()))
    image = tf.expand_dims(image, -1)
    image = tf.repeat(image, 3, 2)
    label = tf.cast(label, tf.int64)
    # print(image.shape)
    return image,label

py_func_wrapper = lambda x,y: tf.py_function(func=dataset_tranforms, inp=[x,y], Tout=[tf.float32,tf.int64])

train_dataset = ( 
                  train_dataset
                  .shuffle(20000)
                  .map(py_func_wrapper)
                  .repeat()
                  .batch(baseline_config.batch_size)
                  
                )

validation_dataset = ( 
                  validation_dataset
                  .map(py_func_wrapper)
                  .repeat()
                  .take(baseline_config.batch_size)
                  .batch(baseline_config.batch_size)
                )

test_dataset = ( 
                  test_dataset
                  .map(py_func_wrapper)
                  .repeat()
                  .take(baseline_config.batch_size)
                  .batch(baseline_config.batch_size)
                )

#for melspec, label in train_dataset:
    # print(melspec.shape, melspec, label)
#    assert melspec.shape==(32,313,128,3)

In [4]:
# build a really simple classification model using a pre-training Efficientnet V2
model = keras.Sequential(
    [
        # need to resize here, as the efficientnet_v2_imagenet1k_b3 model requires 260x260 input
        tf.keras.layers.Resizing(260, 260, interpolation="bilinear", crop_to_aspect_ratio=False),
        
        # use the model as a feature generator only
        hub.KerasLayer("https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b3/feature_vector/2", False),
        
        # add the classification layer here       
        layers.Flatten(),
        layers.Dense(64, activation="relu"),
        layers.Dropout(0.65),
        layers.Dense(len(class_names), activation=None),
    ]
)
# need to tell the model what the input shape is
model.build([None, 313, 128, 3])

# show the model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resizing (Resizing)         (None, 260, 260, 3)       0         
                                                                 
 keras_layer (KerasLayer)    (None, 1536)              12930622  
                                                                 
 flatten (Flatten)           (None, 1536)              0         
                                                                 
 dense (Dense)               (None, 64)                98368     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 5)                 325       
                                                                 
Total params: 13,029,315
Trainable params: 98,693
Non-tr

In [5]:
# the form_logits means the loss function has the 'softmax' buillt in.  This approach is numerically more stable
# than including the softmax activation on the last layer of the classifier
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
              optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), 
              metrics=["accuracy"], 
              run_eagerly=True)

model.fit(train_dataset, 
          validation_data=validation_dataset,
          steps_per_epoch=50,
          epochs=baseline_config.max_epoch)

Epoch 1/100000
Epoch 2/100000
Epoch 3/100000
Epoch 4/100000
Epoch 5/100000
Epoch 6/100000
Epoch 7/100000
Epoch 8/100000
Epoch 9/100000
Epoch 10/100000
Epoch 11/100000
Epoch 12/100000
Epoch 13/100000
Epoch 14/100000