## VGG16 36 SPECIES MODEL 2

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing import image_dataset_from_directory
import pathlib

In [2]:
path = "/Users/laurameyer/code/DSP-Tan/mushroom_learning/raw_data/2_12_mushroom_species_train_test/train"
path_test = "/Users/laurameyer/code/DSP-Tan/mushroom_learning/raw_data/2_12_mushroom_species_train_test/test"

In [3]:
def get_images_directory(directory):
    data_dir = pathlib.Path(directory)
    return data_dir 

data_dir = get_images_directory(path)
data_dir_test = get_images_directory(path_test)

image_count = len(list(data_dir.glob('*/*.jpg')))
image_count

3170

In [4]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32


def load_training_data(data_dir):

    return tf.keras.utils.image_dataset_from_directory(
      data_dir,
      labels='inferred',
      validation_split=0.2,
      subset="training",
      seed=123,
      image_size=(IMG_HEIGHT, IMG_WIDTH),
      batch_size=BATCH_SIZE
    )
    
def load_validation_data(data_dir):

    return tf.keras.utils.image_dataset_from_directory(
      data_dir,
      labels='inferred',
      validation_split=0.2,
      subset="validation",
      seed=123,
      image_size=(IMG_HEIGHT, IMG_WIDTH),
      batch_size=BATCH_SIZE
    )

def load_testing_data(data_dir):
    return tf.keras.utils.image_dataset_from_directory(
      data_dir,
      labels='inferred',
      seed=123,
      image_size=(IMG_HEIGHT, IMG_WIDTH),
      batch_size=BATCH_SIZE
    )

In [5]:
def get_labels_from_tfdataset(tfdataset, batched=False):

    labels = list(map(lambda x: x[1], tfdataset)) # Get labels 

    if not batched:
        return tf.concat(labels, axis=0) # concat the list of batched labels

    return labels

def get_inputs_from_tfdataset(tfdataset, batched=False):

    labels = list(map(lambda x: x[0], tfdataset)) # Get labels 

    if not batched:
        return tf.concat(labels, axis=0) # concat the list of batched labels

    return labels

In [6]:
train_ds = load_training_data(data_dir)
val_ds = load_validation_data(data_dir)
test_ds = load_testing_data(data_dir_test)

Found 3170 files belonging to 13 classes.
Using 2536 files for training.
Found 3170 files belonging to 13 classes.
Using 634 files for validation.
Found 358 files belonging to 13 classes.


In [7]:
def data_augmentation():

        return keras.Sequential(
        [
            layers.RandomRotation(0.1),
            layers.RandomZoom(0.1),
        ]
        )

In [8]:
from tensorflow.keras.applications.vgg16 import VGG16

input_shape = (224, 224, 3)
num_classes = len(train_ds.class_names)

def load_model():
        return VGG16(weights="imagenet", include_top=False, input_shape=input_shape, classes=num_classes, classifier_activation="softmax")
    

In [9]:
def set_nontrainable_layers(model):
    
    model.trainable = False
    
    return model

In [10]:
from tensorflow.keras import layers, models

def add_last_layers(model):
    '''Take a pre-trained model, set its parameters as non-trainables, and add additional trainable layers on top'''
    initializer = tf.keras.initializers.GlorotUniform(seed=0)
    base_model = set_nontrainable_layers(model)
    dropout_layer = layers.Dropout(0.2)
    flatten_layer = layers.Flatten()
    dense_layer_1 = layers.Dense(50, activation='relu', kernel_initializer=initializer)
    dense_layer_2 = layers.Dense(20, activation='relu', kernel_initializer=initializer)
    prediction_layer = layers.Dense(num_classes, activation='softmax')
    
    model = models.Sequential([
        data_augmentation(),
        layers.Rescaling(1./255),
        base_model,
        dropout_layer, 
        flatten_layer,
        dense_layer_1,
        dense_layer_2,
        prediction_layer
    ])
    
    return model

In [11]:
from tensorflow.keras import optimizers

def build_model():
        model = load_model()
        model = add_last_layers(model)
        
        opt = optimizers.Adam(learning_rate=5e-4)
        
        model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                    optimizer=opt,
                    metrics=['accuracy'])
        return model 

In [13]:
from tensorflow.keras.callbacks import EarlyStopping


epochs = 100


model = build_model()

es = EarlyStopping(monitor = 'val_accuracy', 
                   mode = 'max', 
                   patience = 5, 
                   verbose = 1, 
                   restore_best_weights = True)

history = model.fit(train_ds,
                    validation_data=val_ds,
                    epochs=epochs, 
                    verbose=1,
                    callbacks=[es])

Epoch 1/100
Epoch 2/100

KeyboardInterrupt: 

In [None]:
model.save("../model_2_species_vgg16")

In [None]:
## let me know when your model is saved locally, the I'll help you to upload it to gcp! 