In [1]:
import pathlib
import splitfolders
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

## get data 

In [2]:
#path = "/Users/laurameyer/code/DSP-Tan/mushroom_learning/raw_data/mushroom_poison"

In [3]:
#splitfolders.ratio(path, output="../raw_data/mushrooms_poison_train_test",
#                   seed=1337, ratio=(.9, .1), group_prefix=None, move=False)

In [4]:
directory = "/Users/laurameyer/code/DSP-Tan/mushroom_learning/raw_data/mushrooms_poison_train_test/train"

In [5]:
def get_images_directory(directory):
    data_dir = pathlib.Path(directory)
    return data_dir

In [6]:
data_dir = get_images_directory(directory)
data_dir

PosixPath('/Users/laurameyer/code/DSP-Tan/mushroom_learning/raw_data/mushrooms_poison_train_test/train')

In [7]:
image_count = len(list(data_dir.glob('*/*.jpg')))
print(image_count)

8587


In [8]:
img_height = 224
img_width = 224
batch_size = 32

In [9]:
def load_training_data():

    return tf.keras.utils.image_dataset_from_directory(
      data_dir,
      labels='inferred',
      label_mode='binary', 
      validation_split=0.2,
      subset="training",
      seed=123,
      image_size=(img_height, img_width),
      batch_size=batch_size
    )

train_ds = load_training_data()

Found 10532 files belonging to 2 classes.
Using 8426 files for training.


2022-03-07 18:45:51.643633: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
def load_validation_data():
    return tf.keras.utils.image_dataset_from_directory(
      data_dir,
      labels='inferred', 
      label_mode='binary',
      validation_split=0.2,
      subset="validation",
      seed=123,
      image_size=(img_height, img_width),
      batch_size=batch_size
    )

val_ds = load_validation_data()

Found 10532 files belonging to 2 classes.
Using 2106 files for validation.


In [11]:
class_names = train_ds.class_names
class_names

['edable', 'poison']

In [17]:
num_classes = len(class_names)

In [13]:
def get_labels_from_tfdataset(tfdataset, batched=False):

    labels = list(map(lambda x: x[1], tfdataset)) # Get labels 

    if not batched:
        return tf.concat(labels, axis=0) # concat the list of batched labels

    return labels

In [14]:
def get_inputs_from_tfdataset(tfdataset, batched=False):

    labels = list(map(lambda x: x[0], tfdataset)) # Get labels 

    if not batched:
        return tf.concat(labels, axis=0) # concat the list of batched labels

    return labels

## modeling

In [12]:
from tensorflow.keras.applications.vgg19 import VGG19

In [15]:
val_ds_labels = get_labels_from_tfdataset(val_ds)

In [16]:
val_ds_inputs = get_inputs_from_tfdataset(val_ds)

In [18]:
input_shape = (224, 224, 3)

def load_model():
    
    model = tf.keras.applications.VGG19(weights="imagenet", include_top=False, input_shape=input_shape, classes=num_classes, classifier_activation="sigmoid")
    
    return model

In [19]:
def set_nontrainable_layers(model):
    
    model.trainable = False
    
    return model

In [26]:
from tensorflow.keras import layers, models

def add_last_layers(model):
    '''Take a pre-trained model, set its parameters as non-trainables, and add additional trainable layers on top'''
    base_model = set_nontrainable_layers(model)
    dropout_layer = layers.Dropout(0.2)
    flatten_layer = layers.Flatten()
    dense_layer_1 = layers.Dense(50, activation='relu')
    dense_layer_2 = layers.Dense(20, activation='relu')
    prediction_layer = layers.Dense(num_classes, activation='sigmoid')
    
    model = models.Sequential([
        layers.Rescaling(1./255),
        base_model,
        dropout_layer, 
        flatten_layer,
        dense_layer_1,
        dense_layer_2,
        prediction_layer
    ])
    
    return model

In [27]:
vgg_model = load_model()

In [28]:
vgg_model = add_last_layers(vgg_model)

In [29]:
val_prediction = vgg_model.predict(val_ds_inputs)

In [30]:
from sklearn import metrics
import numpy as np

print(val_prediction.shape)

labels = np.argmax(val_prediction, axis=1)

results = metrics.accuracy_score(val_ds_labels, labels)
results

(2106, 2)


NameError: name 'np' is not defined

## vgg19

In [None]:
from tensorflow.keras import optimizers

def build_model(): 
    model = load_model()
    model = add_last_layers(model)
    
    opt = optimizers.Adam(learning_rate=1e-4)
    
    model = model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  optimizer=opt,
                  metrics=['accuracy'])
    return model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

epochs = 100

model = build_model()

es = EarlyStopping(monitor = 'val_accuracy', 
                   mode = 'max', 
                   patience = 5, 
                   verbose = 1, 
                   restore_best_weights = True)

history = model.fit(train_ds,
                    validation_data=val_ds,
                    epochs=epochs, 
                    verbose=1,
                    callbacks=[es])

In [None]:
model.save("../model_poison_vgg19")

## prediction

In [None]:
img = tf.keras.utils.load_img(
    "/Users/laurameyer/Desktop/poisonous-white-mushrooms.jpg", target_size=(img_height, img_width)
)

In [None]:
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)

In [None]:
prediction = model.predict(img_array)

classif = int(prediction > .5)

print(f"This image most likely belongs to {class_names[classif]} with a score of: {prediction[0][0]:.2f}")