In [37]:
import tensorflow as tf
from tensorflow import keras
from keras import layers
# from keras.preprocessing import image
from keras.applications.efficientnet import EfficientNetB0
from keras.applications.vgg16 import VGG16
import tensorflow_addons as tfa


In [25]:
# import os 
# os.chdir ('./Project/Code')

In [38]:
%pwd

'/Users/shreyesh/Desktop/Spring 2022/Neural Networks/Project/Code'

In [39]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

# Retrieve training data
# remove subset param
def getDataset(dataFolder, subset, imageSize = (224, 224), batchSize = 32):
    train_ds = keras.utils.image_dataset_from_directory(
      dataFolder,
      seed=123,
      image_size=imageSize,
      batch_size=batchSize)
    return train_ds

# Tune buffer size and efficiency 
# When do we call this?
def configurePerformance(train_ds, val_ds): 
    AUTOTUNE = tf.data.AUTOTUNE
    train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
    return train_ds, val_ds

# def buildModel(dropoutRate, numClasses, inpShape = (224, 224, 3)):
import pickle

# Create pkl file of the model after the training phase
def dumpModel(modelName, phase): 
    # Save the trained model as a pickle string.
    modelName = "model_" + modelName + "_ " + phase + ".pkl"
    pickle.dump(model, open(modelName, 'wb'))
    
def getDatasetsByCar(cars,  imageSize = (224, 224), batchSize = 32):
    train_ds = None 
    val_ds = None 
    for car in cars: 
        trainingFolder = 'data/'+ car +'/train/RGB/'
        testingFolder = 'data/'+ car + '/test_with_labels/RGB/'
        if not train_ds and not val_ds:
            train_ds = getDataset(trainingFolder, "training")
            val_ds =  getDataset(testingFolder, "validation")
        else: 
            new_train_ds =  getDataset(trainingFolder, "training")
            new_val_ds =  getDataset(testingFolder, "validation")
            train_ds.concatenate(new_train_ds)
            val_ds.concatenate(new_val_ds)
    return train_ds, val_ds


In [41]:
EPOCHS = 5
modelName = "vgg16"
# Initial layer input shape
inpShape =  (224, 224, 3)
cars = ['x5']
train_ds, val_ds = getDatasetsByCar(cars, batchSize=64)

# trainingFolder = 'data/x5/train/RGB/'
# testingFolder = 'data/x5/test_with_labels/RGB/'
# # don't need to pass subset string - datasets already split
# train_ds = getDataset(trainingFolder, "training")
# val_ds =  getDataset(testingFolder, "validation")

train_ds, val_ds = configurePerformance(train_ds, val_ds)

Found 6000 files belonging to 7 classes.
Found 1500 files belonging to 7 classes.


In [42]:
dropoutRate = 0.2
numClasses = 7 
inp = layers.Input(shape=inpShape)
baseModel = VGG16(weights="imagenet",
                   include_top = False) 
baseModel.trainable = False 
x = baseModel(inp, training=False)
x =  layers.GlobalAveragePooling2D(name="avg_pool")(x)
x = layers.Dropout(dropoutRate, noise_shape=None, seed=None)(x)
out = layers.Dense(numClasses,activation="softmax", name = "pred")(x)
model = keras.Model(inp, out, name="FeatureExtraction-B0")
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.01),
          loss=tf.keras.losses.SparseCategoricalCrossentropy(),
          # metrics=['accuracy']
          metrics=['accuracy',
              recall_m,
              precision_m,
              f1_m
              ]
          )

In [43]:
# Feature extraction without the top layers 
hist_results = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=EPOCHS
)


Epoch 1/5


2022-04-15 13:33:21.972255: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2022-04-15 13:35:17.610104: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [44]:
dumpModel(modelName, "phase1")

INFO:tensorflow:Assets written to: ram://04f3cfc1-92dd-4a97-970b-be54f7a744f6/assets


In [45]:
# Fine tuning the Feature Extraction Model 
baseModel.trainable = True
for layer in model.layers[1].layers:
    if isinstance(layer, layers.BatchNormalization):
        layer.trainable = False
        
model.compile(loss = tf.keras.losses.SparseCategoricalCrossentropy(),
              optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001),
              metrics=['accuracy',
              recall_m,
              precision_m,
              f1_m
              ]
              )

# Train it again 
hist_results_tuned = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=9,
  #steps_per_epoch=len(train_ds)?
  initial_epoch=hist_results.epoch[-1]
)

dumpModel(modelName, "phase2")

preds = model.predict(val_ds, verbose = 1)
model.evaluate(val_ds)

Epoch 5/9


2022-04-15 13:46:02.158223: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2022-04-15 13:53:09.289012: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
INFO:tensorflow:Assets written to: ram://b332995d-9115-48f3-920f-7ed5c088d93a/assets


2022-04-15 14:22:21.817584: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




[3.9796597957611084,
 0.6759999990463257,
 1.2051112651824951,
 0.7282177805900574,
 0.9036774635314941]

In [36]:
model.predict(val_ds)

array([[2.3608278e-01, 1.8160450e-04, 2.1566679e-05, ..., 7.5970030e-01,
        2.3007477e-03, 3.7106854e-04],
       [9.8948485e-01, 4.6903307e-07, 4.2847299e-09, ..., 1.0490552e-02,
        2.2224824e-05, 5.0511570e-08],
       [1.0000000e+00, 3.4304973e-30, 1.2041805e-37, ..., 1.9341984e-19,
        7.9030959e-27, 3.3785575e-38],
       ...,
       [1.3920881e-01, 3.1936724e-02, 9.1634942e-03, ..., 9.2083327e-02,
        4.1591633e-02, 2.1902801e-02],
       [2.0419422e-05, 8.5727197e-06, 8.5373646e-09, ..., 9.8762089e-01,
        6.0068138e-05, 5.4565931e-05],
       [2.5823074e-05, 1.8547093e-05, 4.9890145e-06, ..., 7.7274890e-08,
        1.0420250e-06, 1.7978804e-05]], dtype=float32)