In [1]:
from keras.layers import Dense, Dropout, Activation, Flatten, Convolution2D, MaxPooling2D, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from keras.models import Sequential
from typing import Tuple, List
import tensorflow as tf
from tqdm import tqdm
import pandas as pd
import numpy as np
import sklearn
import keras
import os

In [2]:
#Seeding random state to 13 always, for reproducibility
np.random.seed(13)

In [3]:
#Helper Function: Return the paths to all jpg files found within a directory
def getImageDirs(root: str = "data"):
    imageDirs = []
    for subDirectory, directory, files in os.walk(root):
        for file in files:
            if file[-4:] == ".jpg":
                path = os.path.join(subDirectory, file)         
                imageDirs.append(path)
    return(imageDirs)

In [4]:
#Helper Function: Return the class weights given a list of classes
def getClassWeightsFromLabels(labels: List[int]):# -> Dict[int]:
    weights = sklearn.utils.class_weight.compute_class_weight(class_weight="balanced", classes=np.unique(labels), y=labels)
    return {0: weights[0], 1: weights[1]}

In [5]:
#Helper Function: Return the img paths and classes in seperate lists given a txt file from the LABELS folder
def getDirsAndClasses(root: str, file: str) -> Tuple[List[str], List[int]]:
    imageDirs = []
    classes = []
    line = ""
    with open(root + file, "r") as f:
        for line in tqdm(f):
            imageDir, clazz = line.split()
            imageDirs.append(imageDir)
            classes.append(int(clazz))
    return imageDirs, classes

In [6]:
#Helper Function: Create a Keras prebuilt model
def makeModel(inputShape: Tuple[int], modelName:str ='') -> keras.Model:
    """
    Source: https://www.tensorflow.org/guide/keras/functional#a_toy_resnet_model
    
    Note that I tend to prefer the super-explicit (if somewhat verbose) style. 
    This style is technically unnecessary, but it helps with readability.

    Load model by inputing the name to modelName
    Options are "Simple_ResNet", "SimpleNet", "InceptionResNetV2", "MobileNetV2", "ResNet50V2", "DenseNet121", "DenseNet201", and "NASNetLarge"
    """
    input = keras.Input(shape=inputShape, name="Input")
    x=None
    if modelName == "Simple_ResNet":

        x = layers.Conv2D(filters=32, kernel_size=(3, 3), activation="relu")(input)
        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
        block_1_output = layers.MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x)

        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(block_1_output)
        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(x)
        block_2_output = layers.add([x, block_1_output])

        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(block_2_output)
        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(x)
        block_3_output = layers.add([x, block_2_output])

        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(block_3_output)
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dense(units=256, activation="relu")(x)
        x = layers.Dense(units=256, activation="relu")(x)
        x = layers.Dense(units=256, activation="relu")(x)
        x = layers.Dropout(0.5)(x)

    elif modelName in ["InceptionResNetV2","MobileNetV2","ResNet50V2","DenseNet121","DenseNet201","NASNetLarge"]:
        
        if modelName=="InceptionResNetV2":
            baseModel = keras.applications.InceptionResNetV2(include_top=False, weights="imagenet", input_shape=(150,150,3))(input)
        elif modelName=="MobileNetV2":
            baseModel = keras.applications.MobileNetV2(include_top=False, weights="imagenet", input_shape=(150,150,3))(input)
        elif modelName=="ResNet50V2":
            baseModel = keras.applications.ResNet50V2(include_top=False, weights="imagenet", input_shape=(150,150,3))(input)
        elif modelName=="DenseNet121":
            baseModel = keras.applications.DenseNet121(include_top=False, weights="imagenet", input_shape=(150,150,3))(input)
        elif modelName=="DenseNet201":
            baseModel = keras.applications.DenseNet201(include_top=False, weights="imagenet", input_shape=(150,150,3))(input)
        elif modelName=="NASNetLarge":
            x = tf.keras.layers.experimental.preprocessing.Resizing(height=331, width=331)(input)
            baseModel = keras.applications.NASNetLarge(include_top=False, weights="imagenet", input_shape=(331,331,3))(x)
        else:
            raise("Model Name Not In Recognized Keras Models")
        
        baseModel.trainable = False
        x = layers.Flatten()(baseModel)
        x = layers.Dense(128, activation="relu")(x)
        x = layers.Dropout(0.3)(x)
        x = layers.Dense(128, activation="relu")(x)
        x = layers.Dropout(0.3)(x)

    elif modelName=="SimpleNet":
        x = layers.AveragePooling2D(pool_size=(50, 50))(input)
        x = layers.Flatten()(x)
        x = layers.Dense(64, activation="relu")(x)
        x = layers.Dropout(0.3)(x)
        x = layers.Dense(64, activation="relu")(x)
        x = layers.Dropout(0.3)(x)

    if x != None:
        output=layers.Dense(1, activation="sigmoid")(x)
        return keras.Model(inputs=input, outputs=output, name=modelName)
    else:
        raise("Model Name Not Recognized")

In [7]:
#Get the Train Dataset using split from the LABELS folder
root = os.getcwd() + "\\Data\\CNR-EXT-150x150"
imageDirs, classes = getDirsAndClasses(root, "\\LABELS\\train.txt")
root = root + "\\PATCHES\\"
train = pd.DataFrame([
            {
                "image": root + filename,
                "class": "free" if clazz == 0 else "busy"
            }
            for filename, clazz in tqdm(zip(imageDirs, classes))
    ])
#Now Get Test
root = os.getcwd() + "\\Data\\CNR-EXT-150x150"
imageDirs, classes = getDirsAndClasses(root, "\\LABELS\\test.txt")
root = root + "\\PATCHES\\"
test = pd.DataFrame([
            {
                "image": root + filename,
                "class": "free" if clazz == 0 else "busy"
            }
            for filename, clazz in tqdm(zip(imageDirs, classes))
    ])

94493it [00:00, 671957.06it/s]
94493it [00:00, 1553178.67it/s]
31825it [00:00, 998367.45it/s]
31825it [00:00, 1776017.84it/s]


In [8]:
#Declare data generators and preprocessing
train_datagen = ImageDataGenerator(
    #Augment data with random flips, normalize each sample's input
    vertical_flip = True,
    horizontal_flip = True,
    rescale = 1.0 / 255.0,
    samplewise_std_normalization = True
)
train_generator = train_datagen.flow_from_dataframe(
    directory = None, #none since the df has absolute paths
    dataframe = train,
    x_col = "image",
    y_col = "class",
    validate_filenames = False, #faster for huge datasets
    target_size = (150, 150),
    color_mode = "rgb",
    batch_size = 128,
    class_mode = "binary",
    shuffle = True
)

test_datagen = ImageDataGenerator(
    samplewise_std_normalization = True
)
test_generator = train_datagen.flow_from_dataframe(
    directory = None,
    dataframe = test,
    x_col = "image",
    y_col = "class",
    validate_filenames = False,
    target_size = (150, 150),
    color_mode = "rgb",
    batch_size = 128,
    class_mode = "binary",
    shuffle = True
)

Found 94493 non-validated image filenames belonging to 2 classes.
Found 31825 non-validated image filenames belonging to 2 classes.


In [9]:
#Declare Callbacks: stop training if accuracy doesn't rise 1% within 3 epochs
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor = "accuracy",
        min_delta = 0.01,
        patience = 3,
        verbose = 1
    )
]

In [10]:
#Extract Class Weights
classes = list(train["class"])
weights_dict = getClassWeightsFromLabels(classes)
print(weights_dict)

{0: 0.992240003360215, 1: 1.0078823303539048}


In [11]:
#Build Model
Model = makeModel((150, 150, 3), "DenseNet201")
opt = tf.optimizers.Adam()
Model.compile(
    optimizer = opt,
    loss = keras.losses.BinaryCrossentropy(from_logits = True),
    metrics = ["accuracy"]
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5


In [12]:
#Fit data
Model.fit(train_generator, callbacks = callbacks, epochs = 100, class_weight = weights_dict)

Epoch 1/100
  1/739 [..............................] - ETA: 0s - loss: 0.7171 - accuracy: 0.4688

In [None]:
#Test accuracy
Model.evaluate(test_generator)

In [None]:
#Save the model
Model.save("Models/DenseNET201")