In [1]:
from keras.layers import Dense, Dropout, Activation, Flatten, Convolution2D, MaxPooling2D, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from keras.models import Sequential
from typing import Tuple, List
import tensorflow as tf
from tensorflow.random import set_seed
from tqdm import tqdm
import pandas as pd
import numpy as np
import sklearn
import keras
import os

In [2]:
#Seeding random state to 13 always, for reproducibility
np.random.seed(13)
set_seed(13)

In [3]:
#Helper Function: Return the paths to all jpg files found within a directory
def getImageDirs(root: str = "data"):
    imageDirs = []
    for subDirectory, directory, files in os.walk(root):
        for file in files:
            if file[-4:] == ".jpg":
                path = os.path.join(subDirectory, file)         
                imageDirs.append(path)
    return(imageDirs)

In [4]:
#Helper Function: Return the class weights given a list of classes
def getClassWeightsFromLabels(labels: List[int]):# -> Dict[int]:
    weights = sklearn.utils.class_weight.compute_class_weight(class_weight="balanced", classes=np.unique(labels), y=labels)
    return {0: weights[0], 1: weights[1]}

In [5]:
#Helper Function: Return the img paths and classes in seperate lists given a txt file from the LABELS folder
def getDirsAndClasses(root: str, file: str) -> Tuple[List[str], List[int]]:
    imageDirs = []
    classes = []
    line = ""
    with open(root + file, "r") as f:
        for line in tqdm(f):
            imageDir, clazz = line.split()
            imageDirs.append(imageDir)
            classes.append(int(clazz))
    return imageDirs, classes

In [6]:
#Helper Function: Create a Keras prebuilt model
def makeModel(inputShape: Tuple[int]) -> keras.Model:
    """
    Source: https://www.tensorflow.org/guide/keras/functional#a_toy_resnet_model
    
    Note that I tend to prefer the super-explicit (if somewhat verbose) style. 
    This style is technically unnecessary, but it helps with readability.
    """
    inputs = keras.Input(shape=inputShape, name="Input")
    x = layers.Conv2D(filters=32, kernel_size=(3, 3), activation="relu")(inputs)
    x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
    block_1_output = layers.MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x)

    x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(block_1_output)
    x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(x)
    block_2_output = layers.add([x, block_1_output])

    x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(block_2_output)
    x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(x)
    block_3_output = layers.add([x, block_2_output])

    x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(block_3_output)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(units=256, activation="relu")(x)
    x = layers.Dense(units=256, activation="relu")(x)
    x = layers.Dense(units=256, activation="relu")(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(units=1, activation="sigmoid")(x)

    return keras.Model(inputs=inputs, outputs=outputs, name="Simple_ResNet")

In [7]:
#Get the Train Dataset using split from the LABELS folder
root = os.getcwd() + "\\Data\\CNR-EXT-150x150"
imageDirs, classes = getDirsAndClasses(root, "\\LABELS\\train.txt")
root = root + "\\PATCHES\\"
train = pd.DataFrame([
            {
                "image": root + filename,
                "class": "free" if clazz == 0 else "busy"
            }
            for filename, clazz in tqdm(zip(imageDirs, classes))
    ])
#Now Get Test
root = os.getcwd() + "\\Data\\CNR-EXT-150x150"
imageDirs, classes = getDirsAndClasses(root, "\\LABELS\\test.txt")
root = root + "\\PATCHES\\"
test = pd.DataFrame([
            {
                "image": root + filename,
                "class": "free" if clazz == 0 else "busy"
            }
            for filename, clazz in tqdm(zip(imageDirs, classes))
    ])

94493it [00:00, 842960.84it/s]
94493it [00:00, 637914.37it/s]
31825it [00:00, 836794.12it/s]
31825it [00:00, 1413273.95it/s]


In [8]:
#Declare data generators and preprocessing
train_datagen = ImageDataGenerator(
    #Augment data with random flips, normalize each sample's input
    vertical_flip = True,
    horizontal_flip = True,
    rescale = 1.0 / 255.0,
    samplewise_std_normalization = True
)
train_generator = train_datagen.flow_from_dataframe(
    directory = None, #none since the df has absolute paths
    dataframe = train,
    x_col = "image",
    y_col = "class",
    validate_filenames = False, #faster for huge datasets
    target_size = (150, 150),
    color_mode = "rgb",
    batch_size = 128,
    class_mode = "binary",
    shuffle = True
)

test_datagen = ImageDataGenerator(
    samplewise_std_normalization = True
)
test_generator = train_datagen.flow_from_dataframe(
    directory = None,
    dataframe = test,
    x_col = "image",
    y_col = "class",
    validate_filenames = False,
    target_size = (150, 150),
    color_mode = "rgb",
    batch_size = 128,
    class_mode = "binary",
    shuffle = True
)



Found 94493 non-validated image filenames belonging to 2 classes.
Found 31825 non-validated image filenames belonging to 2 classes.




In [9]:
#Declare Callbacks: stop training if accuracy doesn't rise 1% within 3 epochs
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor = "accuracy",
        min_delta = 0.01,
        patience = 3,
        verbose = 1
    )
]

In [10]:
#Extract Class Weights
classes = list(train["class"])
weights_dict = getClassWeightsFromLabels(classes)
print(weights_dict)

{0: 0.992240003360215, 1: 1.0078823303539048}


In [11]:
#Build Model
Model = makeModel((150, 150, 3))
opt = tf.optimizers.Adam()
Model.compile(
    optimizer = opt,
    loss = keras.losses.BinaryCrossentropy(from_logits = True),
    metrics = ["accuracy"]
)

In [12]:
Model.summary()

Model: "Simple_ResNet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input (InputLayer)              [(None, 150, 150, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 148, 148, 32) 896         Input[0][0]                      
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 146, 146, 64) 18496       conv2d[0][0]                     
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 48, 48, 64)   0           conv2d_1[0][0]                   
______________________________________________________________________________________

In [13]:
#Fit data
Model.fit(
    train_generator,
    callbacks = callbacks,
    epochs = 100,
    class_weight = weights_dict,
    max_queue_size = 1000,
    workers = os.cpu_count()
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 00009: early stopping


<tensorflow.python.keras.callbacks.History at 0x19f294339d0>

In [14]:
#Test accuracy
Model.evaluate(
    test_generator,
    max_queue_size = 1000,
    workers = os.cpu_count()
)



[0.5486113429069519, 0.9572977423667908]

In [15]:
#Save the model
Model.save("Models/simpleResnet")

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: Models/simpleResnet\assets
