In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np
import cv2
from typing import Tuple, List
import os
import sys
from tqdm import tqdm

In [2]:
print(f'Tensorflow version: {tf.__version__}')
print(f'Pandas version: {pd.__version__}')
print(f'NumPy version: {np.__version__}')
print(f'OpenCV version: {cv2.__version__}')
!python --version

Tensorflow version: 2.3.1
Pandas version: 1.1.1
NumPy version: 1.18.5
OpenCV version: 4.4.0


Python 3.6.10 :: Anaconda, Inc.


In [3]:
"""
We turn this on to prevent tensorflow from throwing a fit about
things that take longer than the batch training in the model.fit
call (namely, the tensorboard callback can take more time to
execute than the batch training iteration itself).

NOTE: This can be disabled simply by commenting it out. If you
think there is a weird tensorflow issue happening, do that to see
the full tensorflow logs during runtime.
"""
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [4]:
modelName = "SimpleDenseNet"

## Utility Functions

In [5]:
def getDirsAndClasses(root: str, file: str) -> Tuple[List[str], List[int]]:
    imageDirs = []
    classes = []
    line = ""
    with open(root + file, "r") as f:
        for line in tqdm(f):
            imageDir, clazz = line.split()
            imageDirs.append(imageDir)
            classes.append(int(clazz))
    return imageDirs, classes

In [6]:
def preprocessImage(img: np.ndarray) -> tf.Tensor:
    return img.astype(np.float16) / 255.0

In [7]:
def makeOneHot(value: int, size: int) -> np.ndarray:
    onehot = np.zeros(size)
    onehot[value] = 1
    return onehot

In [8]:
def balanceData(data: pd.DataFrame) -> pd.DataFrame:
    data = data.groupby("class")
    data = data.apply(lambda x: x.sample(data.size().min()).reset_index(drop=True))
    return data.reset_index(drop=True)

In [9]:
def makeDenseBlock(groupCount: int, inputs):
    blockConcats = []
    x = layers.BatchNormalization()(inputs)
    x = layers.Conv2D(filters=64, kernel_size=(1, 1), activation="relu", padding="same")(x)
    x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(x)
    blockConcats.append(x)
    for count in range(groupCount):
        x = layers.Concatenate()(blockConcats) if len(blockConcats) > 1 else x
        x = layers.BatchNormalization()(x)
        x = layers.Conv2D(filters=64, kernel_size=(1, 1), activation="relu", padding="same")(x)
        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(x)
        blockConcats.append(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(filters=64, kernel_size=(1, 1), activation="relu", padding="same")(x)
    x = layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    return x

In [10]:
def makeModel(inputShape: Tuple[int], modelName=modelName) -> keras.Model:
    """
    Source: https://www.tensorflow.org/guide/keras/functional#a_toy_resnet_model
    
    Note that I tend to prefer the super-explicit (if somewhat verbose) style. 
    This style is technically unnecessary, but it helps with readability.
    """
    inputs = keras.Input(shape=inputShape, name="Input")
    x = layers.Conv2D(filters=32, kernel_size=(7, 7), strides=(2, 2), activation="relu")(inputs)
    x = layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x)
    
    x = makeDenseBlock(groupCount=3, inputs=x)
    
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(units=2, activation="softmax")(x)

    return keras.Model(inputs=inputs, outputs=outputs, name=modelName)

## Global Variables

In [11]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor="accuracy",
        min_delta=0.01,
        patience=3,
        verbose=1
    ),
    tf.keras.callbacks.TensorBoard(
        log_dir=f'logs/{modelName}',
        write_graph=True,
        write_images=True
    )
]

root = os.getcwd() + "\\Data\\CNR-EXT-150x150"
# train_imageDirs, train_classes = getDirsAndClasses(root, "\\LABELS\\train.txt")
# test_imageDirs, test_classes = getDirsAndClasses(root, "\\LABELS\\test.txt")
# val_imageDirs, val_classes = getDirsAndClasses(root, "\\LABELS\\val.txt")
imageDirs, classes = getDirsAndClasses(root, "\\LABELS\\all.txt")

classDict = {
    0: "free",
    1: "busy"
}

batchSize = 128

144965it [00:00, 894077.56it/s]


## Data Acquisition and Preprocessing

In [None]:
data = pd.DataFrame([
            {
                "image": preprocessImage(cv2.imread(root + "\\PATCHES\\" + filename)),
                "class": clazz,
                "weather": filename[0]
            }
            for filename, clazz in tqdm(zip(imageDirs, classes))
        ])

43880it [00:45, 965.77it/s]

In [None]:
data.groupby("class")["class"].value_counts()

In [None]:
data.groupby("weather")["weather"].value_counts()

In [None]:
data.groupby(["class", "weather"])["class"].value_counts()

In [None]:
classes = list(data.groupby("class").groups.keys())

In [None]:
data["onehot"] = data["class"].apply(
    func=lambda x: makeOneHot(classes.index(x), len(classes))
)

### Split the data into train and test subsets

In [None]:
train = data.groupby("class").sample(frac=0.8)
train.groupby("class")["class"].value_counts()

In [None]:
test = data.drop(train.index).reset_index(drop=True)
test.groupby("class")["class"].value_counts()

In [None]:
train = train.reset_index(drop=True)

In [None]:
train = tf.data.Dataset.from_tensor_slices(
    (
        np.array(train["image"].values.tolist()),
        np.array(train["onehot"].values.tolist())
    )
).shuffle(
    buffer_size=len(train),
    reshuffle_each_iteration=True
).batch(batchSize)

In [None]:
test = tf.data.Dataset.from_tensor_slices(
    (
        np.array(test["image"].values.tolist()),
        np.array(test["onehot"].values.tolist())
    )
).shuffle(
    buffer_size=len(test),
    reshuffle_each_iteration=True
).batch(batchSize)

## Model Definition

In [None]:
model = makeModel(inputShape=data.loc[0, "image"].shape)

In [None]:
model.compile(
    optimizer="adam",
    loss=keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"]
)

In [None]:
model.summary()

## Training and Evaluation

In [None]:
model.fit(
    train,
    epochs=100,
    callbacks=callbacks
)

In [None]:
loss, accuracy = model.evaluate(test)

In [None]:
model.predict(test)

## Results

In [None]:
print(f'Test Loss: {loss}\nTest Accuracy: {accuracy}')

In [None]:
model.save(f'Models/{modelName}'