In [1]:
# from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Convolution2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras import layers
# from tensorflow.keras.models import Sequential
from tensorflow.random import set_seed
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from typing import Tuple, List
from tqdm import tqdm
import pandas as pd
import numpy as np
import sklearn
# import keras
import os

In [2]:
#Seeding random state to 13 always, for reproducibility
np.random.seed(13)
set_seed(13)

In [3]:
### Used to select GPU 0=first device, 1=second device, etc...
os.environ["CUDA_VISIBLE_DEVICES"]="1"

gpus = tf.config.experimental.list_physical_devices('GPU')
print('gpus:',gpus)
if gpus:
    try:
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        tf.config.experimental.set_memory_growth(gpus[0], True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPU,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)

gpus: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
1 Physical GPU, 1 Logical GPUs


In [4]:
#Helper Function: Return the paths to all jpg files found within a directory
def getImageDirs(root: str = "data"):
    imageDirs = []
    for subDirectory, directory, files in os.walk(root):
        for file in files:
            if file[-4:] == ".jpg":
                path = os.path.join(subDirectory, file)         
                imageDirs.append(path)
    return(imageDirs)

In [5]:
#Helper Function: Return the class weights given a list of classes
def getClassWeightsFromLabels(labels: List[int]):# -> Dict[int]:
    weights = sklearn.utils.class_weight.compute_class_weight(class_weight="balanced", classes=np.unique(labels), y=labels)
    return {0: weights[0], 1: weights[1]}

In [6]:
#Helper Function: Return the img paths and classes in seperate lists given a txt file from the LABELS folder
def getDirsAndClasses(root: str, file: str) -> Tuple[List[str], List[int]]:
    imageDirs = []
    classes = []
    line = ""
    with open(root + file, "r") as f:
        for line in tqdm(f):
            imageDir, clazz = line.split()
            imageDirs.append(imageDir)
            classes.append(int(clazz))
    return imageDirs, classes

In [7]:
#Helper Function: Create a Keras prebuilt model
def makeModel(inputShape: Tuple[int], modelName:str ='') -> keras.Model:
    """
    Source: https://www.tensorflow.org/guide/keras/functional#a_toy_resnet_model
    
    Note that I tend to prefer the super-explicit (if somewhat verbose) style. 
    This style is technically unnecessary, but it helps with readability.

    Load model by inputing the name to modelName
    Options are "Simple_ResNet", "SimpleNet", "InceptionResNetV2", "MobileNetV2", "ResNet50V2", "DenseNet121", "DenseNet201", and "NASNetLarge"
    """
    input = keras.Input(shape=inputShape, name="Input")
    x=None
    if modelName == "Simple_ResNet":

        x = layers.Conv2D(filters=32, kernel_size=(3, 3), activation="relu")(input)
        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
        block_1_output = layers.MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x)

        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(block_1_output)
        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(x)
        block_2_output = layers.add([x, block_1_output])

        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(block_2_output)
        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", padding="same")(x)
        block_3_output = layers.add([x, block_2_output])

        x = layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(block_3_output)
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dense(units=256, activation="relu")(x)
        x = layers.Dense(units=256, activation="relu")(x)
        x = layers.Dense(units=256, activation="relu")(x)
        x = layers.Dropout(0.5)(x)

    elif modelName in ["InceptionResNetV2","MobileNetV2","ResNet50V2","DenseNet121","DenseNet201","NASNetLarge"]:
        
        if modelName=="InceptionResNetV2":
            baseModel = keras.applications.InceptionResNetV2(include_top=False, weights="imagenet", input_shape=(150,150,3))(input)
        elif modelName=="MobileNetV2":
            baseModel = keras.applications.MobileNetV2(include_top=False, weights="imagenet", input_shape=(150,150,3))(input)
        elif modelName=="ResNet50V2":
            baseModel = keras.applications.ResNet50V2(include_top=False, weights="imagenet", input_shape=(150,150,3))(input)
        elif modelName=="DenseNet121":
            baseModel = keras.applications.DenseNet121(include_top=False, weights="imagenet", input_shape=(150,150,3))(input)
        elif modelName=="DenseNet201":
            baseModel = keras.applications.DenseNet201(include_top=False, weights="imagenet", input_shape=(150,150,3))(input)
        elif modelName=="NASNetLarge":
            x = tf.keras.layers.experimental.preprocessing.Resizing(height=331, width=331)(input)
            baseModel = keras.applications.NASNetLarge(include_top=False, weights="imagenet", input_shape=(331,331,3))(x)
        else:
            raise("Model Name Not In Recognized Keras Models")
        
        baseModel.trainable = False
        x = layers.Flatten()(baseModel)
        x = layers.Dense(128, activation="relu")(x)
        x = layers.Dropout(0.3)(x)
        x = layers.Dense(128, activation="relu")(x)
        x = layers.Dropout(0.3)(x)

    elif modelName=="SimpleNet":
        x = layers.AveragePooling2D(pool_size=(50, 50))(input)
        x = layers.Flatten()(x)
        x = layers.Dense(64, activation="relu")(x)
        x = layers.Dropout(0.3)(x)
        x = layers.Dense(64, activation="relu")(x)
        x = layers.Dropout(0.3)(x)

    if x != None:
        output=layers.Dense(1, activation="sigmoid")(x)
        return keras.Model(inputs=input, outputs=output, name=modelName)
    else:
        raise("Model Name Not Recognized")

In [8]:
#Get the Train Dataset using split from the LABELS folder
root = os.getcwd() + "/CNR-EXT-Patches-150x150"
imageDirs, classes = getDirsAndClasses(root, "/LABELS/train.txt")
root = root + "/PATCHES/"
train = pd.DataFrame([
            {
                "image": root + filename,
                "class": "free" if clazz == 0 else "busy"
            }
            for filename, clazz in tqdm(zip(imageDirs, classes))
    ])
#Now Get Test
root = os.getcwd() + "/CNR-EXT-Patches-150x150"
imageDirs, classes = getDirsAndClasses(root, "/LABELS/test.txt")
root = root + "/PATCHES/"
test = pd.DataFrame([
            {
                "image": root + filename,
                "class": "free" if clazz == 0 else "busy"
            }
            for filename, clazz in tqdm(zip(imageDirs, classes))
    ])
dataset=train.append(test)


94493it [00:00, 1065564.27it/s]
94493it [00:00, 1044370.58it/s]
31825it [00:00, 1278689.97it/s]
31825it [00:00, 1993722.74it/s]


In [9]:
# Choose number of folds (1=normal experiment)
n_folds = 10

# Make the k folds
kFCV_sets=[]

busy_samples=dataset.loc[dataset["class"] == "busy"]
free_samples=dataset.loc[dataset["class"] == "free"]

from sklearn.model_selection import KFold

if n_folds != 1:
    busy_kf = sklearn.model_selection.KFold(n_splits = n_folds)
    free_kf = sklearn.model_selection.KFold(n_splits = n_folds)


for k in range(n_folds):

    if n_folds != 1:
        busy = next(busy_kf.split(busy_samples), None)
        free = next(busy_kf.split(free_samples), None)
    else:
        busy = [i for i in range(int(len(busy_train)*0.9))],[i for i in range(int(len(busy_train)*0.9), len(busy_train))]
        free = [i for i in range(int(len(free_train)*0.9))],[i for i in range(int(len(free_train)*0.9), len(free_train))]

    busy_train, free_train = busy_samples.iloc[busy[0]], free_samples.iloc[free[0]]
    busy_train, busy_val = busy_train[:int(len(busy_train)*0.9)], busy_train[int(len(busy_train)*0.9):]
    free_train, free_val = free_train[:int(len(free_train)*0.9)], free_train[int(len(free_train)*0.9):]

    train = busy_train.append(free_train)
    val = busy_val.append(free_val)
    test = busy_samples.iloc[busy[1]].append(free_samples.iloc[free[1]])

    


    #Declare data generators and preprocessing
    train_datagen = ImageDataGenerator(
        #Augment data with random flips, normalize each sample's input
        vertical_flip = True,
        horizontal_flip = True,
        rescale = 1.0 / 255.0,
        samplewise_std_normalization = True
    )
    train_generator = train_datagen.flow_from_dataframe(
        directory = None, #none since the df has absolute paths
        dataframe = train,
        x_col = "image",
        y_col = "class",
        validate_filenames = False, #faster for huge datasets
        target_size = (150, 150),
        color_mode = "rgb",
        batch_size = 128,
        class_mode = "binary",
        shuffle = True
    )

    test_datagen = ImageDataGenerator(
        samplewise_std_normalization = True
    )
    test_generator = test_datagen.flow_from_dataframe(
        directory = None,
        dataframe = test,
        x_col = "image",
        y_col = "class",
        validate_filenames = False,
        target_size = (150, 150),
        color_mode = "rgb",
        batch_size = 128,
        class_mode = "binary",
        shuffle = True
    )
    val_generator = test_datagen.flow_from_dataframe(
        directory = None,
        dataframe = val,
        x_col = "image",
        y_col = "class",
        validate_filenames = False,
        target_size = (150, 150),
        color_mode = "rgb",
        batch_size = 128,
        class_mode = "binary",
        shuffle = True
    )

    print()

    kFCV_sets.append([train_generator, test_generator, val_generator])

Found 56842 non-validated image filenames belonging to 2 classes.
Found 63159 non-validated image filenames belonging to 2 classes.
Found 6317 non-validated image filenames belonging to 2 classes.

Found 56842 non-validated image filenames belonging to 2 classes.
Found 63159 non-validated image filenames belonging to 2 classes.
Found 6317 non-validated image filenames belonging to 2 classes.



In [10]:
#Declare Callbacks: stop training if accuracy doesn't rise 1% within 3 epochs
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor = "val_loss",
        patience = 3,
        verbose = 1,
        restore_best_weights = True,
        min_delta = 0.01
    )
]

In [11]:
#Extract Class Weights (Weights will be the same for all folds)
classes = list(train["class"])
weights_dict = getClassWeightsFromLabels(classes)
print(weights_dict)

{0: 0.9585174193113217, 1: 1.045235555882461}


In [12]:
k_results=pd.DataFrame(columns = ['Fold', 'Loss', 'Accuracy'])

for i,k in enumerate(kFCV_sets):

    print("Fold",i+1,"of",len(kFCV_sets))

    #keras.backend.clear_session()
    train_generator, test_generator, val_generator = k

    #Build Model
    Model = makeModel((150, 150, 3), "DenseNET121")
    opt = tf.optimizers.Adam()
    Model.compile(
        optimizer = opt,
        loss = keras.losses.BinaryCrossentropy(from_logits = True),
        metrics = ["accuracy"]
    )

    if i == 0:
        Model.summary()

    #Fit data  
    Model.fit(
        train_generator,
        validation_data=val_generator,
        callbacks = callbacks,
        epochs = 100,
        class_weight = weights_dict,
        max_queue_size = 1000,
        workers = os.cpu_count(),
    )

    #Test accuracy
    results = Model.evaluate(
        test_generator,
        max_queue_size = 1000,
        workers = os.cpu_count(),
    )

    k_results = k_results.append({'Fold':i+1, 'Loss':results[0], 'Accuracy':results[1]}, ignore_index=True)

k_results = k_results.append({'Fold':"Avg", 'Loss':np.average(k_results['Loss']), 'Accuracy':np.average(k_results['Accuracy'])}, ignore_index=True)

Fold 1 of 2


TypeError: exceptions must derive from BaseException

In [13]:
if n_folds != 1:
    k_results.to_csv("Models/DenseNET121/k-fcv_DenseNET121.csv")

In [14]:
#Save the model
if n_folds == 1:
    Model.save("Models/DenseNET121")