In [1]:
from keras.layers import Dense, Dropout, Activation, Flatten, Convolution2D, MaxPooling2D, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from tensorflow.random import set_seed
from typing import Tuple, List
from tqdm import tqdm
import pandas as pd
import numpy as np
import sklearn
import keras
import os

In [2]:
#Seeding random state to 13 always, for reproducibility
np.random.seed(13)
set_seed(13)

In [3]:
#Helper Function: Return the paths to all jpg files found within a directory
def getImageDirs(root: str = "data"):
    imageDirs = []
    for subDirectory, directory, files in os.walk(root):
        for file in files:
            if file[-4:] == ".jpg":
                path = os.path.join(subDirectory, file)         
                imageDirs.append(path)
    return(imageDirs)

In [4]:
#Helper Function: Return the class weights given a list of classes
def getClassWeightsFromLabels(labels: List[int]):# -> Dict[int]:
    weights = sklearn.utils.class_weight.compute_class_weight(class_weight="balanced", classes=np.unique(labels), y=labels)
    return {0: weights[0], 1: weights[1]}

In [5]:
#Helper Function: Return the img paths and classes in seperate lists given a txt file from the LABELS folder
def getDirsAndClasses(root: str, file: str) -> Tuple[List[str], List[int]]:
    imageDirs = []
    classes = []
    line = ""
    with open(root + file, "r") as f:
        for line in tqdm(f):
            imageDir, clazz = line.split()
            imageDirs.append(imageDir)
            classes.append(int(clazz))
    return imageDirs, classes

In [6]:
#Get the Train Dataset using split from the LABELS folder
root = os.getcwd() + "\\Data\\CNR-EXT-150x150"
imageDirs, classes = getDirsAndClasses(root, "\\LABELS\\train.txt")
root = root + "\\PATCHES\\"
train = pd.DataFrame([
            {
                "image": root + filename,
                "class": "free" if clazz == 0 else "busy"
            }
            for filename, clazz in tqdm(zip(imageDirs, classes))
    ])
#Now Get Test
root = os.getcwd() + "\\Data\\CNR-EXT-150x150"
imageDirs, classes = getDirsAndClasses(root, "\\LABELS\\test.txt")
root = root + "\\PATCHES\\"
test = pd.DataFrame([
            {
                "image": root + filename,
                "class": "free" if clazz == 0 else "busy"
            }
            for filename, clazz in tqdm(zip(imageDirs, classes))
    ])

94493it [00:00, 882360.18it/s]
94493it [00:00, 676790.14it/s]
31825it [00:00, 908509.89it/s]
31825it [00:00, 1514210.64it/s]


In [7]:
#Declare data generators and preprocessing
train_datagen = ImageDataGenerator(
    #Augment data with random flips, normalize each sample's input
    vertical_flip = True,
    horizontal_flip = True,
    rescale = 1.0 / 255.0,
    samplewise_std_normalization = True
)
train_generator = train_datagen.flow_from_dataframe(
    directory = None, #none since the df has absolute paths
    dataframe = train,
    x_col = "image",
    y_col = "class",
    validate_filenames = False, #faster for huge datasets
    target_size = (150, 150),
    color_mode = "rgb",
    batch_size = 128,
    class_mode = "binary",
    shuffle = True
)

test_datagen = ImageDataGenerator(
    samplewise_std_normalization = True
)
test_generator = train_datagen.flow_from_dataframe(
    directory = None,
    dataframe = test,
    x_col = "image",
    y_col = "class",
    validate_filenames = False,
    target_size = (150, 150),
    color_mode = "rgb",
    batch_size = 128,
    class_mode = "binary",
    shuffle = True
)




Found 94493 non-validated image filenames belonging to 2 classes.
Found 31825 non-validated image filenames belonging to 2 classes.




In [8]:
#Declare Callbacks: stop training if accuracy doesn't rise 1% within 3 epochs
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor = "accuracy",
        min_delta = 0.01,
        patience = 3,
        verbose = 1
    )
]

In [9]:
#Extract Class Weights
classes = list(train["class"])
weights_dict = getClassWeightsFromLabels(classes)
print(weights_dict)

{0: 0.992240003360215, 1: 1.0078823303539048}


In [10]:
#Build Model
AlexNet = Sequential()

#1st Convolutional Layer
AlexNet.add(Convolution2D(filters=96, input_shape=(150,150,3), kernel_size=(11,11), strides=(4,4), padding='same'))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))
AlexNet.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

#2nd Convolutional Layer
AlexNet.add(Convolution2D(filters=256, kernel_size=(5, 5), strides=(1,1), padding='same'))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))
AlexNet.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

#3rd Convolutional Layer
AlexNet.add(Convolution2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same'))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))

#4th Convolutional Layer
AlexNet.add(Convolution2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same'))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))

#5th Convolutional Layer
AlexNet.add(Convolution2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='same'))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))
AlexNet.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

#Passing it to a Fully Connected layer
AlexNet.add(Flatten())
# 1st Fully Connected Layer
AlexNet.add(Dense(4096, input_shape=(32,32,3,)))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))
# Add Dropout to prevent overfitting
AlexNet.add(Dropout(0.4))

#2nd Fully Connected Layer
AlexNet.add(Dense(4096))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))
#Add Dropout
AlexNet.add(Dropout(0.4))

#3rd Fully Connected Layer
AlexNet.add(Dense(1000))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))
#Add Dropout
AlexNet.add(Dropout(0.4))

#Output Layer
AlexNet.add(Dense(1))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('sigmoid'))

In [11]:
#Declare Optimizer
opt = keras.optimizers.Adam()
AlexNet.compile(
    optimizer = opt,
    loss = keras.losses.BinaryCrossentropy(from_logits = True),
    metrics = ["accuracy"]
)

In [12]:
AlexNet.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 38, 38, 96)        34944     
_________________________________________________________________
batch_normalization (BatchNo (None, 38, 38, 96)        384       
_________________________________________________________________
activation (Activation)      (None, 38, 38, 96)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 19, 19, 96)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 19, 19, 256)       614656    
_________________________________________________________________
batch_normalization_1 (Batch (None, 19, 19, 256)       1024      
_________________________________________________________________
activation_1 (Activation)    (None, 19, 19, 256)       0

In [13]:
#Fit data
AlexNet.fit(
    train_generator,
    callbacks = callbacks,
    epochs = 100,
    class_weight = weights_dict,
    max_queue_size = 1000,
    workers = os.cpu_count()
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 00008: early stopping


<tensorflow.python.keras.callbacks.History at 0x1e9d0edb9d0>

In [14]:
#Test accuracy
AlexNet.evaluate(
    test_generator,
    max_queue_size = 1000,
    workers = os.cpu_count()
)



[0.5409442782402039, 0.988970935344696]

In [15]:
#Save the model
AlexNet.save("Models/AlexNet")

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: Models/AlexNet\assets
