# Keras example AlexNet on Dogs vs Cats

## Building the HDF5 dataset

In [None]:
from config import dogs_vs_cats_config as config

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from keras_example.preprocessing.aspectawareprocessor import AspectAwareProcessor
from keras_example import HDF5DatasetWriter

from imutils import paths

import numpy as np
import progressbar
import json
import cv2
import os

In [None]:
trainPaths = list(paths.list_images(config.IMAGES_PATH))
trainLabels = [p.split(os.paths.sep)[-1].split(".")[0] 
    for p in trainPaths]

le = LabelEncoder()
trainLabels = le.fit_transform(trainLabels)

In [None]:
(trainPaths, testPaths,trainLabels,testLabels) = train_test_split(
    trainPaths, 
    trainLabels,
    test_size=config.NUM_TEST_IMAGES,
    stratify=trainLabels, 
    random_state = 42)

(trainPaths, valPaths,trainLabels,valLabels) = train_test_split(
    trainPaths, 
    trainLabels,
    test_size=config.NUM_VAL_IMAGES,
    stratify=trainLabels, 
    random_state = 42)


In [None]:
datasets = [
    ('train', trainPaths, trainLabels, config.TRAIN_HDF5),
    ('val', valPaths, valLabels, config.VAL_HDF5),
    ('test', testPaths, testLabels, config.TEST_HDF5),
]

aap = AspectAwareProcessor(256,256)
(R,G,B) = ([],[],[])

for (dType, paths, labels, outputPath) in datasets:
    print(f"[INFO] building {outputPath}")
    writer = HDF5DatasetWriter((len(paths), 256,256,3), outputPath)

    widgets = [
        f"Building Dataset {dType}: ",
        progressbar.Percentage(),
        " ",
        progressbar.Bar(),
        " ",
        progressbar.ETA()
    ]

    pbar = progressbar.ProgressBar(
        maxval=len(paths),
        widgets=widgets
        ).start()
    
    for (i, (path,label)) in enumerate(zip(paths, labels)):
        image = cv2.imread(path)
        image = aap.preprocess(image)

        if dType == "train":
            (b,g,r) = cv2.mean(image)[:3]
            R.append(r)
            G.append(g)
            B.append(b)
        
        writer.add([image], [label])
        pbar.update()
    
    pbar.finish()
    writer.close()

In [None]:
print("[INFO] serializing means...")
D = {
    "R": np.mean(R),
    "G": np.mean(G),
    "B": np.mean(B)
}

with open(config.DATASET_MEAN, "w") as f:
    f.write(json.dumps(D))

## Training AlexNet

In [None]:
from config import dogs_vs_cats_config

from keras_example.preprocessing.imagetoarrayprocessor import ImageToArrayPreprocessor
from keras_example.preprocessing.simplepreprocessor import SimpleProcessor
from keras_example.preprocessing.patchpreprocessor import PatchPreprocessor
from keras_example.preprocessing.meanpreprocessor import MeanPreprocessor

from keras_example.callbacks.trainingmonitor import TrainingMonitor

from keras_example.io.hdf5datasetgenerator import HDF5DatasetGenerator

from keras_example.nn.conv.alexnet import AlexNet

from keras.preprocessing.image import ImageDataGenerator

from keras.optimizers import Adam

import json
import os

In [None]:
aug = ImageDataGenerator(
    rotation_range=20, 
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2, 
    shear_range=0.15, 
    horizontal_flip=True, 
    fill_mode='nearest')

In [None]:
means = json.loads(open(config.DATASET_MEAN).read())

sp = SimpleProcessor(227,227)
pp = PatchPreprocessor(227,227)
mp = MeanPreprocessor(means["R"], means["G"], means["B"])
iap = ImageToArrayPreprocessor()

In [None]:
trainGen = HDF5DatasetGenerator(
    config.TRAIN_HDF5,
    128,
    aug=aug,
    preprocessors=[pp, mp, iap],
    classes=2
)

valGen = HDF5DatasetGenerator(
    config.TRAIN_HDF5,
    128,
    aug=aug,
    preprocessors=[sp, mp, iap],
    classes=2
)

In [None]:
print("[INFO] compiling model ...")
opt = Adam(lr=1e-3)
model = AlexNet.build(
    width=227,
    height=227,
    depth=3,
    classes=2,
    reg=0.0002    
)

model.compile(
    loss = "binary_crossentropy",
    optimizer=opt
    metrics=["accuracy"]
)


path = os.path.sep.join(
    [config.OUTPUT_PATH,
    f"{os.getpgid()}"]
)

callbacks = [TrainingMonitor(path)]

print("[INFO] training model ...")

H =  model.fit_generator(
    trainGen.generator(),
    steps_per_epoch=trainGen.numImages // 128,
    validation_data=valGen.generator(),
    validation_steps=valGen.numImages // 128,
    epochs=75,
    max_queue_size=10,
    callbacks=callbacks,
    verbose=1
)

In [None]:
print("[INFO] serializing model ...")
model.save(config.MODEL_PATH, overwrite=True)

trainGen.close()
valGen.close()