In [1]:
import os, shutil

original_dataset_dir = "cats_vs_dogs/train"

dirs = []

base_dir = "cats_vs_dogs_small"
dirs.append(base_dir)

train_dir = os.path.join(base_dir, "train")
dirs.append(train_dir)
validation_dir = os.path.join(base_dir, "validation")
dirs.append(validation_dir)
test_dir = os.path.join(base_dir, "test")
dirs.append(test_dir)

train_cats_dir = os.path.join(train_dir, "cats")
dirs.append(train_cats_dir)
train_dogs_dir = os.path.join(train_dir, "dogs")
dirs.append(train_dogs_dir)

validation_cats_dir = os.path.join(validation_dir, "cats")
dirs.append(validation_cats_dir)
validation_dogs_dir = os.path.join(validation_dir, "dogs")
dirs.append(validation_dogs_dir)

test_cats_dir = os.path.join(test_dir, "cats")
dirs.append(test_cats_dir)
test_dogs_dir = os.path.join(test_dir, "dogs")
dirs.append(test_dogs_dir)

for directory in dirs:
    if not os.path.exists(directory):
       os.mkdir(directory)

fnames = ["cat.{}.jpg".format(i) for i in range(1000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dest = os.path.join(train_cats_dir, fname)
    shutil.copy(src, dest)

fnames = ["cat.{}.jpg".format(i) for i in range(1000, 1500)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dest = os.path.join(validation_cats_dir, fname)
    shutil.copy(src, dest)

fnames = ["cat.{}.jpg".format(i) for i in range(1500, 2000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dest = os.path.join(test_cats_dir, fname)
    shutil.copy(src, dest)

fnames = ["dog.{}.jpg".format(i) for i in range(1000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dest = os.path.join(train_dogs_dir, fname)
    shutil.copy(src, dest)

fnames = ["dog.{}.jpg".format(i) for i in range(1000, 1500)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dest = os.path.join(validation_dogs_dir, fname)
    shutil.copy(src, dest)

fnames = ["dog.{}.jpg".format(i) for i in range(1500, 2000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dest = os.path.join(test_dogs_dir, fname)
    shutil.copy(src, dest)

for directory in dirs:
    print(directory, ":", len(os.listdir(directory)))

print("Done.")

cats_vs_dogs_small : 3
cats_vs_dogs_small\train : 2
cats_vs_dogs_small\validation : 2
cats_vs_dogs_small\test : 2
cats_vs_dogs_small\train\cats : 1000
cats_vs_dogs_small\train\dogs : 1000
cats_vs_dogs_small\validation\cats : 500
cats_vs_dogs_small\validation\dogs : 500
cats_vs_dogs_small\test\cats : 500
cats_vs_dogs_small\test\dogs : 500
Done.


- FIND DATA

- PREPROCESSING
    - Resize
    - Labeling
    - Balance
    - Shuffle
    - Train/Validation/Test - Split
    
- ANN ARCHITECTURE
    - Layers
        - Input / Output
        - Activation Function
    - Compile
        - Optimizer
        - Loss
        - Metrics
    
- TRAINING
    - fitting (incl. validation)
    - plot
    
- EVALUATE

- DEPLOY

In [7]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Conv2D(
    32, (3, 3),
    activation="relu",
    input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation="relu"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation="relu"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation="relu"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dropout(0.25))
model.add(layers.Dense(1, activation="sigmoid"))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 15, 15, 128)       147584    
__________

In [3]:
from keras import optimizers
model.compile(
    optimizer=optimizers.RMSprop(lr=1e-4),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

In [4]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1.0 / 255.0) #normalization
test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size = (150,150), #resizing / scaling all pictures to 150x150
    batch_size = 20,
    class_mode = "binary"
)

validation_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size = (150,150), #resizing / scaling all pictures to 150x150
    batch_size = 20,
    class_mode = "binary"
)

Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


In [6]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch = 100, #since batch size is 20, setting this number lower than 100 means you're not
                            # processing all your data in each epoch
                            # => 2000/20
    epochs = 30,
    validation_data = validation_generator,
    validation_steps = 50 # => 1000/20
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [31]:
#from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rescale = 1.0/255.0,
    rotation_range = 20
)

generator = datagen.flow_from_directory(
    train_dir,
    target_size = (150, 150),
    batch_size = 10,
    class_mode = "binary"
)

import matplotlib.pyplot as plt
from matplotlib import gridspec

fig = plt.figure()


images, labels = next(generator)
print("ImagesShape:", images.shape)
print("Labels:", labels)

#fig = plt.figure()
#gs = gridspec.GridSpec(2, 5)


for i, image in enumerate(images):
    print(i//5 + 1, i%5 + 1)
    fig.add_subplot(i//5,i % 5)
    plt.imshow(image)
    
plt.show()
plt.close()

Found 2000 images belonging to 2 classes.
ImagesShape: (10, 150, 150, 3)
Labels: [1. 1. 0. 0. 0. 0. 0. 1. 0. 1.]
1 1


ValueError: Illegal argument(s) to subplot: (0, 0)

<Figure size 432x288 with 0 Axes>

In [13]:
def generate(batch_size):
    i = 0
    while True:
        arr = []
        for _ in range(batch_size):
            i += 1
            arr.append(i)
        yield arr
        
g = generate(2)
print(g)
print(next(g))
print(next(g))
print(next(g))

<generator object generate at 0x000001DC043260A0>
[1, 2]
[3, 4]
[5, 6]
