In [1]:
!pip install keras==2.2.5
!pip install tensorflow==1.13.1
# !pip install pillow



In [2]:
import os

import keras
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Dense
from keras.models import Model
from keras.optimizers import Adam
from keras.preprocessing import image

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [4]:
OUTPUT_MODEL_NAME = 'classification.h5'

DATA_DIR = os.path.abspath('data')
ALL_DIR = os.path.join(DATA_DIR, 'all')
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
VAL_DIR = os.path.join(DATA_DIR, 'val')
SIZE = (224, 224)

In [63]:
import random
import shutil

def regenerate_sampling():
    for _, dirs, _ in os.walk(ALL_DIR):
        for d in dirs:
            for new_dir in (TRAIN_DIR, VAL_DIR):
                create_dir = os.path.join(new_dir, d)
                try:
                    os.makedirs(create_dir)
                except FileExistsError:
                    print(f"recreating samples for {create_dir}")
                    shutil.rmtree(create_dir)
                    os.makedirs(create_dir)
            for _, _, files in os.walk(os.path.join(ALL_DIR, d)):
                for file in files:
                    if random.random() < (0.01 if d == 'clean' else 0.2):
                        shutil.copyfile(
                            os.path.join(ALL_DIR, d, file),
                            os.path.join(VAL_DIR, d, file),
                        )
                    elif random.random() < (0.05 if d == 'clean' else 1):
                        shutil.copyfile(
                            os.path.join(ALL_DIR, d, file),
                            os.path.join(TRAIN_DIR, d, file),
                        )

# regenerate_sampling()

recreating samples for /home/paulinm/hack/RaspberryTrain/data/train/clean
recreating samples for /home/paulinm/hack/RaspberryTrain/data/val/clean
recreating samples for /home/paulinm/hack/RaspberryTrain/data/train/man
recreating samples for /home/paulinm/hack/RaspberryTrain/data/val/man
recreating samples for /home/paulinm/hack/RaspberryTrain/data/train/escavate
recreating samples for /home/paulinm/hack/RaspberryTrain/data/val/escavate


In [None]:
BATCH_SIZE = 16
FROZEN_LAYERS_NUM = 100  # Magic number out of 175

num_train_samples = sum([len(files) for _, _, files in os.walk(TRAIN_DIR)])
num_valid_samples = sum([len(files) for _, _, files in os.walk(VAL_DIR)])

num_train_steps = num_train_samples // BATCH_SIZE
num_valid_steps = num_valid_samples // BATCH_SIZE

gen = keras.preprocessing.image.ImageDataGenerator()

val_gen = keras.preprocessing.image.ImageDataGenerator()

batches = gen.flow_from_directory(
    TRAIN_DIR,
    target_size=SIZE,
    class_mode='categorical',
    shuffle=True,
    batch_size=BATCH_SIZE,
)
val_batches = val_gen.flow_from_directory(
    VAL_DIR,
    target_size=SIZE,
    class_mode='categorical',
    shuffle=True,
    batch_size=BATCH_SIZE,
)

model = keras.applications.resnet50.ResNet50()
# model = keras.applications.mobilenet.MobileNet() may be faster

classes = list(iter(batches.class_indices))
model.layers.pop()

for i, layer in enumerate(model.layers):
    if i < FROZEN_LAYERS_NUM:
        layer.trainable=False

last = model.layers[-1].output
x = Dense(len(classes), activation="softmax")(last)
finetuned_model = Model(model.input, x)
finetuned_model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

for c in batches.class_indices:
    classes[batches.class_indices[c]] = c
finetuned_model.classes = classes

early_stopping = EarlyStopping()

try:
    finetuned_model.fit_generator(
        batches,
        steps_per_epoch=num_train_steps,
        epochs=5,
        callbacks=[early_stopping],
        validation_data=val_batches,
        validation_steps=num_valid_steps,
    )
except KeyboardInterrupt:
    pass

Found 1130 images belonging to 3 classes.
Found 249 images belonging to 3 classes.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Epoch 1/5

In [14]:
finetuned_model.save(OUTPUT_MODEL_NAME)

### Error analysis on validation

In [9]:
import numpy as np

def make_prediction(img_path):
    img = image.load_img(img_path)
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    return finetuned_model.predict(x)

print('\t\t\t {}'.format("\t\t".join(finetuned_model.classes)))
for filename in ('escavate/sc4_0661.jpg', 'man/sc1_0046.jpg', 'clean/sc2_0138.jpg'):
    img_path = os.path.join(VAL_DIR, filename)
    print(filename, '\t', '\t'.join(map(str, make_prediction(img_path)[0])))

			 clean		escavate		man
escavate/sc4_0661.jpg 	 0.6940015	0.26663688	0.03936166
man/sc1_0046.jpg 	 0.2460796	0.017242521	0.7366778
clean/sc2_0138.jpg 	 0.99986184	8.6562846e-05	5.160353e-05


In [11]:
from collections import defaultdict
from tqdm import tqdm

correct_to_actual = defaultdict(lambda: defaultdict(int))

for _, dirs, _ in os.walk(VAL_DIR):
    for correct_class in dirs:
        for _, _, files in os.walk(os.path.join(VAL_DIR, correct_class)):
            for file in tqdm(files):
                img_path = os.path.join(VAL_DIR, correct_class, file)
                pred = make_prediction(img_path)[0]
                pred_class = finetuned_model.classes[np.argmax(pred)]  # can adjust argmax here
                correct_to_actual[correct_class][pred_class] += 1
                if correct_class != pred_class and correct_class == 'clean':
                    print('Should be clear, but', zip(finetuned_model.classes, pred))

100%|██████████| 124/124 [00:15<00:00,  7.81it/s]
100%|██████████| 101/101 [00:16<00:00,  5.99it/s]
100%|██████████| 24/24 [00:04<00:00,  5.83it/s]


In [12]:
for key, values in correct_to_actual.items():
    print(f'Actually {key}:')
    print(dict(values))
    print()

Actually clean:
{'clean': 124}

Actually man:
{'man': 59, 'clean': 42}

Actually escavate:
{'clean': 21, 'escavate': 3}



In [10]:
# External predictions:

print('\t\t\t {}'.format("\t\t".join(finetuned_model.classes)))
for filename in ('man.jpg', 'noone.jpg'):
    img_path = os.path.join(VAL_DIR, filename)
    print(filename, '\t', '\t'.join(map(str, make_prediction(img_path)[0])))

			 clean		escavate		man
man.jpg 	 0.0084273135	0.0009924108	0.99058026
noone.jpg 	 0.9873367	0.00852231	0.0041410746
