In [1]:
# !pip install keras
# !pip install tensorflow-gpu
# !pip install pillow

In [2]:
import os

import keras
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Dense
from keras.models import Model
from keras.optimizers import Adam
from keras.preprocessing import image

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
OUTPUT_MODEL_NAME = 'classification.h5'

DATA_DIR = os.path.abspath('data')
ALL_DIR = os.path.join(DATA_DIR, 'all')
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
VAL_DIR = os.path.join(DATA_DIR, 'val')
SIZE = (224, 224)

In [63]:
import random
import shutil

def regenerate_sampling():
    for _, dirs, _ in os.walk(ALL_DIR):
        for d in dirs:
            for new_dir in (TRAIN_DIR, VAL_DIR):
                create_dir = os.path.join(new_dir, d)
                try:
                    os.makedirs(create_dir)
                except FileExistsError:
                    print(f"recreating samples for {create_dir}")
                    shutil.rmtree(create_dir)
                    os.makedirs(create_dir)
            for _, _, files in os.walk(os.path.join(ALL_DIR, d)):
                for file in files:
                    if random.random() < (0.01 if d == 'clean' else 0.2):
                        shutil.copyfile(
                            os.path.join(ALL_DIR, d, file),
                            os.path.join(VAL_DIR, d, file),
                        )
                    elif random.random() < (0.05 if d == 'clean' else 1):
                        shutil.copyfile(
                            os.path.join(ALL_DIR, d, file),
                            os.path.join(TRAIN_DIR, d, file),
                        )

# regenerate_sampling()

recreating samples for /home/paulinm/hack/RaspberryTrain/data/train/clean
recreating samples for /home/paulinm/hack/RaspberryTrain/data/val/clean
recreating samples for /home/paulinm/hack/RaspberryTrain/data/train/man
recreating samples for /home/paulinm/hack/RaspberryTrain/data/val/man
recreating samples for /home/paulinm/hack/RaspberryTrain/data/train/escavate
recreating samples for /home/paulinm/hack/RaspberryTrain/data/val/escavate


In [23]:
BATCH_SIZE = 16

num_train_samples = sum([len(files) for _, _, files in os.walk(TRAIN_DIR)])
num_valid_samples = sum([len(files) for _, _, files in os.walk(VAL_DIR)])

num_train_steps = num_train_samples // BATCH_SIZE
num_valid_steps = num_valid_samples // BATCH_SIZE

gen = keras.preprocessing.image.ImageDataGenerator()
val_gen = keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True)

batches = gen.flow_from_directory(
    TRAIN_DIR,
    target_size=SIZE,
    class_mode='categorical',
    shuffle=True,
    batch_size=BATCH_SIZE,
)
val_batches = val_gen.flow_from_directory(
    VAL_DIR,
    target_size=SIZE,
    class_mode='categorical',
    shuffle=True,
    batch_size=BATCH_SIZE,
)

model = keras.applications.resnet50.ResNet50()
# model = keras.applications.mobilenet.MobileNet()

classes = list(iter(batches.class_indices))
model.layers.pop()

for i, layer in enumerate(model.layers):
    if i < 80:
        layer.trainable=False

last = model.layers[-1].output
x = Dense(len(classes), activation="softmax")(last)
finetuned_model = Model(model.input, x)
finetuned_model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

for c in batches.class_indices:
    classes[batches.class_indices[c]] = c
finetuned_model.classes = classes

early_stopping = EarlyStopping(patience=2)

finetuned_model.fit_generator(batches, steps_per_epoch=num_train_steps, epochs=100, callbacks=[early_stopping], validation_data=val_batches, validation_steps=num_valid_steps)

Found 1130 images belonging to 3 classes.
Found 249 images belonging to 3 classes.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
 3/35 [=>............................] - ETA: 3:25 - loss: 0.0014 - accuracy: 1.0000

KeyboardInterrupt: 

In [24]:
finetuned_model.save(OUTPUT_MODEL_NAME)

### Error analysis on validation

In [25]:
import numpy as np

def make_prediction(img_path):
    img = image.load_img(img_path)
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    return finetuned_model.predict(x)

print('\t\t\t {}'.format("\t\t".join(finetuned_model.classes)))
for filename in ('escavate/sc4_0661.jpg', 'man/sc1_0046.jpg', 'clean/sc2_0138.jpg'):
    img_path = os.path.join(VAL_DIR, filename)
    print(filename, '\t', '\t'.join(map(str, make_prediction(img_path)[0])))

			 clean		escavate		man
escavate/sc4_0661.jpg 	 0.13595015	0.32506287	0.538987
man/sc1_0046.jpg 	 0.19008252	0.01039284	0.79952466
clean/sc2_0138.jpg 	 0.9999145	3.0291032e-05	5.514074e-05


In [31]:
from collections import defaultdict
from tqdm import tqdm

correct_to_actual = defaultdict(lambda: defaultdict(int))

for _, dirs, _ in os.walk(VAL_DIR):
    for correct_class in dirs:
        for _, _, files in os.walk(os.path.join(VAL_DIR, correct_class)):
            for file in tqdm(files):
                img_path = os.path.join(VAL_DIR, correct_class, file)
                pred = make_prediction(img_path)[0]
                pred_class = finetuned_model.classes[np.argmax(pred)]  # can adjust argmax here
                correct_to_actual[correct_class][pred_class] += 1

100%|██████████| 124/124 [00:11<00:00, 10.65it/s]
100%|██████████| 101/101 [00:10<00:00,  9.49it/s]
100%|██████████| 24/24 [00:02<00:00,  9.50it/s]


In [39]:
for key, values in correct_to_actual.items():
    print(f'Actually {key}:')
    print(dict(values))
    print()

Actually clean:
{'clean': 120, 'man': 4}

Actually man:
{'man': 90, 'clean': 11}

Actually escavate:
{'escavate': 4, 'man': 16, 'clean': 4}

