In [None]:
import os
from glob import glob

from keras.layers import Input, Lambda, Dense, Flatten
from keras.models import Model
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
image_size = [150,150]
epochs = 20
batch_size = 32
img_ext = '/*/*.tif'

In [None]:
train_path = 'Training'
test_path = 'Validation'
train_image_files = glob(train_path + img_ext)
test_image_files = glob(test_path + img_ext)
folders = glob(train_path + '/*')

In [None]:
folders

In [None]:
vgg = VGG16(input_shape=image_size + [3], weights='imagenet', include_top=False)

for layer in vgg.layers:
    layer.trainable=False
    
x = Flatten()(vgg.output)
prediction = Dense(len(folders), activation='softmax')(x)
model = Model(inputs=vgg.input, outputs=prediction)
model.summary()

In [None]:
model.compile(
    loss='categorical_crossentropy',
    optimizer='rmsprop',
    metrics=['accuracy']
)

In [None]:
gen = ImageDataGenerator(
    rescale=1./255,
    featurewise_center=True,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    preprocessing_function=preprocess_input,
)

In [None]:
test_gen = gen.flow_from_directory(test_path, target_size=image_size)
print(test_gen.class_indices)

In [None]:
labels = [None] * len(test_gen.class_indices)
for k, v in test_gen.class_indices.items():
    labels[v] = k

In [None]:
for x, y in test_gen:
    print('min:', x[1].min(), 'max:', x[1].max())
    plt.title(labels[np.argmax(y[1])])
    plt.imshow(x[1])
    plt.show()
    break

In [None]:
wtf_image_path = 'C:\\Users\\Kendall\\Documents\\DrivenData Competitions\\Open AI Caribbean\\Training\\concrete_cement\\7a1c6d7c.tif'

In [None]:
plt.imshow(image.load_img(wtf_image_path))

In [None]:
wtf_image = image.load_img(wtf_image_path)

In [None]:
wtf_image

In [None]:
train_generator = gen.flow_from_directory(
    train_path,
    target_size=image_size,
    shuffle=True,
    batch_size=batch_size
)

In [None]:
test_generator = gen.flow_from_directory(
    test_path,
    target_size=image_size,
    shuffle=True,
    batch_size=batch_size
)

In [None]:
fitted_model = model.fit_generator(
    train_generator,
    validation_data=test_generator,
    epochs=epochs,
    steps_per_epoch=len(train_image_files) // batch_size,
    validation_steps=len(test_image_files) // batch_size
)

In [None]:
def get_confusion_matrix(data_path, N):
    print('Generating confusion matrix', N)
    predictions = []
    targets = []
    i = 0
    for x, y in gen.flow_from_directory(data_path,
                                       target_size=image_size,
                                       shuffle=False,
                                       batch_size=batch_size):
        i += 1
        if i % 50 == 0:
            print(i)
        p = model.predict(x)
        p = np.argmax(p, axis=1)
        y = np.argmax(y, axis=1)
        
        predictions = np.concatenate((predictions, p))
        targets = np.concatenate((targets, y))
        
        if len(targets) >= N:
            break
        
    cm = confusion_matrix(targets, predictions)
    return cm

In [None]:
cm = get_confusion_matrix(train_path, len(train_image_files))
print(cm)

In [None]:
test_cm = get_confusion_matrix(test_path, len(test_image_files))
print(test_cm)

In [None]:
plt.plot(fitted_model.history['loss'], label='training loss')
plt.plot(fitted_model.history['val_loss'], label='validation loss')
plt.legend()
plt.show()

In [None]:
plt.plot(fitted_model.history['acc'], label='training accuracy')
plt.plot(fitted_model.history['val_acc'], label='validation accuracy')
plt.legend()
plt.show()

In [None]:
from util import plot_confusion_matrix
plot_confusion_matrix(cm, labels, title='Training confusion matrix')
plot_confusion_matrix(test_cm, labels, title='Validation confusion matrix')

In [None]:
# serialize model to YAML
model_yaml = model.to_yaml()
with open("model.yaml", "w") as yaml_file:
    yaml_file.write(model_yaml)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

In [None]:
os.listdir(pred_path)

In [None]:
pred_img_ext = '/*.tif'

pred_path = 'competition_prediction'
pred_image_files = glob(pred_path + pred_img_ext)

predictions = []
targets = []
i = 0

In [None]:
pred_image_files[0]

In [None]:
pred_datagen = ImageDataGenerator(rescale=1./255)

# pred_generator = pred_datagen.flow_from_directory(pred_path,
#                               target_size=image_size,
#                               shuffle=False,
#                               batch_size=batch_size)

# pred_generator = pred_datagen(pred_path,
#                              classes=['test'],
#                              class_mode=None,
#                              shuffle=False,
#                              target_size=(150,150))

# preds = model.predict_generator(pred_generator)

# test_generator = test_datagen('PATH_TO_DATASET_DIR/Dataset',
#                               # only read images from `test` directory
#                               classes=['test'],
#                               # don't generate labels
#                               class_mode=None,
#                               # don't shuffle
#                               shuffle=False,
#                               # use same size as in training
#                               target_size=(299, 299))

# preds = model.predict_generator(test_generator)

pred_generator = pred_datagen.flow_from_directory(
    directory=pred_path,
    class_mode=None,
    shuffle=False
)

# for x, y in pred_generator:
#     print(x)
# for file in gen.filenames:
#     print(file)
    
# for x, y in gen.flow_from_directory(pred_path,
#                               target_size=image_size,
#                               shuffle=False,
#                               batch_size=batch_size):
#     i += 1
#     y_pred = model.predict(x)
    
#     if len(targets) >= len(pred_image_files):
#         break

In [None]:
type(pred_generator)

In [None]:
i = 0

for file in pred_image_files:
    file_name = file
    print(file_name)
    
    image_file = image.load_img(file_name)
    plt.imshow(image_file)
    plt.show()
    
    print(type(image_file))
    print(image_file.format)
    print(image_file.mode)
    print(image_file.size)
    
    image_array = image.img_to_array(image_file)
    print(image_array.shape)

    y_pred = model.predict(image_array)
    
    print(y_pred)
    
    i += 1
    if i >= 1:
        break