In [1]:
import os
import gc
import warnings
warnings.filterwarnings('ignore')

import glob
from shutil import copyfile, copy2

import pandas as pd
import numpy as np
from skimage import io

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, KFold

In [2]:
# Lists and dictionaries that help make translating between class labels and mask colors easier
LABELS = ['Acrop', 'CCA', 'Macro', 'Monti', 'Off', 'Pavon', 'Pocill', 'Porit', 'Sand', 'Turf']

labels = {'Acrop' : 0,
          'CCA' : 1,
          'Macro' : 2,
          'Monti' : 3,
          'Off' : 4,
          'Pavon' : 5,
          'Pocill': 6,
          'Porit' : 7,
          'Sand' : 8,
          'Turf' : 9}

EXP_LABELS = ['Acrop', 'CCA', 'Macro', 'Monti', 'Pavon', 'Pocill', 'Porit', 'Sand', 'Turf']

NO_LABEL = 255


old_labels = pd.read_csv("Replace_With.csv")
old_labels = dict(zip(old_labels['Old'], old_labels['New']))

In [3]:
imgs = glob.glob("Data\\2008\\images\\*.jpg")
labs = glob.glob("Data\\2008\\cpce\\*.txt")

data = pd.DataFrame(data = list(zip(imgs, labs)), columns = ['images', 'cpce'])

dest = "Data\\2008\\patches_224"

for l in LABELS:
    if(not os.path.exists(dest + "\\" + l)):
        os.mkdir(dest + "\\" + l)

In [4]:
def crop_patch(image, y, x):

    size = 112
    
    patch = image[abs(size - y) : abs(size + y), abs(size - x) : abs(size + x), :]
    
    return patch


def check_dimensions(image, y, x):
    
    size = 224
    
    height, width = image.shape[0:2]
    if(x + (size//2) > width or x - (size//2) < 0 or y + (size//2) > height or y - (size//2) < 0):
        return False
    else:
        return True
    
    
def extract_patches(data):
    
    for i in range(len(data)):
        each_image = io.imread(data['images'][i])
        each_annotation = pd.read_csv(data['cpce'][i], sep = "; ", 
                                      engine = 'python').rename(columns={'# Row' : 'Y', 'Col': 'X'})
        

        each_annotation.replace(old_labels, inplace = True)   

        file_name = (data['cpce'][i].split("\\")[-1].split(".")[0]);

        height, width = each_image.shape[0:2]
        
        for index, row in each_annotation.iterrows():
            
            X = int(row[1])
            Y = int(row[0])
            L = str(row[2])
        
            
            if(L not in LABELS):
                continue
            
            if(check_dimensions(each_image, Y, X)):
                patch = crop_patch(each_image, Y, X)
              
            else:
                continue      

            io.imsave(arr = patch, fname = dest + "\\" + L +  "\\" + file_name + "_" + str(index) + ".png")
            
    print("Complete")       
    
#extract_patches(data) # <-------------------------

In [5]:
patches = glob.glob("Data\\2008\\patches_224\\**\\*.png", recursive = True)
labels = [patch.split("\\")[-2] for patch in patches]

data = pd.DataFrame(data = list(zip(patches, labels)), columns = ['images', 'labels'])
data = data[data['labels'] != 'Off']

train, valid = train_test_split(data, test_size = .125)

In [6]:
# perform the split between training and validation

patches = glob.glob("Data\\2009\\patches_224\\**\\*.png", recursive = True)
labels = [patch.split("\\")[-2] for patch in patches]

test = pd.DataFrame(data = list(zip(patches, labels)), columns = ['images', 'labels'])
test = test[test['labels'] != 'Off']

print("Training:", len(train), "Validation:", len(valid), "Testing:", len(test))

Training: 112814 Validation: 16117 Testing: 125888


In [7]:
from panel_image import *
from keras.preprocessing.image import ImageDataGenerator

batch_size = 32

# Training images are augmented, and then lightly pre-processed
train_augmentor = ImageDataGenerator(preprocessing_function = None, 
                                            horizontal_flip = True, 
                                            vertical_flip = True,
                                            rescale = 1.0/255.0)      
                                     
                                                                   
# Reading from dataframe, can save augmented images if needed
train_generator = train_augmentor.flow_from_dataframe(dataframe = train, directory = None,
                                                      x_col = 'images', y_col = 'labels', target_size = (224, 224), 
                                                      color_mode = "rgb",  class_mode = 'categorical', 
                                                      batch_size = batch_size, shuffle = True, seed = 42)
                                                     

# Only pre-process images, no augmentation
validate_augmentor = ImageDataGenerator( preprocessing_function = None,
                                         rescale = 1.0/255.0)


# Reading from dataframe                             
validation_generator = validate_augmentor.flow_from_dataframe(dataframe = valid, directory = None, 
                                                              x_col = 'images', y_col = 'labels', target_size = (224, 224), 
                                                              color_mode = "rgb",  class_mode = 'categorical', 
                                                              batch_size = batch_size, shuffle = True, seed = 42)

Using TensorFlow backend.


Found 112814 images belonging to 9 classes.
Found 16117 images belonging to 9 classes.


In [None]:
num_epochs = 25

# Defines the length of an epoch, all images used
steps_per_epoch_train = len(train)/batch_size

# Defines the length of an epoch, all images used
steps_per_epoch_valid = len(valid)/batch_size

In [None]:
# # Transfer-learning model
from keras.models import Model
from keras.applications.nasnet import NASNetMobile
from keras.layers import Input, Dense, BatchNormalization, Activation, Dropout

main_input = Input(shape = (224, 224, 3))
base = NASNetMobile(include_top = False, weights = 'imagenet', pooling = 'max')(main_input)
x = Dropout(.75)(base)
x = Dense(9)(x)
main_output = Activation('softmax')(x)

model = Model(inputs = [main_input], outputs = [main_output])

In [None]:
from keras import optimizers

learning_rate = 0.00025

model.compile(loss = 'categorical_crossentropy',
              optimizer = optimizers.Adam(lr = learning_rate), 
              metrics = ['acc'])

In [None]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler

holla = [
         LearningRateScheduler(reduceLR),
         ModelCheckpoint(filepath = "Experiment_1\\Panel.h5", 
                         monitor = 'val_loss', 
                         save_weights_only = True, 
                         save_best_only = True, verbose = 1)
        ]

In [None]:
# Starts training, saves info for later
history = model.fit_generator(train_generator, 
                              steps_per_epoch = steps_per_epoch_train, 
                              epochs = num_epochs, 
                              validation_data = validation_generator, 
                              validation_steps = steps_per_epoch_valid,
                              callbacks = holla,
                              verbose = 1)  

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize = (15, 10))
plt.plot(history.history["loss"], label="train_loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.plot(history.history["acc"], label="train_acc")
plt.plot(history.history["val_acc"], label="val_acc")

plt.plot(np.argmin(history.history["val_loss"]), 
         np.min(history.history["val_loss"]), 
         marker = "x", color = "b", label = "best model")

plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="upper left")
plt.show()

In [None]:
model.load_weights("Experiment_1\\Panel.h5")

In [None]:
# Reads from dataframe for holdout set
test_generator = validate_augmentor.flow_from_dataframe(dataframe = test,
                                                 x_col = 'images', y_col = 'labels', target_size = (224, 224), 
                                                 color_mode = "rgb",  class_mode = 'categorical', 
                                                 batch_size = batch_size, shuffle = False, seed = 42)
# Defines the length of an epoch
steps_per_epoch_test = len(test)/batch_size

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
from sklearn.model_selection import cross_val_score

predictions = model.predict_generator(test_generator, steps = steps_per_epoch_test)
predict_classes = np.argmax(predictions, axis = 1)

test_y = test_generator.classes
print("# of images:", len(predict_classes))
print(accuracy_score(y_true = test_y, y_pred = predict_classes))
print(confusion_matrix(y_true = test_y, y_pred = predict_classes))

In [None]:
def get_sparse_points(image, percent, method):
 
    num_points = int((image.shape[0] * image.shape[1]) * percent)
    
    offset = 200 # about 7.5%
    
    if(method == 'grid'):
    
        density = int(np.sqrt(num_points)) 

        x_, y_ = np.meshgrid(np.linspace(offset, image.shape[1] - offset, density), 
                             np.linspace(offset, image.shape[0] - offset, density))

        xy = np.dstack([x_, y_]).reshape(-1, 2).astype(int)
        x = [point[0] for point in xy]
        y = [point[1] for point in xy]
        
    elif(method == 'random'):
        
        x = np.random.randint(offset, image.shape[1] - offset, num_points)
        y = np.random.randint(offset, image.shape[0] - offset, num_points)
    else:
        print("Choose a method for sampling sparse points.")
        return
        
    
    patches = []
    
    for _ in range(len(x)):
        
        if(check_dimensions(image, y[_], x[_])):
            patch = crop_patch(image, y[_], x[_])
            patch = patch * (1.0/255.0)
            patch = iaa.Resize(112).augment_image(patch)
            patches.append(patch)
            
    sparse_predictions = model.predict(np.array(patches)).squeeze()

    
    labels_ = [list(EXP_LABELS)[np.argmax(s)] for s in sparse_predictions]
    
    confidence_ = [(sorted(s)[-1] - sorted(s)[-2]) for s in sparse_predictions]

    df_sparse = pd.DataFrame(data = list(zip(y, x, labels_, confidence_)), columns = ['Y', 'X', 'Label', 'Confidence'])
    
    return df_sparse