# Dataset Utils

In [1]:
import glob
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img
%matplotlib inline

Using plaidml.keras.backend backend.


In [2]:
def get_imgs_labels():
    IMG_DIM = (150, 150)
    
    train_files = glob.glob('training_data/*')
    train_imgs = [img_to_array(load_img(img, target_size=IMG_DIM)) for img in train_files]
    train_imgs = np.array(train_imgs)
    train_labels = [fn.split('\\')[1].split('.')[0].strip() for fn in train_files]

    validation_files = glob.glob('validation_data/*')
    validation_imgs = [img_to_array(load_img(img, target_size=IMG_DIM)) for img in validation_files]
    validation_imgs = np.array(validation_imgs)
    validation_labels = [fn.split('\\')[1].split('.')[0].strip() for fn in validation_files]
    
    return train_imgs, train_labels, validation_imgs, validation_labels

### Preprocessing images

In [3]:
def get_imgs_scaled():
    train_imgs, _, validation_imgs, _ = get_imgs_labels()
    
    train_imgs_scaled = train_imgs.astype('float32')
    validation_imgs_scaled  = validation_imgs.astype('float32')
    
    train_imgs_scaled /= 255
    validation_imgs_scaled /= 255
    
    return train_imgs_scaled, validation_imgs_scaled

### Preprocessing labels - One hot encoder

In [4]:
def get_labels_enc():
    _, train_labels, _, validation_labels = get_imgs_labels()
    
    le = LabelEncoder()
    le.fit(train_labels)
    
    train_labels_enc = le.transform(train_labels)
    validation_labels_enc = le.transform(validation_labels)
    
    return train_labels_enc, validation_labels_enc