In [1]:
from imutils import paths
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelBinarizer
from keras.preprocessing.image import img_to_array
import cv2
import os
import numpy as np


Using TensorFlow backend.


# Loading Images

In [2]:
# Global
Base_dir = 'C:/Users/Tajr/Desktop/Data/RadonPlus/RadonTechnology/Dev/Deep Learning/Datasets/CatDog/'
lb = LabelBinarizer()

### Helpers

In [3]:
# Label Extractor
def extract_labels(img_dir, img_path_list):
    img_labels = []
    img_dir_len = len(img_dir)
    
    for img_path in img_path_list:
        img_label = img_path[img_dir_len:]
        img_labels.append(img_label[:3])
    
    return img_labels

# Group Two Lists
def group_data(list_one, list_two):
    joint_list = list_one
    
    for item in list_two:
        joint_list.append(item)
    
    return joint_list

# Resizer (ignore image aspect ratio)
def img_resizer(img, width, height, interpolation):
    inter = interpolation
    return cv2.resize(img, (width, height), inter)

# Image to array preprocessor
def img2array(img, dataFormat=None):
    return img_to_array(img, data_format=dataFormat)

# Image Loader
def load_img(img_paths, verbose=1):
    data = []
    
    for (i, img_path) in enumerate(img_paths):
        img = cv2.imread(img_path)
        
        # Resize Preprocessor
        interpolation = cv2.INTER_AREA
        img = img_resizer(img, 32, 32, interpolation)
        
        # Image To Array Preprocessor
        img = img2array(img)
        
        data.append(img)
        
        if verbose > 0 and i > 0 and (i + 1) % verbose == 0:
            print('[INFO] processed {} / {}'.format((i + 1), len(img_paths)))
        
        return np.array(data)

### Train

In [4]:
# Cats
train_cats_dir = os.path.join(Base_dir, 'train/cats/')
train_cats_images = list(paths.list_images(train_cats_dir))
train_cats_labels = extract_labels(train_cats_dir, train_cats_images)


# Dogs
train_dogs_dir = os.path.join(Base_dir, 'train/dogs/')
train_dogs_images = list(paths.list_images(train_dogs_dir))
train_dogs_labels = extract_labels(train_dogs_dir, train_dogs_images)


# Group cats and dogs (data, labels)
train_data_paths = group_data(train_cats_images, train_dogs_images)
train_labels = group_data(train_cats_labels, train_dogs_labels)

# Shuffle
(train_data_paths, train_labels) = shuffle(train_data_paths, train_labels)

# Train data
train_data = load_img(train_data_paths, verbose=500)
train_data = train_data.astype('float') / 255.0

# Convert labels to numpy array
train_labels = np.array(train_labels)

# Binarize labels
labels = train_labels
train_labels = lb.fit_transform(train_labels)

for i in np.arange(0, 10):
    print(labels[i], "  |  ", train_labels[i])
    
# Check Ups
print()
print("Train data type: ", type(train_data))
print("Train labels type: ", type(train_labels))
print("Train data shape: ", train_data.shape)

dog   |   [1]
dog   |   [1]
dog   |   [1]
dog   |   [1]
dog   |   [1]
cat   |   [0]
cat   |   [0]
dog   |   [1]
cat   |   [0]
cat   |   [0]

Train data type:  <class 'numpy.ndarray'>
Train labels type:  <class 'numpy.ndarray'>
Train data shape:  (1, 32, 32, 3)


### Validation

In [5]:
# Cats
validation_cats_dir = os.path.join(Base_dir, 'validation/cats/')
validation_cats_images = list(paths.list_images(validation_cats_dir))
validation_cats_labels = extract_labels(validation_cats_dir, validation_cats_images)

# Dogs
validation_dogs_dir = os.path.join(Base_dir, 'validation/dogs/')
validation_dogs_images = list(paths.list_images(validation_dogs_dir))
validation_dogs_labels = extract_labels(validation_dogs_dir, validation_dogs_images)

# Group Validation
validation_data_paths = group_data(validation_cats_images, validation_dogs_images)
validation_labels = group_data(validation_cats_labels, validation_dogs_labels)

# Shuffle
(validation_data_paths, validation_labels) = shuffle(validation_data_paths, validation_labels)

# Validation data
validation_data = load_img(validation_data_paths, verbose=500)
validation_data = validation_data.astype('float') / 255.0

# Convert labels to numpy array
validation_labels = np.array(validation_labels)

# Binerize labels
labels = validation_labels
validation_labels = lb.fit_transform(validation_labels)

for i in np.arange(0, 10):
    print(labels[i], " |  ", validation_labels[i])

# Check Ups
print()
print("Validation data type: ", type(validation_data))
print("Validation labels type: ", type(validation_labels))
print("Validation data shape: ", validation_data.shape)

cat  |   [0]
cat  |   [0]
dog  |   [1]
dog  |   [1]
dog  |   [1]
cat  |   [0]
cat  |   [0]
dog  |   [1]
cat  |   [0]
dog  |   [1]

Validation data type:  <class 'numpy.ndarray'>
Validation labels type:  <class 'numpy.ndarray'>
Validation data shape:  (1, 32, 32, 3)


### Test

In [6]:
# Cats
test_cats_dir = os.path.join(Base_dir, 'test/cats/')
test_cats_images = list(paths.list_images(test_cats_dir))
test_cats_labels = extract_labels(test_cats_dir, test_cats_images)

# Dogs
test_dogs_dir = os.path.join(Base_dir, 'test/dogs/')
test_dogs_images = list(paths.list_images(test_dogs_dir))
test_dogs_labels = extract_labels(test_dogs_dir, test_dogs_images)

# Group Test
test_data_paths = group_data(test_cats_images, test_dogs_images)
test_labels = group_data(test_cats_labels, test_dogs_labels)

# Shuffle
(test_data_paths, test_labels) = shuffle(test_data_paths, test_labels, random_state=0)

# Test data
test_data = load_img(test_data_paths, verbose=500)
test_data = test_data.astype('float') / 255.0

# Convert labels to numpy array
test_labels = np.array(test_labels)

# Binarize labels
labels = test_labels
test_labels = lb.fit_transform(test_labels)

for i in np.arange(0, 10):
    print(labels[i], "  |  ", test_labels[i])
    
# Check Ups
print()
print("Test data type: ", type(test_data))
print("Test labels type: ", type(test_labels))
print("Test data shape: ", test_data.shape)

dog   |   [1]
dog   |   [1]
cat   |   [0]
dog   |   [1]
dog   |   [1]
dog   |   [1]
cat   |   [0]
cat   |   [0]
cat   |   [0]
dog   |   [1]

Test data type:  <class 'numpy.ndarray'>
Test labels type:  <class 'numpy.ndarray'>
Test data shape:  (1, 32, 32, 3)
