This is going to be a survey of various techniques implemented for the purpose of image classification of two classes. In this case we are looking at "Bunny" versus "Not Bunny". "Not Bunny" can we a wide category by which implementing solutions given an unbalanced dataset can be harder to understand best practices.

Variables taken into consideration:

<ol>
    <li>Amount of Data</li>
    <li>Data Partitioning</li>
    <ol>
        <li>50% Bunny and 50% Not Bunny</li>
        <li>10% Bunny, and 10% of 9 other classes.</li>
    </ol>
    <li>Testing Layer</li>
    <ol>
        <li>out_relu</li>
        <li>Conv_1_bn</li>
        <li>block_16_project_BN</li>
        <li>block_16_project</li>
        <li>block_16_depthwise_relu</li>
        <li>block_16_expand</li>
        <li>block_15_add</li>
    </ol>
    <li>Data Augmentation</li>
    <ol>
        <li>None (Reshape if needed)</li>
        <li>Averaging</li>
        <li>UMAP</li>
        <li>Eigenvectors</li>
    </ol>
    <li>Prediction Technique</li>
    <ol>
        <li>SVM (https://ieeexplore.ieee.org/document/8623118)</li>
        <li>NN Layer</li>
        <li>KNN</li>
    </ol>
    <li>Technique Hyperparameters</li>
    <ol>
        <li>Kernel Type (SVM)</li>
        <li>Dropout Rate (NN)</li>
    </ol>
</ol>

<b>The following are all imports required to run the code below.</b>

In [51]:
import os
import random
import shutil
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from keras.models import Model

<b>The following are all functions used for the purpose of creating datasets.</b>

"Images-Using" directory is where the tf keras datasets are created by each folder. The "Other-Directory" holds all subdirectories of subclasses.

In [68]:
# Directory extensions used for class creations
class_dirs = ["/Human", "/Nature-Background", "/Text", "/Dogs", "/Cats", "/Hamsters", "/Suggestively-Sexual",
              "/Suggestively-Violent", "/Abstract-Background", "/Empty-Cages", "/Bunny-Drawings"]

# Deleting and recreating directories depending on partition
def refresh_dirs(path, partition):
    # Removing extra directories
    class_dirs_plus = class_dirs + ["/Other-Usable"]
    for name in class_dirs_plus:
        try:
            shutil.rmtree(str(path) + "/Images-Using" + str(name))
        except:
            pass
    
    # Making directories depending on what partitions are desired
    if partition == "50-50":
        os.mkdir(path + "/Images-Using/Other-Usable")
    elif partition == "Evenly Split":
        for name in class_dirs:
            os.mkdir(str(path) + "/Images-Using" + str(name))

# Creating image directory, images and over-sampling if need be
def do_splits(path, partition):
    if partition == "50-50":
        for class_idx, class_dir in enumerate(class_dirs):
            allFileNames = os.listdir(path + "/Other-Directory" + class_dir)
            allFileNames = [path + "/Other-Directory" + class_dir + '/' + name for name in allFileNames]
            other_directory = "/Images-Using/Other-Usable"

            idx = 0
            for name in allFileNames:
                failed_naming = True
                while failed_naming:
                    try:
                        shutil.copy(name, path + other_directory)
                        os.rename(path + other_directory + '/' + name.rsplit('/', 1)[-1],
                                  path + other_directory + '/' + str(class_idx) + '-' + str(idx) + name.rsplit('/', 1)[-1])
                        failed_naming = False
                    except:
                        idx += 1
                    idx += 1
    elif partition == "Evenly Split":
        for class_idx, class_dir in enumerate(class_dirs):
            allFileNames = os.listdir(path + "/Other-Directory" + class_dir)
            allFileNames = [path + "/Other-Directory" + class_dir + '/' + name for name in allFileNames]
            
            # Copy over files
            for name in allFileNames:
                shutil.copy(name, path + "/Images-Using" + class_dir)
            
            # Random replacement
            idx = 0
            while(len(os.listdir(path + "/Images-Using" + class_dir)) <
                  len(os.listdir(path + "/Images-Using/Bunnies-Base"))):
                random_img = path + "/Other-Directory" + class_dir + "/" + random.choice(os.listdir(path + "/Other-Directory" + class_dir))
                insert_directory = path + "/Images-Using" + class_dir
                
                failed_naming = True
                while failed_naming:
                    try:
                        shutil.copy(random_img, insert_directory)
                        os.rename(insert_directory + '/' + random_img.rsplit('/', 1)[-1],
                                  insert_directory + '/' + str(class_idx) + '-' + str(idx) + name.rsplit('/', 1)[-1])
                        failed_naming = False
                    except:
                        idx += 1
                    idx += 1
                        
def grab_datasets(path):
    # Size set for mobilenetv2
    batch_size = 1
    img_height = 160
    img_width = 160
    
    # Training, validation and test sets created
    train_ds = tf.keras.utils.image_dataset_from_directory(
        path + "/Images-Using",
        validation_split=0.2,
        subset="training",
        seed=321,
        image_size=(img_height, img_width),
        batch_size=batch_size
    )

    validation_ds = tf.keras.utils.image_dataset_from_directory(
        path + "/Images-Using",
        validation_split=0.2,
        subset="validation",
        seed=321,
        image_size=(img_height, img_width),
        batch_size=batch_size
    )
    
    val_batches = tf.data.experimental.cardinality(validation_ds)
    test_ds = validation_ds.take(val_batches // 4)
    validation_ds = validation_ds.skip(val_batches // 4)

    # Setting up speedy image fetching to avoid bottlenecking
    AUTOTUNE = tf.data.AUTOTUNE

    train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
    validation_ds = validation_ds.prefetch(buffer_size=AUTOTUNE)
    test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

    return train_ds, validation_ds, test_ds

<b>All code related to model creation.</b>

In [3]:
#
def create_model():
    # Slightly alter data for generalization purposes
    data_augmentation = tf.keras.Sequential([
      tf.keras.layers.RandomFlip('horizontal'),
      tf.keras.layers.RandomRotation(0.2),
    ])
    
    IMG_SHAPE = IMG_SIZE + (3,)
    base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                                   include_top=False,
                                                   weights='imagenet')
    
    base_model.trainable = False
    
    return base_model

# 
def get_activations(base_model, layer, train_ds):
    n_rows, nx, ny, nz = base_model.get_layer(str(layer)).output_shape
    x_train = np.empty((0, int(nx), int(ny), int(nz)), float)
    y_train = np.empty((0), int)
    
    intermediate_layer_model = Model(inputs=base_model.input, outputs=base_model.get_layer(str(layer)).output)
    
    for image, label in train_ds:
        
        intermediate_output = intermediate_layer_model.predict(image)
        x_train = np.concatenate((x_train, intermediate_output), axis=0)
        y_train = np.concatenate((y_train, label), axis=0)
        
    return x_train, y_train

In [4]:
def make_prediction(augmentation, technique, x_train, y_train, x_test, y_test):
    if technique == "SVM":
        if augmentation == "none":
            # Augmentation
            n_rows, nx, ny, nz = x_train.shape
            x_train = x_train.reshape((n_rows,nx*ny*nz))
            x_train = np.nan_to_num(x_train, nan=0)
            
        # Technique
        clf = make_pipeline(StandardScaler(), SVC(gamma="auto"))
        clf.fit(x_train, y_train)
        
        correct = 0
        wrong = 0

        for image, label in test_ds:
            intermediate_output = intermediate_layer_model.predict(image)
            # print(clf.predict(intermediate_output.reshape((1,nx*ny*nz))))
            # print(label)
            if(clf.predict(intermediate_output.reshape((1,nx*ny*nz))) == label):
                correct += 1
            else:
                wrong += 1

        return correct / (correct + wrong)

In [5]:
def print_results(partition, percentage, layer, augmentation, technique, accuracy):
    print("Partition: " + str(partition) + "\n" +
          "Percentage: " + str(percentage) + "\n" +
          "Layer: " + str(layer) + "\n" +
          "Augmentation: " + str(augmentation) + "\n" + 
          "Technique: " + str(technique) + "\n" +
          "Accuracy: " + str(accuracy))

In [57]:
data_partitions = ['Evenly Split'] # ["50-50", "Evenly Split"]
data_percentages = [100]
testing_layers = ["out_relu", "Conv_1_bn"]
data_augmentations = ["none", "averaging", "UMAP"]
prediction_techniques = ["SVM", "NN", "KNN"]

for partition in data_partitions:
    # Set path to file code is being ran at
    images_path = os.getcwd()
    # Delete files from previous experiment
    refresh_dirs(images_path, partition)
    # Create splits of what is considered training/testing data and how many classes are recognized
    do_splits(images_path, partition)
    
#     for percentage in data_percentages:
#         # Cull to data percentage
#         cull_data(percentage)
#         # Grab finish keras datasets
#         train_ds, validation_ds, test_ds = grab_datasets(images_path)
#         # Create mobilenetv2 base model
#         base_model = create_model()
        
#         for layer in testing_layers:
#             # Getting model activations to react to datasets train, validate and test
#             x_train, y_train = get_activations(base_model, layer, train_ds)
#             x_validate, y_validate = get_activations(base_model, layer, validate_ds)
#             x_test, y_test = get_activations(base_model, layer, test_ds)
            
#             for augmentation in data_augmentations:
#                 # Augmentations change depending on the input they need to service
#                 for technique in prediction_techniques:
#                     # Take everything in for a prediciton
#                     accuracy = make_prediciton(augmentation, technique, x_train, y_train,)
#                     # Print results
#                     print_results(partition, percentage, layer, augmentation, technique, accuracy)

1
100 611
/Human


FileNotFoundError: [Errno 2] No such file or directory: 'images (91).jpg'

In [69]:
images_path = os.getcwd()
print(images_path)

do_splits(images_path, "Evenly Split")

/Users/Alex/Desktop/Image-Classification
1
