In [1]:
import hardy.recognition.cnn as cnn
import hardy.recognition.tuner as tuner
from hardy.handling import pre_processing as preprocessing
from hardy.handling import handling as handling
from hardy.handling import to_catalogue as to_catalogue

Using TensorFlow backend.


In [2]:
import pickle

In [3]:
import random

In [4]:
image_data = to_catalogue.save_load_data('../eisy/examples/rgb_Rex_Imy', file_extension='.npy', load=True)

In [5]:
log_dir = './log/'

In [6]:
config_path = './hardy/recognition/'

In [7]:
# test_set_filenames = preprocessing.hold_out_test_set(path, number_of_files_per_class=500, classes=[])


def classifier_wrapper(input_path, test_set_filenames, run_name, config_path, image_data=None,classifier='tuner',
                       iterator_mode='arrays', split= 0.1, target_size= (80,80),
                       batch_size=32, image_path=None, classes=['class_1','class_2'],
                       project_name='tuner_run', **kwarg):
    '''
    
    '''
    if iterator_mode == 'arrays':
        
        assert image_data, 'No image_data list provided'
        
        test_set_list, learning_set_list = to_catalogue.data_set_split(image_data, test_set_filenames)
        
        training_set, validation_set = to_catalogue.learning_set(image_list=learning_set_list, split=split, target_size=target_size,
                                                                 iterator_mode='arrays', batch_size=batch_size)
        
        test_set = to_catalogue.test_set(image_list=test_set_list, target_size=target_size,
                                         iterator_mode='arrays', batch_size=batch_size)
    else:
        
        assert image_path, 'no path to the image folders was provided'
        
        training_set, validation_set = to_catalogue.learning_set(image_path, plit=split, target_size=target_size,
                                                                 iterator_mode='from_directory', batch_size=batch_size,
                                                                 classes=classes)
        
        test_set = to_catalogue.test_set(path, target_size=target_size,  classes=classes,
                                         iterator_mode='from_directory', batch_size=batch_size,)
    
    if classifier == 'tuner':
#         warn search_function, 'no search function provided, using default RandomSearch'
        tuner.build_param(config_path)
        tuner= tuner.run_tuner(training_set, validation_set, project_name= project_name + transformation_name)
        model, history, metrics = tuner.best_model(tuner, training_set, validation_set, test_set)
    else:
        model, history = cnn.build_model(training_set, validation_set, config_path=config_path)
        metrics = cnn.evaluate_model(model, test_set)

    output_path= preprocessing.save_to_folder(input_path, project_name, run_name)
    conf_matrix, report = cnn.report_on_metrics(model, test_set) 
    tuner.report_generation(model, history, metrics, output_path , tuner=None, save_model=True, config_path=config_path)
    
    return

In [24]:
test_set_filenames = preprocessing.hold_out_test_set('../eisy/examples/simulation_data', number_of_files_per_class=150, classes=['noise',''])

In [25]:
import numpy as np
len(np.unique(test_set_filenames))

300

In [26]:
test_set_list, learning_set_list = to_catalogue.data_set_split(image_data, test_set_filenames)

In [27]:
len(image_data)

9000

In [28]:
len(test_set_list)

300

In [29]:
len(learning_set_list)

8700

In [30]:
def hold_out_test_set(path=None, number_of_files_per_class=100, classes=['noise', ''],
                      file_extension='.csv', image_list=None, iterator_mode=None):
    '''
    Functions that returns a list of filenames
    of the randomly selected files to compose the test set

    Parameters
    ----------
    path : str
           string containing the path to the files to select from
           the test set from.

    number_of_files_per_class: int
                               The number of files to select from each class.

    classes: list
             a list containing strings of the classes the data is divided in.
             The classes are contained in the filename as labels.

    file_extension: str
                    the extension of the file to read. The default value is
                    .csv
    image_list: np.array
                numpy array representing file names, image data and labels
    iterator_mode: str
                   string representing if the data provided is in arrays

    Returns
    -------
    test_set_serialnumbers : list
                             A list containig the strings of filenames
                             randomly selected to be part of the test set.
    '''

    # Initialize a list that will contain the serial numbers of thefiles
    # composing the test set
    test_set_filenames = []
    
    # seperating test_set_filenames for input as arrays
    if iterator_mode == "arrays":
        file_list_1 = [n[0] for n in image_data if n[0].endswith(classes[0])]
        file_list_2 = [n[0] for n in image_data if not n[0].endswith(classes[0])]
        for i in range(number_of_files_per_class):
            chosen_file = random.choice(file_list_1)
            file_list_1.remove(chosen_file)
            test_set_filenames.append(str(chosen_file))
            chosen_file = random.choice(file_list_2)
            file_list_2.remove(chosen_file)
            test_set_filenames.append(str(chosen_file))
    # # These lines are hardcoded to allow for 2 classes only # #
    # #  Rewrite to support a higher number of classes # #

    # Randomly pick files that are labelled as noisy and append
    #  them into the test_set list
    else:
        file_list_1 = [n for n in os.listdir(path)
                    if n.endswith(classes[0]+file_extension)]
        file_list_2 = [n for n in os.listdir(path)
                    if not n.endswith(classes[0]+file_extension)]
        for i in range(number_of_files_per_class):
            chosen_file = random.choice(file_list_1)
            file_list_1.remove(chosen_file)
            test_set_filenames.append(str(chosen_file.rstrip(chosen_file[-4:])
                                        ))

            chosen_file = random.choice(file_list_2)
            file_list_2.remove(chosen_file)
            test_set_filenames.append(str(chosen_file.rstrip(chosen_file[-4:])
                                        ))

    return test_set_filenames

In [11]:
def tuner_from_param(self, config_path):

    param = cnn.import_config(config_path)

    def build_tuner_model(hp):
        '''
        Functions that builds a convolutional keras model with
        tunable hyperparameters


        Parameters
        ----------
        hp: keras tuner class
            A class that is used to define the parameter search space

        Returns
        -------
        model: Keras sequential model
               The trained convolutional neural network
        '''
        ###################################
        # loading the configuration file for tuner

        ####################################
        # Defining input size

        inputs = tf.keras.Input(shape=(50, 50, 3))
        x = inputs

        ####################################
        # extracting parameters from the parameters file
        # and feeding in the tuner

        for i in range(hp.Int('conv_layers', 1, max(param['layers']),
                              default=3)):
            x = tf.keras.layers.Conv2D(
                filters=getattr(hp, param['filters'][0])
                ('filters_' + str(i), min(param['filters'][1]['values']),
                 max(param['filters'][1]['values']), step=4, default=8),
                kernel_size=getattr(hp, param['kernel_size'][0])
                ('kernel_size_' + str(i), min(param['kernel_size'
                                                    ][1]['values']),
                 max(param['kernel_size'][1]['values'])),
                activation=getattr(hp, param['activation'][0])
                ('activation_' + str(i), values=param['activation'
                                                      ][1]['values']),
                padding='same')(x)

        if getattr(hp,
                   param['pooling'][0])('pooling',
                                        values=param['pooling'][1]['values'])\
                == 'max':
            x = tf.keras.layers.GlobalMaxPooling2D()(x)
        else:
            x = tf.keras.layers.GlobalAveragePooling2D()(x)
        outputs = tf.keras.layers.Dense(2, activation='softmax')(x)

        model = tf.keras.Model(inputs, outputs)

        # adding in the optimizer
        optimizer = getattr(hp, param['optimizer'][0]
                            )('optimizer', values=param['optimizer'
                                                        ][1]['values'])

        # compiling neural network model
        model.compile(optimizer, loss='categorical_crossentropy',
                      metrics=['accuracy'])

        return model