In [0]:
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense

# Commonly used modules
import numpy as np
import os
import sys
import json


# Images, plots, display, and visualization
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import IPython
from operator import itemgetter


np.random.seed(43) # to make the results reproductible
tf.set_random_seed(42) # to make the results reproductible 

In [0]:
from google.colab import drive
drive.mount('/gdrive')

# 1 Importing images and setting up train test data

## 1.1 Importing images and creating training, validation and test datasets

In [0]:
class Flags:
  
  def __init__(self):
    self.image_size = 28
    self.train_split = [0.75*0.75,0.25*0.75,0.25]
    self.fixed_img_number = 700

flags = Flags()

In [0]:
data_dir = '/gdrive/My Drive/DL project_2019/Raw_Dataset'

# Get the filenames and label of our data
image_filenames = []
image_labels = []
species = ['Bees','Mosquitoes', 'Flies', 'Butterflies']
for label, category in enumerate(species):
    image_names = os.listdir(os.path.join(data_dir, category))
    image_names = sorted(image_names) # to make the results reproductibles
    image_names = [x for x in image_names if os.stat(os.path.join(data_dir, category, x)).st_size != 0]
    
    image_names = image_names[:flags.fixed_img_number]
    image_filenames += [os.path.join(
        data_dir, category, image_name) for image_name in image_names]
    
    image_labels += [label] * len(image_names)
    
# Split data in three for training, validation and test
train_image_filenames, train_image_labels = [], []
valid_image_filenames, valid_image_labels = [], []
test_image_filenames, test_image_labels  = [], []

for image_filename, image_label in zip(image_filenames, image_labels):

    x = np.random.choice(['train', 'valid', 'test'], p=flags.train_split)

    if x == 'train':
        train_image_filenames.append(image_filename)
        train_image_labels.append(image_label)
    if x == 'valid':
        valid_image_filenames.append(image_filename)
        valid_image_labels.append(image_label)
    if x == 'test':
        test_image_filenames.append(image_filename)
        test_image_labels.append(image_label)

## 1.2 Defining Iterators for batch training

In [0]:
#Iterator builder 
def make_iterator(filenames, labels, batch_size, shuffle_and_repeat=False):
    """function that creates a `tf.data.Iterator` object"""
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    if shuffle_and_repeat:
        dataset = dataset.apply(
            tf.data.experimental.shuffle_and_repeat(buffer_size=1000))

    def parse(filename, label):
        """function that reads the image and normalizes it"""
        try:
          image = tf.read_file(filename)
          image = tf.image.decode_jpeg(image, channels = 3)
          image = tf.cast(image, tf.float32)
          image = tf.image.resize(image, (flags.image_size,flags.image_size))
          image = image / 256
          return {'image': image, 'label': label}
        except Exception:
          print(filename)

    dataset = dataset.apply(tf.data.experimental.map_and_batch(
        map_func=parse, batch_size=batch_size, num_parallel_batches=8))

    if shuffle_and_repeat:
        return dataset.make_one_shot_iterator()
    else:
        return dataset.make_initializable_iterator()


# 2 Constructing CNN 

In [0]:

def model_builder(cfg):
  """
  cfg :list: list of hyperparameters on which to build the model
  returns a Keras.Model object
  """
  
  kernel_size, strides, pool_size, dropout_1, dropout_2, optimizer, loss, batch_size, epochs = cfg
  
  #Model Architecture
  model = keras.Sequential()
  model.add(Conv2D(32, kernel_size=(kernel_size, kernel_size)
                   , activation='relu'
                   , input_shape=(flags.image_size, flags.image_size, 3)))
  model.add(MaxPooling2D(pool_size=(pool_size, pool_size), strides = strides, padding = 'valid'))
  model.add(Conv2D(64, (kernel_size, kernel_size), activation='relu'))
  model.add(MaxPooling2D(pool_size=(pool_size, pool_size), strides = strides, padding = 'valid'))
  model.add(Dropout(dropout_1))
  model.add(Flatten())
  model.add(Dense(128, activation='relu'))
  model.add(Dropout(dropout_2))
  model.add(Dense(len(species), activation='softmax'))
  
  #Model compilation
  model.compile(optimizer=optimizer, 
              loss= loss,
              metrics=['accuracy'])
  
  #Instanciating train and val iterators 
  train_iterator = make_iterator(train_image_filenames, train_image_labels,
    batch_size=batch_size, shuffle_and_repeat=True)
  val_iterator = make_iterator(valid_image_filenames, valid_image_labels,
    batch_size=batch_size, shuffle_and_repeat=True)
  
  #Model training
  features = train_iterator.get_next()
  images, labels = itemgetter('image', 'label')(features)

  val_features = val_iterator.get_next()
  val_images, val_labels = itemgetter('image', 'label')(val_features)
  
  history = model.fit(images
                    , labels
                    , epochs=epochs
                    , validation_data = (val_images, val_labels)
                    , steps_per_epoch= len(train_image_labels) // batch_size
                    , validation_steps = len(valid_image_labels) // batch_size
                   ) 
  
  return history, model


def config_builder():
  """
  returns a list of lists containing configuration
  """
  
  configs = list()
  kernel_size = [3,5]
  strides = [2,4]
  pool_size = [2,4]
  dropout_1= [0.25,0.5]
  dropout_2 = [0.25, 0.5]
  optimizer = ['Adam', 'Adamax', 'rmsprop', 'Nadam']
  loss = ['sparse_categorical_crossentropy']
  batch_size = [10]
  epochs = [15,20]
  for a in kernel_size:
    for b in strides:
      for c in pool_size:
        for d in dropout_1:
          for e in dropout_2:
            for f in optimizer:
              for g in loss:
                for h in batch_size:
                  for i in epochs:
                    cfg = [a,b,c,d,e,f,g,h,i]
                    configs.append(cfg)
  return configs


def grid_search(configs):
  """
  configs:list: list of lists containing hyperparametes
  returns logs and models
  """
  
  
  results = pd.DataFrame(columns =['config', 'train_acc', 'train_loss', 'test_acc', 'test_loss'])
  histories = {}
  models = {}
  for cfg in configs:
    str_cfg = '_'.join([str(x) for x in cfg])
    print('config ', str_cfg)
    history, model = model_builder(cfg)
    row = {'config': [str_cfg]
           ,'train_acc' : [history.history['acc'][-1]]
          , 'train_loss' : [history.history['loss'][-1]]
          , 'test_acc' : [history.history['val_acc'][-1]]
          , 'test_loss' : [history.history['val_loss'][-1]]
          }

    models[str_cfg] = model
    row = pd.DataFrame.from_dict(row, orient = 'columns')
    results = pd.concat([results, row], ignore_index = True)
    
    #saving logs
    saving_dir = '/gdrive/My Drive/DL project_2019/logs_CNN_from_scratch/Grid_search'
    histories[str_cfg] = history.history
    for key in histories[str_cfg]:
      content = histories[str_cfg][key]
      histories[str_cfg][key] = [str(x) for x in content]
    hist = json.dumps(histories)
    name = "20190404_2_histories.json"
    saving_name = os.path.join(saving_dir, name )
    f = open(saving_name,"w")
    f.write(hist)
    f.close()
    
    #saving summary of each session
    name = "20190404_2_summary.csv"
    saving_name = os.path.join(saving_dir, name )
    results.to_csv(saving_name)
    
  return results, models, histories
    

# 3 Training models

In [0]:
configs = config_builder()
results, models, histories = grid_search(configs)