In [2]:
import pandas as pd
import numpy as np
import os
import glob
import numpy.random as rng
import random
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
from scipy.ndimage import rotate

In [49]:
class CreateLabelsTargets:
    def __init__(self,path):
        self.df = pd.read_csv(path)
        columns = list(self.df.columns.values)
        
        # Treating the columns in the csv for the two possible options
        if(columns[0] == 'Image'):
            self.columnImage = columns[0]
            self.columnId = columns[1]
        else:
            self.columnImage = columns[1]
            self.columnId = columns[0]
        
        self.numberOfClasses = self.df[self.columnId].nunique()
    def load_csv_and_treat(self):
        self.dict_classes = {}
        for index, row in self.df.iterrows():
            #print(row['Image'], row['Id'])
            if(row[self.columnId] == 'new_whale' or row[self.columnImage] == 'new_whale'):
                continue
            if (not self.dict_classes.get(row[self.columnId])):
                self.dict_classes[row[self.columnId]] = []
            auxArray = self.dict_classes.get(row[self.columnId])
            auxArray.append(row[self.columnImage])
            self.dict_classes[row[self.columnId]] = auxArray
    def create_labels_and_targets(self):
        X=[]
        y=[]
        for key, value in self.dict_classes.items():
            X.append(value)
            y.append(key)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
        return (X_train, X_test, y_train, y_test)

    
    def create_dataset_with_image(self,imgsPath):
        X=[]
        y=[]
        IMG_DIM = (200,200)
        for key, values in self.dict_classes.items():
            for value in values:
                fullPath = imgsPath + value
                img = load_img(fullPath, color_mode = "grayscale",target_size=IMG_DIM)
                X.append(img_to_array(img))
                y.append(key)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
        return (np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test))
             

In [30]:
class DataLoader:
    """For loading batches and testing tasks to a siamese net"""
    def __init__(self, path, X,y):
        self.X = X
        self.y = y
        self.path = path
        self.w = 200
        self.h = 200
        self.datagen = ImageDataGenerator( 
            rotation_range = 40, 
            shear_range = 0.2, 
            zoom_range = 0.2, 
            horizontal_flip = True, 
            brightness_range = (0.5, 1.5))
        
    def loadImageAsArray(self,name):
        fullPath = self.path + name
        img = load_img(fullPath, color_mode = "grayscale")
        return img_to_array(img)
    
    def getBatch(self,batch_size):
        """Create batch of n pairs, half same class, half different class"""
        n_classes = len(self.y)
        IMG_DIM = (self.w,self.h)
        #randomly sample several classes to use in the batch
        categories = rng.choice(n_classes,size=(batch_size,),replace=False)
        #initialize 2 empty arrays for the input image batch
        pairs=[np.zeros((batch_size, self.w, self.h,1)) for i in range(2)]
        #initialize vector for the targets, and make one half of it '1's, so 2nd half of batch has same class
        targets=np.zeros((batch_size,))
        targets[batch_size//2:] = 1
        for i in range(batch_size):
            category = categories[i]
            #If category doesnt have two examples, it wont be good to equal test
            if (i >= batch_size // 2) and (len(self.X[category]) == 1):  
                while(len(self.X[category]) == 1):
                    categories = rng.choice(n_classes,size=(batch_size,),replace=False)
                    category = categories[i]
            
            #Select first image
            n_examples = len(self.X[category])
            idx_1 = rng.randint(0, n_examples)
            fullPath = self.path + self.X[category][idx_1]
            img = load_img(fullPath, color_mode = "grayscale",target_size=IMG_DIM)
            pairs[0][i,:,:,:] = img_to_array(img)

            #pick category and index of same class for 1st half, different for 2nd
            if i >= batch_size // 2:
                category_2 = category
                idx_2 = (idx_1 + rng.randint(1,n_examples)) % n_examples
            else: 
                #add old category number to the category modulo n classes to ensure 2nd image has different category
                category_2 = (category + rng.randint(1,n_classes)) % n_classes
                n_examples = len(self.X[category_2])
                idx_2 = rng.randint(0, n_examples)
            
            fullPath = self.path + self.X[category_2][idx_2]
            img = load_img(fullPath, color_mode = "grayscale",target_size=IMG_DIM)
            pairs[1][i,:,:,:] = img_to_array(img)
            
            # augmentation on data
            pair = rng.randint(0,1)
            augs = self.datagen.flow(pairs[pair], batch_size=1,shuffle=False)
            for i in range(len(augs)):
                pairs[pair][i,:,:,:] = augs[i][0]

        return pairs, targets

    def generate(self, batch_size):
        """a generator for batches, so model.fit_generator can be used. """
        while True:
            pairs, targets = self.getBatch(batch_size)
            yield (pairs, targets)
            
    def make_oneshot_task(self,N,X,y):
        """Create pairs of test image, support set for testing N way one-shot learning. """
        n_classes = len(y)
        categories = rng.choice(range(n_classes),size=(N,),replace=False)            
        true_category = categories[0]
        if(len(X[true_category]) == 1):
            while(len(X[true_category]) == 1):
                categories = rng.choice(range(n_classes),size=(N,),replace=False)            
                true_category = categories[0]

        n_examples = len(X[true_category])
        ex1, ex2 = rng.choice(n_examples,replace=False,size=(2,))
        
        # Load Image
        testImage = self.loadImageAsArray(X[true_category][ex1])
        testImage = np.asarray([testImage]*N)
#         print("Category", str(true_category))
#         print("Index:", str(ex1))
        # Get first image equal to original
        supportSet = []
        supportSet.append(img_to_array(self.loadImageAsArray(X[true_category][ex2])))
#         print("Category", str(true_category))
#         print("Index:", str(ex2))
        #Append the rest of the test images
        for category in categories:
            if(category == true_category):
                continue
            n_examples = len(X[category])
            index = rng.randint(0,n_examples)
            setImage = img_to_array(self.loadImageAsArray(X[category][index]))
            supportSet.append(setImage)
#             print("Category", str(category))
#             print("Index:", str(index))
        supportSet = np.array(supportSet)
        #initialize targets equal zero
        targets = np.zeros((N,))
        targets[0] = 1
        targets, testImage, supportSet = shuffle(targets, testImage, supportSet)
#         print("Targets after shuffle")
#         print(targets)
        pairs = [testImage,supportSet]
#         for index in range(len(pairs[0])):
#             imgplot = plt.imshow(array_to_img(pairs[0][index]), cmap='gray')
#             plt.title(str(index) + "_ " + str(targets[index]) + "_ "  + y[true_category])
#             plt.show()
#             imgplot2 = plt.imshow(array_to_img(pairs[1][index]), cmap='gray')
#             plt.title(str(index) + "_ " + str(targets[index]) + "_ "  + y[categories[index]])
#             plt.show()
        return pairs, targets
    
    def test_oneshot(self,model,N,k,X,y,verbose=0):
        """Test average N way oneshot learning accuracy of a siamese neural net over k one-shot tasks"""
        n_correct = 0
        if verbose:
            print("Evaluating model on {} random {} way one-shot learning tasks ...".format(k,N))
        for i in range(k):
            inputs, targets = self.make_oneshot_task(N,X,y)
            probs = model.predict(inputs)
            if np.argmax(probs) == np.argmax(targets):
                n_correct+=1
        percent_correct = (100.0*n_correct / k)
        if verbose:
            print("Got an average of {}% {} way one-shot learning accuracy".format(percent_correct,N))
        return percent_correct

createLabelsTargets = CreateLabelsTargets("train_HB.csv")
createLabelsTargets.load_csv_and_treat()
(X_train, X_test, y_train, y_test) = createLabelsTargets.create_labels_and_targets()

dataLoad = DataLoader(os.getcwd()+'/modelHB_imgs/train/',X_train, y_train)


(pairs,targets) = dataLoad.make_oneshot_task(5,X_train,y_train)


(pairs,targets) = dataLoad.getBatch(4)

for index in range(len(pairs[0])):
    imgplot = plt.imshow(array_to_img(pairs[0][index]), cmap='gray')
    plt.title(str(index) + "_ " + str(targets[index]))
    plt.show()
    imgplot2 = plt.imshow(array_to_img(pairs[1][index]), cmap='gray')
    plt.title(str(index) + "_ " + str(targets[index]))
    plt.show()

In [50]:
def buildDataSetTriplet(imgsPath, csvPath):
    """Build dataset for train and test using triplet loss
    """
    createLabelsTargets = CreateLabelsTargets(csvPath)
    createLabelsTargets.load_csv_and_treat()
    (X_train, X_test, y_train, y_test) = createLabelsTargets.create_dataset_with_image(imgsPath)
    width = 200
    height = 200
    
    X_train = X_train.reshape(X_train.shape[0], width, height, 1)
    X_test = X_test.reshape(X_test.shape[0], width, height, 1)
    print(X_train)
    print(X_test)
    dataset_train = []
    dataset_test = []
    
    #Sorting images by classes and normalize values 0=>1
    for n in range(createLabelsTargets.numberOfClasses):
        images_class_n = np.asarray([row for idx,row in enumerate(X_train_origin) if y_train_origin[idx]==n])
        dataset_train.append(images_class_n/255)
        
        images_class_n = np.asarray([row for idx,row in enumerate(X_test_origin) if y_test_origin[idx]==n])
        dataset_test.append(images_class_n/255)
        
    return dataset_train,dataset_test,X_train,y_train,X_test,y_test

In [51]:
buildDataSetTriplet(os.getcwd()+'/modelHB_imgs/train/','train_HB.csv')

[[[[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]

  [[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]

  [[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]

  ...

  [[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]

  [[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]

  [[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]]


 [[[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]

  [[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]

  [[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]

  ...

  [[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]

  [[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]

  [[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]]


 [[[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]

  [[255.]
   [255.]
   [255.]
   ...
   [255.]
   [255.]
   [255.]]

  [[255.]
   [25

NameError: name 'X_train_origin' is not defined