In [291]:
import pandas as pd
import numpy as np
import os
import glob
import numpy.random as rng
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img

In [293]:
class CreateLabelsTargets:
    def __init__(self,path):
        self.df = pd.read_csv(path)
    def load_csv_and_treat(self):
        self.dict_classes = {}
        for index, row in self.df.iterrows():
            #print(row['Image'], row['Id'])
            if(row['whaleID'] == 'new_whale' or row['Image'] == 'new_whale'):
                continue
            if (not self.dict_classes.get(row["whaleID"])):
                self.dict_classes[row["whaleID"]] = []
            auxArray = self.dict_classes.get(row["whaleID"])
            auxArray.append(row['Image'])
            self.dict_classes[row["whaleID"]] = auxArray
    def create_labels_and_targets(self):
        self.X=[]
        self.y=[]
        for key, value in self.dict_classes.items():
            self.X.append(value)
            self.y.append(key)

In [294]:
class DataLoader:
    """For loading batches and testing tasks to a siamese net"""
    def __init__(self, path, X,y):
        self.X = X
        self.y = y
        self.path = path
     
    def getBatch(self,batch_size):
        """Create batch of n pairs, half same class, half different class"""
        n_classes = len(self.y)
        IMG_DIM = (300,300)
        #randomly sample several classes to use in the batch
        categories = rng.choice(n_classes,size=(batch_size,),replace=False)
        #initialize 2 empty arrays for the input image batch
        pairs=[np.zeros((batch_size, 300, 300,1)) for i in range(2)]
        #initialize vector for the targets, and make one half of it '1's, so 2nd half of batch has same class
        targets=np.zeros((batch_size,))
        targets[batch_size//2:] = 1
        for i in range(batch_size):
            category = categories[i]
            #If category doesnt have two examples, it wont be good to equal test
            if (i >= batch_size // 2) and (len(self.X[category]) == 1):  
                while(len(self.X[category]) == 1):
                    categories = rng.choice(n_classes,size=(batch_size,),replace=False)
                    category = categories[i]
            #Select first image
            n_examples = len(self.X[category])
            idx_1 = rng.randint(0, n_examples)
            fullPath = self.path + self.X[category][idx_1]
            img = load_img(fullPath, color_mode = "grayscale",target_size=IMG_DIM)
            pairs[0][i,:,:,:] = img_to_array(img)
    
            #pick category and index of same class for 1st half, different for 2nd
            if i >= batch_size // 2:
                category_2 = category
                idx_2 = (idx_1 + rng.randint(1,n_examples)) % n_examples
            else: 
                #add old category number to the category modulo n classes to ensure 2nd image has different category
                category_2 = (category + rng.randint(1,n_classes)) % n_classes
                n_examples = len(self.X[category_2])
                idx_2 = rng.randint(0, n_examples)
            
            fullPath = self.path + self.X[category_2][idx_2]
            img = load_img(fullPath, color_mode = "grayscale",target_size=IMG_DIM)
            pairs[1][i,:,:,:] = img_to_array(img)
        return pairs, targets

    def generate(self, batch_size):
        """a generator for batches, so model.fit_generator can be used. """
        while True:
            pairs, targets = self.getBatch(batch_size)
            yield (pairs, targets)    

createLabelsTargets = CreateLabelsTargets()
createLabelsTargets.load_csv_and_treat()
createLabelsTargets.create_labels_and_targets()

dataLoad = DataLoader(os.getcwd()+'\\PF_Data\\Train\\Right\\',createLabelsTargets.X, createLabelsTargets.y)

for i in range(2000):
    try:
        (pairs,targets) = dataLoad.getBatch(8)
    except Exception as e:
        print(e)

Imagens não encontradas: w_7489.jpg