In [None]:

import os
from collections import OrderedDict
import itertools as it
import datetime as dt

import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import fbeta_score
import cv2

from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.resnet50 import preprocess_input, ResNet50
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers.normalization import BatchNormalization
from keras import optimizers, Input
from keras import backend as K

import time
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.layers import Activation
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras import backend as K
if K.backend()=='tensorflow':
    K.set_image_dim_ordering("th")
 
# Import Tensorflow with multiprocessing
import tensorflow as tf
import multiprocessing as mp
 
# Loading the CIFAR-10 datasets
from keras.datasets import cifar10

(x_train, y_train), (x_test, y_test) = cifar10.load_data() 
# x_train - training data(images), y_train - labels(digits)
print(x_train.shape, x_test.shape)


In [None]:
from random import shuffle
import numpy as np
from keras.preprocessing.image import ImageDataGenerator


class DataLoader(object):
    ''' Object to train siamese Network with TripletLoss.
    It yields a generator that deliver the batches of dataset,
    this batches are formed with a specific unmber of clases (ids), 
    and a given number of images por class, so the batch size depends on
    both.
    
    To generate the batch samples, instead of go over all images, go over 
    the classes, choosing a determinated number of random samples of each class.
    
    Contructor args:
        images_txt:     Text File where the images's paths are stored (in the common
                        format).
        ims_per_id:     Number of imagenes per id (or class).
        ids_per_batch:  Number of ids or classes in each batch.
                        So, batch size = ims_per_id * ids_per_batch
    Generates:
        im_dict:        A dictionary with tha data, where the keys are the classes
                        and the values, a list of the images's paths of the same class.
        ids_to_train:   A list with the classes that haven't be used in the actual epoch.
    '''

    def __init__(self, DATA, ims_per_id = 4, ids_per_batch = 3, shuffle = True,
                seed=2017, target_image_size=(32, 32), data_gen_args={}, num_clases=10):
        self.ims_per_id = ims_per_id
        self.ids_per_batch = ids_per_batch
        self.batch_size = ims_per_id * ids_per_batch
        self.im_size = target_image_size
        self.shuffle = shuffle
        self.seed = seed
        self.num_classes = num_clases
        self.data_gen_args = data_gen_args
        self.train_dict = {}
        self.test_dict = {}
        self.labels_list = []
        
        (self.x_train, self.y_train), (self.x_test, self.y_test) = DATA
        self.preprocess()
        self.set_labels_list()
        self.train_dict = self.set_dict(self.y_train)
        self.test_dict = self.set_dict(self.y_test)
        
    def preprocess(self):
        self.x_train = self.x_train.astype('float32') / 255.
        self.x_test = self.x_test.astype('float32') / 255.
        self.y_train = self.y_train.reshape(-1)
        self.y_test = self.y_test.reshape(-1)

    def set_labels_list(self):
        '''
        Set the list with the labels, assuming that are the same in test and in train
        :return: 
        '''
        self.labels_list = []
        for y in self.y_train:
            if y not in self.labels_list:
                self.labels_list.append(y)
                if len(self.labels_list)==self.num_classes:
                    break
                    
    def set_dict(self, y):
        final_dict = {}
        indices = np.linspace(0, len(y)-1, len(y), dtype=int)
        for label in self.labels_list:
            label_indices = indices[[y==label]]
            final_dict[label] = list(label_indices)
        return final_dict
                       
    def get_total_steps(self):
        return len(self.y_train) / self.batch_size
       
    @staticmethod 
    def copy_dict(original_dict):
        ''' Copy a dict to another, because the only assignment =,
        implies that changes in one dict affect the other.
        
        Input:
            original_dict:  The Dictionary to copy.
        Output:
            new_dict:       The new dictionary, identicall to the
                            original'''
        new_dict = {}
        for key, items in original_dict.items():
            new_dict[key] = items.copy()
        return new_dict
                   
    def get_generator(self):
        ids_to_train = self.labels_list.copy()
        shuffle(ids_to_train)
        dict_to_train = self.copy_dict(self.train_dict)
        while True:
            x_batch = []
            y_batch = []
            print('normal', len(ids_to_train), self.ids_per_batch)
            if len(ids_to_train) < self.ids_per_batch:
                ids_to_train = self.labels_list.copy()
                shuffle(ids_to_train)
                print('reset', len(ids_to_train), self.ids_per_batch)
            for _ in range(self.ids_per_batch):
                id_ = ids_to_train.pop()
                print('1 element poped:', len(ids_to_train))
                if len(dict_to_train[id_])<self.ims_per_id:
                    dict_to_train[id_] = self.train_dict[id_].copy()
                    shuffle(dict_to_train[id_])
                for im in range(self.ims_per_id):
                    im_id = dict_to_train[id_].pop()
                    x_batch.append(self.x_train[im_id])
                    y_batch.append(id_)
                    
            x_batch = np.stack(x_batch, axis = 0)
            datagen = ImageDataGenerator(**self.data_gen_args)
            datagen.fit(x_batch)
            x_batch = next(datagen.flow(x_batch, shuffle=False))
            yield x_batch, np.array(y_batch).astype(np.int32)


In [None]:


def batch_generator(img_dir_path, df, label_map, batch_size=32, shuffle=True,
                    seed=2017, target_image_size=(224, 224),
                    process_target=True, number_of_batches=None,
                    add_seed_shuffle=True, data_gen_args={}, cv2_read=True,
                    preprocess_unit=False):
    """Batch generator for keras model."""
    if number_of_batches is None:
        number_of_batches = np.ceil(df.shape[0] / batch_size)
        print(number_of_batches)

    counter = 0

    if shuffle:
        np.random.seed(seed)
        df = df.sample(frac=1)

    while True:
        if process_target:
            y_batch = []

        idx_start = batch_size * counter
        idx_end = batch_size * (counter + 1)
        x_batch = []

        for f, tags in df.iloc[idx_start:idx_end].values:
            img_path = os.path.join(img_dir_path, '{}.jpg'.format(f))
            if cv2_read:
                img = cv2.imread(img_path)
                x = cv2.resize(img, target_image_size)
            else:
                img = image.load_img(img_path, target_size=target_image_size)
                x = image.img_to_array(img)

            x = np.expand_dims(x, axis=0)
            if preprocess_unit:
                x = preprocess_input(x)

            x_batch.append(x)

            if process_target:
                targets = np.zeros(17)
                for t in tags.split(' '):
                    targets[label_map[t]] = 1
                y_batch.append(targets)

        x_batch = np.concatenate(x_batch)

        datagen = ImageDataGenerator(**data_gen_args)
        datagen.fit(x_batch)
        x_batch = next(datagen.flow(x_batch, shuffle=False))

        counter += 1
        if process_target:
            yield x_batch, np.array(y_batch)
        else:
            yield x_batch

        if (counter == number_of_batches):
            if shuffle:
                if add_seed_shuffle:
                    np.random.seed(seed + 1)
                df = df.sample(frac=1)
            counter = 0
