In [29]:
#import load_data
#from load_data import DataGenerator
from load_data import get_images, image_file_to_array

In [73]:
import numpy as np
import os
import random
import tensorflow as tf
from scipy import misc

class DataGenerator(object):
    """
    Data Generator capable of generating batches of Omniglot data.
    A "class" is considered a class of omniglot digits.
    """

    def __init__(self, num_classes, num_samples_per_class, config={}):
        """
        Args:
            num_classes: Number of classes for classification (K-way)
            num_samples_per_class: num samples to generate per class in one batch
            batch_size: size of meta batch size (e.g. number of functions)
        """
        self.num_samples_per_class = num_samples_per_class
        self.num_classes = num_classes

        config = {'sad':''}

        data_folder = config.get('data_folder', './omniglot_resized')
        self.img_size = config.get('img_size', (28, 28))

        self.dim_input = np.prod(self.img_size)
        self.dim_output = self.num_classes
        
       

        character_folders = [os.path.join(data_folder, family, character)
                             for family in os.listdir(data_folder)
                             if os.path.isdir(os.path.join(data_folder, family))
                             for character in os.listdir(os.path.join(data_folder, family))
                             if os.path.isdir(os.path.join(data_folder, family, character))]

        random.seed(1)
        random.shuffle(character_folders)
        num_val = 100
        num_train = 1100
        self.metatrain_character_folders = character_folders[: num_train]
        self.metaval_character_folders = character_folders[
            num_train:num_train + num_val]
        self.metatest_character_folders = character_folders[
            num_train + num_val:]

    def sample_batch(self, batch_type, batch_size, K=1, N=5):
        """
        Samples a batch for training, validation, or testing
        Args:
            batch_type: train/val/test
        Returns:
            A a tuple of (1) Image batch and (2) Label batch where
            image batch has shape [B, K, N, 784] and label batch has shape [B, K, N, N]
            where B is batch size, K is number of samples per class, N is number of classes
        """
        if batch_type == "train":
            folders = self.metatrain_character_folders
        elif batch_type == "val":
            folders = self.metaval_character_folders
        else:
            folders = self.metatest_character_folders

        #############################
        #### YOUR CODE GOES HERE ####
        all_image_batches = np.array(batch_size, K, N, 784)
        all_label_batches = np.array(batch_size, K, N, N)
        for b in range(batch_size):
            # Take N samples from all alphabet folders
            sample_paths  = random.sample(folders, N)
            sample_labels = [os.path.basename(os.path.split(family)[0]) for family in sample_paths]
            images_labels = get_images(sample_paths, sample_labels, K)
            
            # TODO: COrrect use of dimension
            all_image_batches = [i for i,l in images_labels]
            all_label_batches = [l for i,l in images_labels]
        #############################

        return all_image_batches, all_label_batches


In [83]:
np.ndarray((2,3,4,5)).shape

(2, 3, 4, 5)

In [74]:
c = {'data_folder':'./'}
data = DataGenerator(5, 1, 16)

In [75]:
c={}
c.get('data_folder', 'asd')

'asd'

In [76]:
print("# samples per class:", data.num_samples_per_class)
print("# classes:",  data.num_classes)
print("Image size:", data.img_size)
print("Input dimension:", data.dim_input)
print("Output dimension:",data.dim_output)


# samples per class: 1
# classes: 5
Image size: (28, 28)
Input dimension: 784
Output dimension: 5


In [77]:
print("# of meta train folders:", len(data.metatrain_character_folders))
print("# of meta val folders:  ", len(data.metaval_character_folders))
print("# of meta test folders: ", len(data.metatest_character_folders))

# of meta train folders: 1100
# of meta val folders:   100
# of meta test folders:  423


In [78]:
data.metatrain_character_folders[0:10]

['./omniglot_resized/Early_Aramaic/character05',
 './omniglot_resized/Malayalam/character40',
 './omniglot_resized/Ojibwe_(Canadian_Aboriginal_Syllabics)/character12',
 './omniglot_resized/Tifinagh/character09',
 './omniglot_resized/Kannada/character02',
 './omniglot_resized/Armenian/character34',
 './omniglot_resized/Gurmukhi/character18',
 './omniglot_resized/Kannada/character25',
 './omniglot_resized/Gurmukhi/character17',
 './omniglot_resized/Japanese_(katakana)/character31']

In [79]:
data.metatest_character_folders[0:3]

['./omniglot_resized/Braille/character12',
 './omniglot_resized/N_Ko/character12',
 './omniglot_resized/Glagolitic/character21']

In [80]:
data.sample_batch('train', batch_size=10)

(['Balinese',
  'Mkhedruli_(Georgian)',
  'Grantha',
  'Atemayar_Qelisayer',
  'Armenian'],
 ['./omniglot_resized/Balinese/character13/0120_10.png',
  './omniglot_resized/Mkhedruli_(Georgian)/character41/0769_13.png',
  './omniglot_resized/Grantha/character32/0382_03.png',
  './omniglot_resized/Atemayar_Qelisayer/character16/1000_16.png',
  './omniglot_resized/Armenian/character11/0037_04.png'])

In [46]:
get_images(data.metatest_character_folders[0:3],['asd','N_Ko','Glagolitic'],1)

[('N_Ko', './omniglot_resized/N_Ko/character12/0815_11.png'),
 ('Glagolitic', './omniglot_resized/Glagolitic/character21/1135_16.png'),
 ('asd', './omniglot_resized/Braille/character12/0203_15.png')]

In [56]:
[os.path.basename(os.path.split(family)[0]) for family in data.metatest_character_folders[0:3]]

['Braille', 'N_Ko', 'Glagolitic']