In [31]:
%load_ext autoreload
%autoreload 2

import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.layers import Dense, Flatten, Input, Reshape
from keras.models import Sequential, Model
from PIL import Image
from pprint import pprint

from keras_preprocessing.image import DataFrameIterator, ImageDataGenerator


pixel_val = 1
np_filenames, img_filenames = [], []
for i in range(5):
    np_filename = '/tmp/{}.npy'.format(i)
    img_filename = '/tmp/{}.jpg'.format(i)
    array = pixel_val * np.random.randint(0, 255, size=(2, 2)).astype('float32')
    np.save(np_filename, array[..., None])
    plt.imsave(img_filename, array)
    np_filenames.append(np_filename)
    img_filenames.append(img_filename)
    
df = pd.DataFrame({'img_path': img_filenames, 'img_np_path': np_filenames})
df['regression'] = np.random.randint(1, 10, size=len(df))
df['binary'] = random.sample(['dog', 'cat'] * 30, len(df))
df['multi_class'] = random.sample(['dog', 'cat', 'horse'] * 30, len(df))
df['multi_label'] = random.sample(['dog', 'cat', ['dog'], ['cat'], ['cat', 'dog'], ['dog', 'cat']] * 10, len(df))
df['img_path_extra'] = img_filenames
df['regression_extra'] = np.random.randint(10, 100, size=len(df))

df.head()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Unnamed: 0,img_path,img_np_path,regression,binary,multi_class,multi_label,img_path_extra,regression_extra
0,/tmp/0.jpg,/tmp/0.npy,1,dog,cat,[dog],/tmp/0.jpg,93
1,/tmp/1.jpg,/tmp/1.npy,1,cat,dog,"[dog, cat]",/tmp/1.jpg,83
2,/tmp/2.jpg,/tmp/2.npy,8,cat,horse,[cat],/tmp/2.jpg,90
3,/tmp/3.jpg,/tmp/3.npy,6,cat,cat,[dog],/tmp/3.jpg,17
4,/tmp/4.jpg,/tmp/4.npy,3,cat,cat,cat,/tmp/4.jpg,99


### Using column name

In [21]:
df_iter = DataFrameIterator(
    df, 
    input_columns='img_path',
    image_size=(2, 2),
    batch_size=2
)
print('\nInputs:')
pprint(df_iter.inputs)
print('\nOutputs:')
pprint(df_iter.outputs)

x = next(df_iter)
print('\nNumber of outputs:', len(x))
for i, x_i in enumerate(x,1):
    print('\nShape of output {}:'.format(i), x_i.shape)


Inputs:
[{'column': 'img_path',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg',
             '/tmp/8.jpg',
             '/tmp/9.jpg',
             '/tmp/10.jpg',
             '/tmp/11.jpg',
             '/tmp/12.jpg',
             '/tmp/13.jpg',
             '/tmp/14.jpg',
             '/tmp/15.jpg',
             '/tmp/16.jpg',
             '/tmp/17.jpg',
             '/tmp/18.jpg',
             '/tmp/19.jpg']}]

Outputs:
[]

Number of outputs: 1

Shape of output 1: (2, 2, 2, 3)


### Using column name in list

In [22]:
df_iter = DataFrameIterator(
    df, 
    input_columns=['img_path'],
    image_size=(2, 2),
    batch_size=2
)
from pprint import pprint
print('\nInputs:')
pprint(df_iter.inputs)
print('\nOutputs:')
pprint(df_iter.outputs)

x = next(df_iter)
print('\nNumber of outputs:', len(x))
for i, x_i in enumerate(x,1):
    print('\nShape of output {}:'.format(i), x_i.shape)


Inputs:
[{'column': 'img_path',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg',
             '/tmp/8.jpg',
             '/tmp/9.jpg',
             '/tmp/10.jpg',
             '/tmp/11.jpg',
             '/tmp/12.jpg',
             '/tmp/13.jpg',
             '/tmp/14.jpg',
             '/tmp/15.jpg',
             '/tmp/16.jpg',
             '/tmp/17.jpg',
             '/tmp/18.jpg',
             '/tmp/19.jpg']}]

Outputs:
[]

Number of outputs: 1

Shape of output 1: (2, 2, 2, 3)


### Two outputs at the same time

In [23]:
df_iter = DataFrameIterator(
    df, 
    input_columns=['img_path', 'img_path_extra'],
    image_size=(2, 2),
    batch_size=2
)
from pprint import pprint
print('\nInputs:')
pprint(df_iter.inputs)
print('\nOutputs:')
pprint(df_iter.outputs)

x = next(df_iter)
print('\nNumber of outputs:', len(x))
for i, x_i in enumerate(x,1):
    print('\nShape of output {}:'.format(i), x_i.shape)


Inputs:
[{'column': 'img_path',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg',
             '/tmp/8.jpg',
             '/tmp/9.jpg',
             '/tmp/10.jpg',
             '/tmp/11.jpg',
             '/tmp/12.jpg',
             '/tmp/13.jpg',
             '/tmp/14.jpg',
             '/tmp/15.jpg',
             '/tmp/16.jpg',
             '/tmp/17.jpg',
             '/tmp/18.jpg',
             '/tmp/19.jpg']},
 {'column': 'img_path_extra',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg',
             '/tmp/8.jpg',
             '/tmp/9.jpg',
             '/tmp/10.jpg',
             '/tmp/11.jpg',
             '/tmp/12.jpg',
             '/tmp/13.jpg',
   

### Two outputs at the same time with augmentation

In [27]:
df_iter = DataFrameIterator(
    df, 
    input_columns=['img_path', 'img_path_extra'],
    image_size=(2, 2),
    batch_size=2,
    image_data_generator=ImageDataGenerator(horizontal_flip=True,
                                            vertical_flip=True)
)
from pprint import pprint
print('\nInputs:')
pprint(df_iter.inputs)
print('\nOutputs:')
pprint(df_iter.outputs)

x = next(df_iter)

# check that augmentation happened equally for both image batches
assert np.allclose(x[0], x[1])

print('\nNumber of outputs:', len(x))
for i, x_i in enumerate(x, 1):
    print('\nShape of output {}:'.format(i), x_i.shape)


Inputs:
[{'column': 'img_path',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg',
             '/tmp/8.jpg',
             '/tmp/9.jpg',
             '/tmp/10.jpg',
             '/tmp/11.jpg',
             '/tmp/12.jpg',
             '/tmp/13.jpg',
             '/tmp/14.jpg',
             '/tmp/15.jpg',
             '/tmp/16.jpg',
             '/tmp/17.jpg',
             '/tmp/18.jpg',
             '/tmp/19.jpg']},
 {'column': 'img_path_extra',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg',
             '/tmp/8.jpg',
             '/tmp/9.jpg',
             '/tmp/10.jpg',
             '/tmp/11.jpg',
             '/tmp/12.jpg',
             '/tmp/13.jpg',
   