In [1]:
%load_ext autoreload
%autoreload 2

import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.layers import Dense, Flatten, Input, Reshape
from keras.models import Sequential, Model
from PIL import Image
from pprint import pprint

from keras_preprocessing.image import DataFrameIterator, ImageDataGenerator


pixel_val = 1
np_filenames, img_filenames, np_arrays = [], [], []
for i in range(8):
    np_filename = '/tmp/{}.npy'.format(i)
    img_filename = '/tmp/{}.jpg'.format(i)
    array = pixel_val * np.random.randint(0, 255, size=(2, 2)).astype('float32')
    np.save(np_filename, array[..., None])
    plt.imsave(img_filename, array)
    
    np_filenames.append(np_filename)
    img_filenames.append(img_filename)
    
df = pd.DataFrame({'img_path': img_filenames, 'img_np_path': np_filenames})
df['regression'] = np.random.randint(1, 10, size=len(df))
df['binary'] = random.sample(['dog', 'cat'] * 30, len(df))
df['multi_class'] = random.sample(['dog', 'cat', 'horse'] * 30, len(df))
df['multi_label'] = random.sample(['dog', 'cat', ['dog'], ['cat'], ['cat', 'dog'], ['dog', 'cat']] * 10, len(df))
df['img_path_extra'] = img_filenames
df['regression_extra'] = np.random.randint(10, 100, size=len(df))

df.head()

Using TensorFlow backend.


Unnamed: 0,img_path,img_np_path,regression,binary,multi_class,multi_label,img_path_extra,regression_extra
0,/tmp/0.jpg,/tmp/0.npy,3,cat,dog,cat,/tmp/0.jpg,51
1,/tmp/1.jpg,/tmp/1.npy,3,cat,cat,[dog],/tmp/1.jpg,53
2,/tmp/2.jpg,/tmp/2.npy,1,dog,cat,"[cat, dog]",/tmp/2.jpg,80
3,/tmp/3.jpg,/tmp/3.npy,2,dog,horse,cat,/tmp/3.jpg,60
4,/tmp/4.jpg,/tmp/4.npy,9,dog,cat,"[cat, dog]",/tmp/4.jpg,65


### Autoencoder (output same as input)

Image are not read again when computing the output but thhe input batched is copied.

In [2]:
df_iter = DataFrameIterator(
    df, 
    input_columns='img_path',
    output_columns=['img_path'],
    output_modes={'img_path': 'image'},
    image_size=(2, 2),
    batch_size=2,
    color_mode='grayscale'
)
print('\nInputs:')
pprint(df_iter.inputs)
print('\nOutputs:')
pprint(df_iter.outputs)

x, y = next(df_iter)
# check thay input is the same  as output
assert np.allclose(x[0], y[0])
# check that they are deep copies
assert id(x[0]) != id(y[0])

model = Sequential()
model.add(Flatten(input_shape=(2, 2, 1)))
model.add(Dense(2, activation='sigmoid'))
model.add(Dense(2 * 2 * 1, activation='sigmoid'))
model.add(Reshape((2, 2, 1)))
model.compile('adam', 'mse')
model.fit_generator(df_iter)


Inputs:
[{'column': 'img_path',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg']}]

Outputs:
[{'column': 'img_path',
  'mode': 'image',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg']}]


W1009 08:47:46.676331 140406748268352 deprecation_wrapper.py:119] From /home/rodrigo/anaconda3/envs/keras-dev/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.



Epoch 1/1


<keras.callbacks.callbacks.History at 0x7fb2a3a90128>

with data augmentation

In [3]:
df_iter = DataFrameIterator(
    df, 
    input_columns='img_path',
    output_columns=['img_path'],
    output_modes={'img_path': 'image'},
    image_size=(2, 2),
    batch_size=2,
    color_mode='grayscale',
    image_data_generator=ImageDataGenerator(horizontal_flip=True,
                                            vertical_flip=True)
)
print('\nInputs:')
pprint(df_iter.inputs)
print('\nOutputs:')
pprint(df_iter.outputs)

x, y = next(df_iter)
# check thay input is the same  as output
assert np.allclose(x[0], y[0])
# check that they are deep copies
assert id(x[0]) != id(y[0])

model = Sequential()
model.add(Flatten(input_shape=(2, 2, 1)))
model.add(Dense(2, activation='sigmoid'))
model.add(Dense(2 * 2 * 1, activation='sigmoid'))
model.add(Reshape((2, 2, 1)))
model.compile('adam', 'mse')
model.fit_generator(df_iter)


Inputs:
[{'column': 'img_path',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg']}]

Outputs:
[{'column': 'img_path',
  'mode': 'image',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg']}]
Epoch 1/1


<keras.callbacks.callbacks.History at 0x7fb2a06d5b70>

### Segmentation

without data augmentation

In [4]:
df_iter = DataFrameIterator(
    df,
    input_columns='img_path',
    output_columns='img_path_extra',
    output_modes={'img_path_extra': 'image'},
    image_size=(2, 2),
    batch_size=2,
    color_mode='grayscale'
)

print('\nInputs:')
pprint(df_iter.inputs)
print('\nOutputs:')
pprint(df_iter.outputs)

x, y = next(df_iter)
"""
Check that in this case input is the same as output.
Even though we are using different columns (different from previous case)
they point to the same image file. Each is collected and augmented
separately but I did thiss on purpose to be able to assert that they
get augmented equally.
"""
assert np.allclose(x[0], y[0])

model = Sequential()
model.add(Flatten(input_shape=(2, 2, 1)))
model.add(Dense(2, activation='sigmoid'))
model.add(Dense(2 * 2 * 1, activation='sigmoid'))
model.add(Reshape((2, 2, 1)))
model.compile('adam', 'mse')
model.fit_generator(df_iter)


Inputs:
[{'column': 'img_path',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg']}]

Outputs:
[{'column': 'img_path_extra',
  'mode': 'image',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg']}]
Epoch 1/1


<keras.callbacks.callbacks.History at 0x7fb2905c2eb8>

with data augmentation

In [5]:
df_iter = DataFrameIterator(
    df,
    input_columns='img_path',
    output_columns='img_path_extra',
    output_modes={'img_path_extra': 'image'},
    image_size=(2, 2),
    batch_size=2,
    color_mode='grayscale',
    image_data_generator=ImageDataGenerator(horizontal_flip=True,
                                            vertical_flip=True)
)

print('\nInputs:')
pprint(df_iter.inputs)
print('\nOutputs:')
pprint(df_iter.outputs)

x, y = next(df_iter)
"""
Check that in this case input is the same as output.
Even though we are using different columns (different from previous case)
they point to the same image file. Each is collected and augmented
separately but I did thiss on purpose to be able to assert that they
get augmented equally.
"""
assert np.allclose(x[0], y[0])

model = Sequential()
model.add(Flatten(input_shape=(2, 2, 1)))
model.add(Dense(2, activation='sigmoid'))
model.add(Dense(2 * 2 * 1, activation='sigmoid'))
model.add(Reshape((2, 2, 1)))
model.compile('adam', 'mse')
model.fit_generator(df_iter)


Inputs:
[{'column': 'img_path',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg']}]

Outputs:
[{'column': 'img_path_extra',
  'mode': 'image',
  'values': ['/tmp/0.jpg',
             '/tmp/1.jpg',
             '/tmp/2.jpg',
             '/tmp/3.jpg',
             '/tmp/4.jpg',
             '/tmp/5.jpg',
             '/tmp/6.jpg',
             '/tmp/7.jpg']}]
Epoch 1/1


<keras.callbacks.callbacks.History at 0x7fb29022acc0>