# How to Scale Image Pixel Data with Keras

In [2]:
from keras.datasets import mnist
(train_images, train_labels), (test_images,test_labels) = mnist.load_data()

print('Train',train_images.shape, train_labels.shape)
print('Test',test_images.shape, test_labels.shape)

print('Train', train_images.min(),train_images.max(),train_images.mean(),train_images.std())
print('Test', test_images.min(),test_images.max(),test_images.mean(),test_images.std())

Train (60000, 28, 28) (60000,)
Test (10000, 28, 28) (10000,)
Train 0 255 33.318421449829934 78.56748998339798
Test 0 255 33.791224489795916 79.17246322228644


# How to Normalize Images With ImageDataGenerator
## - pixel 0 to 255 , 0 to 1 preferred for neural network models.
## - scaling data to the range of 0 to 1 is traditionally referred to as normalization.

In [11]:
# example of normalizing a image dataset
from keras.datasets import mnist
from keras.preprocessing.image import ImageDataGenerator
# load dataset
(trainX, trainY), (testX, testY) = mnist.load_data()
# 개수 x width x height
# trainX : 60000 x 28 x 28
# trainY : 60000,
# testX : 10000 x 28 x 28
# testY : 10000,
# reshape dataset to have a single channel
width, height, channels = trainX.shape[1], trainX.shape[2], 1
trainX = trainX.reshape((trainX.shape[0], width, height, channels))
testX = testX.reshape((testX.shape[0], width, height, channels))
# trainX : 60000 x 28 x 28 x 1
# testX : 10000 x 28 x 28 x 1

# confirm scale of pixels
print('Train min=%.3f, max=%.3f' % (trainX.min(), trainX.max()))
print('Test min=%.3f, max=%.3f' % (testX.min(), testX.max()))

# create generator (1.0/255.0 = 0.003921568627451)
datagen = ImageDataGenerator(rescale=1.0/255.0)

# Note: there is no need to fit the generator in this case
# prepare a iterators to scale images
# batch : 이미지 64개씩 그룹으로 묶음(data와 label) 그리고 normalization
train_iterator = datagen.flow(trainX, trainY, batch_size=64)
# batch : 이미지 64개씩 그룹으로 묶음(test data와 test label) 그리고 normalization
test_iterator = datagen.flow(testX, testY, batch_size=64)

print('Batches train=%d, test=%d' % (len(train_iterator), len(test_iterator)))
# confirm the scaling works
batchX, batchy = train_iterator.next()
print('Batch shape=%s, min=%.3f, max=%.3f' % (batchX.shape, batchX.min(), batchX.max()))

Train min=0.000, max=255.000
Test min=0.000, max=255.000
Batches train=938, test=157
Batch shape=(64, 28, 28, 1), min=0.000, max=1.000


# How to Center Images With ImageDataGenerator
## - Another popular pixel scaling method is to calculate the mean pixel value across the entire training dataset, then subtract it from each image. (=Centering)
### - result : distribution of pixel values on zero(=mean pixel value for centered images will be zero)
### - It requires that the statistic is calculated on the training dataset prior to scaling.

In [3]:
# example of centering a image dataset
from keras.datasets import mnist
from keras.preprocessing.image import ImageDataGenerator
# load dataset
(trainX, trainy), (testX, testy) = mnist.load_data()
# reshape dataset to have a single channel
width, height, channels = trainX.shape[1], trainX.shape[2], 1
trainX = trainX.reshape((trainX.shape[0], width, height, channels))
testX = testX.reshape((testX.shape[0], width, height, channels))
# report per-image mean
print('Means train=%.3f, test=%.3f' % (trainX.mean(), testX.mean()))

# create generator that centers pixel values
datagen = ImageDataGenerator(featurewise_center=True)
# calculate the mean on the training dataset
datagen.fit(trainX)
print('Data Generator Mean: %.3f' % datagen.mean)

# demonstrate effect on a single batch of samples
iterator = datagen.flow(trainX, trainy, batch_size=64)
# get a batch
batchX, batchy = iterator.next()
# mean pixel value in the batch
print(batchX.shape, batchX.mean())
# demonstrate effect on entire training dataset
iterator = datagen.flow(trainX, trainy, batch_size=len(trainX), shuffle=False)
# get a batch
batchX, batchy = iterator.next()
# mean pixel value in the batch
print(batchX.shape, batchX.mean())
# mean이 0에 근접함.

Means train=33.318, test=33.791
Data Generator Mean: 33.318
(64, 28, 28, 1) 1.4372919
(60000, 28, 28, 1) -1.9512918e-05


# How to Standardize Images With ImageDataGenerator
## - distribution of the data is Gaussian and shifts the distribution of the data to have a mean of zero and a standard deviation of one.
## - data with this distribution is referred to as a standard Gaussian.
## - standardization of images is achieved by subtracting the mean pixel value and dividing the result by the standard deviation of the pixel values.

In [1]:
# example of standardizing a image dataset
from keras.datasets import mnist
from keras.preprocessing.image import ImageDataGenerator
# load dataset
(trainX, trainy), (testX, testy) = mnist.load_data()
# reshape dataset to have a single channel
width, height, channels = trainX.shape[1], trainX.shape[2], 1
trainX = trainX.reshape((trainX.shape[0], width, height, channels))
testX = testX.reshape((testX.shape[0], width, height, channels))
# report pixel means and standard deviations
print('Statistics train=%.3f (%.3f), test=%.3f (%.3f)' % (trainX.mean(), trainX.std(), testX.mean(), testX.std()))
# create generator that centers pixel values
datagen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)
# calculate the mean on the training dataset
datagen.fit(trainX)
print('Data Generator mean=%.3f, std=%.3f' % (datagen.mean, datagen.std))
# demonstrate effect on a single batch of samples
iterator = datagen.flow(trainX, trainy, batch_size=64)
# get a batch
batchX, batchy = iterator.next()
# pixel stats in the batch
print(batchX.shape, batchX.mean(), batchX.std())
# demonstrate effect on entire training dataset
iterator = datagen.flow(trainX, trainy, batch_size=len(trainX), shuffle=False)
# get a batch
batchX, batchy = iterator.next()
# pixel stats in the batch
print(batchX.shape, batchX.mean(), batchX.std())

Using TensorFlow backend.


Statistics train=33.318 (78.567), test=33.791 (79.172)
Data Generator mean=33.318, std=78.567
(64, 28, 28, 1) 0.009005004 1.012424
(60000, 28, 28, 1) -3.4560264e-07 0.9999998
