In [1]:
import numpy as np
import progressbar 
import random 
import os
import cv2

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelBinarizer

from keras.applications import VGG16
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.datasets import cifar10


import matplotlib.pyplot as plt
%matplotlib inline

from helpers import HDF5DatasetWriter
from helpers import Utils

Using TensorFlow backend.


In [14]:
output_path_train = "../input/datasets/cifar_rgbmean_train.hdf5"
output_path_test = "../input/datasets/cifar_rgbmean_test.hdf5"

In [7]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [12]:
def export_rgbmean_data(x, y, output_path, batch_size, buffer_size):
    dataset = HDF5DatasetWriter((x.shape[0], 32, 32, 3), (y.shape[0], 10), output_path, bufSize = buffer_size)
    
    lb = LabelBinarizer()
    y = lb.fit_transform(y)
    
    widgets = ["Exporting Data: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
    pbar = progressbar.ProgressBar(maxval=x.shape[0], widgets=widgets).start()
    
    (R, G, B) = ([], [], [])
    for (j, image) in enumerate(x):
        (b, g, r) = cv2.mean(image)[:3]
        R.append(r)
        G.append(g)
        B.append(b)
    RGBMean = {"R": np.mean(R), "G": np.mean(G), "B": np.mean(B)}
    
    for i in np.arange(0, x.shape[0], batch_size):
        batchData = x[i:i + batch_size]
        batchLabels = y[i:i + batch_size]
        images = []
        for (j, image) in enumerate(batchData):
            (B, G, R) = cv2.split(image.astype("float32"))
            R -= RGBMean["R"]
            G -= RGBMean["G"]
            B -= RGBMean["B"]
            image = cv2.merge([B, G, R])
            image = img_to_array(image)
            image = np.expand_dims(image, axis=0)
            image = imagenet_utils.preprocess_input(image)
            images.append(image)
        images = np.vstack(images)
        dataset.add(images, batchLabels)
        pbar.update(i)
    dataset.close()
    pbar.finish()

In [13]:
export_rgbmean_data(x_train, y_train, output_path_train, 32, 1000)

Exporting Data: 100% |##########################################| Time: 0:00:08


In [15]:
export_rgbmean_data(x_test, y_test, output_path_test, 32, 1000)

Exporting Data: 100% |##########################################| Time: 0:00:01
