In [1]:
from keras.preprocessing.image import array_to_img, img_to_array, load_img
import numpy as np
import os
from zipfile import ZipFile

Using TensorFlow backend.


In [2]:
def load(zipfilename, ratio=4, task='classification'):

    image_width = 640
    image_height = 480
    channels = 3

    image_width = int(image_width / ratio)
    image_height = int(image_height / ratio)

    with ZipFile(zipfilename) as archive:
        X = np.ndarray(shape=(len(archive.infolist()), image_height,
                        image_width, channels), dtype=np.float32)
        y = []
        i = 0
        for entry in archive.infolist():
            with archive.open(entry) as file:
                index = file.name.find("_")
                y.append(int(file.name[:index]))
                img = load_img(file)
                img.thumbnail((image_width, image_height))
                x = img_to_array(img)
                x = (x - 128.0) / 128.0
                X[i] = x
                i += 1
                if i % 250 == 0:
                    print("{} images to array".format(i))

        if task == 'classification':
            print('vectorization')
            y = vectorize_labels(y)

        print('Loaded!')
        return X, np.array(y)

In [6]:
def vectorize_labels(labels):
    classes = sorted(list(set(labels)))
    n_classes = len(classes)
    results = []
    for label in labels:
        result = np.zeros(n_classes)
        result[classes.index(label)] = 1
        results.append(result)
    return results

In [7]:
X, y = load('br-cl.zip')

vectorization
1
Loaded!


In [5]:
print(y)

[[ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]]


In [15]:
folder = "./Classification/all"

onlyfiles = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]

train_files = []
y_train = []
file_names = []
i=0
for _file in onlyfiles:
    train_files.append(_file)
    label_in_file = _file.find("_")
    y_train.append(int(_file[0:label_in_file]))
    file_names.append(_file)
    
print("Files in train_files: %d" % len(train_files))

# Original Dimensions
image_width = 640
image_height = 480
ratio = 4

image_width = int(image_width / ratio)
image_height = int(image_height / ratio)

channels = 3
nb_classes = 1

dataset = np.ndarray(shape=(len(train_files), image_height, image_width, channels),
                     dtype=np.float32)

i = 0
for _file in train_files:
    img = load_img(folder + "/" + _file)  # this is a PIL image
    img.thumbnail((image_width, image_height))
    # Convert to Numpy Array
    x = img_to_array(img)  
    #x = x.reshape((3, 120, 160))
    # Normalize
    x = (x - 128.0) / 128.0
    dataset[i] = x
    i += 1
    if i % 250 == 0:
        print("%d images to array" % i)
print("All images to array!")

Files in train_files: 3059
250 images to array
500 images to array
750 images to array
1000 images to array
1250 images to array
1500 images to array
1750 images to array
2000 images to array
2250 images to array
2500 images to array
2750 images to array
3000 images to array
All images to array!


In [16]:
np.save('X_{}_{}.npy'.format(image_height,image_width), dataset)

In [17]:
np.save('y_{}_{}.npy'.format(image_height,image_width), y_train)

In [14]:
np.save('file_names.npy', file_names)