In [1]:
import os
import numpy as np
from PIL import Image, ImageOps

In [2]:
def listdir(path):
    for f in sorted(os.listdir(path)):
        if not f.startswith('.'):
            yield f

In [3]:
def load_dataset():
    classes = ['Cats', 'Dogs']
    data = []
    labels = []
    for i, c in enumerate(classes):
        path = os.path.join('dataset', c)
        for f in listdir(path):
            try:
                img = Image.open(os.path.join(path, f))
                img_data = np.asarray(ImageOps.grayscale(img))
            except:
                print(i, f)
                continue
            data.append(img_data)
            labels.append(i)
    labels = np.asarray(labels, dtype='byte')
    return data, labels

In [4]:
data, labels = load_dataset()



In [5]:
def find_max_dim(data:list) -> (int, int):
    """
    Returns the max value of each dimension from the images in the list

    :param data:
    :return: tuple of int: (heigth, width)
    """
    max_height = max([image.shape[0] for image in data])
    max_width = max([image.shape[1] for image in data])

    return max_height, max_width

In [7]:
max_height, max_width = find_max_dim(data)
mapped_data = map(lambda image: np.pad(image, ((0, max_height-image.shape[0]),(0, max_width-image.shape[1])), 'constant', constant_values = 0), data)
data = list(mapped_data)

In [8]:
data = np.dstack(data)
data.shape

In [14]:
data = data.astype('float16')/255

In [15]:
print(data)
print(data.shape)

[[[0.655  0.153  0.1137 ... 0.1647 0.9883 0.9453]
  [0.655  0.1569 0.1059 ... 0.2    0.9883 0.957 ]
  [0.6587 0.1608 0.0902 ... 0.2196 0.9883 0.965 ]
  ...
  [0.8    0.     0.     ... 0.306  0.     0.    ]
  [0.796  0.     0.     ... 0.2627 0.     0.    ]
  [0.792  0.     0.     ... 0.2432 0.     0.    ]]

 [[0.655  0.1608 0.1294 ... 0.1686 0.992  0.949 ]
  [0.655  0.1647 0.1255 ... 0.1765 0.992  0.957 ]
  [0.6587 0.1647 0.1137 ... 0.1765 0.9883 0.961 ]
  ...
  [0.8    0.     0.     ... 0.2783 0.     0.    ]
  [0.796  0.     0.     ... 0.2471 0.     0.    ]
  [0.796  0.     0.     ... 0.2354 0.     0.    ]]

 [[0.655  0.1726 0.1372 ... 0.149  0.996  0.9883]
  [0.655  0.1726 0.149  ... 0.1412 0.996  0.992 ]
  [0.6587 0.1726 0.153  ... 0.1294 0.992  0.992 ]
  ...
  [0.8037 0.     0.     ... 0.2783 0.     0.    ]
  [0.8    0.     0.     ... 0.2471 0.     0.    ]
  [0.796  0.     0.     ... 0.2354 0.     0.    ]]

 ...

 [[0.     0.     0.5605 ... 0.     0.3647 0.    ]
  [0.     0.     0.5

In [16]:
np.save("data_16", data, allow_pickle = False)
np.save("labels", labels, allow_pickle = False)