In [1]:
%%shell
gdown -q 1VEG0qsiD5CZqyKSv5k6TceJbLOCt2yp7
echo 'download: done => file: animals.zip'
rm -rf ./animals/
unzip -q animals.zip
echo 'Data files in: /content/animals/'

download: done => file: animals.zip
Data files in: /content/animals/




In [3]:
import os
import glob
import functools as F
import random

class DataLoader(object):
  def __init__(self, path:str, batch_size=5):
    self.path_to_data = path
    self.batch_size = batch_size
    self.prepare()

  def prepare(self):
    classes = os.listdir(self.path_to_data)
    classes = list(filter(lambda pt: os.path.isdir(os.path.join(self.path_to_data, pt)), classes))

    files = []
    labels = []
    for folder in classes:
      file_filter = os.path.join(self.path_to_data, folder, '*')
      c_files = glob.glob(file_filter)
      c_labels = [folder]*len(c_files)
      files.append(c_files)
      labels.append(c_labels)

    self.files = F.reduce(lambda accL, item: accL + item, files, [])
    self.labels = F.reduce(lambda accL, item: accL + item, labels, [])
    self.file2label = dict(zip(self.files, self.labels))

  def __iter__(self):
    self.start_idx = 0
    self.files = random.sample(self.files, len(self.files))
    return self

  def __next__(self):
    if self.start_idx < len(self.files):
      end_idx = min(self.start_idx + self.batch_size, len(self.files))
      sel_files = self.files[self.start_idx:end_idx]
      self.start_idx += self.batch_size
      labels = [self.file2label[file] for file  in sel_files]

      return sel_files, labels
    else:
      raise StopIteration



In [5]:
loader = DataLoader("./animals/")
for idx, (files, labels) in enumerate(loader):
  print(f"batch: {idx}")
  print("files: ", files)
  print("labels: ", labels)

batch: 0
files:  ['./animals/horse/horse-4.jpg', './animals/cat/cat-6.jpg', './animals/dog/dog-3.jpg', './animals/dog/dog-2.jpg', './animals/chicken/c1.jpg']
labels:  ['horse', 'cat', 'dog', 'dog', 'chicken']
batch: 1
files:  ['./animals/cat/cat-1.jpg', './animals/dog/dog-1.jpg', './animals/horse/horse-1.jpg', './animals/horse/horse-2.jpg', './animals/dog/dog-7.jpg']
labels:  ['cat', 'dog', 'horse', 'horse', 'dog']
batch: 2
files:  ['./animals/chicken/c2.jpg', './animals/cat/cat-3.jpeg', './animals/cat/cat-2.jpg', './animals/chicken/c4.jpeg', './animals/horse/horse-3.jpeg']
labels:  ['chicken', 'cat', 'cat', 'chicken', 'horse']
batch: 3
files:  ['./animals/dog/dog-4.jpeg', './animals/dog/dog-6.jpg', './animals/cat/cat-5.jpeg', './animals/horse/horse-5.jpg', './animals/cat/cat-4.jpg']
labels:  ['dog', 'dog', 'cat', 'horse', 'cat']
batch: 4
files:  ['./animals/goose/g3.jpg', './animals/chicken/c5.jpg', './animals/dog/dog-5.jpeg', './animals/goose/g2.jpg', './animals/chicken/c3.jpg']
labe

In [15]:
def load_data(path_to_data:str, batch_size=5):
    classes = os.listdir(path_to_data)
    classes = list(filter(lambda pt: os.path.isdir(os.path.join(path_to_data, pt)), classes))

    files = []
    labels = []
    for folder in classes:
      file_filter = os.path.join(path_to_data, folder, '*')
      c_files = glob.glob(file_filter)
      c_labels = [folder]*len(c_files)
      files.append(c_files)
      labels.append(c_labels)

    files = F.reduce(lambda accL, item: accL + item, files, [])
    labels = F.reduce(lambda accL, item: accL + item, labels, [])
    file2label = dict(zip(files, labels))

    start_idx = 0
    files = random.sample(files, len(files))

    nbatches = len(files)//batch_size
    start_idx = 0;
    for idx in range(nbatches):
      end_idx = min(start_idx + batch_size, len(files))
      #additional code
      if start_idx == end_idx:
        raise StopIteration
      #...
      sel_files = files[start_idx:end_idx]
      labels = [file2label[file] for file in sel_files]
      start_idx += batch_size

      yield sel_files, labels





In [16]:
loader = load_data("./animals/")
for files, labels in loader:
  print(files)
  print(labels)

['./animals/horse/horse-5.jpg', './animals/chicken/c3.jpg', './animals/horse/horse-3.jpeg', './animals/cat/cat-4.jpg', './animals/chicken/c2.jpg']
['horse', 'chicken', 'horse', 'cat', 'chicken']
['./animals/dog/dog-5.jpeg', './animals/chicken/c4.jpeg', './animals/cat/cat-2.jpg', './animals/goose/g2.jpg', './animals/dog/dog-7.jpg']
['dog', 'chicken', 'cat', 'goose', 'dog']
['./animals/dog/dog-6.jpg', './animals/chicken/c5.jpg', './animals/cat/cat-5.jpeg', './animals/dog/dog-2.jpg', './animals/cat/cat-6.jpg']
['dog', 'chicken', 'cat', 'dog', 'cat']
['./animals/cat/cat-3.jpeg', './animals/horse/horse-1.jpg', './animals/horse/horse-2.jpg', './animals/goose/g3.jpg', './animals/horse/horse-4.jpg']
['cat', 'horse', 'horse', 'goose', 'horse']
['./animals/cat/cat-1.jpg', './animals/dog/dog-4.jpeg', './animals/goose/g1.jpg', './animals/dog/dog-3.jpg', './animals/chicken/c1.jpg']
['cat', 'dog', 'goose', 'dog', 'chicken']
