In [1]:
import h5py
import numpy as np

In [6]:
class dataIter(object):
    def __init__(self, batch_size=1, dtype='train', path='data/input_data.hdf5'):
        self.current = 0
        self.dtype=dtype
        self.path=path
        self.batch_size=batch_size
        self.size = self.hdf_len(self.dtype, path)
        self.idx = np.arange(self.size)
        np.random.shuffle(self.idx)
        
        if self.dtype=='train':
            self.iter_num = np.ceil(self.size / self.batch_size).astype(np.int32)
        else:
            self.iter_num =1

    def __iter__(self):
        return self

    def __next__(self):
        if self.current == self.iter_num:
            raise StopIteration()
        
        if self.dtype=='train':
            mask = np.sort(self.idx[self.batch_size*self.current :self.batch_size*(self.current+1)])
            X_train, y_train=self.load_hdf(self.dtype, self.path, mask)
        else:
            X_train, y_train=self.load_hdf(self.dtype, self.path)

        self.current += 1
        return X_train, y_train
    
    def hdf_len(self, dtype, path):
        with h5py.File(path, 'r') as f:
            len=f[f'X_{dtype}'].shape[0]
        return len

    def load_hdf(self, dtype, path, mask=None):
        with h5py.File(path, 'r') as f:
            if dtype=='train':
                X=f[f'X_{dtype}'][mask]
                y=f[f'y_{dtype}'][mask]
            else:
                X=f[f'X_{dtype}'][:]
                y=f[f'y_{dtype}'][:]
            scaled_X=self.MinMaxScaler(X)
            scaled_y=self.MinMaxScaler(y)
        return scaled_X, scaled_y
            
    def MinMaxScaler(self, data):
        scaled_data= (data - data.min()) / (data.max()- data.min())
        scaled_data=scaled_data.astype('float32')
        return scaled_data
        

In [8]:
test=dataIter(dtype='test', path='data/auged_input_data.hdf5')
X_test, y_test=next(test)
X_test.shape, y_test.shape

((10000, 1, 28, 28), (10000, 10))

In [None]:
epochs=3
test=dataIter(dtype='test', path='data/input_data.hdf5')
X_test, y_test=next(test)
for epoch in range(epochs):
    print("epoch=%s"%epoch)
    trainer=dataIter(batch_size = 32, dtype='train', path='data/auged_input_data.hdf5')

    for X_train, y_train in trainer:
        """
        ランダムなミニバッチを順番に取り出す
        """
        print(y_train.shape)

In [12]:
trainer.iter_num

37500