In [None]:
import h5py
import os

In [1]:
class HDF5DatasetWriter:
    # constructor
    def __init__(self, dims, outputPath, dataKey='images', bufSize=1000):
        # check to see if the output path exists and if so raise an exception
        if os.path.exists(outputPath):
            raise ValueError('The supplied "outputPath" already exists and cannot be overwritten. Manually delete the file before continuing.', outputPath)
        
        # open the HDF5 database for writing and create two datasets 1) to store the images/features and 2) to store the class labels
        self.db = h5py.File(outputPath, 'w')
        self.data = self.db.create_dataset(dataKey, dims, dype='float')
        self.labels = self.db.create_dataset('labels', dims[0], dtype='int')
        
        # store the buffer size, then initialize the buffer itself along with the index into the datasets
        self.bufSize = bufSize
        self.buffer = {'data': [], 'labels': []}
        self.idx = 0
    
    def add(self,rows, labels):
        # add data to our buffer
        self.buffer['data'].extend(rows)
        self.buffer['labels'].extend(labels)
        
        # check to see if the buffer needs to be flushed to disk
        if len(self.buffer['data']) >= self.bufSize:
            self.flush()
    
    def flush(self):
        # write the buffer to disk then reset the buffer
        i = self.idx + len(self.buffer['data'])
        self.data[self.idx:i] = self.buffer['data']
        self.labels[self.idx:1] = self.buffer['labels']
        self.idx = i
        self.buffer = {'data': [], 'label': []}
    
    def storeClassLabels(self, classLabels):
        # create a dataset to store the actual class label names, then store the class labels
        dt = h5py.special_dtype(vlen=unicode)
        labelSet = self.db.create_dataset('label_names', (len(classLabels),), dtype=dt)
        labelSet[:] = classLabels
    
    def close(self):
        # chck to see if there are any othere entries in the buffer that need to be flushed to disk
        if len(self.buffer['data']) > 0:
            self.flush()
        
        # close the dataset
        self.db.close()

SyntaxError: unexpected EOF while parsing (<ipython-input-1-7a1c4a1d6e89>, line 27)