diff --git a/theano/tensor/signal/downsample.py b/theano/tensor/signal/downsample.py index c04c73c181f..2081d4e7c64 100644 --- a/theano/tensor/signal/downsample.py +++ b/theano/tensor/signal/downsample.py @@ -68,7 +68,7 @@ class DownsampleFactorMax(Op): """ @staticmethod - def out_shape(imgshape, ds, ignore_border=False): + def out_shape(imgshape, ds, st, ignore_border=False): """Return the shape of the output from this op, for input of given shape and flags. @@ -78,8 +78,12 @@ def out_shape(imgshape, ds, ignore_border=False): scalar Theano variable. :param ds: downsample factor over rows and columns + this parameter indicates the pooling region :type ds: list or tuple of two ints + :param st: the stride size + :type st: list or tuple of two ints + :param ignore_border: if ds doesn't divide imgshape, do we include an extra row/col of partial downsampling (False) or ignore it (True). :type ignore_border: bool @@ -93,24 +97,30 @@ def out_shape(imgshape, ds, ignore_border=False): raise TypeError('imgshape must have at least two elements ' '(rows, cols)') r, c = imgshape[-2:] - rval = list(imgshape[:-2]) + [r // ds[0], c // ds[1]] + rval = list(imgshape[:-2]) + [(r - ds[0]) // st[0] + 1, (c - ds[1]) // st[1] + 1] if not ignore_border: if isinstance(r, theano.Variable): - rval[-2] = tensor.switch(r % ds[0], rval[-2] + 1, rval[-2]) - elif r % ds[0]: + rval[-2] = tensor.switch((r - ds[0]) % st[0], rval[-2] + 1, rval[-2]) + elif (r - ds[0]) % st[0]: rval[-2] += 1 if isinstance(c, theano.Variable): - rval[-1] = tensor.switch(c % ds[1], rval[-1] + 1, rval[-1]) - elif c % ds[1]: + rval[-1] = tensor.switch((c - ds[1]) % st[1], rval[-1] + 1, rval[-1]) + elif (c - ds[1]) % st[1]: rval[-1] += 1 return rval - def __init__(self, ds, ignore_border=False): + def __init__(self, ds, ignore_border=False, st=None): """ - :param ds: downsample factor over rows and columns + :param ds: downsample factor over rows and column. ds indicates the pool region size :type ds: list or tuple of two ints + : param st: stride size, which is the number of shifts + over rows/cols to get the the next pool region. + if st is None, it is considered equal to ds + (no overlap on pooling regions) + : type st: list or tuple of two ints + :param ignore_border: if ds doesn't divide imgshape, do we include an extra row/col of partial downsampling (False) or ignore it (True). @@ -123,19 +133,23 @@ def __init__(self, ds, ignore_border=False): raise ValueError( "DownsampleFactorMax downsample parameters must be ints." " Got %s" % str(ds)) + if st == None: + st = ds + self.st = tuple(st) self.ignore_border = ignore_border def __eq__(self, other): return (type(self) == type(other) and self.ds == other.ds and + self.st == other.st and self.ignore_border == other.ignore_border) def __hash__(self): - return hash(type(self)) ^ hash(self.ds) ^ hash(self.ignore_border) + return hash(type(self)) ^ hash(self.ds) ^ hash(self.st) ^ hash(self.ignore_border) def __str__(self): return '%s{%s,%s}' % (self.__class__.__name__, - self.ds, self.ignore_border) + self.ds, self.st, self.ignore_border) def make_node(self, x): if x.type.ndim != 4: @@ -151,35 +165,49 @@ def perform(self, node, inp, out): if len(x.shape) != 4: raise NotImplementedError( 'DownsampleFactorMax requires 4D input for now') - z_shape = self.out_shape(x.shape, self.ds, self.ignore_border) + z_shape = self.out_shape(x.shape, self.ds, self.st, self.ignore_border) if (z[0] is None) or (z[0].shape != z_shape): - z[0] = numpy.zeros(self.out_shape(x.shape, self.ds, + z[0] = numpy.zeros(self.out_shape(x.shape, self.ds, self.st, self.ignore_border)) z[0] = theano._asarray(z[0], dtype=x.dtype) zz = z[0] ## zz needs to be initialized with -inf for the following to work zz -= numpy.inf + pr = zz.shape[-2] # number of pooling output rows + pc = zz.shape[-1] # number of pooling output cols ds0, ds1 = self.ds + st0, st1 = self.st + img_rows = x.shape[-2] + img_cols = x.shape[-1] + if self.ignore_border: - x_usable2 = (x.shape[2] // ds0 * ds0) + x_usable2 = (x.shape[2] - ds0) // st0 * st0 + ds0 else: x_usable2 = x.shape[2] if self.ignore_border: - x_usable3 = (x.shape[3] // ds1 * ds1) + x_usable3 = (x.shape[3] - ds1) // st1 * st1 + ds1 else: x_usable3 = x.shape[3] for n in xrange(x.shape[0]): for k in xrange(x.shape[1]): - for i in xrange(x_usable2): - zi = i / ds0 - for j in xrange(x_usable3): - zj = j / ds1 - zz[n, k, zi, zj] = __builtin__.max(zz[n, k, zi, zj], - x[n, k, i, j]) + for r in xrange(pr): + row_st = r * st0 + for c in xrange(pc): + col_st = c * st1 + for i in xrange(ds0): + row_ind = row_st + i + if row_ind >= img_rows: + continue + for j in xrange(ds1): + col_ind = col_st + j + if col_ind >= img_cols: + continue + zz[n, k, r, c] = __builtin__.max(zz[n, k, r, c], + x[n, k, row_ind, col_ind]) def infer_shape(self, node, in_shapes): - shp = self.out_shape(in_shapes[0], self.ds, self.ignore_border) + shp = self.out_shape(in_shapes[0], self.ds, self.st, self.ignore_border) return [shp] def grad(self, inp, grads): @@ -190,7 +218,7 @@ def grad(self, inp, grads): ignore_border=self.ignore_border)( x, maxout, gz)] - def c_code(self, node, name, inp, out, sub): + def c_code_tmp(self, node, name, inp, out, sub): x, = inp z, = out fail = sub['fail'] @@ -262,7 +290,7 @@ def c_code(self, node, name, inp, out, sub): } """ % locals() - def c_code_cache_version(self): + def c_code_cache_version_tmp(self): return (0, 1)