In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 29 days
Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 29 days


In [2]:
from theano import function, config, shared, sandbox
import theano.tensor as T
import numpy
import time

vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
iters = 1000

rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], T.exp(x))
print(f.maker.fgraph.toposort())
t0 = time.time()
for i in xrange(iters):
    r = f()
t1 = time.time()
print("Looping %d times took %f seconds" % (iters, t1 - t0))
print("Result is %s" % (r,))
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
    print('Used the cpu')
else:
    print('Used the gpu')

[GpuElemwise{exp,no_inplace}(<CudaNdarrayType(float32, vector)>), HostFromGpu(GpuElemwise{exp,no_inplace}.0)]
Looping 1000 times took 0.257601 seconds
Result is [ 1.23178029  1.61879349  1.52278066 ...,  2.20771813  2.29967761
  1.62323296]
Used the gpu


Using gpu device 0: GeForce GTX 780 (CNMeM is enabled)


In [3]:
#
# These are taken from https://github.com/mila-udem/blocks
# 

class Constant():
    """Initialize parameters to a constant.
    The constant may be a scalar or a :class:`~numpy.ndarray` of any shape
    that is broadcastable with the requested parameter arrays.
    Parameters
    ----------
    constant : :class:`~numpy.ndarray`
        The initialization value to use. Must be a scalar or an ndarray (or
        compatible object, such as a nested list) that has a shape that is
        broadcastable with any shape requested by `initialize`.
    """
    def __init__(self, constant):
        self._constant = numpy.asarray(constant)

    def generate(self, rng, shape):
        dest = numpy.empty(shape, dtype=np.float32)
        dest[...] = self._constant
        return dest


class IsotropicGaussian():
    """Initialize parameters from an isotropic Gaussian distribution.
    Parameters
    ----------
    std : float, optional
        The standard deviation of the Gaussian distribution. Defaults to 1.
    mean : float, optional
        The mean of the Gaussian distribution. Defaults to 0
    Notes
    -----
    Be careful: the standard deviation goes first and the mean goes
    second!
    """
    def __init__(self, std=1, mean=0):
        self._mean = mean
        self._std = std

    def generate(self, rng, shape):
        m = rng.normal(self._mean, self._std, size=shape)
        return m.astype(np.float32)


class Uniform():
    """Initialize parameters from a uniform distribution.
    Parameters
    ----------
    mean : float, optional
        The mean of the uniform distribution (i.e. the center of mass for
        the density function); Defaults to 0.
    width : float, optional
        One way of specifying the range of the uniform distribution. The
        support will be [mean - width/2, mean + width/2]. **Exactly one**
        of `width` or `std` must be specified.
    std : float, optional
        An alternative method of specifying the range of the uniform
        distribution. Chooses the width of the uniform such that random
        variates will have a desired standard deviation. **Exactly one** of
        `width` or `std` must be specified.
    """
    def __init__(self, mean=0., width=None, std=None):
        if (width is not None) == (std is not None):
            raise ValueError("must specify width or std, "
                             "but not both")
        if std is not None:
            # Variance of a uniform is 1/12 * width^2
            self._width = numpy.sqrt(12) * std
        else:
            self._width = width
        self._mean = mean

    def generate(self, rng, shape):
        w = self._width / 2
        #print 'u', shape
        m = rng.uniform(self._mean - w, self._mean + w, size=shape)
        return m.astype(np.float32)


In [4]:
from fuel.datasets.mnist import MNIST
from fuel.transformers import ScaleAndShift, Cast, Flatten, Mapping
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme, ShuffledScheme

MNIST.default_transformers = (
    (ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}),
    (Cast, [np.float32], {'which_sources': 'features'}))

mnist_train = MNIST(("train",), subset=slice(None,50000))
#this stream will shuffle the MNIST set and return us batches of 100 examples
mnist_train_stream = DataStream.default_stream(
    mnist_train,
    iteration_scheme=ShuffledScheme(mnist_train.num_examples, 100))

                         
mnist_validation = MNIST(("train",), subset=slice(50000, None))

# We will use larger portions for testing and validation
# as these dont do a backward pass and reauire less RAM.
mnist_validation_stream = DataStream.default_stream(
    mnist_validation, iteration_scheme=SequentialScheme(mnist_validation.num_examples, 250))
mnist_test = MNIST(("test",))
mnist_test_stream = DataStream.default_stream(
    mnist_test, iteration_scheme=SequentialScheme(mnist_test.num_examples, 250))

In [5]:
from fuel.datasets.cifar10 import CIFAR10
from fuel.transformers import ScaleAndShift, Cast, Flatten, Mapping
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme, ShuffledScheme

CIFAR10.default_transformers = (
    (ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}),
    (Cast, [np.float32], {'which_sources': 'features'}))

cifar10_train = CIFAR10(("train",), subset=slice(None,40000))
#this stream will shuffle the MNIST set and return us batches of 100 examples
cifar10_train_stream = DataStream.default_stream(
    cifar10_train,
    iteration_scheme=ShuffledScheme(cifar10_train.num_examples, 100))
                                               
cifar10_validation = CIFAR10(("train",), subset=slice(40000, None))

# We will use larger portions for testing and validation
# as these dont do a backward pass and reauire less RAM.
cifar10_validation_stream = DataStream.default_stream(
    cifar10_validation, iteration_scheme=SequentialScheme(cifar10_validation.num_examples, 250))
cifar10_test = CIFAR10(("test",))
cifar10_test_stream = DataStream.default_stream(
    cifar10_test, iteration_scheme=SequentialScheme(cifar10_test.num_examples, 250))

In [6]:
print "The streams return batches containing %s" % (mnist_train_stream.sources,)

print "Each trainin batch consits of a tuple containing:"
for element in next(mnist_train_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)
    
print "Validation/test batches consits of tuples containing:"
for element in next(mnist_test_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)
    
print "CIFAR: "  
print "The streams return batches containing %s" % (cifar10_train_stream.sources,)

print "Each trainin batch consits of a tuple containing:"
for element in next(cifar10_train_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)
    
print "Validation/test batches consits of tuples containing:"
for element in next(cifar10_test_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)

The streams return batches containing (u'features', u'targets')
Each trainin batch consits of a tuple containing:
 - an array of size (100, 1, 28, 28) containing float32
 - an array of size (100, 1) containing uint8
Validation/test batches consits of tuples containing:
 - an array of size (250, 1, 28, 28) containing float32
 - an array of size (250, 1) containing uint8
CIFAR: 
The streams return batches containing (u'features', u'targets')
Each trainin batch consits of a tuple containing:
 - an array of size (100, 3, 32, 32) containing float32
 - an array of size (100, 1) containing uint8
Validation/test batches consits of tuples containing:
 - an array of size (250, 3, 32, 32) containing float32
 - an array of size (250, 1) containing uint8


In [7]:
from sklearn import datasets
iris = datasets.load_iris()

#print iris

feats = 4
alpha = 0.01
pop_num = 150
rng = np.random
iris_f = iris['data'][:pop_num,:feats]
iris_t = iris['target'][:pop_num]
iris = hstack(([[x] for x in iris_t], iris_f))

rng.shuffle(iris

#print iris

iris_train_f = iris[:2*pop_num/3,1:]
iris_train_t = np.array(iris[:2*pop_num/3, 0], dtype='uint8')
iris_test_f = iris[2*pop_num/3:,1:]
iris_test_t = np.array(iris[2*pop_num/3:, 0], dtype='uint8')

SyntaxError: invalid syntax (<ipython-input-7-6604dab46423>, line 18)

In [8]:
import theano
import theano.printing as TP
from IPython.display import SVG
def svgdotprint(g):
    return SVG(theano.printing.pydotprint(g, return_image=True, format='svg'))
import time

In [21]:
import theano.tensor.signal.downsample as down

class Layer(object):
    def __init__(self, lamb = 0.1,rng=None, name=""):
        self.name = name
        self.lamb = lamb
        if rng is None:
            rng = numpy.random
        self.rng = rng
    
    @property
    def parameters(self):
        return []
    
    @property
    def parameter_names(self):
        return []
    
    def get_gradients(self, dLdY, fprop_context):
        return []
    
    def update(self, foo, alpha):
        return []
    def cost(self):
        return 0;
    def setInputDim(self, inputDim):
        self.num_out = inputDim
    def getOutputDim(self):
        return self.num_out
    def setMoments(self, moments):
        self.moments = moments
    def setLambda(self, lamb):
        self.lamb = lamb
    

class AffineLayer(Layer):
    def __init__(self, num_out, initW = 10., gamma  = 0.1, n = "", weight_init=None, bias_init=None, **kwargs):
        super(AffineLayer, self).__init__(name= n, **kwargs)
        self.num_out = num_out
        if weight_init is None:
            b = numpy.sqrt(initW / (num_out))
            self.weight_init = Uniform(width=b)
        if bias_init is None:
            bias_init = Constant(0.0)
        self.gamma= theano.shared(gamma)
        self.b = theano.shared(bias_init.generate(self.rng, (num_out)), name=self.name +" bias")
    
    @property
    def parameters(self):
        return [self.W, self.b]
    @property
    def parametersValues(self):
        return [self.W.get_value(), self.b.get_value()]
    @property
    def parameter_names(self):
        return ['W','b']
    
    def build(self, X):
        #print self.name+ " ",X.shape 
        return X.dot(self.W) + self.b
    def cost(self):
        return  (self.W ** 2).sum() * self.gamma
    def update(self, foo, alpha):
        gw, gb = T.grad(foo, self.parameters)
        moments = self.moments
        self.setMoments((gw, gb))
        return  [(self.W, self.W - (alpha * gw + self.lamb * moments[0])), 
                 (self.b, self.b - (alpha * gb+ self.lamb * moments[1]))]
    def setInputDim(self, inputDim):
        shape = (inputDim, self.num_out)
        print "AffineLayer: ", shape
        self.W = theano.shared(self.weight_init.generate(self.rng, shape),name=self.name +" weight")
        self.setMoments(zeros(shape, dtype='float32'))
    
class LogRegLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(LogRegLayer, self).__init__(name = n, **kwargs)
    def build(self, X):
        return T.nnet.sigmoid(X)


class TanhLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(TanhLayer, self).__init__(name = n, **kwargs)
    def build(self, X):
        print "tanh layer", X
        return T.tanh(X)

    
class ReLULayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(ReLULayer, self).__init__(name = n, **kwargs)
    
    def build(self, X):
        return T.maximum(0.0, X)

class Conv(Layer):
    def __init__(self, f_out, f_size, initW = 10., gamma = 0.1, n = "", weight_init = None, **kwargs):
        super(Conv, self).__init__(name = n, **kwargs)
        if weight_init is None:
            b = numpy.sqrt(initW / (f_out+ f_size + f_size))
            self.weight_init = Uniform(width=b)
        self.gamma= theano.shared(gamma)
        self.f_out = f_out
        self.f_size = f_size
    
    
    def setInputDim(self, inputDim):
        F_size = (self.f_out, ) + (inputDim[0], self.f_size, self.f_size)                                   
        self.num_out = (self.f_out, inputDim[1] - self.f_size + 1, inputDim[2] - self.f_size + 1)
        print 'Conv filter', F_size
        self.F = theano.shared(self.weight_init.generate(self.rng, F_size),name=self.name +" filter")
        
    def update(self, foo, alpha):
        gf = T.grad(foo, self.F)
        return  [(self.F, self.F - alpha * gf)]    
    
    #def cost(self):
    #    return  (self.F ** 2).sum() * self.gamma
    
    def build(self, X):
        return T.maximum(0.0, T.nnet.conv2d(X, self.F))
        
        
        
class Flatten(Layer):
    def __init__(self, n = "", **kwargs):
        super(Flatten, self).__init__(name = n, **kwargs)
    def build(self, X):
        return T.flatten(X, 2)
    def setInputDim(self, inputDim):
        out_dim = 1
        for i in inputDim:
            out_dim = out_dim * i
        self.num_out = out_dim
    

class BNLayer(Layer):
    def __init__(self,num_out, n = "BNLayer", gamma = 0.1, alpha=1.0,**kwargs):
        super(BNLayer, self).__init__(name = n, **kwargs)
        self.num_out, self.alpha = num_out, alpha
        self.gamma= theano.shared(gamma)
    def build(self, X):
        self.Gamma = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Gamma " + self.name))
        print 'Gamma shape:', np.zeros((1, self.num_out)).shape
        self.Beta  = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Beta " + self.name))
        print 'Beta shape:', np.zeros((1, self.num_out)).shape
        self.Gamma.tag.initializer = Constant(1.0)
        self.Beta.tag.initializer = Constant(0.0)
    
        #self.stored_means = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Means" + self.name))
        #self.stored_stds  = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Stds" + self.name))
        #self.stored_means.tag.initializer = Constant(0.0)
        #self.stored_stds.tag.initializer = Constant(1.0)
    
        self.means = self.alpha * theano.tensor.mean(X, 0, keepdims=True)
        self.stds = self.alpha * theano.tensor.std(X, 0, keepdims=True)
        self.means.tag.initializer = Constant(0.0)
        self.stds.tag.initializer = Constant(1.0)
        #self.means = self.alpha *self.means + (1.0 - self.alpha) * self.stored_means.dimshuffle(0,'x')
        #self.stds = self.alpha * self.stds + (1.0 - self.alpha) * self.stored_stds.dimshuffle(0,'x')
        
        normalized = theano.tensor.nnet.bn.batch_normalization(
            X,
            self.Gamma,
            self.Beta,
            self.means,
            self.stds,
            'high_mem'
        )
        return normalized
    @property
    def parameters(self):
        return [self.Gamma, self.Beta]
    @property
    def check(self):
        return [self.gg, self.gb, self.Gamma, self.Beta,self.means, self.stds ]
    #def cost(self):
    #    return  ((self.Gamma ** 2).sum() + (self.Gamma ** 2).sum())* self.gamma
    def update(self, foo, alpha):
        self.gg, self.gb = T.grad(foo, self.parameters)
        return  [(self.Gamma, self.Gamma- alpha *self.gg),
            (self.Beta, self.Beta - alpha * self.gb)] 
    
class SoftMaxLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(SoftMaxLayer, self).__init__(name = n, **kwargs)
    
    def build(self, X):
        return T.nnet.softmax(X)

class MaxPoolLayer(Layer):
    def __init__(self, p_size, n = "MP", **kwargs):
        super(MaxPoolLayer, self).__init__(name = n, **kwargs)
        self.p_size = p_size
    def build(self, input):
        return down.max_pool_2d(input, (self.p_size,self.p_size), ignore_border=True)
    def getOutputDim(self):
        shape = (self.num_out[0], ) + (self.num_out[1]/self.p_size, self.num_out[2]/self.p_size) 
        print "maxPool", shape
        return shape
    
class DropOutLayer(Layer):
    def __init__(self, dropOut = 0.1, n = "MP", **kwargs):
        super(DropOutLayer, self).__init__(name = n, **kwargs)
        self.dropOut = dropOut
        self.u = Uniform(0.5, 1.)
    def build(self, input):
        self.D = theano.shared((self.u.generate(self.rng, (self.num_out,))>= self.dropOut) + 0,name=self.name +" Dropout") 
        print self.D.get_value()
        return input * self.D
    def getOutputDim(self):
        shape = self.num_out 
        print "maxPool", shape
        return shape
    def update(self, foo, alpha):
        return  [(self.D,
            (self.u.generate(self.rng, (self.num_out,)) >= self.dropOut)+0)]
    
class FeedForwardNet(object):
    def __init__(self, layers=None, alpha=0.1, lamb = 0.1):
        if layers is None:
            layers = []
        self.layers = layers
        print type(alpha)
        self.alpha = theano.shared(float32(alpha), name='alpha')
    
    def add(self, layer):
        self.layers.append(layer)
    
    @property
    def parameters(self):
        params = []
        for layer in self.layers:
            params += layer.parameters
        return params
    
    @parameters.setter
    def parameters(self, values):
        for ownP, newP in zip(self.parameters, values):
            ownP[...] = newP
    
    @property
    def parameter_names(self):
        param_names = []
        for layer in self.layers:
            param_names += layer.parameter_names
        return param_names
    
    def build(self, inputDim):
        x = T.tensor4("x")
        y = T.vector("y", dtype='int64')
        cost = 0
        paramUpdates = []
        
        X = x
        for layer, i in zip(self.layers, range(len(self.layers))):
            #print inputDim
            layer.setInputDim(inputDim)
            layer.setLambda(lamb)
            inputDim = layer.getOutputDim()
            X = layer.build(X)
            #if(i == 3):
            #    D = X
            cost += layer.cost()
        
        pred = np.argmax(X, 1)
        self.costFoo = T.nnet.categorical_crossentropy(X, y).mean() + cost
        
        #svgdotprint(self.costFoo)
        
        for layer in self.layers:
            paramUpdates += layer.update(self.costFoo, self.alpha)
        
        paramUpdates += [(self.alpha, self.alpha * 0.99993)]
        self.train = theano.function(inputs=[x,y], 
                                    outputs=[pred, self.costFoo, self.alpha],
                                    updates=paramUpdates)
        self.predict  = theano.function(inputs=[x], 
                                    outputs=pred)
        
    
    @property
    def trainFunction(self):
        return self.train
    
    @property
    def predictFunction(self):
        return self.predict
    @property
    def costFunction(self):
        return self.costFoo


In [22]:
def compute_er(net, stream):
    num_errs = 0.0
    num_examples = 0
    for X, Y in stream.get_epoch_iterator():
        predictions = net.predictFunction(X)
        #print predictions != Y.ravel()
        num_errs += (predictions != Y.ravel()).sum()
        #print Y.shape[0], num_errs
        num_examples += Y.shape[0]
    return num_errs/num_examples


In [None]:
img_size = (32, 32)
c1_i = 3
c1_o = 20
c1_f = 5
p1   = 3
c2_o = 40
c2_f = 5
hidden1 = 500
hidden2 = 600
hidden3 = 900
hidden4 = 1000
hidden5 = 500
outs = 10
gamma = 0.01
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 300

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Conv(c2_o, c1_f, initC, "Conv2"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a = net.trainFunction(X, Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

 <type 'float'>
Conv filter (20, 3, 5, 5)
maxPool (20, 9, 9)
Conv filter (40, 20, 5, 5)
AffineLayer:  (1000, 500)
AffineLayer:  (500, 1000)
AffineLayer:  (1000, 10)
Start
gamma:  0.01
alpha:  0.005
22.387253952 0.78
21.8155486107 0.63
21.2939832258 0.55
20.8205234814 0.56
After epoch:  0 0.5988 time:  11.5790638924
20.5381700087 0.57
20.1330384636 0.5
19.9208042812 0.58
19.3910835934 0.53
After epoch:  1 0.5526 time:  11.5683820248
18.862948103 0.46
18.6502162266 0.52
18.4663848734 0.58
18.1054138851 0.54
After epoch:  2 0.5183 time:  11.567499876
17.6029868269 0.45
17.4562418699 0.59
17.0683274746 0.47
16.8309161282 0.51
After epoch:  3 0.5051 time:  11.5688889027
16.5115652418 0.49
16.087076683 0.4
15.9467263174 0.49
15.468911438 0.35
After epoch:  4 0.4868 time:  11.5675930977
15.2993978691 0.4
14.8421323848 0.26
14.9537456799 0.46
14.7467283726 0.48
After epoch:  5 

In [15]:
c1_o = 50
c1_f = 5
p1   = 3
c2_o = 100
c2_f = 5
hidden1 = 1500
hidden2 = 1200
hidden3 = 2500
hidden4 = 2000
hidden5 = 1000
outs = 10
gamma = 0.003
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 50

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Conv(c2_o, c2_f, initC, "Conv1"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA1"), 
                      TanhLayer("ReLu"),
                      AffineLayer(hidden2, initW, gamma, "tA"),
                      #BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden3, initW, gamma, "tA"),
                      #BNLayer(hidden3, 'BN2'),
                      TanhLayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden5, initW, gamma, "tA"),
                      #BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a, = net.trainFunction(X, Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

<type 'float'>
Conv filter (50, 3, 5, 5)
maxPool (50, 9, 9)
Conv filter (100, 50, 5, 5)
AffineLayer:  (2500, 1500)
tanh layer Elemwise{add,no_inplace}.0
AffineLayer:  (1500, 1200)
AffineLayer:  (1200, 2500)
tanh layer Elemwise{add,no_inplace}.0
AffineLayer:  (2500, 2000)
Gamma shape: (1, 2000)
Beta shape: (1, 2000)
AffineLayer:  (2000, 1000)
AffineLayer:  (1000, 10)
Start
gamma:  0.003
alpha:  0.005
28.8414355469 0.73
28.5769722729 0.71
28.2626245413 0.7
28.055165803 0.68
After epoch:  0 0.6709 time:  27.6519670486
27.7894599752 0.58
27.5488911686 0.62


KeyboardInterrupt: 

In [None]:
c1_o = 50
c1_f = 5
p1   = 3
c2_o = 100
c2_f = 5
hidden1 = 1500
hidden2 = 1200
hidden3 = 2500
hidden4 = 2000
hidden5 = 1000
outs = 10
gamma = 0.003
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 50

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Conv(c2_o, c2_f, initC, "Conv1"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA1"), 
                      TanhLayer("ReLu"),
                      AffineLayer(hidden2, initW, gamma, "tA"),
                      #BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden3, initW, gamma, "tA"),
                      #BNLayer(hidden3, 'BN2'),
                      TanhLayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden5, initW, gamma, "tA"),
                      #BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a, = net.trainFunction(X + u.generate(numpy.random, X.shape), Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

<type 'float'>
Conv filter (50, 3, 5, 5)
maxPool (50, 9, 9)
Conv filter (100, 50, 5, 5)
AffineLayer:  (2500, 1500)
tanh layer Elemwise{add,no_inplace}.0
AffineLayer:  (1500, 1200)
AffineLayer:  (1200, 2500)
tanh layer Elemwise{add,no_inplace}.0
AffineLayer:  (2500, 2000)
AffineLayer:  (2000, 1000)
AffineLayer:  (1000, 10)
Start
gamma:  0.003
alpha:  0.005
28.8212689743 0.75
28.5534723682 0.71
28.3255749226 0.76
28.0081979818 0.68
After epoch:  0 0.6558 time:  29.4251060486
27.7539963131 0.66
27.5408900337 0.59
27.2896728888 0.56
27.1731458454 0.61
After epoch:  1 0.5595 time:  29.3862380981
27.0200676079 0.6
26.7889199762 0.56
26.5146210728 0.52
26.3646943731 0.46
After epoch:  2 0.5029 time:  29.3927900791
26.1267358379 0.49
26.1867808313 0.56
25.762874012 0.47
25.5814525661 0.4
After epoch:  3 0.4778 time:  29.3873929977
25.5643673515 0.49
25.5977406425 0.48
25.1781211843 0.35
25.0971089964 0.37
After epoch:  4 0.4558 time:  29.3930521011
24.8887736349 0.41
24.9025677319 0.39
24.8407

In [None]:
c1_o = 50
c1_f = 5
p1   = 3
c2_o = 100
c2_f = 5
hidden1 = 1500
hidden2 = 1200
hidden3 = 2500
hidden4 = 2000
hidden5 = 1000
outs = 10
gamma = 0.003
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 50

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Conv(c2_o, c2_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA1"), 
                      TanhLayer("ReLu"),
                      AffineLayer(hidden2, initW, gamma, "tA"),
                      #BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden3, initW, gamma, "tA"),
                      #BNLayer(hidden3, 'BN2'),
                      TanhLayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden5, initW, gamma, "tA"),
                      #BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a, = net.trainFunction(X , Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

In [None]:
c1_o = 50
c1_f = 5
p1   = 3
c2_o = 100
c2_f = 5
hidden1 = 1500
hidden2 = 1200
hidden3 = 2500
hidden4 = 2000
hidden5 = 1000
outs = 10
gamma = 0.003
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 50

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Conv(c2_o, c2_f, initC, "Conv1"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA1"), 
                      TanhLayer("ReLu"),
                      AffineLayer(hidden2, initW, gamma, "tA"),
                      #BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden3, initW, gamma, "tA"),
                      #BNLayer(hidden3, 'BN2'),
                      TanhLayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden5, initW, gamma, "tA"),
                      #BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.1)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a, = net.trainFunction(X + u.generate(numpy.random, X.shape), Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

 0.27
22.2184374504 0.25
After epoch:  10 0.3783 time:  29.3749330044
22.0886293435 0.25
22.001179276 0.28
22.0768219624 0.34
21.906710783 0.36
After epoch:  11 0.3696 time:  29.3447511196
21.7319138603 0.3
21.4842985559 0.21
21.3746518869 0.22
21.3993080239 0.26
After epoch:  12 0.3646 time:  29.3457448483
21.2365318155 0.23
21.056635602 0.16
21.046574193 0.19
21.0572039719 0.29
After epoch:  13 0.3621 time:  29.343585968
20.8926315637 0.22
20.7590068207 0.2
20.864456377 0.24
20.666518827 0.28
After epoch:  14 0.3597 time:  29.3464579582
20.464023128 0.2
20.508782434 0.21
20.4226004872 0.17
20.3481724958 0.17
After epoch:  15 0.3617 time:  29.3413469791
20.0332682495 0.14
20.2825598607 0.2
19.9670508451 0.18
19.9788673515 0.23
After epoch:  16 0.3636 time:  29.3448309898
19.7908584874 0.11
19.8568601727 0.18
19.7922747183 0.2
19.7898434858 0.21
After epoch:  17 0.3648 time:  29.3410298824
19.5526448977 0.16
19.5229635911 0.18
19.4494173989 0.17
19.2958652601 0.14
After epoch:  18 0.36

KeyboardInterrupt: 

In [12]:
img_size = (32, 32)
c1_i = 3
c1_o = 50
c1_f = 5
p1   = 3
c2_o = 100
c2_f = 5
hidden1 = 1500
hidden2 = 1200
hidden3 = 2500
hidden4 = 2000
hidden5 = 1000
outs = 10
gamma = 0.003
alpha = 0.003
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 50

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Conv(c2_o, c2_f, initC, "Conv1"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA1"), 
                      TanhLayer("ReLu"),
                      AffineLayer(hidden2, initW, gamma, "tA"),
                      #BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden3, initW, gamma, "tA"),
                      #BNLayer(hidden3, 'BN2'),
                      TanhLayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden5, initW, gamma, "tA"),
                      #BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a, = net.trainFunction(X + u.generate(numpy.random, X.shape), Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

<type 'float'>
Conv filter (50, 3, 5, 5)
maxPool (50, 9, 9)
Conv filter (100, 50, 5, 5)
AffineLayer:  (2500, 1500)
tanh layer Elemwise{add,no_inplace}.0
AffineLayer:  (1500, 1200)
AffineLayer:  (1200, 2500)
tanh layer Elemwise{add,no_inplace}.0
AffineLayer:  (2500, 2000)
AffineLayer:  (2000, 1000)
AffineLayer:  (1000, 10)
Start
gamma:  0.003
alpha:  0.003
28.8906262512 0.69
28.7555505428 0.76
28.6465790691 0.74
28.4763495998 0.76
After epoch:  0 0.7196 time:  29.3690638542
28.3564878483 0.79
28.1685270958 0.71
28.0013622055 0.63
27.8244158039 0.59
After epoch:  1 0.6488 time:  29.3416910172
27.7385859079 0.63
27.5573060837 0.58
27.4350211325 0.55
27.2524747276 0.53
After epoch:  2 0.585 time:  29.3402769566
27.2392432909 0.56
27.0430137129 0.52
26.9165180416 0.59
26.9818340521 0.59
After epoch:  3 0.5454 time:  29.3390109539
26.8729083033 0.58
26.5941492052 0.48
26.5839362793 0.55
26.6605544662 0.64
After epoch:  4 0.5149 time:  29.3379859924
26.4003236809 0.54
26.2317354975 0.48
26.11

In [13]:
c1_o = 50
c1_f = 5
p1   = 3
c2_o = 100
c2_f = 5
hidden1 = 1500
hidden2 = 1200
hidden3 = 2500
hidden4 = 1000
hidden5 = 1000
outs = 10
gamma = 0.003
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 50

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Conv(c2_o, c2_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Flatten("Flatten"),
                      AffineLayer(hidden2, initW, gamma, "tA"),
                      #BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden3, initW, gamma, "tA"),
                      #BNLayer(hidden3, 'BN2'),
                      TanhLayer("ReLu"),
                      BNLayer(hidden4, 'BN2'),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      ReLULayer("ReLu"),

                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a, = net.trainFunction(X , Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

<type 'float'>
Conv filter (50, 3, 5, 5)
maxPool (50, 9, 9)
Conv filter (100, 50, 5, 5)
maxPool (100, 1, 1)
AffineLayer:  (100, 1200)
AffineLayer:  (1200, 2500)
tanh layer Elemwise{add,no_inplace}.0
AffineLayer:  (2500, 1000)
AffineLayer:  (1000, 10)
Start
gamma:  0.003
alpha:  0.005
14.0355478382 0.75
13.8781158695 0.68
13.6647407007 0.61
13.5452297859 0.61
After epoch:  0 0.6498 time:  20.2297329903
13.4000119324 0.64
13.2335639524 0.62
13.1682131453 0.62
13.1399305668 0.62
After epoch:  1 0.5877 time:  20.2084889412
12.9640302515 0.57
12.8924028683 0.61
12.7934504585 0.63
12.7042204962 0.58
After epoch:  2 0.5553 time:  20.2117159367
12.6900714188 0.58
12.5555318394 0.59
12.5725090628 0.49
12.4071586103 0.56
After epoch:  3 0.5348 time:  20.2089660168
12.5106524553 0.55
12.4343873367 0.58
12.2590950508 0.5
12.2280057507 0.51
After epoch:  4 0.5245 time:  20.213411808
12.0968634329 0.5
11.9242234154 0.41
11.9860299311 0.47
11.8396515522 0.44
After epoch:  5 0.5144 time:  20.209270000

In [14]:
    def xent(x , y):
        return  -y * T.log(x) - (1-y) * T.log(1-x)
    
    def costF(x, w):
        return x.mean() + 0.01 * (w ** 2).sum()

In [69]:
conv = Conv(3, 32, 3)
flatten = Flatten()
x = T.tensor4("x")
y = T.vector("y", dtype='int64')

C = conv.build(x)
F = flatten.build(C)

foo = theano.function(inputs=[x], 
                    outputs=[C, F])

for X, Y in cifar10_train_stream.get_epoch_iterator():
    print X.shape
    c,f = foo(X)
    print c.shape
    print f.shape
    break

conv = Conv(1, 1)
x = T.matrix("x")
y = T.vector("y", dtype='int64')

x = conv.build(x)
for X, Y in cifar10_train_stream.get_epoch_iterator():
    X = X.T
    Y = X.copy()
    print X.shape
    zeros(X.shape)
    conv.resize(X)
    print X.shape
    print Y.shape
    conv.reresize(X)
    print X.shape
    print X == Y
    break

AttributeError: 'Conv' object has no attribute 'F'

In [351]:
feats = 784
hidden1 = 500
hidden2 = 200
outs = 10
gamma = 0.001
alpha = 0.1
num_epochs  = 100

net = FeedForwardNet([AffineLayer(feats, hidden1, gamma, "fA"), 
                      TanhLayer("fTanh"),
                      AffineLayer(hidden1, hidden2, gamma, "sA"), 
                      TanhLayer("fTanh"),
                      AffineLayer(hidden2, outs, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha)
net.build()
print "Start"
i = 0
e = 0
while e < num_epochs:
    for X, Y in mnist_train_stream.get_epoch_iterator():
        pr ,c = net.trainFunction(X.T, Y.ravel())
        i+=1
        #if i % 100 == 0:
            #print c, (pr  == Y).mean()
    
    print "After epoch: ", e, compute_er(net, mnist_validation_stream)
    e+=1

    
for X, Y in mnist_validation_stream.get_epoch_iterator():
    predictions = net.predictFunction(X.T)
    num_errs += (predictions != Y).sum()
    num_examples += X.shape[1]
    k+=1
print num_errs, num_examples, k, num_errs/num_examples
#print (iris_test_t  == net.predictFunction(iris_test_f)).mean()

fA  Shape.0
sA  Shape.0
tA  Shape.0


ValueError: x must be 1-d or 2-d tensor of floats. Got TensorType(float32, 4D)

In [None]:
import theano.printing as TP
from IPython.display import SVG
def svgdotprint(g):
    return SVG(theano.printing.pydotprint(g, return_image=True, format='svg'))



feats = 4
hidden = 500
outs = 3
gamma = 0.001
alpha = 0.1

net = FeedForwardNet([AffineLayer(feats, hidden, gamma, "fA"), 
          TanhLayer("fTanh"),
          AffineLayer(hidden, outs, gamma, "sA"), 
          SoftMaxLayer("fSoftMax")], alpha)




#theano.printing.pydotprint(costFoo, outfile="symbolic_graph_unopt.png", var_with_name_simple=True)  


net.build()

print iris_train_t
for i in range(10000):
    pr ,c = net.trainFunction(iris_train_f, iris_train_t)
    if i % 100 == 0:
        print c, (pr  == iris_train_t).mean()

print (pr  == iris_train_t).mean()
print pr

figure()
subplot(2,1,1)
scatter(iris_test_f[:,0], iris_test_f[:,1], c=iris_test_t.ravel(), cmap='prism')
subplot(2,1,2)
scatter(iris_test_f[:,0], iris_test_f[:,1], c=net.predicFunction(iris_test_f), cmap='prism')
print (iris_test_t  == net.predictFunction(iris_test_f)).mean()

In [None]:
#print iris_test_t

w = theano.shared(rng.randn(feats)*0.01, name="w")
#print w.get_value()
b = theano.shared(0., name="b")
#print b.get_value()
x = T.matrix("x")
y = T.vector("y")

p_1 = T.nnet.sigmoid(T.dot(x, w) + b)   # Probability that target = 1
prediction = p_1 > 0.5                    # The prediction thresholded
c = costF(xent(p_1, y), w)# The cost to minimize
gw, gb = T.grad(c, [w, b])             # Compute the gradient of the cost
                                          # (we shall return to this in a
                                          # following section of this tutorial)

train = theano.function(
          inputs=[x,y],
          outputs=[prediction, c],
          updates=((w, w - alpha * gw), (b, b - alpha * gb)))
predict = theano.function(inputs=[x], outputs=prediction)



for i in range(1000):
    p, c =train(iris_train_f, iris_train_t)
    #print p, c, x.mean()
    
print (predict(iris_test_f) == iris_test_t).mean()


#foo = theano.function(inputs=[iris_train_f], outputs=[f])

#print iris_test_t
figure()
subplot(2,1,1)
scatter(iris_test_f[:,0], iris_test_f[:,1], c=iris_test_t.ravel(), cmap='spring')
subplot(2,1,2)
scatter(iris_test_f[:,0], iris_test_f[:,1], c=predict(iris_test_f).ravel(), cmap='spring')


In [None]:
import theano.printing as TP
from IPython.display import SVG
def svgdotprint(g):
    return SVG(theano.printing.pydotprint(g, return_image=True, format='svg'))



x = T.matrix("x")
y = T.vector("y")
feats = 4
hidden = 500
outs = 1
gamma = 0.1
alpha = 0.1

fAL = AffineLayer(feats, hidden, gamma, "fA")
tL = TanhLayer()
sAL = AffineLayer(hidden, outs, gamma, "sA")
lL = LogRegLayer()

fa = fAL.build(x)
t = tL.build(fa)
sa = sAL.build(t)
out = lL.build(sa)
pred = out > 0.5
c = xent(out.ravel(), y).mean() + fAL.cost() + sAL.cost()

theano.printing.pydotprint(out, outfile="symbolic_graph_unopt.png", var_with_name_simple=True)  
fgw, fgb = T.grad(c, fAL.parameters)
sgw, sgb = T.grad(c, sAL.parameters)

train = theano.function(inputs=[x,y], 
                        outputs=[pred, c], 
                        updates=(fAL.update(c, alpha) + sAL.update(c, alpha)))
predict  = theano.function(inputs=[x], 
                        outputs=[pred])

for i in range(100):
    pr, cost = train(iris_train_f, iris_train_t)

print (pr.ravel() == iris_train_t).mean()


figure()
subplot(2,1,1)
scatter(iris_test_f[:,0], iris_test_f[:,1], c=iris_test_t.ravel(), cmap='spring')
subplot(2,1,2)
scatter(iris_test_f[:,0], iris_test_f[:,1], c=predict(iris_test_f), cmap='spring')





In [None]:
x1 = np.array([[12, 13], [1, 3]])
y1 = [1, 2]
print x.shape
y = T.vector()
x = T.matrix()
f = theano.function(inputs=[x, y], outputs=x+y)
f(x1, y1)


x2 = [1,2]
y2 = [2,3]
x2 + y2

In [None]:
img_size = (32, 32)
c1_i = 3
c1_o = 50
c1_f = 5
p1   = 2
c2_o = 10
c2_f = 3
hidden1 = 300
hidden2 = 500
hidden3 = 50
hidden4 = 50
hidden5 = 300
outs = 10
gamma = 0.001
alpha = 0.01
lamb = 0.1
num_epochs  = 300

net = FeedForwardNet([
                      Flatten("Flatten"),
                      AffineLayer(hidden3, gamma, "tA1"), 
                      BNLayer(hidden3, 'BN1'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden4, gamma, "tA"),
                      BNLayer(hidden4, 'BN2'),
                      TanhLayer("ReLu"),
                      AffineLayer(outs, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a, gg1, gb1, gamma1, beta1, mean1, std1, gg2, gb2, gamma2, beta2, mean2, std2 = net.trainFunction(X , Y.ravel()) # + u.generate(numpy.random, X.shape)
        i+=1
        if i % 1 == 0:
            print 'L1gg', gg1.shape, np.array(gg1)
            print 'L1gb', gb1.shape, np.array(gb1)
            print 'L1gamma', gamma1.shape, np.array(gamma1)
            print 'L1beta', beta1.shape, np.array(beta1)
            print 'L1mean', mean1.shape, np.array(mean1)
            print 'L1std', std1.shape, np.array(std1)
            print 'L2gg', gg2.shape, np.array(gg2)
            print 'L2gb', gb2.shape, np.array(gb2)
            print 'L2gamma', gamma2.shape, np.array(gamma2)
            print 'L2beta', beta2.shape, np.array(beta2)
            print 'L2mean', mean2.shape, np.array(mean2)
            print 'L2std', std2.shape, np.array(std2)
            print c, (pr  != Y.ravel()).mean()
        if i % 3 == 0:
            break
    break
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

In [None]:
import theano.tensor.signal.downsample as down

class Layer(object):
    def __init__(self, lamb = 0.1,rng=None, name=""):
        self.name = name
        self.lamb = lamb
        if rng is None:
            rng = numpy.random
        self.rng = rng
    
    @property
    def parameters(self):
        return []
    
    @property
    def parameter_names(self):
        return []
    
    def get_gradients(self, dLdY, fprop_context):
        return []
    
    def update(self, foo, alpha):
        return []
    def cost(self):
        return 0;
    def setInputDim(self, inputDim):
        self.num_out = inputDim
    def getOutputDim(self):
        return self.num_out
    def setMoments(self, moments):
        self.moments = moments
    def setLambda(self, lamb):
        self.lamb = lamb
    

class AffineLayer(Layer):
    def __init__(self, num_out, gamma  = 0.1, n = "", weight_init=None, bias_init=None, **kwargs):
        super(AffineLayer, self).__init__(name= n, **kwargs)
        self.num_out = num_out
        if weight_init is None:
            b = numpy.sqrt(20. / (2* num_out))
            self.weight_init = Uniform(width=b)
        if bias_init is None:
            bias_init = Constant(0.0)
        self.gamma= theano.shared(gamma)
        self.b = theano.shared(bias_init.generate(self.rng, (num_out)), name=self.name +" bias")
    
    @property
    def parameters(self):
        return [self.W, self.b]
    @property
    def parametersValues(self):
        return [self.W.get_value(), self.b.get_value()]
    @property
    def parameter_names(self):
        return ['W','b']
    
    def build(self, X):
        #print self.name+ " ",X.shape 
        return X.dot(self.W) + self.b
    def cost(self):
        return  (self.W ** 2).sum() * self.gamma
    def update(self, foo, alpha):
        gw, gb = T.grad(foo, self.parameters)
        moments = self.moments
        self.setMoments((gw, gb))
        return  [(self.W, self.W - (alpha * gw + self.lamb * moments[0])), 
                 (self.b, self.b - (alpha * gb+ self.lamb * moments[1]))]
    def setInputDim(self, inputDim):
        shape = (inputDim, self.num_out)
        print "AffineLayer: ", shape
        self.W = theano.shared(self.weight_init.generate(self.rng, shape),name=self.name +" weight")
        self.setMoments(zeros(shape, dtype='float32'))

class Affine2DLayer(Layer):
    def __init__(self, num_out, gamma = None, n = "", weight_init=None, bias_init=None, **kwargs):
        super(Affine2DLayer, self).__init__(name= n, **kwargs)
        self.num_out = num_out
        if weight_init is None:
            b = numpy.sqrt(6. / 2* (num_out))
            self.weight_init = Uniform(width=b)
        if bias_init is None:
            bias_init = Constant(0.0)
        if gamma is None:
            self.gamma = theano.shared(0.1)
        else:
            self.gamma = theano.shared(gamma, name = self.name + " gamma")
    
    @property
    def parameters(self):
        return [self.W]
    @property
    def parametersValues(self):
        return [self.W.get_value()]
    @property
    def parameter_names(self):
        return ['W']
    
    def build(self, X):
        return X.dot(self.W)
    def cost(self):
        return  (self.W ** 2).sum() * self.gamma
    def update(self, foo, alpha):
        gw = T.grad(foo, self.parameters)
        return  [(self.W, self.W -alpha * gw)] 
    def setInputDim(self, inputDim):
        self.num_out = (self.num_out, inputDim[1], inputDim[2])
        shape = inputDim +  self.num_out
        print "Affine2D", shape
        self.W = theano.shared(self.weight_init.generate(self.rng, shape),name=self.name +" weight")
        self.setMoments(zeros(self.W.shape, dtype='float32'))
    
class LogRegLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(LogRegLayer, self).__init__(name = n, **kwargs)
    def build(self, X):
        return T.nnet.sigmoid(X)


class TanhLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(TanhLayer, self).__init__(name = n, **kwargs)
    def build(self, X):
        print "tanh layer", X
        return T.tanh(X)

    
class ReLULayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(ReLULayer, self).__init__(name = n, **kwargs)
    
    def build(self, X):
        return T.maximum(0.0, X)

class Conv(Layer):
    def __init__(self, f_out, f_size, gamma = 0.1, n = "", weight_init = None, **kwargs):
        super(Conv, self).__init__(name = n, **kwargs)
        if weight_init is None:
            b = numpy.sqrt(50. / (2*f_out+ f_size + f_size))
            self.weight_init = Uniform(width=b)
        self.gamma= theano.shared(gamma)
        self.f_out = f_out
        self.f_size = f_size
    
    
    def setInputDim(self, inputDim):
        F_size = (self.f_out, ) + (inputDim[0], self.f_size, self.f_size)                                   
        self.num_out = (self.f_out, inputDim[1] - self.f_size + 1, inputDim[2] - self.f_size + 1)
        print 'Conv filter', F_size
        self.F = theano.shared(self.weight_init.generate(self.rng, F_size),name=self.name +" filter")
        
    def update(self, foo, alpha):
        gf = T.grad(foo, self.F)
        return  [(self.F, self.F - alpha * gf)]    
    
    def cost(self):
        return  (self.F ** 2).sum() * self.gamma
    
    def build(self, X):
        return T.maximum(0.0, T.nnet.conv2d(X, self.F))
        
        
        
class Flatten(Layer):
    def __init__(self, n = "", **kwargs):
        super(Flatten, self).__init__(name = n, **kwargs)
    def build(self, X):
        return T.flatten(X, 2)
    def setInputDim(self, inputDim):
        out_dim = 1
        for i in inputDim:
            out_dim = out_dim * i
        self.num_out = out_dim
    

class BNLayer(Layer):
    def __init__(self,num_out, n = "BNLayer", gamma = 0.1, alpha=1.0,**kwargs):
        super(BNLayer, self).__init__(name = n, **kwargs)
        self.num_out, self.alpha = num_out, alpha
        self.gamma= theano.shared(gamma)
    def build(self, X):
        self.Gamma = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Gamma " + self.name))
        print 'Gamma shape:', np.zeros((1, self.num_out)).shape
        self.Beta  = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Beta " + self.name))
        print 'Beta shape:', np.zeros((1, self.num_out)).shape
        self.Gamma.tag.initializer = Constant(1.0)
        self.Beta.tag.initializer = Constant(0.0)
    
        #self.stored_means = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Means" + self.name))
        #self.stored_stds  = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Stds" + self.name))
        #self.stored_means.tag.initializer = Constant(0.0)
        #self.stored_stds.tag.initializer = Constant(1.0)
    
        self.means = self.alpha * theano.tensor.mean(X, 0, keepdims=True)
        self.stds = self.alpha * theano.tensor.std(X, 0, keepdims=True)
        self.means.tag.initializer = Constant(0.0)
        self.stds.tag.initializer = Constant(1.0)
        #self.means = self.alpha *self.means + (1.0 - self.alpha) * self.stored_means.dimshuffle(0,'x')
        #self.stds = self.alpha * self.stds + (1.0 - self.alpha) * self.stored_stds.dimshuffle(0,'x')
        
        normalized = theano.tensor.nnet.bn.batch_normalization(
            X,
            self.Gamma,
            self.Beta,
            self.means,
            self.stds,
            'high_mem'
        )
        return normalized
    @property
    def parameters(self):
        return [self.Gamma, self.Beta]
    @property
    def check(self):
        return [self.gg, self.gb, self.Gamma, self.Beta,self.means, self.stds ]
    #def cost(self):
    #    return  ((self.Gamma ** 2).sum() + (self.Gamma ** 2).sum())* self.gamma
    def update(self, foo, alpha):
        self.gg, self.gb = T.grad(foo, self.parameters)
        return  [(self.Gamma, self.Gamma- alpha *self.gg),
            (self.Beta, self.Beta - alpha * self.gb)] 
    
class SoftMaxLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(SoftMaxLayer, self).__init__(name = n, **kwargs)
    
    def build(self, X):
        return T.nnet.softmax(X)

class MaxPoolLayer(Layer):
    def __init__(self, p_size):
        self.p_size = p_size
    def build(self, input):
        return down.max_pool_2d(input, (self.p_size,self.p_size), ignore_border=True)
    def getOutputDim(self):
        shape = (self.num_out[0], ) + (self.num_out[1]/self.p_size, self.num_out[2]/self.p_size) 
        print "maxPool", shape
        return shape
    
class FeedForwardNet(object):
    def __init__(self, layers=None, alpha=0.1, lamb = 0.1):
        if layers is None:
            layers = []
        self.layers = layers
        print type(alpha)
        self.alpha = theano.shared(float32(alpha), name='alpha')
    
    def add(self, layer):
        self.layers.append(layer)
    
    @property
    def parameters(self):
        params = []
        for layer in self.layers:
            params += layer.parameters
        return params
    
    @parameters.setter
    def parameters(self, values):
        for ownP, newP in zip(self.parameters, values):
            ownP[...] = newP
    
    @property
    def parameter_names(self):
        param_names = []
        for layer in self.layers:
            param_names += layer.parameter_names
        return param_names
    
    def build(self, inputDim):
        x = T.tensor4("x")
        y = T.vector("y", dtype='int64')
        cost = 0
        paramUpdates = []
        
        X = x
        o1 = []
        o2 = []
        for layer, i in zip(self.layers, range(len(self.layers))):
            #print inputDim
            layer.setInputDim(inputDim)
            layer.setLambda(lamb)
            inputDim = layer.getOutputDim()
            X = layer.build(X)
            print 'name', layer.name
            if i == 2:
                print 'BNname', layer.name
                o1 = layer
            if i == 5:
                print 'BNname', layer.name
                o2 = layer

            cost += layer.cost()
        
        pred = np.argmax(X, 1)
        self.costFoo = T.nnet.categorical_crossentropy(X, y).mean() + cost
        
        for layer in self.layers:
            print layer.name
            paramUpdates += layer.update(self.costFoo, self.alpha)
            
        o1 = o1.check
        o2 = o2.check
        
        paramUpdates += [(self.alpha, self.alpha * 0.99993)]
        self.train = theano.function(inputs=[x,y], 
                                    outputs=[pred, self.costFoo, self.alpha]+o1+o2,
                                    updates=paramUpdates)
        self.predict  = theano.function(inputs=[x], 
                                    outputs=pred)
        
    
    @property
    def trainFunction(self):
        return self.train
    
    @property
    def predictFunction(self):
        return self.predict
    @property
    def costFunction(self):
        return self.costFoo
