In [15]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy


In [16]:
from theano import function, config, shared, sandbox
import theano.tensor as T
import numpy
import time

vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
iters = 1000

rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], T.exp(x))
print(f.maker.fgraph.toposort())
t0 = time.time()
for i in xrange(iters):
    r = f()
t1 = time.time()
print("Looping %d times took %f seconds" % (iters, t1 - t0))
print("Result is %s" % (r,))
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
    print('Used the cpu')
else:
    print('Used the gpu')

[GpuElemwise{exp,no_inplace}(<CudaNdarrayType(float32, vector)>), HostFromGpu(GpuElemwise{exp,no_inplace}.0)]
Looping 1000 times took 0.242374 seconds
Result is [ 1.23178029  1.61879349  1.52278066 ...,  2.20771813  2.29967761
  1.62323296]
Used the gpu


In [17]:
#
# These are taken from https://github.com/mila-udem/blocks
# 

class Constant():
    """Initialize parameters to a constant.
    The constant may be a scalar or a :class:`~numpy.ndarray` of any shape
    that is broadcastable with the requested parameter arrays.
    Parameters
    ----------
    constant : :class:`~numpy.ndarray`
        The initialization value to use. Must be a scalar or an ndarray (or
        compatible object, such as a nested list) that has a shape that is
        broadcastable with any shape requested by `initialize`.
    """
    def __init__(self, constant):
        self._constant = numpy.asarray(constant)

    def generate(self, rng, shape):
        dest = numpy.empty(shape, dtype=np.float32)
        dest[...] = self._constant
        return dest


class IsotropicGaussian():
    """Initialize parameters from an isotropic Gaussian distribution.
    Parameters
    ----------
    std : float, optional
        The standard deviation of the Gaussian distribution. Defaults to 1.
    mean : float, optional
        The mean of the Gaussian distribution. Defaults to 0
    Notes
    -----
    Be careful: the standard deviation goes first and the mean goes
    second!
    """
    def __init__(self, std=1, mean=0):
        self._mean = mean
        self._std = std

    def generate(self, rng, shape):
        m = rng.normal(self._mean, self._std, size=shape)
        return m.astype(np.float32)


class Uniform():
    """Initialize parameters from a uniform distribution.
    Parameters
    ----------
    mean : float, optional
        The mean of the uniform distribution (i.e. the center of mass for
        the density function); Defaults to 0.
    width : float, optional
        One way of specifying the range of the uniform distribution. The
        support will be [mean - width/2, mean + width/2]. **Exactly one**
        of `width` or `std` must be specified.
    std : float, optional
        An alternative method of specifying the range of the uniform
        distribution. Chooses the width of the uniform such that random
        variates will have a desired standard deviation. **Exactly one** of
        `width` or `std` must be specified.
    """
    def __init__(self, mean=0., width=None, std=None):
        if (width is not None) == (std is not None):
            raise ValueError("must specify width or std, "
                             "but not both")
        if std is not None:
            # Variance of a uniform is 1/12 * width^2
            self._width = numpy.sqrt(12) * std
        else:
            self._width = width
        self._mean = mean

    def generate(self, rng, shape):
        w = self._width / 2
        #print 'u', shape
        m = rng.uniform(self._mean - w, self._mean + w, size=shape)
        return m.astype(np.float32)


In [18]:
from fuel.datasets.mnist import MNIST
from fuel.transformers import ScaleAndShift, Cast, Flatten, Mapping
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme, ShuffledScheme

MNIST.default_transformers = (
    (ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}),
    (Cast, [np.float32], {'which_sources': 'features'}))

mnist_train = MNIST(("train",), subset=slice(None,50000))
#this stream will shuffle the MNIST set and return us batches of 100 examples
mnist_train_stream = DataStream.default_stream(
    mnist_train,
    iteration_scheme=ShuffledScheme(mnist_train.num_examples, 100))

                         
mnist_validation = MNIST(("train",), subset=slice(50000, None))

# We will use larger portions for testing and validation
# as these dont do a backward pass and reauire less RAM.
mnist_validation_stream = DataStream.default_stream(
    mnist_validation, iteration_scheme=SequentialScheme(mnist_validation.num_examples, 250))
mnist_test = MNIST(("test",))
mnist_test_stream = DataStream.default_stream(
    mnist_test, iteration_scheme=SequentialScheme(mnist_test.num_examples, 250))

In [19]:
from fuel.datasets.cifar10 import CIFAR10
from fuel.transformers import ScaleAndShift, Cast, Flatten, Mapping
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme, ShuffledScheme

CIFAR10.default_transformers = (
    (ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}),
    (Cast, [np.float32], {'which_sources': 'features'}))

cifar10_train = CIFAR10(("train",), subset=slice(None,40000))
#this stream will shuffle the MNIST set and return us batches of 100 examples
cifar10_train_stream = DataStream.default_stream(
    cifar10_train,
    iteration_scheme=ShuffledScheme(cifar10_train.num_examples, 100))
                                               
cifar10_validation = CIFAR10(("train",), subset=slice(40000, None))

# We will use larger portions for testing and validation
# as these dont do a backward pass and reauire less RAM.
cifar10_validation_stream = DataStream.default_stream(
    cifar10_validation, iteration_scheme=SequentialScheme(cifar10_validation.num_examples, 250))
cifar10_test = CIFAR10(("test",))
cifar10_test_stream = DataStream.default_stream(
    cifar10_test, iteration_scheme=SequentialScheme(cifar10_test.num_examples, 250))

In [20]:
print "The streams return batches containing %s" % (mnist_train_stream.sources,)

print "Each trainin batch consits of a tuple containing:"
for element in next(mnist_train_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)
    
print "Validation/test batches consits of tuples containing:"
for element in next(mnist_test_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)
    
print "CIFAR: "  
print "The streams return batches containing %s" % (cifar10_train_stream.sources,)

print "Each trainin batch consits of a tuple containing:"
for element in next(cifar10_train_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)
    
print "Validation/test batches consits of tuples containing:"
for element in next(cifar10_test_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)

The streams return batches containing (u'features', u'targets')
Each trainin batch consits of a tuple containing:
 - an array of size (100, 1, 28, 28) containing float32
 - an array of size (100, 1) containing uint8
Validation/test batches consits of tuples containing:
 - an array of size (250, 1, 28, 28) containing float32
 - an array of size (250, 1) containing uint8
CIFAR: 
The streams return batches containing (u'features', u'targets')
Each trainin batch consits of a tuple containing:
 - an array of size (100, 3, 32, 32) containing float32
 - an array of size (100, 1) containing uint8
Validation/test batches consits of tuples containing:
 - an array of size (250, 3, 32, 32) containing float32
 - an array of size (250, 1) containing uint8


In [21]:
from sklearn import datasets
iris = datasets.load_iris()

#print iris

feats = 4
alpha = 0.01
pop_num = 150
rng = np.random
iris_f = iris['data'][:pop_num,:feats]
iris_t = iris['target'][:pop_num]
iris = hstack(([[x] for x in iris_t], iris_f))

rng.shuffle(iris

#print iris

iris_train_f = iris[:2*pop_num/3,1:]
iris_train_t = np.array(iris[:2*pop_num/3, 0], dtype='uint8')
iris_test_f = iris[2*pop_num/3:,1:]
iris_test_t = np.array(iris[2*pop_num/3:, 0], dtype='uint8')

SyntaxError: invalid syntax (<ipython-input-21-6604dab46423>, line 18)

In [22]:
import theano
import theano.printing as TP
from IPython.display import SVG
def svgdotprint(g):
    return SVG(theano.printing.pydotprint(g, return_image=True, format='svg'))
import time

In [23]:
import theano.tensor.signal.downsample as down

class Layer(object):
    def __init__(self, lamb = 0.1,rng=None, name=""):
        self.name = name
        self.lamb = lamb
        if rng is None:
            rng = numpy.random
        self.rng = rng
    
    @property
    def parameters(self):
        return []
    
    @property
    def parameter_names(self):
        return []
    
    def get_gradients(self, dLdY, fprop_context):
        return []
    
    def update(self, foo, alpha):
        return []
    def cost(self):
        return 0;
    def setInputDim(self, inputDim):
        self.num_out = inputDim
    def getOutputDim(self):
        return self.num_out
    def setMoments(self, moments):
        self.moments = moments
    def setLambda(self, lamb):
        self.lamb = lamb
    

class AffineLayer(Layer):
    def __init__(self, num_out, initW = 10., gamma  = 0.1, n = "", weight_init=None, bias_init=None, **kwargs):
        super(AffineLayer, self).__init__(name= n, **kwargs)
        self.num_out = num_out
        if weight_init is None:
            b = numpy.sqrt(initW / (num_out))
            self.weight_init = Uniform(width=b)
        if bias_init is None:
            bias_init = Constant(0.0)
        self.gamma= theano.shared(gamma)
        self.b = theano.shared(bias_init.generate(self.rng, (num_out)), name=self.name +" bias")
    
    @property
    def parameters(self):
        return [self.W, self.b]
    @property
    def parametersValues(self):
        return [self.W.get_value(), self.b.get_value()]
    @property
    def parameter_names(self):
        return ['W','b']
    
    def build(self, X):
        #print self.name+ " ",X.shape 
        return X.dot(self.W) + self.b
    def cost(self):
        return  (self.W ** 2).sum() * self.gamma
    def update(self, foo, alpha):
        gw, gb = T.grad(foo, self.parameters)
        moments = self.moments
        self.setMoments((gw, gb))
        return  [(self.W, self.W - (alpha * gw + self.lamb * moments[0])), 
                 (self.b, self.b - (alpha * gb+ self.lamb * moments[1]))]
    def setInputDim(self, inputDim):
        shape = (inputDim, self.num_out)
        print "AffineLayer: ", shape
        self.W = theano.shared(self.weight_init.generate(self.rng, shape),name=self.name +" weight")
        self.setMoments(zeros(shape, dtype='float32'))
    
class LogRegLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(LogRegLayer, self).__init__(name = n, **kwargs)
    def build(self, X):
        return T.nnet.sigmoid(X)


class TanhLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(TanhLayer, self).__init__(name = n, **kwargs)
    def build(self, X):
        print "tanh layer", X
        return T.tanh(X)

    
class ReLULayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(ReLULayer, self).__init__(name = n, **kwargs)
    
    def build(self, X):
        return T.maximum(0.0, X)

class Conv(Layer):
    def __init__(self, f_out, f_size, initW = 10., gamma = 0.1, n = "", weight_init = None, **kwargs):
        super(Conv, self).__init__(name = n, **kwargs)
        if weight_init is None:
            b = numpy.sqrt(initW / (f_out+ f_size + f_size))
            self.weight_init = Uniform(width=b)
        self.gamma= theano.shared(gamma)
        self.f_out = f_out
        self.f_size = f_size
    
    
    def setInputDim(self, inputDim):
        F_size = (self.f_out, ) + (inputDim[0], self.f_size, self.f_size)                                   
        self.num_out = (self.f_out, inputDim[1] - self.f_size + 1, inputDim[2] - self.f_size + 1)
        print 'Conv filter', F_size
        self.F = theano.shared(self.weight_init.generate(self.rng, F_size),name=self.name +" filter")
        
    def update(self, foo, alpha):
        gf = T.grad(foo, self.F)
        return  [(self.F, self.F - alpha * gf)]    
    
    #def cost(self):
    #    return  (self.F ** 2).sum() * self.gamma
    
    def build(self, X):
        return T.maximum(0.0, T.nnet.conv2d(X, self.F))
        
        
        
class Flatten(Layer):
    def __init__(self, n = "", **kwargs):
        super(Flatten, self).__init__(name = n, **kwargs)
    def build(self, X):
        return T.flatten(X, 2)
    def setInputDim(self, inputDim):
        out_dim = 1
        for i in inputDim:
            out_dim = out_dim * i
        self.num_out = out_dim
    

class BNLayer(Layer):
    def __init__(self,num_out, n = "BNLayer", gamma = 0.1, alpha=1.0,**kwargs):
        super(BNLayer, self).__init__(name = n, **kwargs)
        self.num_out, self.alpha = num_out, alpha
        self.gamma= theano.shared(gamma)
    def build(self, X):
        self.Gamma = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Gamma " + self.name))
        print 'Gamma shape:', np.zeros((1, self.num_out)).shape
        self.Beta  = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Beta " + self.name))
        print 'Beta shape:', np.zeros((1, self.num_out)).shape
        self.Gamma.tag.initializer = Constant(1.0)
        self.Beta.tag.initializer = Constant(0.0)
    
        #self.stored_means = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Means" + self.name))
        #self.stored_stds  = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Stds" + self.name))
        #self.stored_means.tag.initializer = Constant(0.0)
        #self.stored_stds.tag.initializer = Constant(1.0)
    
        self.means = self.alpha * theano.tensor.mean(X, 0, keepdims=True)
        self.stds = self.alpha * theano.tensor.std(X, 0, keepdims=True)
        self.means.tag.initializer = Constant(0.0)
        self.stds.tag.initializer = Constant(1.0)
        #self.means = self.alpha *self.means + (1.0 - self.alpha) * self.stored_means.dimshuffle(0,'x')
        #self.stds = self.alpha * self.stds + (1.0 - self.alpha) * self.stored_stds.dimshuffle(0,'x')
        
        normalized = theano.tensor.nnet.bn.batch_normalization(
            X,
            self.Gamma,
            self.Beta,
            self.means,
            self.stds,
            'high_mem'
        )
        return normalized
    @property
    def parameters(self):
        return [self.Gamma, self.Beta]
    @property
    def check(self):
        return [self.gg, self.gb, self.Gamma, self.Beta,self.means, self.stds ]
    #def cost(self):
    #    return  ((self.Gamma ** 2).sum() + (self.Gamma ** 2).sum())* self.gamma
    def update(self, foo, alpha):
        self.gg, self.gb = T.grad(foo, self.parameters)
        return  [(self.Gamma, self.Gamma- alpha *self.gg),
            (self.Beta, self.Beta - alpha * self.gb)] 
    
class SoftMaxLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(SoftMaxLayer, self).__init__(name = n, **kwargs)
    
    def build(self, X):
        return T.nnet.softmax(X)

class MaxPoolLayer(Layer):
    def __init__(self, p_size, n = "MP", **kwargs):
        super(MaxPoolLayer, self).__init__(name = n, **kwargs)
        self.p_size = p_size
    def build(self, input):
        return down.max_pool_2d(input, (self.p_size,self.p_size), ignore_border=True)
    def getOutputDim(self):
        shape = (self.num_out[0], ) + (self.num_out[1]/self.p_size, self.num_out[2]/self.p_size) 
        print "maxPool", shape
        return shape
    
class DropOutLayer(Layer):
    def __init__(self, dropOut = 0.1, n = "MP", **kwargs):
        super(DropOutLayer, self).__init__(name = n, **kwargs)
        self.dropOut = dropOut
        self.u = Uniform(0.5, 1.)
    def build(self, input):
        self.D = theano.shared((self.u.generate(self.rng, (self.num_out,))>= self.dropOut) + 0,name=self.name +" Dropout") 
        print self.D.get_value()
        return input * self.D
    def getOutputDim(self):
        shape = self.num_out 
        print "maxPool", shape
        return shape
    def update(self, foo, alpha):
        return  [(self.D,
            (self.u.generate(self.rng, (self.num_out,)) >= self.dropOut)+0)]
    
class FeedForwardNet(object):
    def __init__(self, layers=None, alpha=0.1, lamb = 0.1):
        if layers is None:
            layers = []
        self.layers = layers
        print type(alpha)
        self.alpha = theano.shared(float32(alpha), name='alpha')
    
    def add(self, layer):
        self.layers.append(layer)
    
    @property
    def parameters(self):
        params = []
        for layer in self.layers:
            params += layer.parameters
        return params
    
    @parameters.setter
    def parameters(self, values):
        for ownP, newP in zip(self.parameters, values):
            ownP[...] = newP
    
    @property
    def parameter_names(self):
        param_names = []
        for layer in self.layers:
            param_names += layer.parameter_names
        return param_names
    
    def build(self, inputDim):
        x = T.tensor4("x")
        y = T.vector("y", dtype='int64')
        cost = 0
        paramUpdates = []
        
        X = x
        for layer, i in zip(self.layers, range(len(self.layers))):
            #print inputDim
            layer.setInputDim(inputDim)
            layer.setLambda(lamb)
            inputDim = layer.getOutputDim()
            X = layer.build(X)
            #if(i == 3):
            #    D = X
            cost += layer.cost()
        
        pred = np.argmax(X, 1)
        self.costFoo = T.nnet.categorical_crossentropy(X, y).mean() + cost
        
        #svgdotprint(self.costFoo)
        
        for layer in self.layers:
            paramUpdates += layer.update(self.costFoo, self.alpha)
        
        paramUpdates += [(self.alpha, self.alpha * 0.99993)]
        self.train = theano.function(inputs=[x,y], 
                                    outputs=[pred, self.costFoo, self.alpha],
                                    updates=paramUpdates)
        self.predict  = theano.function(inputs=[x], 
                                    outputs=pred)
        
    
    @property
    def trainFunction(self):
        return self.train
    
    @property
    def predictFunction(self):
        return self.predict
    @property
    def costFunction(self):
        return self.costFoo


In [24]:
def compute_er(net, stream):
    num_errs = 0.0
    num_examples = 0
    for X, Y in stream.get_epoch_iterator():
        predictions = net.predictFunction(X)
        #print predictions != Y.ravel()
        num_errs += (predictions != Y.ravel()).sum()
        #print Y.shape[0], num_errs
        num_examples += Y.shape[0]
    return num_errs/num_examples


In [30]:
img_size = (32, 32)
c1_i = 3
c1_o = 40
c1_f = 5
p1   = 3
c2_o = 40
c2_f = 5
hidden1 = 1000
hidden2 = 600
hidden3 = 900
hidden4 = 1500
hidden5 = 1000
outs = 10
gamma = 0.01
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 100

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden5, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a = net.trainFunction(X, Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

<type 'float'>
Conv filter (40, 3, 5, 5)
maxPool (40, 9, 9)
AffineLayer:  (3240, 1000)
AffineLayer:  (1000, 1500)
AffineLayer:  (1500, 1000)
AffineLayer:  (1000, 10)
Start
gamma:  0.01
alpha:  0.005
57.0667901421 0.71
55.6934429216 0.56
54.3762359428 0.51
53.3481961107 0.51
After epoch:  0 0.5154 time:  11.2588739395
52.3642998552 0.53
51.0976513529 0.34
50.3923095131 0.51
49.4492250872 0.43
After epoch:  1 0.4784 time:  11.227989912
48.3454208183 0.38
47.4335389566 0.37
46.7476853275 0.39
45.9872167206 0.42
After epoch:  2 0.4405 time:  11.2285368443
44.9683929539 0.38
44.2580107355 0.38
43.3583118677 0.28
42.6145517874 0.34
After epoch:  3 0.4346 time:  11.2292501926
41.8657502317 0.33
40.9719060946 0.27
40.4854404879 0.37
39.6791185474 0.31
After epoch:  4 0.4226 time:  11.2287328243
39.2342171812 0.33
38.4017769194 0.32
37.6770122981 0.31
37.2771888018 0.37
After epoch:  5 0.4027 time:  11.2280030251
36.5642729497 0.29
36.0807698298 0.34
35.4947420311 0.35
34.886615746 0.31
After e

In [31]:
# conv improved

img_size = (32, 32)
c1_i = 3
c1_o = 70
c1_f = 5
p1   = 3
c2_o = 40
c2_f = 5
hidden1 = 1000
hidden2 = 600
hidden3 = 900
hidden4 = 1500
hidden5 = 1000
outs = 10
gamma = 0.01
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 100

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden5, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a = net.trainFunction(X, Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

<type 'float'>
Conv filter (70, 3, 5, 5)
maxPool (70, 9, 9)
AffineLayer:  (5670, 1000)
AffineLayer:  (1000, 1500)
AffineLayer:  (1500, 1000)
AffineLayer:  (1000, 10)
Start
gamma:  0.01
alpha:  0.005
76.5141811752 0.58
75.0916991472 0.58
73.4868948364 0.49
72.1148699427 0.5
After epoch:  0 0.509 time:  14.4691700935
70.7826563311 0.51
69.1302146769 0.4
67.8547703123 0.42
66.5984792519 0.41
After epoch:  1 0.4728 time:  14.4609730244
65.276296382 0.38
64.2076591015 0.44
63.1852592611 0.5
61.8972413158 0.38
After epoch:  2 0.4405 time:  14.461083889
60.9679688883 0.46
59.5152011442 0.27
58.635369277 0.38
57.6455867815 0.45
After epoch:  3 0.4256 time:  14.4646980762
56.7879782152 0.44
55.5405999374 0.35
54.5855901885 0.36
53.5783030725 0.27
After epoch:  4 0.399 time:  14.4621059895
52.7219259644 0.3
51.8848256254 0.32
51.1191351509 0.35
50.1619957328 0.3
After epoch:  5 0.3985 time:  14.4612538815
49.4187628889 0.33
48.5868545175 0.31
47.8374587345 0.32
46.943489368 0.31
After epoch:  6 

In [32]:
# drop out + tanh

img_size = (32, 32)
c1_i = 3
c1_o = 40
c1_f = 5
p1   = 3
c2_o = 40
c2_f = 5
hidden1 = 1000
hidden2 = 600
hidden3 = 900
hidden4 = 1500
hidden5 = 1000
outs = 10
gamma = 0.01
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 100

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA"),
                      DropOutLayer(0.3),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden5, initW, gamma, "tA"),
                      TanhLayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a = net.trainFunction(X, Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

<type 'float'>
Conv filter (40, 3, 5, 5)
maxPool (40, 9, 9)
AffineLayer:  (3240, 1000)
maxPool 1000
[1 1 1 0 0 1 1 1 1 1 0 0 1 0 1 0 1 1 0 1 0 0 1 1 1 0 1 0 0 1 1 1 1 1 0 1 1
 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 0 1 1 1
 1 0 1 0 1 1 0 1 1 0 1 1 0 1 1 1 0 0 0 1 0 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 0
 1 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0 1 1 0 0
 1 1 1 1 0 0 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 0 1 0 1 1 0 1 1 0 1
 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1 1 0 0 1 0 1 0 1 1 1 1 0 1 1 0 0 0 1 1 0
 0 0 0 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 0 0
 1 0 1 1 0 1 0 1 1 1 1 0 1 1 0 1 1 1 1 1 0 0 0 1 0 0 0 1 1 1 0 1 1 1 0 1 1
 1 0 0 0 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 1 1 0 1 0 1 1 0 1 1 1 0 1 1 0 1 0 0 1 1 1 0 1 1 0 0 1 1 1 1 0
 1 0 1 1 1 1 0 1 0 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 0


In [33]:
# double drop out + tanh + more neurons

img_size = (32, 32)
c1_i = 3
c1_o = 40
c1_f = 5
p1   = 3
c2_o = 40
c2_f = 5
hidden1 = 1500
hidden2 = 600
hidden3 = 900
hidden4 = 2000
hidden5 = 1500
outs = 10
gamma = 0.01
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 100

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA"),
                      DropOutLayer(0.3),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden5, initW, gamma, "tA"),
                      DropOutLayer(0.3),
                      TanhLayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a = net.trainFunction(X, Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

<type 'float'>
Conv filter (40, 3, 5, 5)
maxPool (40, 9, 9)
AffineLayer:  (3240, 1500)
maxPool 1500
[1 1 1 ..., 1 1 0]
AffineLayer:  (1500, 2000)
AffineLayer:  (2000, 1500)
maxPool 1500
[1 0 0 ..., 0 1 1]
tanh layer Elemwise{mul,no_inplace}.0
AffineLayer:  (1500, 10)
Start
gamma:  0.01
alpha:  0.005
69.0172825295 0.56
67.2557420538 0.51
66.0673458478 0.48
64.7968794919 0.5
After epoch:  0 0.4908 time:  44.5873589516
63.6033252626 0.51
62.5095396898 0.51
61.0372832378 0.4
59.9644198064 0.45
After epoch:  1 0.4406 time:  44.6066348553
58.7408954558 0.36
57.8960114827 0.46
56.7116449277 0.44
55.5822044693 0.39
After epoch:  2 0.429 time:  44.6188797951
54.7956082452 0.42
53.7528127197 0.43
52.639026151 0.36
51.7261502536 0.35
After epoch:  3 0.4043 time:  44.613355875
50.864513801 0.36
49.8224320525 0.28
49.1909439529 0.39
48.5430456783 0.44
After epoch:  4 0.3978 time:  44.6108000278
47.5268163014 0.39
46.5498307569 0.32
45.8406187621 0.29
45.025637523 0.27
After epoch:  5 0.3866 time:  

In [34]:
# BN

img_size = (32, 32)
c1_i = 3
c1_o = 40
c1_f = 5
p1   = 3
c2_o = 40
c2_f = 5
hidden1 = 1000
hidden2 = 600
hidden3 = 900
hidden4 = 1500
hidden5 = 1000
outs = 10
gamma = 0.01
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 100

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      BNLayer(hidden4, 'BN2'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden5, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a = net.trainFunction(X, Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

<type 'float'>
Conv filter (40, 3, 5, 5)
maxPool (40, 9, 9)
AffineLayer:  (3240, 1000)
AffineLayer:  (1000, 1500)
Gamma shape: (1, 1500)
Beta shape: (1, 1500)
AffineLayer:  (1500, 1000)
AffineLayer:  (1000, 10)
Start
gamma:  0.01
alpha:  0.005
57.3323056126 0.69
56.1859529114 0.68
54.9988648891 0.58
53.9329610968 0.62
After epoch:  0 0.6686 time:  11.508767128
52.8235335302 0.62
51.7835012674 0.61
50.8437072325 0.6
49.8444881964 0.58
After epoch:  1 0.6104 time:  11.4829850197
48.9583642435 0.57
48.091694355 0.66
47.1744084215 0.53
46.3351177216 0.53
After epoch:  2 0.5586 time:  11.4824221134
45.3866720486 0.44
44.6058132601 0.56
43.9576383877 0.57
43.0978316879 0.54
After epoch:  3 0.5155 time:  11.4844830036
42.3801307344 0.52
41.6845852137 0.56
40.7879029846 0.47
40.0049655199 0.47
After epoch:  4 0.4838 time:  11.4825909138
39.4758899212 0.48
38.9038632298 0.47
38.0142376661 0.42
37.4901660252 0.5
After epoch:  5 0.4612 time:  11.482970953
36.862213459 0.44
36.2565147161 0.41
35.8

In [None]:
# conv improved + second conv - one affine

img_size = (32, 32)
c1_i = 3
c1_o = 70
c1_f = 5
p1   = 3
c2_o = 100
c2_f = 5
hidden1 = 1000
hidden2 = 600
hidden3 = 900
hidden4 = 1500
hidden5 = 1000
outs = 10
gamma = 0.01
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 100

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Conv(c2_o, c2_f, initC, "Conv1"),
                      Flatten("Flatten"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden5, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a = net.trainFunction(X, Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

<type 'float'>
Conv filter (70, 3, 5, 5)
maxPool (70, 9, 9)
Conv filter (100, 70, 5, 5)
AffineLayer:  (2500, 1500)
AffineLayer:  (1500, 1000)
AffineLayer:  (1000, 10)
Start
gamma:  0.01
alpha:  0.005
42.7093963385 0.65
41.8681861401 0.64
40.9443804646 0.66
39.9672256708 0.55
After epoch:  0 0.5623 time:  21.5187909603
39.1055736685 0.45
38.2819350386 0.42
37.6842639017 0.47
36.8423904085 0.44
After epoch:  1 0.5004 time:  21.6505651474
36.1072417307 0.37
35.4611143494 0.38
34.8009821796 0.4
34.3248109579 0.52
After epoch:  2 0.4728 time:  21.6272439957
33.4408372211 0.29
33.2032279682 0.48
32.3753739595 0.41
31.8188680506 0.32
After epoch:  3 0.4501 time:  21.634953022
31.3639669466 0.39
30.7137915897 0.35
30.2074648809 0.28
29.7925431204 0.32
After epoch:  4 0.4394 time:  21.6252038479
29.0517795897 0.34
28.5935023379

In [11]:
# conv improved smaler filter - one affine + bigger othera

img_size = (32, 32)
c1_i = 3
c1_o = 70
c1_f = 4
p1   = 3
c2_o = 40
c2_f = 5
hidden1 = 1500
hidden2 = 600
hidden3 = 900
hidden4 = 2000
hidden5 = 1000
outs = 10
gamma = 0.01
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 100

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a = net.trainFunction(X, Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

<type 'float'>
Conv filter (70, 3, 4, 4)
maxPool (70, 9, 9)
AffineLayer:  (5670, 1500)
AffineLayer:  (1500, 2000)
AffineLayer:  (2000, 10)
Start
gamma:  0.01
alpha:  0.005
76.3999556541 0.57
74.8158580732 0.51
73.5076325226 0.57
71.9370175076 0.55
After epoch:  0 0.5096 time:  16.4881169796
70.572705164 0.47
69.0423923826 0.41
67.9038034773 0.52
66.3765749454 0.37
After epoch:  1 0.4613 time:  16.4796540737
65.280384469 0.4
63.9740920973 0.38
62.7846190166 0.3
61.5477346301 0.27
After epoch:  2 0.419 time:  16.4553368092
60.7711463308 0.45
59.5147601271 0.37
58.4205138874 0.37
57.6285100555 0.4
After epoch:  3 0.4096 time:  16.4735560417
56.470032959 0.3
55.478413806 0.27
54.5091505361 0.37
53.8628119326 0.39
After epoch:  4 0.399 time:  16.4540200233
52.5932572103 0.26
51.6949662352 0.3
50.8454707718 0.29
50.0987755847 0.37
After epoch:  5 0.381 time:  16.4711191654
49.2206654239 0.28
48.4164842558 0.31
47.5306910491 0.19
46.8070055795 0.26
After epoch:  6 0.3804 time:  16.4538910389


In [27]:
# noise

img_size = (32, 32)
c1_i = 3
c1_o = 40
c1_f = 5
p1   = 3
c2_o = 40
c2_f = 5
hidden1 = 1000
hidden2 = 600
hidden3 = 900
hidden4 = 1500
hidden5 = 1000
outs = 10
gamma = 0.01
alpha = 0.005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 100

net = FeedForwardNet([Conv(c1_o, c1_f, initC, "Conv1"),
                      MaxPoolLayer(p1, "P"),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden4, initW, gamma, "tA"),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.1)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a = net.trainFunction(X+ u.generate(numpy.random, X.shape), Y.ravel()) #
        i+=1
        if i % 100 == 0:
            print c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

<type 'float'>
Conv filter (40, 3, 5, 5)
maxPool (40, 9, 9)
AffineLayer:  (3240, 1000)
AffineLayer:  (1000, 1500)
AffineLayer:  (1500, 10)
Start
gamma:  0.01
alpha:  0.005
48.7995629263 0.69
47.6393838739 0.57
46.6148750305 0.46
45.6382958364 0.53
After epoch:  0 0.5051 time:  11.6033420563
44.9440546608 0.52
43.8671420527 0.42
43.0279544878 0.48
42.4255361032 0.45
After epoch:  1 0.4648 time:  11.5716280937
41.5676314926 0.47
40.7466702366 0.36
39.9619822884 0.46
39.4632064867 0.48
After epoch:  2 0.4481 time:  11.5704879761
38.4886363316 0.39
37.734236846 0.37
37.1774774885 0.38
36.5852271032 0.38
After epoch:  3 0.4284 time:  11.5708990097
35.8151766944 0.33
35.1907035613 0.32
34.5989921641 0.34
34.0279805589 0.33
After epoch:  4 0.4083 time:  11.5706729889
33.5948352909 0.38
32.9223415923 0.35
32.3143537855 0.28
31.7502085423 0.28
After epoch:  5 0.3952 time:  11.5710990429
31.4369001818 0.39
30.8559224606 0.35
30.2323788381 0.28
30.0459413004 0.47
After epoch:  6 0.3861 time:  11.

In [14]:
    def xent(x , y):
        return  -y * T.log(x) - (1-y) * T.log(1-x)
    
    def costF(x, w):
        return x.mean() + 0.01 * (w ** 2).sum()

In [69]:
conv = Conv(3, 32, 3)
flatten = Flatten()
x = T.tensor4("x")
y = T.vector("y", dtype='int64')

C = conv.build(x)
F = flatten.build(C)

foo = theano.function(inputs=[x], 
                    outputs=[C, F])

for X, Y in cifar10_train_stream.get_epoch_iterator():
    print X.shape
    c,f = foo(X)
    print c.shape
    print f.shape
    break

conv = Conv(1, 1)
x = T.matrix("x")
y = T.vector("y", dtype='int64')

x = conv.build(x)
for X, Y in cifar10_train_stream.get_epoch_iterator():
    X = X.T
    Y = X.copy()
    print X.shape
    zeros(X.shape)
    conv.resize(X)
    print X.shape
    print Y.shape
    conv.reresize(X)
    print X.shape
    print X == Y
    break

AttributeError: 'Conv' object has no attribute 'F'

In [351]:
feats = 784
hidden1 = 500
hidden2 = 200
outs = 10
gamma = 0.001
alpha = 0.1
num_epochs  = 100

net = FeedForwardNet([AffineLayer(feats, hidden1, gamma, "fA"), 
                      TanhLayer("fTanh"),
                      AffineLayer(hidden1, hidden2, gamma, "sA"), 
                      TanhLayer("fTanh"),
                      AffineLayer(hidden2, outs, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha)
net.build()
print "Start"
i = 0
e = 0
while e < num_epochs:
    for X, Y in mnist_train_stream.get_epoch_iterator():
        pr ,c = net.trainFunction(X.T, Y.ravel())
        i+=1
        #if i % 100 == 0:
            #print c, (pr  == Y).mean()
    
    print "After epoch: ", e, compute_er(net, mnist_validation_stream)
    e+=1

    
for X, Y in mnist_validation_stream.get_epoch_iterator():
    predictions = net.predictFunction(X.T)
    num_errs += (predictions != Y).sum()
    num_examples += X.shape[1]
    k+=1
print num_errs, num_examples, k, num_errs/num_examples
#print (iris_test_t  == net.predictFunction(iris_test_f)).mean()

fA  Shape.0
sA  Shape.0
tA  Shape.0


ValueError: x must be 1-d or 2-d tensor of floats. Got TensorType(float32, 4D)

In [None]:
import theano.printing as TP
from IPython.display import SVG
def svgdotprint(g):
    return SVG(theano.printing.pydotprint(g, return_image=True, format='svg'))



feats = 4
hidden = 500
outs = 3
gamma = 0.001
alpha = 0.1

net = FeedForwardNet([AffineLayer(feats, hidden, gamma, "fA"), 
          TanhLayer("fTanh"),
          AffineLayer(hidden, outs, gamma, "sA"), 
          SoftMaxLayer("fSoftMax")], alpha)




#theano.printing.pydotprint(costFoo, outfile="symbolic_graph_unopt.png", var_with_name_simple=True)  


net.build()

print iris_train_t
for i in range(10000):
    pr ,c = net.trainFunction(iris_train_f, iris_train_t)
    if i % 100 == 0:
        print c, (pr  == iris_train_t).mean()

print (pr  == iris_train_t).mean()
print pr

figure()
subplot(2,1,1)
scatter(iris_test_f[:,0], iris_test_f[:,1], c=iris_test_t.ravel(), cmap='prism')
subplot(2,1,2)
scatter(iris_test_f[:,0], iris_test_f[:,1], c=net.predicFunction(iris_test_f), cmap='prism')
print (iris_test_t  == net.predictFunction(iris_test_f)).mean()

In [None]:
#print iris_test_t

w = theano.shared(rng.randn(feats)*0.01, name="w")
#print w.get_value()
b = theano.shared(0., name="b")
#print b.get_value()
x = T.matrix("x")
y = T.vector("y")

p_1 = T.nnet.sigmoid(T.dot(x, w) + b)   # Probability that target = 1
prediction = p_1 > 0.5                    # The prediction thresholded
c = costF(xent(p_1, y), w)# The cost to minimize
gw, gb = T.grad(c, [w, b])             # Compute the gradient of the cost
                                          # (we shall return to this in a
                                          # following section of this tutorial)

train = theano.function(
          inputs=[x,y],
          outputs=[prediction, c],
          updates=((w, w - alpha * gw), (b, b - alpha * gb)))
predict = theano.function(inputs=[x], outputs=prediction)



for i in range(1000):
    p, c =train(iris_train_f, iris_train_t)
    #print p, c, x.mean()
    
print (predict(iris_test_f) == iris_test_t).mean()


#foo = theano.function(inputs=[iris_train_f], outputs=[f])

#print iris_test_t
figure()
subplot(2,1,1)
scatter(iris_test_f[:,0], iris_test_f[:,1], c=iris_test_t.ravel(), cmap='spring')
subplot(2,1,2)
scatter(iris_test_f[:,0], iris_test_f[:,1], c=predict(iris_test_f).ravel(), cmap='spring')


In [None]:
import theano.printing as TP
from IPython.display import SVG
def svgdotprint(g):
    return SVG(theano.printing.pydotprint(g, return_image=True, format='svg'))



x = T.matrix("x")
y = T.vector("y")
feats = 4
hidden = 500
outs = 1
gamma = 0.1
alpha = 0.1

fAL = AffineLayer(feats, hidden, gamma, "fA")
tL = TanhLayer()
sAL = AffineLayer(hidden, outs, gamma, "sA")
lL = LogRegLayer()

fa = fAL.build(x)
t = tL.build(fa)
sa = sAL.build(t)
out = lL.build(sa)
pred = out > 0.5
c = xent(out.ravel(), y).mean() + fAL.cost() + sAL.cost()

theano.printing.pydotprint(out, outfile="symbolic_graph_unopt.png", var_with_name_simple=True)  
fgw, fgb = T.grad(c, fAL.parameters)
sgw, sgb = T.grad(c, sAL.parameters)

train = theano.function(inputs=[x,y], 
                        outputs=[pred, c], 
                        updates=(fAL.update(c, alpha) + sAL.update(c, alpha)))
predict  = theano.function(inputs=[x], 
                        outputs=[pred])

for i in range(100):
    pr, cost = train(iris_train_f, iris_train_t)

print (pr.ravel() == iris_train_t).mean()


figure()
subplot(2,1,1)
scatter(iris_test_f[:,0], iris_test_f[:,1], c=iris_test_t.ravel(), cmap='spring')
subplot(2,1,2)
scatter(iris_test_f[:,0], iris_test_f[:,1], c=predict(iris_test_f), cmap='spring')





In [None]:
x1 = np.array([[12, 13], [1, 3]])
y1 = [1, 2]
print x.shape
y = T.vector()
x = T.matrix()
f = theano.function(inputs=[x, y], outputs=x+y)
f(x1, y1)


x2 = [1,2]
y2 = [2,3]
x2 + y2

In [None]:
img_size = (32, 32)
c1_i = 3
c1_o = 50
c1_f = 5
p1   = 2
c2_o = 10
c2_f = 3
hidden1 = 300
hidden2 = 500
hidden3 = 50
hidden4 = 50
hidden5 = 300
outs = 10
gamma = 0.001
alpha = 0.01
lamb = 0.1
num_epochs  = 300

net = FeedForwardNet([
                      Flatten("Flatten"),
                      AffineLayer(hidden3, gamma, "tA1"), 
                      BNLayer(hidden3, 'BN1'),
                      ReLULayer("ReLu"),
                      AffineLayer(hidden4, gamma, "tA"),
                      BNLayer(hidden4, 'BN2'),
                      TanhLayer("ReLu"),
                      AffineLayer(outs, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")], alpha, lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
print "alpha: ", alpha
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        pr ,c,a, gg1, gb1, gamma1, beta1, mean1, std1, gg2, gb2, gamma2, beta2, mean2, std2 = net.trainFunction(X , Y.ravel()) # + u.generate(numpy.random, X.shape)
        i+=1
        if i % 1 == 0:
            print 'L1gg', gg1.shape, np.array(gg1)
            print 'L1gb', gb1.shape, np.array(gb1)
            print 'L1gamma', gamma1.shape, np.array(gamma1)
            print 'L1beta', beta1.shape, np.array(beta1)
            print 'L1mean', mean1.shape, np.array(mean1)
            print 'L1std', std1.shape, np.array(std1)
            print 'L2gg', gg2.shape, np.array(gg2)
            print 'L2gb', gb2.shape, np.array(gb2)
            print 'L2gamma', gamma2.shape, np.array(gamma2)
            print 'L2beta', beta2.shape, np.array(beta2)
            print 'L2mean', mean2.shape, np.array(mean2)
            print 'L2std', std2.shape, np.array(std2)
            print c, (pr  != Y.ravel()).mean()
        if i % 3 == 0:
            break
    break
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

In [None]:
import theano.tensor.signal.downsample as down

class Layer(object):
    def __init__(self, lamb = 0.1,rng=None, name=""):
        self.name = name
        self.lamb = lamb
        if rng is None:
            rng = numpy.random
        self.rng = rng
    
    @property
    def parameters(self):
        return []
    
    @property
    def parameter_names(self):
        return []
    
    def get_gradients(self, dLdY, fprop_context):
        return []
    
    def update(self, foo, alpha):
        return []
    def cost(self):
        return 0;
    def setInputDim(self, inputDim):
        self.num_out = inputDim
    def getOutputDim(self):
        return self.num_out
    def setMoments(self, moments):
        self.moments = moments
    def setLambda(self, lamb):
        self.lamb = lamb
    

class AffineLayer(Layer):
    def __init__(self, num_out, gamma  = 0.1, n = "", weight_init=None, bias_init=None, **kwargs):
        super(AffineLayer, self).__init__(name= n, **kwargs)
        self.num_out = num_out
        if weight_init is None:
            b = numpy.sqrt(20. / (2* num_out))
            self.weight_init = Uniform(width=b)
        if bias_init is None:
            bias_init = Constant(0.0)
        self.gamma= theano.shared(gamma)
        self.b = theano.shared(bias_init.generate(self.rng, (num_out)), name=self.name +" bias")
    
    @property
    def parameters(self):
        return [self.W, self.b]
    @property
    def parametersValues(self):
        return [self.W.get_value(), self.b.get_value()]
    @property
    def parameter_names(self):
        return ['W','b']
    
    def build(self, X):
        #print self.name+ " ",X.shape 
        return X.dot(self.W) + self.b
    def cost(self):
        return  (self.W ** 2).sum() * self.gamma
    def update(self, foo, alpha):
        gw, gb = T.grad(foo, self.parameters)
        moments = self.moments
        self.setMoments((gw, gb))
        return  [(self.W, self.W - (alpha * gw + self.lamb * moments[0])), 
                 (self.b, self.b - (alpha * gb+ self.lamb * moments[1]))]
    def setInputDim(self, inputDim):
        shape = (inputDim, self.num_out)
        print "AffineLayer: ", shape
        self.W = theano.shared(self.weight_init.generate(self.rng, shape),name=self.name +" weight")
        self.setMoments(zeros(shape, dtype='float32'))

class Affine2DLayer(Layer):
    def __init__(self, num_out, gamma = None, n = "", weight_init=None, bias_init=None, **kwargs):
        super(Affine2DLayer, self).__init__(name= n, **kwargs)
        self.num_out = num_out
        if weight_init is None:
            b = numpy.sqrt(6. / 2* (num_out))
            self.weight_init = Uniform(width=b)
        if bias_init is None:
            bias_init = Constant(0.0)
        if gamma is None:
            self.gamma = theano.shared(0.1)
        else:
            self.gamma = theano.shared(gamma, name = self.name + " gamma")
    
    @property
    def parameters(self):
        return [self.W]
    @property
    def parametersValues(self):
        return [self.W.get_value()]
    @property
    def parameter_names(self):
        return ['W']
    
    def build(self, X):
        return X.dot(self.W)
    def cost(self):
        return  (self.W ** 2).sum() * self.gamma
    def update(self, foo, alpha):
        gw = T.grad(foo, self.parameters)
        return  [(self.W, self.W -alpha * gw)] 
    def setInputDim(self, inputDim):
        self.num_out = (self.num_out, inputDim[1], inputDim[2])
        shape = inputDim +  self.num_out
        print "Affine2D", shape
        self.W = theano.shared(self.weight_init.generate(self.rng, shape),name=self.name +" weight")
        self.setMoments(zeros(self.W.shape, dtype='float32'))
    
class LogRegLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(LogRegLayer, self).__init__(name = n, **kwargs)
    def build(self, X):
        return T.nnet.sigmoid(X)


class TanhLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(TanhLayer, self).__init__(name = n, **kwargs)
    def build(self, X):
        print "tanh layer", X
        return T.tanh(X)

    
class ReLULayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(ReLULayer, self).__init__(name = n, **kwargs)
    
    def build(self, X):
        return T.maximum(0.0, X)

class Conv(Layer):
    def __init__(self, f_out, f_size, gamma = 0.1, n = "", weight_init = None, **kwargs):
        super(Conv, self).__init__(name = n, **kwargs)
        if weight_init is None:
            b = numpy.sqrt(50. / (2*f_out+ f_size + f_size))
            self.weight_init = Uniform(width=b)
        self.gamma= theano.shared(gamma)
        self.f_out = f_out
        self.f_size = f_size
    
    
    def setInputDim(self, inputDim):
        F_size = (self.f_out, ) + (inputDim[0], self.f_size, self.f_size)                                   
        self.num_out = (self.f_out, inputDim[1] - self.f_size + 1, inputDim[2] - self.f_size + 1)
        print 'Conv filter', F_size
        self.F = theano.shared(self.weight_init.generate(self.rng, F_size),name=self.name +" filter")
        
    def update(self, foo, alpha):
        gf = T.grad(foo, self.F)
        return  [(self.F, self.F - alpha * gf)]    
    
    def cost(self):
        return  (self.F ** 2).sum() * self.gamma
    
    def build(self, X):
        return T.maximum(0.0, T.nnet.conv2d(X, self.F))
        
        
        
class Flatten(Layer):
    def __init__(self, n = "", **kwargs):
        super(Flatten, self).__init__(name = n, **kwargs)
    def build(self, X):
        return T.flatten(X, 2)
    def setInputDim(self, inputDim):
        out_dim = 1
        for i in inputDim:
            out_dim = out_dim * i
        self.num_out = out_dim
    

class BNLayer(Layer):
    def __init__(self,num_out, n = "BNLayer", gamma = 0.1, alpha=1.0,**kwargs):
        super(BNLayer, self).__init__(name = n, **kwargs)
        self.num_out, self.alpha = num_out, alpha
        self.gamma= theano.shared(gamma)
    def build(self, X):
        self.Gamma = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Gamma " + self.name))
        print 'Gamma shape:', np.zeros((1, self.num_out)).shape
        self.Beta  = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Beta " + self.name))
        print 'Beta shape:', np.zeros((1, self.num_out)).shape
        self.Gamma.tag.initializer = Constant(1.0)
        self.Beta.tag.initializer = Constant(0.0)
    
        #self.stored_means = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Means" + self.name))
        #self.stored_stds  = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Stds" + self.name))
        #self.stored_means.tag.initializer = Constant(0.0)
        #self.stored_stds.tag.initializer = Constant(1.0)
    
        self.means = self.alpha * theano.tensor.mean(X, 0, keepdims=True)
        self.stds = self.alpha * theano.tensor.std(X, 0, keepdims=True)
        self.means.tag.initializer = Constant(0.0)
        self.stds.tag.initializer = Constant(1.0)
        #self.means = self.alpha *self.means + (1.0 - self.alpha) * self.stored_means.dimshuffle(0,'x')
        #self.stds = self.alpha * self.stds + (1.0 - self.alpha) * self.stored_stds.dimshuffle(0,'x')
        
        normalized = theano.tensor.nnet.bn.batch_normalization(
            X,
            self.Gamma,
            self.Beta,
            self.means,
            self.stds,
            'high_mem'
        )
        return normalized
    @property
    def parameters(self):
        return [self.Gamma, self.Beta]
    @property
    def check(self):
        return [self.gg, self.gb, self.Gamma, self.Beta,self.means, self.stds ]
    #def cost(self):
    #    return  ((self.Gamma ** 2).sum() + (self.Gamma ** 2).sum())* self.gamma
    def update(self, foo, alpha):
        self.gg, self.gb = T.grad(foo, self.parameters)
        return  [(self.Gamma, self.Gamma- alpha *self.gg),
            (self.Beta, self.Beta - alpha * self.gb)] 
    
class SoftMaxLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(SoftMaxLayer, self).__init__(name = n, **kwargs)
    
    def build(self, X):
        return T.nnet.softmax(X)

class MaxPoolLayer(Layer):
    def __init__(self, p_size):
        self.p_size = p_size
    def build(self, input):
        return down.max_pool_2d(input, (self.p_size,self.p_size), ignore_border=True)
    def getOutputDim(self):
        shape = (self.num_out[0], ) + (self.num_out[1]/self.p_size, self.num_out[2]/self.p_size) 
        print "maxPool", shape
        return shape
    
class FeedForwardNet(object):
    def __init__(self, layers=None, alpha=0.1, lamb = 0.1):
        if layers is None:
            layers = []
        self.layers = layers
        print type(alpha)
        self.alpha = theano.shared(float32(alpha), name='alpha')
    
    def add(self, layer):
        self.layers.append(layer)
    
    @property
    def parameters(self):
        params = []
        for layer in self.layers:
            params += layer.parameters
        return params
    
    @parameters.setter
    def parameters(self, values):
        for ownP, newP in zip(self.parameters, values):
            ownP[...] = newP
    
    @property
    def parameter_names(self):
        param_names = []
        for layer in self.layers:
            param_names += layer.parameter_names
        return param_names
    
    def build(self, inputDim):
        x = T.tensor4("x")
        y = T.vector("y", dtype='int64')
        cost = 0
        paramUpdates = []
        
        X = x
        o1 = []
        o2 = []
        for layer, i in zip(self.layers, range(len(self.layers))):
            #print inputDim
            layer.setInputDim(inputDim)
            layer.setLambda(lamb)
            inputDim = layer.getOutputDim()
            X = layer.build(X)
            print 'name', layer.name
            if i == 2:
                print 'BNname', layer.name
                o1 = layer
            if i == 5:
                print 'BNname', layer.name
                o2 = layer

            cost += layer.cost()
        
        pred = np.argmax(X, 1)
        self.costFoo = T.nnet.categorical_crossentropy(X, y).mean() + cost
        
        for layer in self.layers:
            print layer.name
            paramUpdates += layer.update(self.costFoo, self.alpha)
            
        o1 = o1.check
        o2 = o2.check
        
        paramUpdates += [(self.alpha, self.alpha * 0.99993)]
        self.train = theano.function(inputs=[x,y], 
                                    outputs=[pred, self.costFoo, self.alpha]+o1+o2,
                                    updates=paramUpdates)
        self.predict  = theano.function(inputs=[x], 
                                    outputs=pred)
        
    
    @property
    def trainFunction(self):
        return self.train
    
    @property
    def predictFunction(self):
        return self.predict
    @property
    def costFunction(self):
        return self.costFoo
