Skip to content
Permalink
 
 
Cannot retrieve contributors at this time
"""
Functions to create initializers for parameter variables.
Examples
--------
>>> from lasagne.layers import DenseLayer
>>> from lasagne.init import Constant, GlorotUniform
>>> l1 = DenseLayer((100,20), num_units=50,
... W=GlorotUniform('relu'), b=Constant(0.0))
"""
import numpy as np
from .utils import floatX
from .random import get_rng
class Initializer(object):
"""Base class for parameter tensor initializers.
The :class:`Initializer` class represents a weight initializer used
to initialize weight parameters in a neural network layer. It should be
subclassed when implementing new types of weight initializers.
"""
def __call__(self, shape):
"""
Makes :class:`Initializer` instances callable like a function, invoking
their :meth:`sample()` method.
"""
return self.sample(shape)
def sample(self, shape):
"""
Sample should return a theano.tensor of size shape and data type
theano.config.floatX.
Parameters
-----------
shape : tuple or int
Integer or tuple specifying the size of the returned
matrix.
returns : theano.tensor
Matrix of size shape and dtype theano.config.floatX.
"""
raise NotImplementedError()
class Normal(Initializer):
"""Sample initial weights from the Gaussian distribution.
Initial weight parameters are sampled from N(mean, std).
Parameters
----------
std : float
Std of initial parameters.
mean : float
Mean of initial parameters.
"""
def __init__(self, std=0.01, mean=0.0):
self.std = std
self.mean = mean
def sample(self, shape):
return floatX(get_rng().normal(self.mean, self.std, size=shape))
class Uniform(Initializer):
"""Sample initial weights from the uniform distribution.
Parameters are sampled from U(a, b).
Parameters
----------
range : float or tuple
When std is None then range determines a, b. If range is a float the
weights are sampled from U(-range, range). If range is a tuple the
weights are sampled from U(range[0], range[1]).
std : float or None
If std is a float then the weights are sampled from
U(mean - np.sqrt(3) * std, mean + np.sqrt(3) * std).
mean : float
see std for description.
"""
def __init__(self, range=0.01, std=None, mean=0.0):
if std is not None:
a = mean - np.sqrt(3) * std
b = mean + np.sqrt(3) * std
else:
try:
a, b = range # range is a tuple
except TypeError:
a, b = -range, range # range is a number
self.range = (a, b)
def sample(self, shape):
return floatX(get_rng().uniform(
low=self.range[0], high=self.range[1], size=shape))
class Glorot(Initializer):
"""Glorot weight initialization.
This is also known as Xavier initialization [1]_.
Parameters
----------
initializer : lasagne.init.Initializer
Initializer used to sample the weights, must accept `std` in its
constructor to sample from a distribution with a given standard
deviation.
gain : float or 'relu'
Scaling factor for the weights. Set this to ``1.0`` for linear and
sigmoid units, to 'relu' or ``sqrt(2)`` for rectified linear units, and
to ``sqrt(2/(1+alpha**2))`` for leaky rectified linear units with
leakiness ``alpha``. Other transfer functions may need different
factors.
c01b : bool
For a :class:`lasagne.layers.cuda_convnet.Conv2DCCLayer` constructed
with ``dimshuffle=False``, `c01b` must be set to ``True`` to compute
the correct fan-in and fan-out.
References
----------
.. [1] Xavier Glorot and Yoshua Bengio (2010):
Understanding the difficulty of training deep feedforward neural
networks. International conference on artificial intelligence and
statistics.
Notes
-----
For a :class:`DenseLayer <lasagne.layers.DenseLayer>`, if ``gain='relu'``
and ``initializer=Uniform``, the weights are initialized as
.. math::
a &= \\sqrt{\\frac{12}{fan_{in}+fan_{out}}}\\\\
W &\sim U[-a, a]
If ``gain=1`` and ``initializer=Normal``, the weights are initialized as
.. math::
\\sigma &= \\sqrt{\\frac{2}{fan_{in}+fan_{out}}}\\\\
W &\sim N(0, \\sigma)
See Also
--------
GlorotNormal : Shortcut with Gaussian initializer.
GlorotUniform : Shortcut with uniform initializer.
"""
def __init__(self, initializer, gain=1.0, c01b=False):
if gain == 'relu':
gain = np.sqrt(2)
self.initializer = initializer
self.gain = gain
self.c01b = c01b
def sample(self, shape):
if self.c01b:
if len(shape) != 4:
raise RuntimeError(
"If c01b is True, only shapes of length 4 are accepted")
n1, n2 = shape[0], shape[3]
receptive_field_size = shape[1] * shape[2]
else:
if len(shape) < 2:
raise RuntimeError(
"This initializer only works with shapes of length >= 2")
n1, n2 = shape[:2]
receptive_field_size = np.prod(shape[2:])
std = self.gain * np.sqrt(2.0 / ((n1 + n2) * receptive_field_size))
return self.initializer(std=std).sample(shape)
class GlorotNormal(Glorot):
"""Glorot with weights sampled from the Normal distribution.
See :class:`Glorot` for a description of the parameters.
"""
def __init__(self, gain=1.0, c01b=False):
super(GlorotNormal, self).__init__(Normal, gain, c01b)
class GlorotUniform(Glorot):
"""Glorot with weights sampled from the Uniform distribution.
See :class:`Glorot` for a description of the parameters.
"""
def __init__(self, gain=1.0, c01b=False):
super(GlorotUniform, self).__init__(Uniform, gain, c01b)
class He(Initializer):
"""He weight initialization.
Weights are initialized with a standard deviation of
:math:`\\sigma = gain \\sqrt{\\frac{1}{fan_{in}}}` [1]_.
Parameters
----------
initializer : lasagne.init.Initializer
Initializer used to sample the weights, must accept `std` in its
constructor to sample from a distribution with a given standard
deviation.
gain : float or 'relu'
Scaling factor for the weights. Set this to ``1.0`` for linear and
sigmoid units, to 'relu' or ``sqrt(2)`` for rectified linear units, and
to ``sqrt(2/(1+alpha**2))`` for leaky rectified linear units with
leakiness ``alpha``. Other transfer functions may need different
factors.
c01b : bool
For a :class:`lasagne.layers.cuda_convnet.Conv2DCCLayer` constructed
with ``dimshuffle=False``, `c01b` must be set to ``True`` to compute
the correct fan-in and fan-out.
References
----------
.. [1] Kaiming He et al. (2015):
Delving deep into rectifiers: Surpassing human-level performance on
imagenet classification. arXiv preprint arXiv:1502.01852.
See Also
----------
HeNormal : Shortcut with Gaussian initializer.
HeUniform : Shortcut with uniform initializer.
"""
def __init__(self, initializer, gain=1.0, c01b=False):
if gain == 'relu':
gain = np.sqrt(2)
self.initializer = initializer
self.gain = gain
self.c01b = c01b
def sample(self, shape):
if self.c01b:
if len(shape) != 4:
raise RuntimeError(
"If c01b is True, only shapes of length 4 are accepted")
fan_in = np.prod(shape[:3])
else:
if len(shape) == 2:
fan_in = shape[0]
elif len(shape) > 2:
fan_in = np.prod(shape[1:])
else:
raise RuntimeError(
"This initializer only works with shapes of length >= 2")
std = self.gain * np.sqrt(1.0 / fan_in)
return self.initializer(std=std).sample(shape)
class HeNormal(He):
"""He initializer with weights sampled from the Normal distribution.
See :class:`He` for a description of the parameters.
"""
def __init__(self, gain=1.0, c01b=False):
super(HeNormal, self).__init__(Normal, gain, c01b)
class HeUniform(He):
"""He initializer with weights sampled from the Uniform distribution.
See :class:`He` for a description of the parameters.
"""
def __init__(self, gain=1.0, c01b=False):
super(HeUniform, self).__init__(Uniform, gain, c01b)
class Constant(Initializer):
"""Initialize weights with constant value.
Parameters
----------
val : float
Constant value for weights.
"""
def __init__(self, val=0.0):
self.val = val
def sample(self, shape):
return floatX(np.ones(shape) * self.val)
class Sparse(Initializer):
"""Initialize weights as sparse matrix.
Parameters
----------
sparsity : float
Exact fraction of non-zero values per column. Larger values give less
sparsity.
std : float
Non-zero weights are sampled from N(0, std).
"""
def __init__(self, sparsity=0.1, std=0.01):
self.sparsity = sparsity
self.std = std
def sample(self, shape):
if len(shape) != 2:
raise RuntimeError(
"sparse initializer only works with shapes of length 2")
w = floatX(np.zeros(shape))
n_inputs, n_outputs = shape
size = int(self.sparsity * n_inputs) # fraction of number of inputs
for k in range(n_outputs):
indices = np.arange(n_inputs)
get_rng().shuffle(indices)
indices = indices[:size]
values = floatX(get_rng().normal(0.0, self.std, size=size))
w[indices, k] = values
return w
class Orthogonal(Initializer):
"""Intialize weights as Orthogonal matrix.
Orthogonal matrix initialization [1]_. For n-dimensional shapes where
n > 2, the n-1 trailing axes are flattened. For convolutional layers, this
corresponds to the fan-in, so this makes the initialization usable for
both dense and convolutional layers.
Parameters
----------
gain : float or 'relu'
Scaling factor for the weights. Set this to ``1.0`` for linear and
sigmoid units, to 'relu' or ``sqrt(2)`` for rectified linear units, and
to ``sqrt(2/(1+alpha**2))`` for leaky rectified linear units with
leakiness ``alpha``. Other transfer functions may need different
factors.
References
----------
.. [1] Saxe, Andrew M., James L. McClelland, and Surya Ganguli.
"Exact solutions to the nonlinear dynamics of learning in deep
linear neural networks." arXiv preprint arXiv:1312.6120 (2013).
"""
def __init__(self, gain=1.0):
if gain == 'relu':
gain = np.sqrt(2)
self.gain = gain
def sample(self, shape):
if len(shape) < 2:
raise RuntimeError("Only shapes of length 2 or more are "
"supported.")
flat_shape = (shape[0], np.prod(shape[1:]))
a = get_rng().normal(0.0, 1.0, flat_shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
# pick the one with the correct shape
q = u if u.shape == flat_shape else v
q = q.reshape(shape)
return floatX(self.gain * q)