forked from tflearn/tflearn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
initializations.py
164 lines (124 loc) · 6.08 KB
/
initializations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
from __future__ import division, print_function, absolute_import
import math
import tensorflow as tf
from .utils import get_from_module
def get(identifier):
if hasattr(identifier, '__call__'):
return identifier
else:
return get_from_module(identifier, globals(), 'initialization')
def zeros(shape=None, dtype=tf.float32, seed=None):
""" Zeros.
Initialize a tensor with all elements set to zero.
Arguments:
shape: List of `int`. A shape to initialize a Tensor (optional).
dtype: The tensor data type.
Returns:
The Initializer, or an initialized `Tensor` if a shape is specified.
"""
if shape:
return tf.zeros(shape, dtype=dtype)
else:
return tf.constant_initializer(0.)
def uniform(shape=None, minval=0, maxval=None, dtype=tf.float32, seed=None):
""" Uniform.
Initialization with random values from a uniform distribution.
The generated values follow a uniform distribution in the range
`[minval, maxval)`. The lower bound `minval` is included in the range,
while the upper bound `maxval` is excluded.
For floats, the default range is `[0, 1)`. For ints, at least `maxval`
must be specified explicitly.
In the integer case, the random integers are slightly biased unless
`maxval - minval` is an exact power of two. The bias is small for values of
`maxval - minval` significantly smaller than the range of the output (either
`2**32` or `2**64`).
Arguments:
shape: List of `int`. A shape to initialize a Tensor (optional).
dtype: The tensor data type. Only float are supported.
seed: `int`. Used to create a random seed for the distribution.
Returns:
The Initializer, or an initialized `Tensor` if shape is specified.
"""
if shape:
return tf.random_uniform(shape, minval=minval, maxval=maxval,
seed=seed, dtype=dtype)
else:
return tf.random_uniform_initializer(minval=minval, maxval=maxval,
seed=seed, dtype=dtype)
def uniform_scaling(shape=None, factor=1.0, dtype=tf.float32, seed=None):
""" Uniform Scaling.
Initialization with random values from uniform distribution without scaling
variance.
When initializing a deep network, it is in principle advantageous to keep
the scale of the input variance constant, so it does not explode or diminish
by reaching the final layer. If the input is `x` and the operation `x * W`,
and we want to initialize `W` uniformly at random, we need to pick `W` from
[-sqrt(3) / sqrt(dim), sqrt(3) / sqrt(dim)]
to keep the scale intact, where `dim = W.shape[0]` (the size of the input).
A similar calculation for convolutional networks gives an analogous result
with `dim` equal to the product of the first 3 dimensions. When
nonlinearities are present, we need to multiply this by a constant `factor`.
See [Sussillo et al., 2014](https://arxiv.org/abs/1412.6558)
([pdf](http://arxiv.org/pdf/1412.6558.pdf)) for deeper motivation, experiments
and the calculation of constants. In section 2.3 there, the constants were
numerically computed: for a linear layer it's 1.0, relu: ~1.43, tanh: ~1.15.
Arguments:
shape: List of `int`. A shape to initialize a Tensor (optional).
factor: `float`. A multiplicative factor by which the values will be
scaled.
dtype: The tensor data type. Only float are supported.
seed: `int`. Used to create a random seed for the distribution.
Returns:
The Initializer, or an initialized `Tensor` if shape is specified.
"""
if shape:
input_size = 1.0
for dim in shape[:-1]:
input_size *= float(dim)
max_val = math.sqrt(3 / input_size) * factor
return tf.random_ops.random_uniform(shape, -max_val, max_val,
dtype, seed=seed)
else:
return tf.uniform_unit_scaling_initializer(seed=seed, dtype=dtype)
def normal(shape=None, mean=0.0, stddev=0.02, dtype=tf.float32, seed=None):
""" Normal.
Initialization with random values from a normal distribution.
Arguments:
shape: List of `int`. A shape to initialize a Tensor (optional).
mean: Same as `dtype`. The mean of the truncated normal distribution.
stddev: Same as `dtype`. The standard deviation of the truncated
normal distribution.
dtype: The tensor data type.
seed: `int`. Used to create a random seed for the distribution.
Returns:
The Initializer, or an initialized `Tensor` if shape is specified.
"""
if shape:
return tf.random_normal(shape, mean=mean, stddev=stddev, seed=seed,
dtype=dtype)
else:
return tf.random_normal_initializer(mean=mean, stddev=stddev,
seed=seed, dtype=dtype)
def truncated_normal(shape=None, mean=0.0, stddev=0.02, dtype=tf.float32,
seed=None):
""" Truncated Normal.
Initialization with random values from a normal truncated distribution.
The generated values follow a normal distribution with specified mean and
standard deviation, except that values whose magnitude is more than 2 standard
deviations from the mean are dropped and re-picked.
Arguments:
shape: List of `int`. A shape to initialize a Tensor (optional).
mean: Same as `dtype`. The mean of the truncated normal distribution.
stddev: Same as `dtype`. The standard deviation of the truncated
normal distribution.
dtype: The tensor data type.
seed: `int`. Used to create a random seed for the distribution.
Returns:
The Initializer, or an initialized `Tensor` if shape is specified.
"""
if shape:
return tf.truncated_normal(shape=shape, mean=mean, stddev=stddev,
seed=seed, dtype=dtype)
else:
return tf.truncated_normal_initializer(mean=mean, stddev=stddev,
seed=seed, dtype=dtype)