In [4]:
import numpy as np
import tensorflow as tf
from src.utils import get_wb, foo
from src.utils import print_params_dict, print_params_number

from tensorlayer.layers import DenseLayer, RNNLayer, InputLayer, FlattenLayer

from src.layers import dense, multi_dense, linear_dense, Dense
from src.layers import highway_dense, multi_highway_dense
from src.layers import conv2d
from src.layers import highway_conv2d, multi_highway_conv2d
from src.layers import attention_for_rnn, attention_for_dense
from src.layers import attention_flow_self, attention_flow

foo()

foo


## get_wb

In [5]:
# get_wb
tf.reset_default_graph()

# Input shape: [8, 2]
x = tf.constant(np.arange(16, dtype=np.float32).reshape([8,2]))
# Output shape: [8, 16]
w, b = get_wb([2, 16], b_initializer=tf.initializers.constant(-1.0))

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = tf.matmul(x, w) + b
    b_val = tf.identity(b).eval()
    
print(o_val.shape)
print(b_val)
print_params_dict()

(8, 16)
[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.]
W:0 	{'number': 32, 'shape': [2, 16]}
b:0 	{'number': 16, 'shape': [16]}


## Dense

In [6]:
# Dense
tf.reset_default_graph()

# Input shape: [8, 2]
x = tf.constant(np.arange(16, dtype=np.float32).reshape([8,2]))
# Output shape: [8, 16]
o = dense(x, 16, name="D")

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    
print(o_val.shape)
print_params_dict()

(8, 16)
D/W:0 	{'number': 32, 'shape': [2, 16]}
D/b:0 	{'number': 16, 'shape': [16]}


## Dense reuse

In [7]:
# Dense reuse
tf.reset_default_graph()

# Input shape: [8, 2]
x = tf.constant(np.arange(16, dtype=np.float32).reshape([8,2]))
# Output shape: [8, 16]
o = dense(x, 16, name="D")
o2 = dense(x, 16, name="D", reuse=True)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    o2_val = o2.eval()
    
print((o_val == o2_val).all())
print_params_dict()

True
D/W:0 	{'number': 32, 'shape': [2, 16]}
D/b:0 	{'number': 16, 'shape': [16]}


## Dense reuse with class

In [8]:
# Dense class
tf.reset_default_graph()

# Input shape: [8, 2]
x = tf.constant(np.arange(16, dtype=np.float32).reshape([8,2]))
# Output shape: [8, 16]
dense_reuse = Dense(16, name="DD")
o = dense_reuse(x)
o2 = dense_reuse(x)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    o2_val = o2.eval()
    
print((o_val == o2_val).all())
print_params_dict()

True
DD/W:0 	{'number': 32, 'shape': [2, 16]}
DD/b:0 	{'number': 16, 'shape': [16]}


## Multi Dense

In [9]:
# Multi Dense
tf.reset_default_graph()

# Input shape: [8, 2]
x = tf.constant(np.arange(16, dtype=np.float32).reshape([8,2]))
# Output shape: [8, 5]
o = multi_dense(x, [3, 4, 5], name="D")

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    
print(o_val.shape)
print_params_dict()

(8, 5)
D-0/W:0 	{'number': 6, 'shape': [2, 3]}
D-0/b:0 	{'number': 3, 'shape': [3]}
D-1/W:0 	{'number': 12, 'shape': [3, 4]}
D-1/b:0 	{'number': 4, 'shape': [4]}
D-2/W:0 	{'number': 20, 'shape': [4, 5]}
D-2/b:0 	{'number': 5, 'shape': [5]}


## Highway Dense

In [10]:
# Highway Dense
tf.reset_default_graph()

# Input shape: [8, 2]
x = tf.constant(np.arange(16, dtype=np.float32).reshape([8,2]))
# Output shape: [8, 2]
o = highway_dense(x, name="H")

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    
print(o_val.shape)
print_params_dict()

(8, 2)
H/W:0 	{'number': 4, 'shape': [2, 2]}
H/b:0 	{'number': 2, 'shape': [2]}
H/transform/W:0 	{'number': 4, 'shape': [2, 2]}
H/transform/b:0 	{'number': 2, 'shape': [2]}


## Multi Highway Dense

In [11]:
# Multi Highway Dense
tf.reset_default_graph()

# Input shape: [8, 2]
x = tf.constant(np.arange(16, dtype=np.float32).reshape([8,2]))
# Output shape: [8, 2]
o = multi_highway_dense(x, 3, name="H1")

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    
print(o_val.shape)
print_params_dict()

(8, 2)
H1-0/W:0 	{'number': 4, 'shape': [2, 2]}
H1-0/b:0 	{'number': 2, 'shape': [2]}
H1-0/transform/W:0 	{'number': 4, 'shape': [2, 2]}
H1-0/transform/b:0 	{'number': 2, 'shape': [2]}
H1-1/W:0 	{'number': 4, 'shape': [2, 2]}
H1-1/b:0 	{'number': 2, 'shape': [2]}
H1-1/transform/W:0 	{'number': 4, 'shape': [2, 2]}
H1-1/transform/b:0 	{'number': 2, 'shape': [2]}
H1-2/W:0 	{'number': 4, 'shape': [2, 2]}
H1-2/b:0 	{'number': 2, 'shape': [2]}
H1-2/transform/W:0 	{'number': 4, 'shape': [2, 2]}
H1-2/transform/b:0 	{'number': 2, 'shape': [2]}


## Conv2d

In [12]:
# Conv2d
tf.reset_default_graph()

# Input shape: [4, 20, 20, 1]
x = tf.constant(np.arange(1600, dtype=np.float32).reshape([4, 20, 20, 1]))
# Output shape: [4, 20, 20, 16]
o = conv2d(x, 3, 16)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    
print(o_val.shape)
print_params_dict()

(4, 20, 20, 16)
conv2d/W:0 	{'number': 144, 'shape': [3, 3, 1, 16]}
conv2d/b:0 	{'number': 16, 'shape': [16]}


## Highway Conv2d

In [13]:
# Highway Conv2d
tf.reset_default_graph()

# Input shape: [4, 20, 20, 1]
x = tf.constant(np.arange(1600, dtype=np.float32).reshape([4, 20, 20, 1]))
# Output shape: [4, 20, 20, 16]
o = highway_conv2d(x, 3)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    
print(o_val.shape)
print_params_dict()

(4, 20, 20, 1)
highway_conv2d/W:0 	{'number': 9, 'shape': [3, 3, 1, 1]}
highway_conv2d/b:0 	{'number': 1, 'shape': [1]}
highway_conv2d/transform/W:0 	{'number': 9, 'shape': [3, 3, 1, 1]}
highway_conv2d/transform/b:0 	{'number': 1, 'shape': [1]}


## Multi Highway Conv2d

In [14]:
# Multi Highway Conv2d
tf.reset_default_graph()

# Input shape: [4, 20, 20, 1]
x = tf.constant(np.arange(1600, dtype=np.float32).reshape([-1, 20, 20, 1]))
# Output shape: [4, 20, 20, 1]
x = highway_conv2d(x, 3, name="HCnn1")
x = highway_conv2d(x, 4, name="HCnn2")
o = highway_conv2d(x, 5, name="HCnn3")

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    
print(o_val.shape)
print_params_dict()
print_params_number()

(4, 20, 20, 1)
HCnn1/W:0 	{'number': 9, 'shape': [3, 3, 1, 1]}
HCnn1/b:0 	{'number': 1, 'shape': [1]}
HCnn1/transform/W:0 	{'number': 9, 'shape': [3, 3, 1, 1]}
HCnn1/transform/b:0 	{'number': 1, 'shape': [1]}
HCnn2/W:0 	{'number': 16, 'shape': [4, 4, 1, 1]}
HCnn2/b:0 	{'number': 1, 'shape': [1]}
HCnn2/transform/W:0 	{'number': 16, 'shape': [4, 4, 1, 1]}
HCnn2/transform/b:0 	{'number': 1, 'shape': [1]}
HCnn3/W:0 	{'number': 25, 'shape': [5, 5, 1, 1]}
HCnn3/b:0 	{'number': 1, 'shape': [1]}
HCnn3/transform/W:0 	{'number': 25, 'shape': [5, 5, 1, 1]}
HCnn3/transform/b:0 	{'number': 1, 'shape': [1]}
106


## Multi Highway Conv2d with function

In [15]:
# Multi Highway Conv2d
tf.reset_default_graph()

# Input shape: [4, 20, 20, 1]
x = tf.constant(np.arange(1600, dtype=np.float32).reshape([-1, 20, 20, 1]))
# Output shape: [4, 20, 20, 1]
o = multi_highway_conv2d(x, 3, 2)
o2 = multi_highway_conv2d(x, [3,4,5], 3, name="HC")


with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    o2_val = o2.eval()
    
print(o_val.shape)
print(o2_val.shape)
print_params_dict()
print_params_number()

(4, 20, 20, 1)
(4, 20, 20, 1)
highway_conv2d-0/W:0 	{'number': 9, 'shape': [3, 3, 1, 1]}
highway_conv2d-0/b:0 	{'number': 1, 'shape': [1]}
highway_conv2d-0/transform/W:0 	{'number': 9, 'shape': [3, 3, 1, 1]}
highway_conv2d-0/transform/b:0 	{'number': 1, 'shape': [1]}
highway_conv2d-1/W:0 	{'number': 9, 'shape': [3, 3, 1, 1]}
highway_conv2d-1/b:0 	{'number': 1, 'shape': [1]}
highway_conv2d-1/transform/W:0 	{'number': 9, 'shape': [3, 3, 1, 1]}
highway_conv2d-1/transform/b:0 	{'number': 1, 'shape': [1]}
HC-0/W:0 	{'number': 9, 'shape': [3, 3, 1, 1]}
HC-0/b:0 	{'number': 1, 'shape': [1]}
HC-0/transform/W:0 	{'number': 9, 'shape': [3, 3, 1, 1]}
HC-0/transform/b:0 	{'number': 1, 'shape': [1]}
HC-1/W:0 	{'number': 16, 'shape': [4, 4, 1, 1]}
HC-1/b:0 	{'number': 1, 'shape': [1]}
HC-1/transform/W:0 	{'number': 16, 'shape': [4, 4, 1, 1]}
HC-1/transform/b:0 	{'number': 1, 'shape': [1]}
HC-2/W:0 	{'number': 25, 'shape': [5, 5, 1, 1]}
HC-2/b:0 	{'number': 1, 'shape': [1]}
HC-2/transform/W:0 	{'numb

## Attention for Dense

In [16]:
# Attention for Dense
tf.reset_default_graph()

# Input shape: [4, 8]
x = tf.constant(np.arange(32, dtype=np.float32).reshape([-1, 8]))
# Output shape: [4, 8]
o = attention_for_dense(x)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    
print(o_val.shape)
print_params_dict()
print_params_number()

(4, 8)
attention_for_dense/W:0 	{'number': 64, 'shape': [8, 8]}
attention_for_dense/b:0 	{'number': 8, 'shape': [8]}
72


## Attention for RNN (use_mean_attention=False)

In [17]:
# Attention for RNN
tf.reset_default_graph()

# Input shape: [128, 5, 32]
x = tf.constant(np.arange(20480, dtype=np.float32).reshape([128, 5, 32]))
# Output shape: [128, 5, 32]
o = attention_for_rnn(x)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    
print(o_val.shape)
print_params_dict()
print_params_number()

(128, 5, 32)
attention_for_rnn/W:0 	{'number': 25, 'shape': [5, 5]}
attention_for_rnn/b:0 	{'number': 5, 'shape': [5]}
30


## Attention for RNN (use_mean_attention=True)

In [18]:
import numpy as np
import tensorflow as tf
from src.utils import get_wb, foo
from src.utils import print_params_dict, print_params_number

from src.layers import attention_for_rnn

# Attention for RNN
tf.reset_default_graph()

# Input shape: [128, 5, 32]
x = tf.constant(np.arange(20480, dtype=np.float32).reshape([128, 5, 32]))
# Output shape: [128, 5, 32]
o = attention_for_rnn(x, n_step=5, use_mean_attention=True)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    
print(o_val.shape)
print_params_dict()
print_params_number()

(128, 5, 32)
attention_for_rnn/W:0 	{'number': 25, 'shape': [5, 5]}
attention_for_rnn/b:0 	{'number': 5, 'shape': [5]}
30


## Use attention after lstm (use tensorlayer)

In [19]:
# Attention for RNN
tf.reset_default_graph()

# Input shape: [128, 16, 5]
x = tf.constant(np.arange(10240, dtype=np.float32).reshape([128, 16, 5]))
x = InputLayer(x)

# Use attention after lstm
x = RNNLayer(x, tf.nn.rnn_cell.LSTMCell, n_hidden=32)
x = attention_for_rnn(x.outputs)

x = InputLayer(x)
x = FlattenLayer(x)
x = DenseLayer(x, n_units=1, act=tf.nn.sigmoid)
o = x.outputs

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()

print(o_val.shape)  # [128, 1]
print_params_dict()
print_params_number()

[TL] InputLayer  input: (128, 16, 5)
[TL] RNNLayer rnn: n_hidden: 32 n_steps: 5 in_dim: 3 in_shape: (128, 16, 5) cell_fn: LSTMCell 
[TL]        RNN batch_size (concurrent processes): 128
[TL]      n_params : 2
[TL] InputLayer  input: (128, 5, 32)
[TL] FlattenLayer flatten: 160
[TL] DenseLayer  dense: 1 sigmoid
(128, 1)
rnn/lstm_cell/kernel:0 	{'number': 4736, 'shape': [37, 128]}
rnn/lstm_cell/bias:0 	{'number': 128, 'shape': [128]}
attention_for_rnn/W:0 	{'number': 25, 'shape': [5, 5]}
attention_for_rnn/b:0 	{'number': 5, 'shape': [5]}
dense/W:0 	{'number': 160, 'shape': [160, 1]}
dense/b:0 	{'number': 1, 'shape': [1]}
5055


## Use attention before lstm (use tensorlayer)

In [20]:
# Attention for RNN
tf.reset_default_graph()

# Input shape: [128, 16, 5]
x = tf.constant(np.arange(10240, dtype=np.float32).reshape([128, 16, 5]))

# Use attention before lstm
x = attention_for_rnn(x)
x = InputLayer(x)
x = RNNLayer(x, tf.nn.rnn_cell.LSTMCell, n_hidden=32, return_last=True)

# x = FlattenLayer(x)
x = DenseLayer(x, n_units=1, act=tf.nn.sigmoid)
o = x.outputs

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()

print(o_val.shape)  # [128, 1]
print_params_dict()
print_params_number()

[TL] InputLayer  input: (128, 16, 5)
[TL] RNNLayer rnn: n_hidden: 32 n_steps: 5 in_dim: 3 in_shape: (128, 16, 5) cell_fn: LSTMCell 
[TL]        RNN batch_size (concurrent processes): 128
[TL]      n_params : 2
[TL] DenseLayer  dense: 1 sigmoid
(128, 1)
attention_for_rnn/W:0 	{'number': 256, 'shape': [16, 16]}
attention_for_rnn/b:0 	{'number': 16, 'shape': [16]}
rnn/lstm_cell/kernel:0 	{'number': 4736, 'shape': [37, 128]}
rnn/lstm_cell/bias:0 	{'number': 128, 'shape': [128]}
dense/W:0 	{'number': 32, 'shape': [32, 1]}
dense/b:0 	{'number': 1, 'shape': [1]}
5169


## Attention Flow Self Match Layer

In [21]:
#from src.__ref import AttentionFlowMatchLayer

# Attention Flow Self Match Layer
tf.reset_default_graph()

# Input shape
c = tf.constant(np.arange(40960, dtype=np.float32).reshape([128, 10, 32]))
q = tf.constant(np.arange(20480, dtype=np.float32).reshape([128, 5, 32]))

o = attention_flow_self(c, q)
# o2 = AttentionFlowMatchLayer(0).match(c, q, _, _)[0]

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    #o2_val = o2.eval()

print(o_val.shape)  # [128, 1]
#print((o_val==o2_val).all())
print_params_dict()
print_params_number()

(128, 10, 128)
0


## Usage of `tf.einsum`

In [22]:
tf.reset_default_graph()
T, J, d = 3, 4, 10
h = tf.constant(np.arange(3600, dtype=np.float32).reshape([10, T, J, 3*d]))

w, b = get_wb([3*d, 1])

# tf.matmul(w, h)
o = tf.einsum("ntjd,do->ntjo", h, w)
o2 = tf.matmul(tf.reshape(h, [-1, 3*d]), w)
o2 = tf.reshape(o2,[-1, 3, 4, 1])

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    o2_val = o2.eval()

print(o_val.shape)  # [128, 1]
print((o_val==o2_val).all())
print_params_dict()
print_params_number()

(10, 3, 4, 1)
True
W:0 	{'number': 30, 'shape': [30, 1]}
b:0 	{'number': 1, 'shape': [1]}
31


## Attention Flow Match Layer

In [34]:
#from src.layers import attention_flow_2

# Attention Flow Match Layer
tf.reset_default_graph()

# Input shape
c = tf.constant(np.arange(40960, dtype=np.float32).reshape([128, 10, 32]))
q = tf.constant(np.arange(20480, dtype=np.float32).reshape([128, 5, 32]))

o = attention_flow(c, q, name="af")
#o2 = attention_flow_2(c, q, name="af", reuse=True)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    o_val = o.eval()
    #o2_val = o2.eval()

print(o_val.shape)  # [128, 1]
#print((o_val==o2_val).all())
print_params_dict()
print_params_number()

(128, 10, 128)
-----------
params_dict
  af/W:0 	{'number': 96, 'shape': [96, 1]}
-----------
params_number: 96
