Skip to content

Commit

Permalink
New bidirectional architecture
Browse files Browse the repository at this point in the history
  • Loading branch information
Sentimentron committed Jan 1, 2016
1 parent 8b5174a commit 30e161d
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 32 deletions.
23 changes: 8 additions & 15 deletions lstm.py
Expand Up @@ -15,7 +15,7 @@
from modelio import load_pos_tagged_data, prepare_data, get_max_word_count, get_max_length

from nn_layers import *
from nn_lstm import lstm_layer, lstm_unmasked_layer
from nn_lstm import lstm_layer, lstm_unmasked_layer, bidirectional_lstm_layer
from nn_params import *
from nn_optimizers import *
from nn_support import pred_error
Expand Down Expand Up @@ -43,26 +43,19 @@ def build_model(tparams, options, maxw, training=True):
n_samples = xc.shape[1]

emb = embeddings_layer(xc, tparams['Cemb'], n_timesteps, n_samples, options['dim_proj'])
# emb2 = embeddings_layer(xw, tparams['Wemb'], n_timesteps, n_samples, options['dim_proj_words'])

#emb = tensor.concatenate([emb1, emb2], axis=2)

#emb = theano.printing.Print("emb", attrs=["shape"])(emb)

proj_chars_1 = lstm_layer(tparams, emb, options, "lstm_chars_forwards", mask=mask)
proj_chars_2 = lstm_layer(tparams, emb, options, "lstm_chars_backwards", mask=mask, go_backwards=True)

proj = proj_chars_1 + proj_chars_2
proj = bidirectional_lstm_layer(tparams, emb, options, "lstm_chars_1", mask=mask)
proj = bidirectional_lstm_layer(tparams, proj, options, "lstm_chars_2", mask=mask)

avg_per_word = per_word_averaging_layer(proj, wmask, maxw)
avg_per_word = avg_per_word.dimshuffle(1, 0, 2)

proj2 = lstm_unmasked_layer(tparams, avg_per_word, options, prefix="lstm_words", mult=3)
proj3 = lstm_unmasked_layer(tparams, avg_per_word, options, prefix="lstm_words_2", mult=3, go_backwards=True)
#avg_per_word = theano.printing.Print("avg", attrs=["shape"])(avg_per_word)

proj4 = proj2 + proj3
proj2 = bidirectional_lstm_layer(tparams, avg_per_word, options, "lstm_words_1", mult=3)
proj2 = bidirectional_lstm_layer(tparams, proj2, options, "lstm_words_2", mult=3)

pred = softmax_layer(proj4, tparams['U'], tparams['b'], y_mask, maxw, training)
pred = softmax_layer(proj2, tparams['U'], tparams['b'], y_mask, maxw, training)

f_pred_prob = theano.function([xc, mask, wmask, y_mask], pred, name='f_pred_prob', on_unused_input='ignore')
f_pred = theano.function([xc, mask, wmask, y_mask], pred.argmax(axis=2), name='f_pred', on_unused_input='ignore')
Expand Down Expand Up @@ -157,7 +150,7 @@ def train_lstm(
test = load_pos_tagged_data("Data/TweeboDaily547.conll", char_dict, word_dict, pos_dict, 16)
test, valid = split_at(test, 0.10)
max_word_count = max(max_word_count, get_max_word_count("Data/TweeboDaily547.conll"))
batch_size = 50
batch_size = 25
else:
# Pre-populate
test = load_pos_tagged_data("Data/Brown.conll", char_dict, word_dict, pos_dict)
Expand Down
1 change: 1 addition & 0 deletions nn_layers.py
Expand Up @@ -36,6 +36,7 @@ def per_word_averaging_layer_distrib(proj, wmask, maxw):
"""
"""
print maxw, "MAXW"
dup = [tensor.shape_padaxis(proj, 0) for _ in range(maxw)]
dup = tensor.concatenate(dup, 0)
#dup = tensor.shape_padaxis(proj, 0)
Expand Down
21 changes: 21 additions & 0 deletions nn_lstm.py
Expand Up @@ -124,3 +124,24 @@ def _step(m_, x_, h_, c_):
name=_p(prefix, '_layers'),
n_steps=nsteps, go_backwards=go_backwards)
return rval[0]

def bidirectional_lstm_layer(tparams, state_below, options, prefix='lstm', mask=None, mult=1):

def _p(pp, name):
return '%s_%s' % (pp, name)

prefix_forwards = '%s_forwards' % (prefix,)
prefix_backwards = '%s_backwards' % (prefix,)

if mask is not None:
forwards = lstm_layer(tparams, state_below, options, prefix=prefix_forwards, mask=mask, go_backwards=False, mult=mult)
backwards = lstm_layer(tparams, state_below, options, prefix=prefix_backwards, mask=mask, go_backwards=True, mult=mult)
else:
forwards = lstm_unmasked_layer(tparams, state_below, options, prefix=prefix_forwards, mult=mult, go_backwards=False)
backwards = lstm_unmasked_layer(tparams, state_below, options, prefix=prefix_backwards, mult=mult, go_backwards=True)

#forwards = theano.printing.Print(prefix_forwards, attrs=["shape"])(forwards)
#backwards = theano.printing.Print(prefix_forwards, attrs=["shape"])(backwards)

return forwards + backwards

39 changes: 24 additions & 15 deletions nn_params.py
Expand Up @@ -41,21 +41,21 @@ def generate_init_params(options, params):
options['dim_proj_chars'])*2 - 1
params['Cemb'] = (0.01 * randn).astype(config.floatX)

params = param_init_lstm(options,
params,
prefix="lstm_chars_forwards")
params = param_init_lstm(options,
params,
prefix="lstm_chars_backwards")

params = param_init_lstm(options,
params,
prefix="lstm_words", mult=3)

params = param_init_lstm(options,
params,
prefix="lstm_words_2", mult=3)
params = param_init_bidirection_lstm(options,
params,
prefix="lstm_chars_1")

params = param_init_bidirection_lstm(options,
params,
prefix="lstm_chars_2")

params = param_init_bidirection_lstm(options,
params,
prefix="lstm_words_1", mult=3)

params = param_init_bidirection_lstm(options,
params,
prefix="lstm_words_2", mult=3)

# classifier
params['U'] = 0.01 * numpy.random.randn(options['dim_proj']*3,
Expand Down Expand Up @@ -97,3 +97,12 @@ def param_init_lstm(options, params, prefix='lstm', mult=1):
params[_p(prefix, 'b')] = b.astype(config.floatX)

return params

def param_init_bidirection_lstm(options, params, prefix='lstm', mult=1):
prefix_forwards = '%s_forwards' % (prefix,)
prefix_backwards = '%s_backwards' % (prefix,)

params = param_init_lstm(options, params, prefix_forwards, mult)
params = param_init_lstm(options, params, prefix_backwards, mult)

return params
2 changes: 1 addition & 1 deletion nn_serialization.py
Expand Up @@ -36,6 +36,6 @@ def load_params(path, params):
logging.info("Loading model from file '%s'...", path)
with open(path, 'rb') as fin:
data = pickle.load(fin)
for k in ['dim_proj_chars', 'dim_proj_words', 'char_dict', 'pos_dict', 'word_dict']:
for k in ['dim_proj_chars', 'char_dict', 'pos_dict', 'word_dict']:
params[k] = data[k]
return params
2 changes: 1 addition & 1 deletion server.py
Expand Up @@ -102,7 +102,7 @@ def hello():

print chars, words
# TODO: 32 is the n_proj
xc, xw, mask, wmask, y, y_mask = prepare_data(chars, words, labels, 140, 38, 32)
xc, xw, mask, wmask, y, y_mask = prepare_data(chars, words, labels, 140, 38, 16)

pred = model[-3](xc, mask, wmask, y_mask)
print pred
Expand Down

0 comments on commit 30e161d

Please sign in to comment.