In [1]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, modified_precision
from nltk.translate.chrf_score import sentence_chrf, corpus_chrf
from nltk.metrics import scores
import scipy.io.wavfile
from IPython.display import Audio
from IPython.display import display
from nltk.stem import *
# from nltk.stem.snowball import SnowballStemmer
from stemming.porter2 import stem
import stemming
from nltk.metrics.scores import recall

from nltk.corpus import stopwords

%matplotlib inline

In [2]:
 #coding: utf-8

from basics import *
import prep_buckets

In [3]:
class SpeechEncoderDecoder(Chain):
    def __init__(self, m_cfg, gpuid):
        self.m_cfg = m_cfg
        self.gpuid = gpuid

        self.init_params()

        if self.gpuid >= 0:
            cuda.get_device(self.gpuid).use()

        super(SpeechEncoderDecoder, self).__init__()
        self.init_model()

    def init_params(self):
        #----------------------------------------------------------------------
        # determine rnn type
        #----------------------------------------------------------------------
        if self.m_cfg['rnn_unit'] == RNN_GRU:
            self.RNN = L.GRU
        else:
            self.RNN = L.LSTM
        #----------------------------------------------------------------------
        # get vocab size
        #----------------------------------------------------------------------
        if 'fisher' in self.m_cfg['train_set']:
            if self.m_cfg['stemmify'] == False:
                v_path = os.path.join(self.m_cfg['data_path'],
                                                'train_vocab.dict')
            else:
                v_path = os.path.join(self.m_cfg['data_path'],
                                                'train_stemmed_vocab.dict')
        else:
            v_path = os.path.join(self.m_cfg['data_path'],
                                            'ch_train_vocab.dict')
        vocab_dict = pickle.load(open(v_path, "rb"))
        if self.m_cfg['enc_key'] != 'sp':
            self.v_size_es = len(vocab_dict[self.m_cfg['enc_key']]['w2i'])
        else:
            self.v_size_es = 0
        self.v_size_en = len(vocab_dict[self.m_cfg['dec_key']]['w2i'])
        #----------------------------------------------------------------------
        # sim dict
        #----------------------------------------------------------------------
        if "sample_out" in self.m_cfg and self.m_cfg["sample_out"] == True:
            sim_dict_path = os.path.join(self.m_cfg['data_path'], self.m_cfg['sim_dict'])
            if os.path.exists(sim_dict_path):
                self.sim_dict = pickle.load(open(sim_dict_path, "rb"))
            else:
                print("{0:s} -- sim dict not found!".format(sim_dict_path))
        #----------------------------------------------------------------------
        # bag-of-words dict
        #----------------------------------------------------------------------
        bow_dict_path = os.path.join(self.m_cfg['data_path'],
                                     'train_top_K_enw.dict')
        if os.path.exists(bow_dict_path):
            self.bow_dict = pickle.load(open(bow_dict_path, "rb"))
            self.bag_size_en = len(self.bow_dict['w2i'])
        else:
            print("bag-of-words data not found")
        #----------------------------------------------------------------------

    def add_rnn_layers(self, layer_names, in_units, out_units):
        w = chainer.initializers.HeNormal()
        for i, rnn_name in enumerate(layer_names):
            #------------------------------------------------------------------
            # for first layer, use in_units
            #------------------------------------------------------------------
            curr_in = in_units if i == 0 else out_units
            #------------------------------------------------------------------
            # add rnn layer
            #------------------------------------------------------------------
            self.add_link(rnn_name, self.RNN(curr_in, out_units))
            #------------------------------------------------------------------
            # add layer normalization
            #------------------------------------------------------------------
            if self.m_cfg['ln']:
                self.add_link("{0:s}_ln".format(rnn_name), L.LayerNormalization(out_units))
            #------------------------------------------------------------------

    def init_rnn_model(self, in_dim):
        h_units = self.m_cfg['hidden_units']
        #----------------------------------------------------------------------
        # add encoder layers
        #----------------------------------------------------------------------
        self.rnn_enc = ["L{0:d}_enc".format(i)
                         for i in range(self.m_cfg['enc_layers'])]
        self.add_rnn_layers(self.rnn_enc, in_dim, h_units)

        if self.m_cfg['bi_rnn']:
            #------------------------------------------------------------------
            # if bi rnn, add rev rnn layer
            #------------------------------------------------------------------
            self.rnn_rev_enc = ["L{0:d}_rev_enc".format(i) for i in range(self.m_cfg['enc_layers'])]
            self.add_rnn_layers(self.rnn_rev_enc, in_dim, h_units)

        if "bagofwords" not in self.m_cfg or self.m_cfg['bagofwords'] == False:
            #------------------------------------------------------------------
            # add attention layers
            #------------------------------------------------------------------
            a_units = self.m_cfg['attn_units']
            if self.m_cfg['bi_rnn']:
                self.add_link("attn_Wa", L.Linear(2*h_units, 2*h_units))
                #--------------------------------------------------------------
                # context layer = 2*h_units from enc + 2*h_units from dec
                #--------------------------------------------------------------
                self.add_link("context", L.Linear(4*h_units, a_units))
            else:
                self.add_link("attn_Wa", L.Linear(h_units, h_units))
                #--------------------------------------------------------------
                # context layer = 1*h_units from enc + 1*h_units from dec
                #--------------------------------------------------------------
                self.add_link("context", L.Linear(2*h_units, a_units))
            #------------------------------------------------------------------
            # add decoder layers
            #------------------------------------------------------------------
            e_units = self.m_cfg['embedding_units']
            # first layer appends previous ht, and therefore,
            # in_units = embed units + hidden units
            self.rnn_dec = ["L{0:d}_dec".format(i)
                            for i in range(self.m_cfg['dec_layers'])]
            #------------------------------------------------------------------
            # decoder rnn input = emb + prev. context vector
            #------------------------------------------------------------------
            if self.m_cfg['bi_rnn']:
                self.add_rnn_layers(self.rnn_dec, e_units+a_units, 2*h_units)
            else:
                self.add_rnn_layers(self.rnn_dec, e_units+a_units, h_units)
            #------------------------------------------------------------------

    def init_deep_cnn_model(self):
        CNN_IN_DIM = (self.m_cfg['sp_dim'] if self.m_cfg['enc_key'] == 'sp'
                             else self.m_cfg['embedding_units'])
        # ---------------------------------------------------------------------
        # initialize list of cnn layers
        # ---------------------------------------------------------------------
        self.cnns = []
        if len(self.m_cfg['cnn_layers']) > 0:
            # -----------------------------------------------------------------
            # using He initializer
            # -----------------------------------------------------------------
            w = chainer.initializers.HeNormal()
            # add CNN layers
            cnn_out_dim = 0
            self.reduce_dim_len = 1
            reduce_dim = CNN_IN_DIM
            for i, l in enumerate(self.m_cfg['cnn_layers']):
                lname = "CNN_{0:d}".format(i)
                cnn_out_dim += l["out_channels"]
                self.cnns.append(lname)
                self.add_link(lname, L.Convolution2D(**l, initialW=w))
                reduce_dim = math.ceil(reduce_dim / l["stride"][1])
                self.reduce_dim_len *= l["stride"][0]
                if self.m_cfg['bn']:
                    # ---------------------------------------------------------
                    # add batch normalization
                    # ---------------------------------------------------------
                    self.add_link('{0:s}_bn'.format(lname), L.BatchNormalization((l["out_channels"])))
                    # ---------------------------------------------------------
            self.cnn_out_dim = self.m_cfg['cnn_layers'][-1]["out_channels"]
            # -----------------------------------------------------------------
            # cnn output has reduced dimensions based on strides
            # -----------------------------------------------------------------
            self.cnn_out_dim *= reduce_dim
            # -----------------------------------------------------------------
        else:
            # -----------------------------------------------------------------
            # no cnns added
            # -----------------------------------------------------------------
            self.cnn_out_dim = CNN_IN_DIM
            # -----------------------------------------------------------------
    # end init_deep_cnn_model()

    def init_model(self):
        xp = cuda.cupy if self.gpuid >= 0 else np
        # ---------------------------------------------------------------------
        # add enc embedding layer if text model
        # ---------------------------------------------------------------------
        if self.m_cfg['enc_key'] != 'sp':
            self.add_link("embed_enc", L.EmbedID(self.v_size_es,
                                                self.m_cfg['embedding_units']))
        # ---------------------------------------------------------------------
        # add cnn layer
        # ---------------------------------------------------------------------
        self.init_deep_cnn_model()
        rnn_in_units = self.cnn_out_dim
        # ---------------------------------------------------------------------
        # add rnn layers
        # ---------------------------------------------------------------------
        print("cnn_out_dim = rnn_in_units = ", rnn_in_units)
        self.init_rnn_model(rnn_in_units)
        # ---------------------------------------------------------------------
        # add decoder/bag-of-words
        # ---------------------------------------------------------------------
        if "bagofwords" not in self.m_cfg or self.m_cfg['bagofwords'] == False:
            # -----------------------------------------------------------------
            # add dec embedding layer
            # -----------------------------------------------------------------
            self.add_link("embed_dec", L.EmbedID(self.v_size_en,
                                                 self.m_cfg['embedding_units']))
            # -----------------------------------------------------------------
            # add output layers
            # -----------------------------------------------------------------
            self.add_link("out", L.Linear(self.m_cfg['attn_units'],
                                          self.v_size_en))
            # -----------------------------------------------------------------
            # create masking array for pad id
            # -----------------------------------------------------------------
            with cupy.cuda.Device(self.gpuid):
                self.mask_pad_id = xp.ones(self.v_size_en, dtype=xp.float32)
            # make the class weight for pad id equal to 0
            # this way loss will not be computed for this predicted loss
            self.mask_pad_id[0] = 0
            # -----------------------------------------------------------------
        else:
            # -----------------------------------------------------------------
            # add bag-of-words output layer
            # -----------------------------------------------------------------
            if self.m_cfg['bi_rnn']:
                h_units = self.m_cfg['hidden_units'] * 2
            else:
                h_units = self.m_cfg['hidden_units']

            # Add highway layers for classification
            self.highway = []
            for i in range(self.m_cfg["highway_layers"]):
                lname = "HIGHWAY_{0:d}".format(i)
                self.highway.append(lname)
                self.add_link(lname, L.Highway(h_units))
            # Add final prediction layer
            self.add_link("out", L.Linear(h_units, self.bag_size_en))
            # -----------------------------------------------------------------


    def reset_state(self):
        # ---------------------------------------------------------------------
        # reset the state of LSTM layers
        # ---------------------------------------------------------------------
        if self.m_cfg['bi_rnn']:
            for rnn_name in self.rnn_enc + self.rnn_rev_enc:
                self[rnn_name].reset_state()
        else:
            for rnn_name in self.rnn_enc:
                self[rnn_name].reset_state()

        if "bagofwords" not in self.m_cfg or self.m_cfg['bagofwords'] == False:
            for rnn_name in self.rnn_dec:
                self[rnn_name].reset_state()

        self.loss = 0

    def set_decoder_state(self):
        # ---------------------------------------------------------------------
        # set the hidden and cell state (LSTM) of the first RNN in the decoder
        # ---------------------------------------------------------------------
        if self.m_cfg['bi_rnn']:
            for enc, rev_enc, dec in zip(self.rnn_enc,
                                         self.rnn_rev_enc,
                                         self.rnn_dec):
                h_state = F.concat((self[enc].h, self[rev_enc].h))
                if self.m_cfg['rnn_unit'] == RNN_LSTM:
                    c_state = F.concat((self[enc].c, self[rev_enc].c))
                    self[dec].set_state(c_state, h_state)
                else:
                    self[dec].set_state(h_state)
        else:
            for enc, dec in zip(self.rnn_enc, self.rnn_dec):
                if self.m_cfg['rnn_unit'] == RNN_LSTM:
                    self[dec].set_state(self[enc].c, self[enc].h)
                else:
                    self[dec].set_state(self[enc].h)
        # ---------------------------------------------------------------------

    def compute_context_vector(self, dec_h):
        batch_size, n_units = dec_h.shape
        # attention weights for the hidden states of each word in the input list
        # ---------------------------------------------------------------------
        # compute weights
        ht = self.attn_Wa(dec_h)
        weights = F.batch_matmul(self.enc_states, ht)
        # ---------------------------------------------------------------------
        # '''
        # this line is valid when no max pooling or sequence length manipulation is performed
        # weights = F.where(self.mask, weights, self.minf)
            # '''
        # ---------------------------------------------------------------------
        # softmax to compute alphas
        # ---------------------------------------------------------------------
        alphas = F.softmax(weights)
        # ---------------------------------------------------------------------
        # compute context vector
        # ---------------------------------------------------------------------
        cv = F.squeeze(F.batch_matmul(F.swapaxes(self.enc_states, 2, 1), alphas), axis=2)
        # ---------------------------------------------------------------------
        return cv, alphas
        # ---------------------------------------------------------------------

    def feed_rnn(self, rnn_in, rnn_layers, highway_layers=None):
        hs = rnn_in
        for rnn_layer in rnn_layers:
            # -----------------------------------------------------------------
            # apply rnn
            # -----------------------------------------------------------------
            if self.m_cfg['rnn_dropout'] > 0:
                hs = F.dropout(self[rnn_layer](hs),
                               ratio=self.m_cfg['rnn_dropout'])
            else:
                hs = self[rnn_layer](hs)
            # -----------------------------------------------------------------
            # layer normalization
            # -----------------------------------------------------------------
            if self.m_cfg['ln']:
                ln_name = "{0:s}_ln".format(rnn_layer)
                hs = self[ln_name](hs)
            # -----------------------------------------------------------------
            # RELU activation
            # -----------------------------------------------------------------
            if 'rnn_relu' in self.m_cfg and self.m_cfg['rnn_relu'] == True:
                hs = F.relu(hs)
            # -----------------------------------------------------------------
        return hs

    def encode(self, data_in, rnn_layers):
        h = self.feed_rnn(data_in, rnn_layers)
        return h

    def decode(self, word, ht):
        # ---------------------------------------------------------------------
        # get embedding
        # ---------------------------------------------------------------------
        if 'embed_dropout' in self.m_cfg:
            embed_id = F.dropout(self.embed_dec(word),
                                 ratio=self.m_cfg['rnn_dropout'])
        else:
            embed_id = self.embed_dec(word)
        # ---------------------------------------------------------------------
        # apply rnn - input feeding, use previous ht
        # ---------------------------------------------------------------------
        rnn_in = F.concat((embed_id, ht), axis=1)
        h = self.feed_rnn(rnn_in, self.rnn_dec)
        # ---------------------------------------------------------------------
        # compute context vector
        # ---------------------------------------------------------------------
        cv, _ = self.compute_context_vector(h)
        cv_hdec = F.concat((cv, h), axis=1)
        # ---------------------------------------------------------------------
        # compute attentional hidden state
        # ---------------------------------------------------------------------
        ht = F.tanh(self.context(cv_hdec))
        # ---------------------------------------------------------------------
        # make prediction
        # ---------------------------------------------------------------------
        if self.m_cfg['out_dropout'] > 0:
            predicted_out = F.dropout(self.out(ht),
                                      ratio=self.m_cfg['out_dropout'])
        else:
            predicted_out = self.out(ht)
        # ---------------------------------------------------------------------
        return predicted_out, ht

    def decode_batch(self, decoder_batch, teacher_ratio):
        xp = cuda.cupy if self.gpuid >= 0 else np
        batch_size = decoder_batch.shape[1]
        loss = 0
        # ---------------------------------------------------------------------
        # initialize hidden states as a zero vector
        # ---------------------------------------------------------------------
        a_units = self.m_cfg['attn_units']
        ht = Variable(xp.zeros((batch_size, a_units), dtype=xp.float32))
        # ---------------------------------------------------------------------
        decoder_input = decoder_batch[0]
        # for all sequences in the batch, feed the characters one by one
        for curr_word, next_word in zip(decoder_batch, decoder_batch[1:]):
            # -----------------------------------------------------------------
            # teacher forcing logic
            # -----------------------------------------------------------------
            use_label = True if random.random() < teacher_ratio else False
            if use_label:
                decoder_input = curr_word
            # -----------------------------------------------------------------
            # encode tokens
            # -----------------------------------------------------------------
            predicted_out, ht = self.decode(decoder_input, ht)
            decoder_input = F.argmax(predicted_out, axis=1)
            # -----------------------------------------------------------------
            # compute loss
            # -----------------------------------------------------------------
            if "smooth_out" in self.m_cfg and self.m_cfg["smooth_out"] == True:
                t = xp.zeros(shape=predicted_out.shape, dtype='i')
                for i, w in enumerate(next_word.data.tolist()):
                    if w == PAD_ID:
                        t[i,:] = -1
                    else:
                        t[i,self.sim_dict['i'][w]] = 1
                loss_arr = F.sigmoid_cross_entropy(predicted_out, t, normalize=True)
            elif "sample_out" in self.m_cfg and self.m_cfg["sample_out"] == True:
                t_alt = xp.copy(next_word.data)
                # sample and replace each element in the batch
                for i in range(len(t_alt)):
                    # use_sample = True if random.random() > self.m_cfg["sample_out_prob"] else False
                    if random.random() > self.m_cfg["sample_out_prob"]:
                        t_alt[i] = xp.random.choice(self.sim_dict['i'][int(t_alt[i])],1)

                loss_arr = F.softmax_cross_entropy(predicted_out, t_alt,
                                               class_weight=self.mask_pad_id)
            else:
                loss_arr = F.softmax_cross_entropy(predicted_out, next_word,
                                               class_weight=self.mask_pad_id)
            loss += loss_arr
            # -----------------------------------------------------------------
        return loss

    def predict_batch(self, batch_size, pred_limit, y=None, display=False):
        xp = cuda.cupy if self.gpuid >= 0 else np
        # max number of predictions to make
        # if labels are provided, this variable is not used
        stop_limit = pred_limit
        # to track number of predictions made
        npred = 0
        # to store loss
        loss = 0
        # if labels are provided, use them for computing loss
        compute_loss = True if y is not None else False
        # ---------------------------------------------------------------------
        if compute_loss:
            stop_limit = len(y)-1
            # get starting word to initialize decoder
            curr_word = y[0]
        else:
            # intialize starting word to GO_ID symbol
            curr_word = Variable(xp.full((batch_size,), GO_ID, dtype=xp.int32))
        # ---------------------------------------------------------------------
        # flag to track if all sentences in batch have predicted EOS
        # ---------------------------------------------------------------------
        with cupy.cuda.Device(self.gpuid):
            check_if_all_eos = xp.full((batch_size,), False, dtype=xp.bool_)
        # ---------------------------------------------------------------------
        a_units = self.m_cfg['attn_units']
        ht = Variable(xp.zeros((batch_size, a_units), dtype=xp.float32))
        # ---------------------------------------------------------------------
        while npred < (stop_limit):
            # -----------------------------------------------------------------
            # decode and predict
            pred_out, ht = self.decode(curr_word, ht)
            pred_word = F.argmax(pred_out, axis=1)
            # -----------------------------------------------------------------
            # save prediction at this time step
            # -----------------------------------------------------------------
            if npred == 0:
                pred_sents = pred_word.data
            else:
                pred_sents = xp.vstack((pred_sents, pred_word.data))
            # -----------------------------------------------------------------
            if compute_loss:
                # compute loss
                loss += F.softmax_cross_entropy(pred_out, y[npred+1],
                                                   class_weight=self.mask_pad_id)
            # -----------------------------------------------------------------
            curr_word = pred_word
            # check if EOS is predicted for all sentences
            # -----------------------------------------------------------------
            check_if_all_eos[pred_word.data == EOS_ID] = True
            # if xp.all(check_if_all_eos == EOS_ID):
            if xp.all(check_if_all_eos):
                break
            # -----------------------------------------------------------------
            # increment number of predictions made
            npred += 1
            # -----------------------------------------------------------------
        return pred_sents.T, loss

    def decode_bow_batch(self, y):
        xp = cuda.cupy if self.gpuid >= 0 else np
        # use final rnn state for both fwd and rev (if configured) rnns

        if self.m_cfg['highway_layers'] > 0:
            highway_h = self.forward_highway(self.h_final_rnn)

        predicted_out = self.out(highway_h)

        loss = F.sigmoid_cross_entropy(predicted_out, y, reduce="no")

        loss_weights = xp.ones(shape=y.data.shape, dtype="f")
        loss_weights[y.data < 0] = 0
        loss_weights[y.data == 0] = self.m_cfg["negative_weight"]
        loss_weights[y.data > 0] = self.m_cfg["positive_weight"]
        #loss_avg = F.average(F.sigmoid_cross_entropy(predicted_out, y, normalize=True, reduce='no'), weights=loss_weights)
        loss_avg = F.mean(loss_weights * loss)
        # ---------------------------------------------------------------------
        pred_words = []
        pred_probs = []
        pred_limit = self.m_cfg['max_en_pred']
        for row in predicted_out.data:
            pred_inds = xp.where(row >= self.m_cfg["pred_thresh"])[0]
            if len(pred_inds) > pred_limit:
                pred_inds = xp.argsort(row)[-pred_limit:][::-1]
            #pred_words.append([bow_dict['i2w'][i] for i in pred_inds.tolist()])
            pred_words.append([i for i in pred_inds.tolist() if i > 3])
            pred_probs.append(row)

        return pred_words, loss_avg, pred_probs

    def predict_bow_batch(self, batch_size, pred_limit, y=None, display=False):
        xp = cuda.cupy if self.gpuid >= 0 else np
        # to store loss
        loss = 0
        loss_avg = 0
        # if labels are provided, use them for computing loss
        compute_loss = True if y is not None else False
        pred_words = []
        pred_probs = []
        # ---------------------------------------------------------------------
        # decode and predict
        if self.m_cfg['highway_layers'] > 0:
            highway_h = self.forward_highway(self.h_final_rnn)

        predicted_out = self.out(highway_h)

        for row in predicted_out.data:
            pred_inds = xp.where(row >= self.m_cfg["pred_thresh"])[0]
            if len(pred_inds) > pred_limit:
                pred_inds = xp.argsort(row)[-pred_limit:][::-1]
            #pred_words.append([bow_dict['i2w'][i] for i in pred_inds.tolist()])
            pred_words.append([i for i in pred_inds.tolist() if i > 3])
            pred_probs.append(row)

        # -----------------------------------------------------------------
        if compute_loss:
            # compute loss
            loss = F.sigmoid_cross_entropy(predicted_out, y, reduce="no")

            loss_weights = xp.ones(shape=y.data.shape, dtype="f")
            loss_weights[y.data < 0] = 0
            loss_weights[y.data == 0] = self.m_cfg["negative_weight"]
            loss_weights[y.data > 0] = self.m_cfg["positive_weight"]
            #loss_avg = F.average(F.sigmoid_cross_entropy(predicted_out, y, normalize=True, reduce='no'), weights=loss_weights)
            loss_avg = F.mean(loss_weights * loss)
        # -----------------------------------------------------------------
        return pred_words, loss_avg, pred_probs

    def forward_deep_cnn(self, h):
        # ---------------------------------------------------------------------
        # check and prepare for 2d convolutions
        # ---------------------------------------------------------------------
        h = F.expand_dims(h, 2)
        h = F.swapaxes(h,1,2)
        # ---------------------------------------------------------------------
        for i, cnn_layer in enumerate(self.cnns):
            # -----------------------------------------------------------------
            # apply cnn
            # -----------------------------------------------------------------
            h = self[cnn_layer](h)
            # -----------------------------------------------------------------
            # batch normalization before non-linearity
            # -----------------------------------------------------------------
            if self.m_cfg['bn']:
                bn_lname = '{0:s}_bn'.format(cnn_layer)
                h = self[bn_lname](h)
            # -----------------------------------------------------------------
            h = F.relu(h)
            # -----------------------------------------------------------------

        # ---------------------------------------------------------------------
        # prepare return
        # batch size * num time frames after pooling * cnn out dim
        # ---------------------------------------------------------------------
        h = F.swapaxes(h,1,2)
        h = F.reshape(h, h.shape[:2] + tuple([-1]))
        h = F.rollaxis(h,1)
        # ---------------------------------------------------------------------
        return h

    def forward_highway(self, X):
        for i in range(len(self.highway)):
            if self.m_cfg['highway_dropout'] > 0:
                h = F.dropout(self[self.highway[i]](X), ratio=self.m_cfg['highway_dropout'])
            else:
                h = self[self.highway[i]](X)
        return h

    def forward_rnn(self, X):
        # ---------------------------------------------------------------------
        # reset rnn state
        # ---------------------------------------------------------------------
        self.reset_state()
        # ---------------------------------------------------------------------
        in_size, batch_size, in_dim = X.shape
        for i in range(in_size):
            if i > 0:
                h_fwd = F.concat((h_fwd,
                                  F.expand_dims(self.encode(X[i],
                                    self.rnn_enc), 0)),
                                  axis=0)
                if self.m_cfg['bi_rnn']:
                    h_rev = F.concat((h_rev,
                                      F.expand_dims(self.encode(X[-i],
                                        self.rnn_rev_enc), 0)),
                                      axis=0)
            else:
                h_fwd = F.expand_dims(self.encode(X[i], self.rnn_enc), 0)
                if self.m_cfg['bi_rnn']:
                    h_rev = F.expand_dims(self.encode(X[-i], self.rnn_rev_enc), 0)
        # ---------------------------------------------------------------------
        if self.m_cfg['bi_rnn']:
            h_rev = F.flipud(h_rev)
            self.enc_states = F.concat((h_fwd, h_rev), axis=2)
        else:
            self.enc_states = h_fwd
        # ---------------------------------------------------------------------
        self.enc_states = F.swapaxes(self.enc_states, 0, 1)
        # ---------------------------------------------------------------------

    def forward_bow_rnn(self, X, l):
        # ---------------------------------------------------------------------
        # reset rnn state
        # ---------------------------------------------------------------------
        self.reset_state()
        # ---------------------------------------------------------------------
        in_size, batch_size, in_dim = X.shape
        len_check = xp.floor(xp.array(l, dtype='i') / self.reduce_dim_len)
        #print(X.shape)
        #print(len_check)
        for i in range(in_size):
            curr_fwd_h = self.encode(X[i], self.rnn_enc)
            if i == 0:
                h_fwd = curr_fwd_h
            else:
                h_fwd = (h_fwd * (i >= len_check)[:, xp.newaxis]) + (curr_fwd_h * (i < len_check)[:, xp.newaxis])
            #print(h_fwd.shape)
            #print("h_fwd", h_fwd[:2, :5])
            #print("curr_fwd_h", curr_fwd_h[:2, :5])
            
            if self.m_cfg['bi_rnn']:
                curr_rev_h = self.encode(X[-i], self.rnn_rev_enc)
                if i == 0:
                    h_rev = curr_rev_h
                else:
                    h_rev = (h_rev * (i >= len_check)[:, xp.newaxis]) + (curr_rev_h * (i < len_check)[:, xp.newaxis])
                #print(h_rev.shape)
        # ---------------------------------------------------------------------
#         self.h_final_rnn = self[self.rnn_enc[-1]].h.data
#         if self.m_cfg['bi_rnn']:
#             h_rev = self[self.rnn_rev_enc[-1]].h.data
#             self.h_final_rnn = F.concat((self.h_final_rnn, h_rev), axis=1)
        self.h_final_rnn = h_fwd
        if self.m_cfg['bi_rnn']:
            self.h_final_rnn = F.concat((self.h_final_rnn, h_rev), axis=1)

    def forward_enc(self, X, l=None):
        if self.m_cfg['enc_key'] != 'sp':
            # -----------------------------------------------------------------
            # get encoder embedding for text input
            # -----------------------------------------------------------------
            h = self.embed_enc(X)
            # -----------------------------------------------------------------
        else:
            h = X
        # ---------------------------------------------------------------------
        # call cnn logic
        # ---------------------------------------------------------------------
        # if len(self.cnns) > 0:
        h = self.forward_deep_cnn(h)
        # ---------------------------------------------------------------------
        # call rnn logic
        # ---------------------------------------------------------------------
        if "bagofwords" not in self.m_cfg or self.m_cfg['bagofwords'] == False:
            self.forward_rnn(h)
        else:
            self.forward_bow_rnn(h, l)
        # ---------------------------------------------------------------------

    def forward(self, X, add_noise=0, teacher_ratio=0, y=None):
        # get shape
        batch_size = X.shape[0]
        # check whether to add noi, start=1se
        # ---------------------------------------------------------------------
        # check whether to add noise to speech input
        # ---------------------------------------------------------------------
        if add_noise > 0 and chainer.config.train:
            # due to CUDA issues with random number generator
            # creating a numpy array and moving to GPU
            noise = Variable(np.random.normal(1.0,
                                              add_noise,
                                              size=X.shape).astype(np.float32))
            if self.gpuid >= 0:
                noise.to_gpu(self.gpuid)
            X = X * noise
        # ---------------------------------------------------------------------
        # encode input
        self.forward_enc(X)
        # -----------------------------------------------------------------
        # initialize decoder LSTM to final encoder state
        # -----------------------------------------------------------------
        self.set_decoder_state()
        # -----------------------------------------------------------------
        # swap axes of the decoder batch
        if y is not None:
            y = F.swapaxes(y, 0, 1)
        # -----------------------------------------------------------------
        # check if train or test
        # -----------------------------------------------------------------
        if chainer.config.train:
            # -------------------------------------------------------------
            # decode
            # -------------------------------------------------------------
            self.loss = self.decode_batch(y, teacher_ratio)
            # -------------------------------------------------------------
            # make return statements consistent
            return [], self.loss
        else:
            # -------------------------------------------------------------
            # predict
            # -------------------------------------------------------------
            # make return statements consistent
            return(self.predict_batch(batch_size=batch_size,
                                      pred_limit=self.m_cfg['max_en_pred'],
                                      y=y))
        # -----------------------------------------------------------------


    def forward_bow(self, X, add_noise=0, y=None, l=None):
        # get shape
        batch_size = X.shape[0]
        # check whether to add noi, start=1se
        # ---------------------------------------------------------------------
        # check whether to add noise to speech input
        # ---------------------------------------------------------------------
        if add_noise > 0 and chainer.config.train:
            # due to CUDA issues with random number generator
            # creating a numpy array and moving to GPU
            noise = Variable(np.random.normal(1.0,
                                              add_noise,
                                              size=X.shape).astype(np.float32))
            if self.gpuid >= 0:
                noise.to_gpu(self.gpuid)
            X = X * noise
        # ---------------------------------------------------------------------
        # encode input
        self.forward_enc(X, l)
        # -----------------------------------------------------------------
        # check if train or test
        # -----------------------------------------------------------------
        if chainer.config.train:
            # -------------------------------------------------------------
            # decode
            # -------------------------------------------------------------
            # self.loss = self.decode_bow_batch(y)
            # -------------------------------------------------------------
            # make return statements consistent
            # return [], self.loss
            return(self.decode_bow_batch(y))
        else:
            # -------------------------------------------------------------
            # predict
            # -------------------------------------------------------------
            # make return statements consistent
            return(self.predict_bow_batch(batch_size=batch_size,
                                      pred_limit=self.m_cfg['max_en_pred'],
                                      y=y))
        # -----------------------------------------------------------------

    def add_gru_weight_noise(self, rnn_layer, mu, sigma):
        xp = cuda.cupy if self.gpuid >= 0 else np
        # W_shape = self[rnn_layer].W.W.shape
        # b_shape = self[rnn_layer].W.b.shape

        rnn_params = ["W", "W_r", "W_z", "U", "U_r", "U_z"]
        for p in rnn_params:
            # add noise to W
            s_w = xp.random.normal(mu,
                                   sigma,
                                   self[rnn_layer][p].W.shape,
                                   dtype=xp.float32)
            s_b = xp.random.normal(mu,
                                   sigma,
                                   self[rnn_layer][p].b.shape,
                                   dtype=xp.float32)
            self[rnn_layer][p].W.data = self[rnn_layer][p].W.data + s_w
            self[rnn_layer][p].b.data = self[rnn_layer][p].b.data + s_b


    def add_lstm_weight_noise(self, rnn_layer, mu, sigma):
        xp = cuda.cupy if self.gpuid >= 0 else np
        # W_shape = self[rnn_layer].W.W.shape
        # b_shape = self[rnn_layer].W.b.shape
        rnn_params = ["upward", "lateral"]
        for p in rnn_params:
            # add noise to W
            s_w = xp.random.normal(mu,
                                   sigma,
                                   self[rnn_layer][p].W.shape,
                                   dtype=xp.float32)

            self[rnn_layer][p].W.data = self[rnn_layer][p].W.data + s_w

            if p == "upward":
                s_b = xp.random.normal(mu,
                                       sigma,
                                       self[rnn_layer][p].b.shape,
                                       dtype=xp.float32)
                self[rnn_layer][p].b.data = self[rnn_layer][p].b.data + s_b

    def add_weight_noise(self, mu, sigma):
        xp = cuda.cupy if self.gpuid >= 0 else np
        # add noise to rnn weights
        if self.m_cfg['bi_rnn']:
            rnn_layers = self.rnn_enc + self.rnn_rev_enc + self.rnn_dec
        else:
            rnn_layers = self.rnn_enc + self.rnn_dec

        for rnn_layer in rnn_layers:
            if self.m_cfg['rnn_unit'] == RNN_GRU:
                self.add_gru_weight_noise(rnn_layer, mu, sigma)
            else:
                self.add_lstm_weight_noise(rnn_layer, mu, sigma)

        # add noise to decoder embeddings
        self.embed_dec.W.data = (self.embed_dec.W.data +
                                   xp.random.normal(mu,
                                                    sigma,
                                                    self.embed_dec.W.shape,
                                                    dtype=xp.float32))

# In[ ]:



In [4]:
def get_bow_batch(m_dict, x_key, y_key, utt_list, vocab_dict, bow_dict,
                  max_enc, max_dec, input_path=''):
    batch_data = {'X':[], 't':[], 'y':[], 'r':[], 'l': []}
    # -------------------------------------------------------------------------
    # loop through each utterance in utt list
    # -------------------------------------------------------------------------
    for i, u in enumerate(utt_list):
        # ---------------------------------------------------------------------
        #  add X data
        # ---------------------------------------------------------------------
        if x_key == 'sp':
            # -----------------------------------------------------------------
            # for speech data
            # -----------------------------------------------------------------
            # get path to speech file
            utt_sp_path = os.path.join(input_path, "{0:s}.npy".format(u))
            if not os.path.exists(utt_sp_path):
                # for training data, there are sub-folders
                utt_sp_path = os.path.join(input_path,
                                           u.split('_',1)[0],
                                           "{0:s}.npy".format(u))
            if os.path.exists(utt_sp_path):
                x_data = xp.load(utt_sp_path)[:max_enc]
            else:
                # -------------------------------------------------------------
                # exception if file not found
                # -------------------------------------------------------------
                raise FileNotFoundError("ERROR!! file not found: {0:s}".format(utt_sp_path))
                # -------------------------------------------------------------
        else:
            # -----------------------------------------------------------------
            # for text data
            # -----------------------------------------------------------------
            x_ids = [vocab_dict[x_key]['w2i'].get(w, UNK_ID) for w in m_dict[u][x_key]]
            x_data = xp.asarray(x_ids, dtype=xp.int32)[:max_enc]
            # -----------------------------------------------------------------
        # ---------------------------------------------------------------------
        #  add labels
        # ---------------------------------------------------------------------
        if type(m_dict[u][y_key]) == list:
            en_ids = list(set([bow_dict['w2i'].get(w, UNK_ID) for w in m_dict[u][y_key]])-set(range(4)))
            r_data = [en_ids[:max_dec]]

        else:
            # dev and test data have multiple translations
            # choose the first one for computing perplexity
            en_ids = list(set([bow_dict['w2i'].get(w, UNK_ID) for w in m_dict[u][y_key][0]])-set(range(4)))
            r_data = []
            for r in m_dict[u][y_key]:
                r_list = list(set([bow_dict['w2i'].get(w, UNK_ID) for w in r])-set(range(4)))
                r_data.append(r_list[:max_dec])

        y_ids = en_ids[:max_dec]
        # ---------------------------------------------------------------------
        if len(x_data) > 0:
            #  and len(y_ids) > 0
            batch_data['X'].append(x_data)
            batch_data['t'].append([y_ids])
            y_data = xp.zeros(len(bow_dict['w2i']), dtype=xp.int32)
            y_data[y_ids] = 1
            y_data[list(range(4))] = -1
            batch_data['y'].append(y_data)
            batch_data['r'].append(r_data)
            batch_data['l'].append(len(x_data))

    # -------------------------------------------------------------------------
    # end for all utterances in batch
    # -------------------------------------------------------------------------
    if len(batch_data['X']) > 0 and len(batch_data['y']) > 0:
        batch_data['X'] = F.pad_sequence(batch_data['X'], padding=PAD_ID)
        batch_data['y'] = F.pad_sequence(batch_data['y'], padding=PAD_ID)
    return batch_data

In [5]:
def check_model(cfg_path):
    # -------------------------------------------------------------------------
    # read config files model
    # -------------------------------------------------------------------------
    with open(os.path.join(cfg_path, "model_cfg.json"), "r") as model_f:
        m_cfg = json.load(model_f)
    # -------------------------------------------------------------------------
    with open(os.path.join(cfg_path, "train_cfg.json"), "r") as train_f:
        t_cfg = json.load(train_f)
    xp = cuda.cupy if t_cfg['gpuid'] >= 0 else np
    # -------------------------------------------------------------------------
    # check model path
    # -------------------------------------------------------------------------
    if not os.path.exists(m_cfg['data_path']):
        raise FileNotFoundError("ERROR!! file not found: {0:s}".format(m_cfg['data_path']))
    # end if
    # -------------------------------------------------------------------------
    # initialize new model
    # -------------------------------------------------------------------------
    model = SpeechEncoderDecoder(m_cfg, t_cfg['gpuid'])
    model.to_gpu(t_cfg['gpuid'])
    # -------------------------------------------------------------------------
    # set up optimizer
    # -------------------------------------------------------------------------
    if t_cfg['optimizer'] == OPT_ADAM:
        print("using ADAM optimizer")
        optimizer = optimizers.Adam(alpha=t_cfg['lr'],
                                    beta1=0.9,
                                    beta2=0.999,
                                    eps=1e-08)
    else:
        print("using SGD optimizer")
        optimizer = optimizers.SGD(lr=t_cfg['lr'])

    # attach optimizer
    optimizer.setup(model)
    # -------------------------------------------------------------------------
    # optimizer settings
    # -------------------------------------------------------------------------
    if m_cfg['l2'] > 0:
        optimizer.add_hook(chainer.optimizer.WeightDecay(m_cfg['l2']))

    # gradient clipping
    optimizer.add_hook(chainer.optimizer.GradientClipping(threshold=m_cfg['grad_clip']))

    # gradient noise
    if t_cfg['grad_noise_eta'] > 0:
        print("------ Adding gradient noise")
        optimizer.add_hook(chainer.optimizer.GradientNoise(eta=t_cfg['grad_noise_eta']))
        print("Finished adding gradient noise")
    # -------------------------------------------------------------------------
    # check last saved model
    # -------------------------------------------------------------------------
    max_epoch = 0
    # -------------------------------------------------------------------------
    # add debug info
    # -------------------------------------------------------------------------
    m_cfg['model_dir'] = cfg_path
    m_cfg['train_log'] = os.path.join(m_cfg['model_dir'], "train.log")
    m_cfg['dev_log'] = os.path.join(m_cfg['model_dir'], "dev.log")
    m_cfg['model_fname'] = os.path.join(m_cfg['model_dir'], "seq2seq.model")
    m_cfg['opt_fname'] = os.path.join(m_cfg['model_dir'], "train.opt")
    # -------------------------------------------------------------------------
    model_fil = m_cfg['model_fname']
    model_files = [f for f in os.listdir(os.path.dirname(model_fil))
                   if os.path.basename(model_fil).replace('.model','') in f]
    if len(model_files) > 0:
        print("-"*80)
        max_model_fil = max(model_files, key=lambda s: int(s.split('_')[-1].split('.')[0]))
        max_model_fil = os.path.join(os.path.dirname(model_fil),
                                     max_model_fil)
        print('model found = \n{0:s}'.format(max_model_fil))
        serializers.load_npz(max_model_fil, model)
        print("finished loading ..")
        max_epoch = int(max_model_fil.split('_')[-1].split('.')[0])
        # load optimizer
        if os.path.exists(m_cfg['opt_fname']):
            print("optimizer found = {0:s}".format(m_cfg['opt_fname']))
            serializers.load_npz(m_cfg['opt_fname'], optimizer)
            print("finished loading optimizer ...")
        else:
            print("optimizer not found")
    else:
        print("-"*80)
        print('model not found')
    # end if model found
    # -------------------------------------------------------------------------
    return max_epoch, model, optimizer, m_cfg, t_cfg
# end check_model

In [6]:
def get_data_dicts(m_cfg):
    print("-"*50)
    # load dictionaries
    # -------------------------------------------------------------------------
    # MAP dict
    # -------------------------------------------------------------------------
    map_dict_path = os.path.join(m_cfg['data_path'],'map.dict')
    print("loading dict: {0:s}".format(map_dict_path))
    map_dict = pickle.load(open(map_dict_path, "rb"))
    # -------------------------------------------------------------------------
    # VOCAB
    # -------------------------------------------------------------------------
    if 'fisher' in m_cfg['train_set']:
        if m_cfg['stemmify'] == False:
            vocab_path = os.path.join(m_cfg['data_path'], 'train_vocab.dict')
        else:
            vocab_path = os.path.join(m_cfg['data_path'], 'train_stemmed_vocab.dict')
    else:
        vocab_path = os.path.join(m_cfg['data_path'], 'ch_train_vocab.dict')
    print("loading dict: {0:s}".format(vocab_path))
    vocab_dict = pickle.load(open(vocab_path, "rb"))
    print("-"*50)
    # -------------------------------------------------------------------------
    # BUCKETS
    # -------------------------------------------------------------------------
    prep_buckets.buckets_main(m_cfg['data_path'],
                              m_cfg['buckets_num'],
                              m_cfg['buckets_width'],
                              m_cfg['enc_key'],
                              scale=m_cfg['train_scale'],
                              seed=m_cfg['seed'])

    buckets_path = os.path.join(m_cfg['data_path'],
                                'buckets_{0:s}.dict'.format(m_cfg['enc_key']))
    print("loading dict: {0:s}".format(buckets_path))
    bucket_dict = pickle.load(open(buckets_path, "rb"))
    print("-"*50)
    # -------------------------------------------------------------------------
    # bag-of-words
    # -------------------------------------------------------------------------
    bow_dict_path = os.path.join(m_cfg['data_path'],
                                     'train_top_K_enw.dict')
    print("loading dict: {0:s}".format(bow_dict_path))
    bow_dict = pickle.load(open(bow_dict_path, "rb"))
    print("-"*50)
    # -------------------------------------------------------------------------
    # INFORMATION
    # -------------------------------------------------------------------------
    for cat in map_dict:
        print('utterances in {0:s} = {1:d}'.format(cat, len(map_dict[cat])))

    if m_cfg['enc_key'] != 'sp':
        vocab_size_es = len(vocab_dict[m_cfg['enc_key']]['w2i'])
    else:
        vocab_size_es = 0
    vocab_size_en = len(vocab_dict[m_cfg['dec_key']]['w2i'])
    print('vocab size for {0:s} = {1:d}'.format(m_cfg['enc_key'],
                                                vocab_size_es))
    print('vocab size for {0:s} = {1:d}'.format(m_cfg['dec_key'],
                                                vocab_size_en))
    # -------------------------------------------------------------------------
    return map_dict, vocab_dict, bucket_dict, bow_dict

In [7]:
def feed_model(model, optimizer, m_dict, b_dict,
               batch_size, vocab_dict, bow_dict, x_key, y_key,
               train, input_path, max_dec, t_cfg, use_y=True):
    # number of buckets
    num_b = b_dict['num_b']
    width_b = b_dict['width_b']
    utts = {"ids": [], "preds": [], "probs": [], "refs": []}

    total_loss = 0
    loss_per_epoch = 0
    total_loss_updates= 0

    sys.stderr.flush()
    # -------------------------------------------------------------------------
    # create batches of utterances - shuffled
    # -------------------------------------------------------------------------
    utt_list_batches, total_utts = create_batches(b_dict, batch_size)
    # -------------------------------------------------------------------------
    with tqdm(total=total_utts, ncols=80) as pbar:
        for i, (utt_list, b) in enumerate(utt_list_batches):
            # -----------------------------------------------------------------
            # get batch_data
            # -----------------------------------------------------------------
            batch_data = get_bow_batch(m_dict,
                                   x_key, y_key,
                                   utt_list,
                                   vocab_dict,
                                   bow_dict,
                                   ((b+1) * width_b),
                                   max_dec,
                                   input_path=input_path)
            # -----------------------------------------------------------------
            if (len(batch_data['X']) > 0 and len(batch_data['y']) > 0):
                if use_y:
                    # ---------------------------------------------------------
                    # using labels, computing loss
                    # also used for dev set
                    # ---------------------------------------------------------
                    with chainer.using_config('train', train):
                        cuda.get_device(t_cfg['gpuid']).use()
                        p_words, loss, p_probs = model.forward_bow(X=batch_data['X'],
                                                                   y=batch_data['y'],
                                                                   add_noise=t_cfg['speech_noise'],
                                                                   l=l)
                        loss_val = float(loss.data)
                else:
                    # ---------------------------------------------------------
                    # prediction only
                    # ---------------------------------------------------------
                    with chainer.using_config('train', False):
                        cuda.get_device(t_cfg['gpuid']).use()
                        p_words, _, p_probs = model.forward_bow(X=batch_data['X'], l=l)
                        loss_val = 0.0
                # -------------------------------------------------------------
                # add list of utterances used
                # -------------------------------------------------------------
                for u, pred, prob, ref in zip(utt_list, p_words, p_probs, batch_data['r']):
                    utts['ids'].append(u)
                    utts["preds"].append(pred)
                    utts["probs"].append(prob)
                    utts["refs"].append(ref)
                # utts.extend(utt_list)
                # -------------------------------------------------------------
                # if len(p) > 0:
                #     pred_sents.extend(p)
                #     refs.extend(batch_data['t'])

                total_loss += loss_val
                total_loss_updates += 1
                loss_per_epoch = (total_loss / total_loss_updates)

                out_str = "b={0:d},l={1:.2f},avg={2:.2f}".format((b+1),loss_val,loss_per_epoch)
                # -------------------------------------------------------------
                # train mode logic
                # -------------------------------------------------------------
                if train:
                    # ---------------------------------------------------------
                    model.cleargrads()
                    loss.backward()
                    optimizer.update()
                    # ---------------------------------------------------------
                pbar.set_description('{0:s}'.format(out_str))
            else:
                print("no data in batch")
                print(utt_list)
            # update progress bar
            pbar.update(len(utt_list))
        # end for batches
    # end tqdm
    # return pred_sents, utts, refs, loss_per_epoch
    utts["probs"] = F.pad_sequence(utts["probs"]).data
    return utts, loss_per_epoch
# end feed_model

In [8]:
cfg_path = "./sp2bagwords/sp_1.0_h-256_e-128_rnn-2_hwy-2_cnn-32-2-5"

In [9]:
last_epoch, model, optimizer, m_cfg, t_cfg = check_model(cfg_path)

cnn_out_dim = rnn_in_units =  640




using ADAM optimizer
--------------------------------------------------------------------------------
model not found


In [10]:
xp = cuda.cupy if t_cfg['gpuid'] >= 0 else np

In [11]:
%%capture

train_key = m_cfg['train_set']
dev_key = m_cfg['dev_set']
batch_size=t_cfg['batch_size']
enc_key=m_cfg['enc_key']
dec_key=m_cfg['dec_key']
input_path = os.path.join(m_cfg['data_path'], m_cfg['dev_set'])
# -------------------------------------------------------------------------
# get data dictionaries
# -------------------------------------------------------------------------
map_dict, vocab_dict, bucket_dict, bow_dict = get_data_dicts(m_cfg)
batch_size = {'max': 96, 'med': 128, 'min': 256, 'scale': 1}

In [12]:
%%capture
print("-"*50)
prep_buckets.display_buckets(bucket_dict, "fisher_train")

In [13]:
len(bucket_dict['fisher_train']['buckets'])

10

In [26]:
curr_bucket = 8
num_utts = 50
utt_list = bucket_dict['fisher_train']['buckets'][curr_bucket][:num_utts]
curr_set='fisher_train'
print(len(utt_list))

3


In [27]:
6*10 / 10

6.0

In [28]:
if "train" in curr_set:
    local_input_path = os.path.join(m_cfg['data_path'], m_cfg['train_set'])
    play_audio = False
    width_b = bucket_dict[train_key]["width_b"]
    num_b = bucket_dict[dev_key]["num_b"]
else:
    local_input_path = os.path.join(m_cfg['data_path'], m_cfg['dev_set'])
    play_audio = True
    width_b = bucket_dict[train_key]["width_b"]
    num_b = bucket_dict[dev_key]["num_b"]

In [29]:
width_b, num_b

(200, 10)

In [30]:
batch_data = get_bow_batch(map_dict[curr_set], 
                                enc_key,
                                dec_key,
                                utt_list,
                                vocab_dict,
                                bow_dict,
                                (curr_bucket+1) * width_b,
                                200,
                                input_path=local_input_path)

X, y, t, l = batch_data['X'], batch_data['y'], batch_data['t'], batch_data['l']

batch_size = X.shape[0]

In [31]:
X.shape, l[:5]

((3, 1634, 80), [1623, 1634, 1616])

In [19]:
l[0] = 44

In [None]:
model.reduce_dim_len, np.floor(np.array(l, dtype='i') / model.reduce_dim_len)

In [None]:
len_check = np.floor(np.array(l, dtype='i') / model.reduce_dim_len)

In [None]:
i = 4

In [None]:
(np.ones(10, dtype='f')) * (i < np.array(l, dtype='i')), (np.array(l, dtype='i') >= i)

In [None]:
(np.ones(10, dtype='f')) * (i < np.floor(np.array(l, dtype='i') / model.reduce_dim_len))

In [None]:
i >= len_check

In [None]:
(np.ones(10, dtype='f')) * (i < np.floor(np.array(l, dtype='i') / model.reduce_dim_len))

In [None]:
(np.ones((10,2), dtype='f') * (i < len_check)[:, np.newaxis])

In [None]:
model.forward_enc(X, l)

In [None]:
model.h_final_rnn.shape

In [None]:
model.h_final_rnn[:2, :5]

In [None]:
for i in range(5):
    pred_words = []
    batch_data = get_bow_batch(map_dict[curr_set], 
                                enc_key,
                                dec_key,
                                utt_list,
                                vocab_dict,
                                bow_dict,
                                (curr_bucket+1) * m_cfg["buckets_width"],
                                200,
                                input_path=local_input_path)

    X, y, t, l = batch_data['X'], batch_data['y'], batch_data['t'], batch_data['l']

    batch_size = X.shape[0]
    # encode input
    model.forward_enc(X, l)
    
    if m_cfg['highway_layers'] > 0:
        highway_h = model.forward_highway(model.h_final_rnn)

    predicted_out = model.out(highway_h)
    
    simple_loss = F.sigmoid_cross_entropy(predicted_out, y, normalize=True, reduce="mean")
    loss = F.sigmoid_cross_entropy(predicted_out, y, normalize=True, reduce="no")
    
    loss_weights = xp.ones(shape=y.data.shape, dtype="f")
    loss_weights[y.data < 0] = 0
    loss_weights[y.data == 0] = 1
    loss_weights[y.data > 0] = 10
    #loss_avg = F.average(F.sigmoid_cross_entropy(predicted_out, y, normalize=True, reduce='no'), weights=loss_weights)
    loss_avg = F.mean(loss_weights * loss)
    print(i, "---".join(["{0:.3f}".format(float(val)) for val in (loss_avg.data, xp.mean(loss.data), simple_loss.data)]))
    model.cleargrads()
    loss_avg.backward()
    optimizer.update()

In [None]:
batch_data['l']

In [33]:
for i in range(5):
    p_words, loss, p_probs = model.forward_bow(X=batch_data['X'],
                                                y=batch_data['y'],
                                                add_noise=t_cfg['speech_noise'],
                                              l=batch_data['l'])
    model.cleargrads()
    loss.backward()
    optimizer.update()
    loss_val = float(loss.data)
    print(loss_val)

0.10755518078804016
0.09179939329624176
0.10357362776994705
0.08566057682037354
0.07195165008306503


In [None]:
loss_val