In [1]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, modified_precision
from nltk.translate.chrf_score import sentence_chrf, corpus_chrf
from nltk.metrics import scores
import scipy.io.wavfile
from IPython.display import Audio
from IPython.display import display
from nltk.stem import *
# from nltk.stem.snowball import SnowballStemmer
from stemming.porter2 import stem
import stemming
from nltk.metrics.scores import recall

from nltk.corpus import stopwords

%matplotlib inline

In [2]:
tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),    
             (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),    
             (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),    
             (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),    
             (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]    
# Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.    
for i in range(len(tableau20)):    
    r, g, b = tableau20[i]    
    tableau20[i] = (r / 255., g / 255., b / 255.)

In [3]:
smooth_fun = nltk.translate.bleu_score.SmoothingFunction()

In [4]:
from bow_run import *

In [5]:
def play_utt(utt, m_dict):
    sr, y = scipy.io.wavfile.read(os.path.join(wavs_path, utt.rsplit("-",1)[0]+'.wav'))
    start_t = min(seg['start'] for seg in m_dict[utt]['seg'])
    end_t = max(seg['end'] for seg in m_dict[utt]['seg'])
    print(start_t, end_t)
    start_t_samples, end_t_samples = int(start_t*sr), int(end_t*sr)
    display(Audio(y[start_t_samples:end_t_samples], rate=sr))

In [6]:
def display_words(m_dict, v_dict, preds, utts, dec_key, key, play_audio=False, displayN=-1):
    if displayN == -1:
        displayN = len(utts)
    es_ref = []
    en_ref = []
    google_ref = []
    google_pred = []
    for u in utts:
        es_ref.append(" ".join([w.decode() for w in m_dict[u]['es_w']]))
        if type(m_dict[u][dec_key]) == list:
            en_ref.append(" ".join([w.decode() for w in m_dict[u]['en_w']]))
        else:
            en_ref.append(" ".join([w.decode() for w in m_dict[u]['en_w'][0]]))
        google_pred.append(" ".join(google_hyp_r0[u]))
        google_ref.append(" ".join(google_dev_ref_0[u]))

    en_pred = []
    join_str = ' ' if dec_key.endswith('_w') else ''

    for p in preds:
        if type(p) == list:
            t_str = join_str.join([v_dict['i2w'][i].decode() for i in p])
            t_str = t_str[:t_str.find('_EOS')]
            en_pred.append(t_str)
        else:
            en_pred.append("")
        

    for u, es, en, p, g, gr in sorted(list(zip(utts, es_ref, en_ref, en_pred, google_pred, google_ref)))[:displayN]:
        # for reference, 1st word is GO_ID, no need to display
        print("Utterance: {0:s}".format(u))
        display_pp = PrettyTable(["cat","sent"], hrules=True)
        display_pp.align = "l"
        display_pp.header = False
        display_pp.add_row(["es ref", textwrap.fill(es,50)])
        display_pp.add_row(["en ref", textwrap.fill(en,50)])
        display_pp.add_row(["model pred", textwrap.fill(p,50)])
        display_pp.add_row(["model bleu", "{0:.2f}".format(sentence_bleu([en], p, smoothing_function=smooth_fun.method2))])
        display_pp.add_row(["google pred", textwrap.fill(g,50)])
        display_pp.add_row(["google bleu", "{0:.2f}".format(sentence_bleu([gr], g, smoothing_function=smooth_fun.method2))])
    

        print(display_pp)
        if play_audio:
            play_utt(u, m_dict)
    

In [7]:
def make_pred(utt, X, y=None, display_limit=10):
    # get shape
    batch_size = X.shape[0]
    # encode input
    model.forward_enc(X)
    # ---------------------------------------------------------------------
    # initialize decoder LSTM to final encoder state
    # ---------------------------------------------------------------------
    model.set_decoder_state()
    # ---------------------------------------------------------------------
    # swap axes of the decoder batch
    if y is not None:
        y = F.swapaxes(y, 0, 1)
    # -----------------------------------------------------------------
    # predict
    # -----------------------------------------------------------------
    # make return statements consistent
    return(decode_display(utt, batch_size=batch_size,
                          pred_limit=model.m_cfg['max_en_pred'],
#                           pred_limit=20,
                          y=y, display_limit=display_limit))

In [8]:
def decode_display(utt, batch_size, pred_limit, y=None, display_limit=10):
    xp = cuda.cupy if model.gpuid >= 0 else np
    # max number of predictions to make
    # if labels are provided, this variable is not used
    stop_limit = pred_limit
    # to track number of predictions made
    npred = 0
    # to store loss
    loss = 0
    # if labels are provided, use them for computing loss
    compute_loss = True if y is not None else False
    # ---------------------------------------------------------------------
    if compute_loss:
        stop_limit = len(y)-1
        # get starting word to initialize decoder
        curr_word = y[0]
    else:
        # intialize starting word to GO_ID symbol
        curr_word = Variable(xp.full((batch_size,), GO_ID, dtype=xp.int32))
    # ---------------------------------------------------------------------
    # flag to track if all sentences in batch have predicted EOS
    # ---------------------------------------------------------------------
    with cupy.cuda.Device(model.gpuid):
        check_if_all_eos = xp.full((batch_size,), False, dtype=xp.bool_)
    # ---------------------------------------------------------------------
    a_units = m_cfg['attn_units']
    ht = Variable(xp.zeros((batch_size, a_units), dtype=xp.float32))
    # ---------------------------------------------------------------------
    prob_out = {}
    prob_print_str = []
    while npred < (stop_limit):
        # -----------------------------------------------------------------
        # decode and predict
        #print("decoding with word: {0:s}".format(vocab_dict['en_w']['i2w'][curr_word.data[0].tolist()].decode()))
        pred_out, ht = model.decode(curr_word, ht)
        pred_word = F.argmax(pred_out, axis=1)
        # -----------------------------------------------------------------
        # printing conditional probabilities
        # -----------------------------------------------------------------
        pred_probs = xp.asnumpy(F.softmax(pred_out).data[0])
        top_n_probs = np.argsort(pred_probs)[-display_limit:]
        #print("-"*60)
        #print("predicting word : {0:d}".format(npred))
        prob_print_str.append("-" * 60)
        prob_print_str.append("predicting word : {0:d}".format(npred))
        
        # -----------------------------------------------------------------
#         if npred == 0:
#             sample_word = np.random.choice(range(len(pred_probs)), p=pred_probs)
#             sample_word = np.argsort(pred_probs)[-2]
#             print(np.argsort(pred_probs)[-2], np.argsort(pred_probs)[-1])
#             pred_word = Variable(xp.asarray([sample_word], dtype=xp.int32))
        # -----------------------------------------------------------------
        
        prob_out[npred] = {}
        for pi in top_n_probs[::-1]:
            prob_out[npred][v_dict['i2w'][pi].decode()] = "{0:.3f}".format(pred_probs[pi])
            #print("{0:10s} = {1:5.3f}".format(v_dict['i2w'][pi].decode(), pred_probs[pi]))
            prob_print_str.append("{0:10s} = {1:5.3f}".format(v_dict['i2w'][pi].decode(), pred_probs[pi]))
            
        # -----------------------------------------------------------------
        # save prediction at this time step
        # -----------------------------------------------------------------
        if npred == 0:
            pred_sents = pred_word.data
        else:
            pred_sents = xp.vstack((pred_sents, pred_word.data))
        # -----------------------------------------------------------------
        if compute_loss:
            # compute loss
            loss += F.softmax_cross_entropy(pred_out, y[npred+1],
                                               class_weight=model.mask_pad_id)
        # -----------------------------------------------------------------
        curr_word = pred_word
        # -----------------------------------------------------------------
        # check if EOS is predicted for all sentences
        # -----------------------------------------------------------------
        check_if_all_eos[pred_word.data == EOS_ID] = True
        if xp.all(check_if_all_eos):
            break
        # -----------------------------------------------------------------
        # increment number of predictions made
        npred += 1
        # -----------------------------------------------------------------
    
    out_fname = os.path.join(m_cfg['model_dir'], "probs", "{0:s}_probs.json".format(utt))
    with open(out_fname, "w") as out_f:
        json.dump(prob_out, out_f, indent=4)
    print("saved probs in : {0:s}".format(out_fname))
    return pred_sents.T, loss, "\n".join(prob_print_str)

In [9]:
def check_loss(eg_utt, curr_set='fisher_dev', teacher_ratio=1.0):
    # get shape
    if "train" in curr_set:
        local_input_path = os.path.join(m_cfg['data_path'], m_cfg['train_set'])
        play_audio = False
    else:
        local_input_path = os.path.join(m_cfg['data_path'], m_cfg['dev_set'])
        play_audio = True
        
    eg_utt_bucket = -1
    for i, bucket in enumerate(bucket_dict[curr_set]["buckets"]):
        if eg_utt in bucket:
            eg_utt_bucket = i
            #print("found")
        # end if
    # end for
    #print("found in bucket : {0:d}".format(eg_utt_bucket))
    width_b = bucket_dict[dev_key]["width_b"]
    utt_list = [eg_utt]
    
    batch_data = get_batch(map_dict[curr_set], 
                           enc_key,
                           dec_key,
                           utt_list,
                           vocab_dict,
                           (eg_utt_bucket+1) * width_b,
                           200,
                           input_path=local_input_path)
    
    X, y = batch_data['X'], batch_data['y']
    
    batch_size = X.shape[0]
    # encode input
    model.forward_enc(X)
    # ---------------------------------------------------------------------
    # initialize decoder LSTM to final encoder state
    # ---------------------------------------------------------------------
    model.set_decoder_state()
    # ---------------------------------------------------------------------
    y = F.swapaxes(y, 0, 1)
        
    xp = cuda.cupy if model.gpuid >= 0 else np
    
    decoder_batch = y 
    batch_size = decoder_batch.shape[1]
    loss = 0
    # ---------------------------------------------------------------------
    # initialize hidden states as a zero vector
    # ---------------------------------------------------------------------
    a_units = model.m_cfg['attn_units']
    ht = Variable(xp.zeros((batch_size, a_units), dtype=xp.float32))
    # ---------------------------------------------------------------------
    decoder_input = decoder_batch[0]
    # for all sequences in the batch, feed the characters one by one
    for curr_word, next_word in zip(decoder_batch, decoder_batch[1:]):
        #print(curr_word, next_word)
        # -----------------------------------------------------------------
        # teacher forcing logic
        # -----------------------------------------------------------------
        use_label = True if random.random() < teacher_ratio else False
        if use_label:
            decoder_input = curr_word
        # -----------------------------------------------------------------
        # encode tokens
        # -----------------------------------------------------------------
        predicted_out, ht = model.decode(decoder_input, ht)
        decoder_input = F.argmax(predicted_out, axis=1)
        #print(decoder_input)
        # -----------------------------------------------------------------
        # compute loss
        # -----------------------------------------------------------------
        loss_arr = F.softmax_cross_entropy(predicted_out, next_word,
                                           class_weight=model.mask_pad_id)
        #print(loss_arr.data.tolist())
        loss += loss_arr
        # -----------------------------------------------------------------
    #print(loss, loss / (y.shape[0]-2), y.shape)
    return loss.data.tolist(), (loss / (y.shape[0]-1)).data.tolist()

In [10]:
def get_utt_data(eg_utt, curr_set='fisher_dev'):
    # get shape
    if "train" in curr_set:
        local_input_path = os.path.join(m_cfg['data_path'], m_cfg['train_set'])
        play_audio = False
    else:
        local_input_path = os.path.join(m_cfg['data_path'], m_cfg['dev_set'])
        play_audio = True
        
    eg_utt_bucket = -1
    for i, bucket in enumerate(bucket_dict[curr_set]["buckets"]):
        if eg_utt in bucket:
            eg_utt_bucket = i
            #print("found")
        # end if
    # end for
    #print("found in bucket : {0:d}".format(eg_utt_bucket))
    width_b = bucket_dict[dev_key]["width_b"]
    utt_list = [eg_utt]
    
    
    batch_data = get_batch(map_dict[curr_set], 
                           enc_key,
                           dec_key,
                           utt_list,
                           vocab_dict,
                           (eg_utt_bucket+1) * width_b,
                           200,
                           input_path=local_input_path)
    
    return batch_data

In [59]:
def get_unpadded_batch(m_dict, x_key, y_key, utt_list, vocab_dict, bow_dict, max_enc, max_dec, input_path='', set_zero_num=500):
    batch_data = {'X':[], 't':[], 'y':[]}
    # -------------------------------------------------------------------------
    # loop through each utterance in utt list
    # -------------------------------------------------------------------------
    for i, u in enumerate(utt_list):
        # ---------------------------------------------------------------------
        #  add X data
        # ---------------------------------------------------------------------
        if x_key == 'sp':
            # -----------------------------------------------------------------
            # for speech data
            # -----------------------------------------------------------------
            # get path to speech file
            utt_sp_path = os.path.join(input_path, "{0:s}.npy".format(u))
            if not os.path.exists(utt_sp_path):
                # for training data, there are sub-folders
                utt_sp_path = os.path.join(input_path,
                                           u.split('_',1)[0],
                                           "{0:s}.npy".format(u))
            if os.path.exists(utt_sp_path):
                x_data = Variable(xp.load(utt_sp_path)[:max_enc])
            else:
                # -------------------------------------------------------------
                # exception if file not found
                # -------------------------------------------------------------
                raise FileNotFoundError("ERROR!! file not found: {0:s}".format(utt_sp_path))
                # -------------------------------------------------------------
        else:
            # -----------------------------------------------------------------
            # for text data
            # -----------------------------------------------------------------
            x_ids = [vocab_dict[x_key]['w2i'].get(w, UNK_ID) for w in m_dict[u][x_key]]
            x_ids = xp.asarray(x_ids, dtype=xp.int32)
            batch_data['X'].append(x_ids[:max_enc])
            # -----------------------------------------------------------------
        # ---------------------------------------------------------------------
        #  add labels
        # ---------------------------------------------------------------------
        if type(m_dict[u][y_key]) == list:
            en_ids = list(set([bow_dict['w2i'].get(w, UNK_ID) for w in m_dict[u][y_key]])-set(range(4)))
        else:
            # dev and test data have multiple translations
            # choose the first one for computing perplexity
            en_ids = list(set([bow_dict['w2i'].get(w, UNK_ID) for w in m_dict[u][y_key][0]])-set(range(4)))
        y_ids = en_ids[:max_dec]
        # ---------------------------------------------------------------------
        if len(x_data) > 0 and len(y_ids) > 0:
            batch_data['X'].append(x_data)
            batch_data['t'].append([y_ids])
            y_data = xp.zeros(len(bow_dict['w2i']), dtype=xp.int32)
            #y_data = -1 * xp.ones(len(bow_dict['w2i']), dtype=xp.int32)
            #set_some_to_0 = np.random.choice(np.array(range(len(bow_dict['w2i'])), dtype="i"), 
            #                                 size=set_zero_num, 
            #                                 replace=False)
            #y_data[set_some_to_0] = 0
            y_data[y_ids] = 1
            y_data[list(range(4))] = -1
            batch_data['y'].append(y_data)
            
    # -------------------------------------------------------------------------
    # end for all utterances in batch
    # -------------------------------------------------------------------------
#     if len(batch_data['X']) > 0 and len(batch_data['y']) > 0:
#         batch_data['X'] = F.pad_sequence(batch_data['X'], padding=PAD_ID)
#         batch_data['y'] = F.pad_sequence(batch_data['y'], padding=PAD_ID)
    return batch_data

In [11]:
def hmm_get_curr_bow_batch(m_dict, x_key, y_key, utt_list, vocab_dict, bow_dict, max_enc, max_dec, input_path='', set_zero_num=500):
    batch_data = {'X':[], 't':[], 'y':[]}
    # -------------------------------------------------------------------------
    # loop through each utterance in utt list
    # -------------------------------------------------------------------------
    for i, u in enumerate(utt_list):
        # ---------------------------------------------------------------------
        #  add X data
        # ---------------------------------------------------------------------
        if x_key == 'sp':
            # -----------------------------------------------------------------
            # for speech data
            # -----------------------------------------------------------------
            # get path to speech file
            utt_sp_path = os.path.join(input_path, "{0:s}.npy".format(u))
            if not os.path.exists(utt_sp_path):
                # for training data, there are sub-folders
                utt_sp_path = os.path.join(input_path,
                                           u.split('_',1)[0],
                                           "{0:s}.npy".format(u))
            if os.path.exists(utt_sp_path):
                x_data = xp.load(utt_sp_path)[:max_enc]
            else:
                # -------------------------------------------------------------
                # exception if file not found
                # -------------------------------------------------------------
                raise FileNotFoundError("ERROR!! file not found: {0:s}".format(utt_sp_path))
                # -------------------------------------------------------------
        else:
            # -----------------------------------------------------------------
            # for text data
            # -----------------------------------------------------------------
            x_ids = [vocab_dict[x_key]['w2i'].get(w, UNK_ID) for w in m_dict[u][x_key]]
            x_ids = xp.asarray(x_ids, dtype=xp.int32)
            batch_data['X'].append(x_ids[:max_enc])
            # -----------------------------------------------------------------
        # ---------------------------------------------------------------------
        #  add labels
        # ---------------------------------------------------------------------
        if type(m_dict[u][y_key]) == list:
            en_ids = list(set([bow_dict['w2i'].get(w, UNK_ID) for w in m_dict[u][y_key]])-set(range(4)))
        else:
            # dev and test data have multiple translations
            # choose the first one for computing perplexity
            en_ids = list(set([bow_dict['w2i'].get(w, UNK_ID) for w in m_dict[u][y_key][0]])-set(range(4)))
        y_ids = en_ids[:max_dec]
        # ---------------------------------------------------------------------
        if len(x_data) > 0 and len(y_ids) > 0:
            batch_data['X'].append(x_data)
            batch_data['t'].append([y_ids])
            y_data = xp.zeros(len(bow_dict['w2i']), dtype=xp.int32)
            #y_data = -1 * xp.ones(len(bow_dict['w2i']), dtype=xp.int32)
            #set_some_to_0 = np.random.choice(np.array(range(len(bow_dict['w2i'])), dtype="i"), 
            #                                 size=set_zero_num, 
            #                                 replace=False)
            #y_data[set_some_to_0] = 0
            y_data[y_ids] = 1
            y_data[list(range(4))] = -1
            batch_data['y'].append(y_data)
            
    # -------------------------------------------------------------------------
    # end for all utterances in batch
    # -------------------------------------------------------------------------
    if len(batch_data['X']) > 0 and len(batch_data['y']) > 0:
        batch_data['X'] = F.pad_sequence(batch_data['X'], padding=PAD_ID)
        batch_data['y'] = F.pad_sequence(batch_data['y'], padding=PAD_ID)
    return batch_data

### Fisher dev

In [12]:
cfg_path = "./sp2bagwords/sp_1.0_h-128_e-128_rnn-2_hwy-1_cnn-128-5-5"

In [13]:
!cd .

In [14]:
last_epoch, model, optimizer, m_cfg, t_cfg = check_model(cfg_path);

cnn_out_dim = rnn_in_units =  640




using SGD optimizer
--------------------------------------------------------------------------------
model not found


In [15]:
bow_dict_path = os.path.join(m_cfg['data_path'],
                                     'train_top_K_enw.dict')
if os.path.exists(bow_dict_path):
    bow_dict = pickle.load(open(bow_dict_path, "rb"))

In [16]:
%%capture

train_key = m_cfg['train_set']
dev_key = m_cfg['dev_set']
batch_size=t_cfg['batch_size']
enc_key=m_cfg['enc_key']
dec_key=m_cfg['dec_key']
input_path = os.path.join(m_cfg['data_path'], m_cfg['dev_set'])
# -------------------------------------------------------------------------
# get data dictionaries
# -------------------------------------------------------------------------
map_dict, vocab_dict, bucket_dict, bow_dict = get_data_dicts(m_cfg)
batch_size = {'max': 96, 'med': 128, 'min': 256, 'scale': 1}

In [17]:
sum(bow_dict['freq'].values()), sum(vocab_dict['en_w']['freq'].values()), sum(bow_dict['freq'].values()) / sum(vocab_dict['en_w']['freq'].values())

(308663, 1497356, 0.2061386871258405)

In [18]:
list(bow_dict['i2w'].keys())[:10]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [19]:
random.seed("meh")
# random.seed("haha")

In [20]:
# Eval parameters
ref_index = -1
min_len, max_len= 0, m_cfg['max_en_pred']
# min_len, max_len = 0, 10
displayN = 50
m_dict=map_dict[dev_key]
# wavs_path = os.path.join(m_cfg['data_path'], "wavs")
wavs_path = os.path.join("../chainer2/speech2text/both_fbank_out/", "wavs")
v_dict = vocab_dict['en_w']
key = m_cfg['dev_set']

### BOW trials

In [21]:
bucket_dict['fisher_dev']['buckets'][0][:1]

['20051009_182032_217_fsp-B-1']

In [22]:
curr_bucket = 0
num_utts = 10
utt_list = bucket_dict['fisher_train']['buckets'][curr_bucket][:num_utts]
curr_set='fisher_train'

In [40]:
if "train" in curr_set:
    local_input_path = os.path.join(m_cfg['data_path'], m_cfg['train_set'])
    play_audio = False
else:
    local_input_path = os.path.join(m_cfg['data_path'], m_cfg['dev_set'])
    play_audio = True

width_b = bucket_dict[dev_key]["width_b"]

batch_data = hmm_get_curr_bow_batch(map_dict[curr_set], 
                                enc_key,
                                dec_key,
                                utt_list,
                                vocab_dict,
                                bow_dict,
                                (curr_bucket+1) * width_b,
                                200,
                                input_path=local_input_path)

X, y, t = batch_data['X'], batch_data['y'], batch_data['t']

batch_size = X.shape[0]

In [24]:
t

[[[4]], [[8, 9, 85]], [[8]], [[8]], [[23]], [[37]]]

In [28]:
xp.min(batch_data['X'].data)

array(-11.340629577636719, dtype=float32)

In [60]:
if "train" in curr_set:
    local_input_path = os.path.join(m_cfg['data_path'], m_cfg['train_set'])
    play_audio = False
else:
    local_input_path = os.path.join(m_cfg['data_path'], m_cfg['dev_set'])
    play_audio = True

width_b = bucket_dict[dev_key]["width_b"]

batch_data = get_unpadded_batch(map_dict[curr_set], 
                                enc_key,
                                dec_key,
                                utt_list,
                                vocab_dict,
                                bow_dict,
                                (curr_bucket+1) * width_b,
                                200,
                                input_path=local_input_path)

X, y, t = batch_data['X'], batch_data['y'], batch_data['t']

# batch_size = X.shape[0]

In [61]:
t_nlstm = L.NStepBiLSTM(2, 80, 256, 0.3)

In [62]:
h.shape

(35, 6, 640)

In [63]:
type(X)

list

In [64]:
t_rnn = t_nlstm(hx=None, cx=None, xs=X)

ValueError: numpy and cupy must not be used together
type(W): <class 'numpy.ndarray'>, type(x): <class 'cupy.core.core.ndarray'>

In [25]:
print(X.shape, y.shape)

(6, 210, 80) (6, 104)


In [None]:
# optimizer.hyperparam.alpha = 0.001
optimizer.hyperparam.lr = 1.0

In [None]:
predicted_out[0,:15]

In [None]:
PRED_THRESH = 0.6

In [None]:
for i in range(1):
    pred_words = []
    batch_data = hmm_get_curr_bow_batch(map_dict[curr_set], 
                                enc_key,
                                dec_key,
                                utt_list,
                                vocab_dict,
                                bow_dict,
                                (curr_bucket+1) * width_b,
                                200,
                                input_path=local_input_path,
                                set_zero_num=50)

    X, y, t = batch_data['X'], batch_data['y'], batch_data['t']

    batch_size = X.shape[0]
    # encode input
    model.forward_enc(X)
    
    if m_cfg['highway_layers'] > 0:
        highway_h = model.forward_highway(model.h_final_rnn)

    predicted_out = model.out(highway_h)
    
    simple_loss = F.sigmoid_cross_entropy(predicted_out, y, normalize=True, reduce="mean")
    loss = F.sigmoid_cross_entropy(predicted_out, y, normalize=True, reduce="no")
    
    loss_weights = xp.ones(shape=y.data.shape, dtype="f")
    loss_weights[y.data < 0] = 0
    loss_weights[y.data == 0] = 1
    loss_weights[y.data > 0] = 10
    #loss_avg = F.average(F.sigmoid_cross_entropy(predicted_out, y, normalize=True, reduce='no'), weights=loss_weights)
    loss_avg = F.mean(loss_weights * loss)
    print(i, "---".join(["{0:.3f}".format(float(val)) for val in (loss_avg.data, xp.mean(loss.data), simple_loss.data)]))
    model.cleargrads()
    loss_avg.backward()
    optimizer.update()
#     for row in predicted_out.data[:5]:
#         print(xp.where(row > 0.6)[0][:20])

In [None]:
probs = []
for i, row in enumerate(predicted_out.data):
    print(row[10:12])
    pred_inds = xp.where(row >= PRED_THRESH)[0]
    if len(pred_inds) > 20:
        pred_inds = xp.argsort(row)[-20:][::-1]
    #pred_words.append([bow_dict['i2w'][i] for i in pred_inds.tolist()])
    curr_row = [i for i in pred_inds.tolist() if i > 3]
#     if i < 5:
#         print(curr_row)
    pred_words.append(curr_row)
    probs.append(row)
# print(pred_words)

In [None]:
probs = F.pad_sequence(probs).data

In [None]:
probs[:,10:12]

In [None]:
N = 5
for row, pred, ttt in zip(y.data[:N], predicted_out.data[:N], batch_data['t']):
    #print(xp.where(row == 1))
    print(ttt[0])
    print(xp.where(pred >= PRED_THRESH)[0].tolist())
    print(set(xp.where(pred >= PRED_THRESH)[0].tolist()) & set(ttt[0]))
    print("-"*50)

In [None]:
basic_precision_recall(batch_data['t'], pred_words)

In [None]:
F.binary_accuracy(predicted_out.data, y)

In [None]:
batch_data['y'][:,:20]

In [None]:
loss_weight = xp.ones(shape=y.data.shape, dtype="f")
loss_weight[y.data < 0] = 0
loss_weight[y.data == 0] = 10
loss_weight[y.data > 0] = 1

In [None]:
y.data[0][:20], loss_weight[0,:20]

In [None]:
loss_full = F.sigmoid_cross_entropy(predicted_out, y, normalize=True, reduce='no')

In [None]:
loss_full.shape, loss_weight.shape, loss_full[0,:12]

In [None]:
F.average(loss_full, weights=loss_weight), F.mean(loss_full), F.mean((loss_full*loss_weight))

In [None]:
F.average(F.sigmoid_cross_entropy(predicted_out, y, normalize=True, reduce="no").data), loss

In [None]:
F.sigmoid_cross_entropy(predicted_out, y, normalize=True, reduce="mean").data

In [None]:
out_h = model.out_1(model.h_final_rnn)
predicted_out = model.out_2(F.sigmoid(out_h))
loss = F.sigmoid_cross_entropy(predicted_out, y, normalize=True)

In [None]:
print(loss.data)

In [None]:
model.cleargrads()
loss.backward()
optimizer.update()

In [None]:
predicted_out.data[0,9:20]

In [None]:
print(xp.where(predicted_out.data > 0))

In [None]:
X.shape, y.shape

In [None]:
y[0,:50]

In [None]:
xp.where(y[0] == 1)

In [None]:
set(t[0])

In [None]:
[vocab_dict['en_w']['i2w'][i] for i in t[0]]

In [None]:
model.h_final_rnn.shape

In [None]:
model.h_final_rnn[-1,:10], model.h_final_rnn[-1,-10:]

In [None]:
model.L1_enc.h[-1,:10], model.L1_rev_enc.h[-1][-10:]

In [None]:
h_fwd = model.L1_enc.h.data
h_rev = model.L1_rev_enc.h.data

In [None]:
h_fwd[-1,:10], h_rev[-1,-10:]

In [None]:
F.concat((h_fwd, h_rev), axis=1)[-1][:10], F.concat((h_fwd, h_rev), axis=1)[-1][-10:]

In [None]:
pata = model[model.rnn_enc[-1]].h.data
if model.m_cfg['bi_rnn']:
    h_rev = model[model.rnn_rev_enc[-1]].h.data
    pata = F.concat((pata, h_rev), axis=1)

In [None]:
pata[-1][-10:]

In [None]:
model.out_1

In [None]:
haha = F.sigmoid(model.out_1(model.h_final_rnn))

In [None]:
from scipy.special import expit

In [None]:
y.shape

In [None]:
dummy_y = xp.ones(y.shape, dtype='i')

In [None]:
hu = model.out_2(haha)

In [None]:
F.sigmoid_cross_entropy(hu, dummy_y)

In [None]:
loss = F.sigmoid_cross_entropy(hu, y)
loss

In [None]:
model.cleargrads()
loss.backward()
optimizer.update()

In [None]:
ha = F.sigmoid(model.out_2(haha)).data

In [None]:
[expit(i) for i in hu[0][:10].tolist()]

In [None]:
ha[0].data[:10]

In [None]:
xp.where(ha[0].data >= 0)

In [None]:
loss = F.sigmoid_cross_entropy(model.out_2(haha), y, normalize=True)

In [None]:
y[0][:13]

In [None]:
loss

In [None]:
ha.shape

In [None]:
ha[0,:10]

In [None]:
F.sigmoid

In [None]:
ha.data.shape

In [None]:
batch_data['y'][0,:5]

In [None]:
y = np.array([[0.1, 0.7, 0.2], # prediction label is 1
              [8.0, 1.0, 2.0], # prediction label is 0
              [-8.0, 1.0, 2.0], # prediction label is 2
              [-8.0, -1.0, -2.0]]) # prediction label is 1
t = np.array([1, 0, 2, 1], 'i')

In [None]:
F.accuracy(y, t).data # 100% accuracy because all samples are correct

In [None]:
t = np.array([1, 0, 0, 0], 'i')
F.accuracy(y, t).data # 100% accuracy because all samples are correct

In [None]:
ha.shape, batch_data['y'].shape

In [None]:
F.binary_accuracy(ha.data, batch_data['y'])