In [1]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, modified_precision
from nltk.translate.chrf_score import sentence_chrf, corpus_chrf
from nltk.metrics import scores
import scipy.io.wavfile
from IPython.display import Audio
from IPython.display import display
from nltk.stem import *
# from nltk.stem.snowball import SnowballStemmer
from stemming.porter2 import stem
import stemming
from nltk.metrics.scores import recall

from nltk.corpus import stopwords

%matplotlib inline

In [2]:
tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),    
             (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),    
             (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),    
             (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),    
             (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]    
# Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.    
for i in range(len(tableau20)):    
    r, g, b = tableau20[i]    
    tableau20[i] = (r / 255., g / 255., b / 255.)

In [3]:
smooth_fun = nltk.translate.bleu_score.SmoothingFunction()

In [4]:
from nmt_run import *

In [5]:
def play_utt(utt, m_dict):
    sr, y = scipy.io.wavfile.read(os.path.join(wavs_path, utt.rsplit("-",1)[0]+'.wav'))
    start_t = min(seg['start'] for seg in m_dict[utt]['seg'])
    end_t = max(seg['end'] for seg in m_dict[utt]['seg'])
    print(start_t, end_t)
    start_t_samples, end_t_samples = int(start_t*sr), int(end_t*sr)
    display(Audio(y[start_t_samples:end_t_samples], rate=sr))

In [6]:
def display_words(m_dict, v_dict, preds, utts, dec_key, key, play_audio=False, displayN=-1):
    if displayN == -1:
        displayN = len(utts)
    es_ref = []
    en_ref = []
    for u in utts:
        es_ref.append(" ".join([w.decode() for w in m_dict[u]['es_w']]))
        if type(m_dict[u][dec_key]) == list:
            en_ref.append(" ".join([w.decode() for w in m_dict[u]['en_w']]))
        else:
            en_ref.append(" ".join([w.decode() for w in m_dict[u]['en_w'][0]]))

    en_pred = []
    join_str = ' ' if dec_key.endswith('_w') else ''

    for p in preds:
        t_str = join_str.join([v_dict['i2w'][i].decode() for i in p])
        t_str = t_str[:t_str.find('_EOS')]
        en_pred.append(t_str)

    for u, es, en, p in sorted(list(zip(utts, es_ref, en_ref, en_pred)))[:displayN]:
        # for reference, 1st word is GO_ID, no need to display
        print("Utterance: {0:s}".format(u))
        display_pp = PrettyTable(["cat","sent"], hrules=True)
        display_pp.align = "l"
        display_pp.header = False
        display_pp.add_row(["es ref", textwrap.fill(es,50)])
        display_pp.add_row(["en ref", textwrap.fill(en,50)])
        display_pp.add_row(["en pred", textwrap.fill(p,50)])

        print(display_pp)
        if play_audio:
            play_utt(u, m_dict)
    

In [34]:
def make_pred(utt, X, y=None, display_limit=10):
    # get shape
    batch_size = X.shape[0]
    # encode input
    model.forward_enc(X)
    # ---------------------------------------------------------------------
    # initialize decoder LSTM to final encoder state
    # ---------------------------------------------------------------------
    model.set_decoder_state()
    # ---------------------------------------------------------------------
    # swap axes of the decoder batch
    if y is not None:
        y = F.swapaxes(y, 0, 1)
    # -----------------------------------------------------------------
    # predict
    # -----------------------------------------------------------------
    # make return statements consistent
    return(decode_display(utt, batch_size=batch_size,
                          pred_limit=model.m_cfg['max_en_pred'],
#                           pred_limit=20,
                          y=y, display_limit=display_limit))

In [286]:
def decode_display(utt, batch_size, pred_limit, y=None, display_limit=10):
    xp = cuda.cupy if model.gpuid >= 0 else np
    # max number of predictions to make
    # if labels are provided, this variable is not used
    stop_limit = pred_limit
    # to track number of predictions made
    npred = 0
    # to store loss
    loss = 0
    # if labels are provided, use them for computing loss
    compute_loss = True if y is not None else False
    # ---------------------------------------------------------------------
    if compute_loss:
        stop_limit = len(y)-1
        # get starting word to initialize decoder
        curr_word = y[0]
    else:
        # intialize starting word to GO_ID symbol
        curr_word = Variable(xp.full((batch_size,), GO_ID, dtype=xp.int32))
    # ---------------------------------------------------------------------
    # flag to track if all sentences in batch have predicted EOS
    # ---------------------------------------------------------------------
    with cupy.cuda.Device(model.gpuid):
        check_if_all_eos = xp.full((batch_size,), False, dtype=xp.bool_)
    # ---------------------------------------------------------------------
    a_units = m_cfg['attn_units']
    ht = Variable(xp.zeros((batch_size, a_units), dtype=xp.float32))
    # ---------------------------------------------------------------------
    prob_out = {}
    prob_print_str = []
    while npred < (stop_limit):
        # -----------------------------------------------------------------
        # decode and predict
        pred_out, ht = model.decode(curr_word, ht)
        pred_word = F.argmax(pred_out, axis=1)
        # -----------------------------------------------------------------
        # printing conditional probabilities
        # -----------------------------------------------------------------
        pred_probs = xp.asnumpy(F.softmax(pred_out).data[0])
        top_n_probs = np.argsort(pred_probs)[-display_limit:]
        #print("-"*60)
        #print("predicting word : {0:d}".format(npred))
        prob_print_str.append("-" * 60)
        prob_print_str.append("predicting word : {0:d}".format(npred))
        
        prob_out[npred] = {}
        for pi in top_n_probs[::-1]:
            prob_out[npred][v_dict['i2w'][pi].decode()] = "{0:.3f}".format(pred_probs[pi])
            #print("{0:10s} = {1:5.3f}".format(v_dict['i2w'][pi].decode(), pred_probs[pi]))
            prob_print_str.append("{0:10s} = {1:5.3f}".format(v_dict['i2w'][pi].decode(), pred_probs[pi]))
            
        # -----------------------------------------------------------------
        # save prediction at this time step
        # -----------------------------------------------------------------
        if npred == 0:
            pred_sents = pred_word.data
        else:
            pred_sents = xp.vstack((pred_sents, pred_word.data))
        # -----------------------------------------------------------------
        if compute_loss:
            # compute loss
            loss += F.softmax_cross_entropy(pred_out, y[npred+1],
                                               class_weight=model.mask_pad_id)
        # -----------------------------------------------------------------
        curr_word = pred_word
        # -----------------------------------------------------------------
        # check if EOS is predicted for all sentences
        # -----------------------------------------------------------------
        check_if_all_eos[pred_word.data == EOS_ID] = True
        if xp.all(check_if_all_eos):
            break
        # -----------------------------------------------------------------
        # increment number of predictions made
        npred += 1
        # -----------------------------------------------------------------
    
    out_fname = os.path.join(m_cfg['model_dir'], "{0:s}_probs.json".format(utt))
    with open(out_fname, "w") as out_f:
        json.dump(prob_out, out_f, indent=4)
    print("saved probs in : {0:s}".format(out_fname))
    return pred_sents.T, loss, "\n".join(prob_print_str)

### Fisher dev

In [9]:
cfg_path = "./sp2enw/sp_1.0_l2e-3/"

In [10]:
last_epoch, model, optimizer, m_cfg, t_cfg = check_model(cfg_path)

cnn_out_dim = rnn_in_units =  320




using ADAM optimizer
--------------------------------------------------------------------------------
model found = 
./sp2enw/sp_1.0_l2e-3/seq2seq_75.model
finished loading ..
optimizer not found


In [11]:
train_key = m_cfg['train_set']
dev_key = m_cfg['dev_set']
batch_size=t_cfg['batch_size']
enc_key=m_cfg['enc_key']
dec_key=m_cfg['dec_key']
input_path = os.path.join(m_cfg['data_path'], m_cfg['dev_set'])
# -------------------------------------------------------------------------
# get data dictionaries
# -------------------------------------------------------------------------
map_dict, vocab_dict, bucket_dict = get_data_dicts(m_cfg)
batch_size = {'max': 96, 'med': 128, 'min': 256, 'scale': 1}

--------------------------------------------------
loading dict: ../chainer2/speech2text/both_fbank_out/map.dict
loading dict: ../chainer2/speech2text/both_fbank_out/train_vocab.dict
--------------------------------------------------
--------------------------------------------------
loading info_dict from=../chainer2/speech2text/both_fbank_out/info.dict


100%|██████████| 3977/3977 [00:00<00:00, 514901.44it/s]
100%|██████████| 3959/3959 [00:00<00:00, 514843.57it/s]
100%|██████████| 3641/3641 [00:00<00:00, 519031.40it/s]
 39%|███▊      | 53640/138708 [00:00<00:00, 536261.15it/s]

--------------------------------------------------
creating buckets for: fisher_dev
creating buckets for key: sp
creating buckets for: fisher_dev2
creating buckets for key: sp
creating buckets for: fisher_test
creating buckets for key: sp
creating buckets for: fisher_train
creating buckets for key: sp


100%|██████████| 138708/138708 [00:00<00:00, 627039.47it/s]
100%|██████████| 3801/3801 [00:00<00:00, 633545.92it/s]
100%|██████████| 1824/1824 [00:00<00:00, 655113.08it/s]
100%|██████████| 14284/14284 [00:00<00:00, 640791.46it/s]


creating buckets for: callhome_devtest
creating buckets for key: sp
creating buckets for: callhome_evltest
creating buckets for key: sp
creating buckets for: callhome_train
creating buckets for key: sp
--------------------------------------------------
saving info dict in: ../chainer2/speech2text/both_fbank_out/buckets_sp.dict
all done ...
loading dict: ../chainer2/speech2text/both_fbank_out/buckets_sp.dict
--------------------------------------------------
utterances in fisher_dev = 3979
utterances in fisher_dev2 = 3961
utterances in fisher_test = 3641
utterances in fisher_train = 138819
utterances in callhome_devtest = 3966
utterances in callhome_evltest = 1829
utterances in callhome_train = 15080
vocab size for sp = 0
vocab size for en_w = 17378


In [12]:
random.seed("meh")
# random.seed("haha")

In [13]:
# Eval parameters
ref_index = -1
min_len, max_len= 0, m_cfg['max_en_pred']
# min_len, max_len = 0, 5
displayN = 50
m_dict=map_dict[dev_key]
# wavs_path = os.path.join(m_cfg['data_path'], "wavs")
wavs_path = os.path.join("../chainer2/speech2text/both_fbank_out/", "wavs")
v_dict = vocab_dict['en_w']
key = m_cfg['dev_set']

In [14]:
os.chdir("..")
os.chdir("/afs/inf.ed.ac.uk/group/project/lowres/work/speech2text")

### View model

In [287]:
def generate_translate_probs(eg_utt, curr_set="fisher_dev"):
    if "train" in curr_set:
        local_input_path = os.path.join(m_cfg['data_path'], m_cfg['train_set'])
        play_audio = False
    else:
        local_input_path = os.path.join(m_cfg['data_path'], m_cfg['dev_set'])
        play_audio = True
        
    eg_utt_bucket = -1
    for i, bucket in enumerate(bucket_dict[curr_set]["buckets"]):
        if eg_utt in bucket:
            eg_utt_bucket = i
            print("found")
        # end if
    # end for
    print("found in bucket : {0:d}".format(eg_utt_bucket))
    width_b = bucket_dict[dev_key]["width_b"]
    utt_list = [eg_utt]
    
    
    batch_data = get_batch(map_dict[curr_set], 
                           enc_key,
                           dec_key,
                           utt_list,
                           vocab_dict,
                           (eg_utt_bucket+1) * width_b,
                           200,
                           input_path=local_input_path)
    
    with chainer.using_config('train', False):
        cuda.get_device(t_cfg['gpuid']).use()
        preds, _, probs_str = make_pred(eg_utt, X=batch_data['X'], display_limit=10)
        #preds, _ = make_pred(eg_utt, X=batch_data['X'][:,-150:,:], display_limit=10)
        loss_val = 0.0
    
    display_words(map_dict[curr_set], v_dict, 
                  preds.tolist(), 
                  utt_list, dec_key, 
                  key, 
                  play_audio=play_audio, 
                  displayN=displayN)
    
    print(probs_str)

In [288]:
def find_utts_with_word(word, set_key="fisher_dev", show_max_found=10):
    total_found = 0
    out_str = []
    for utt, entry in map_dict[set_key].items():
        if "train" in set_key:
            words_in_utt = " ".join([w.decode() for w in entry['en_w']])
        else:
            words_in_utt = " ".join([w.decode() for w in entry['en_w'][0]])
        es_words_in_utt = " ".join([w.decode() for w in entry['es_w']])        
        #if "puerto" in words_in_utt:
        if word in words_in_utt:
            out_str.append("{0:s} | {1:s} | {2:s}".format(utt, words_in_utt, es_words_in_utt))
            total_found += 1
    
    print("-" * 80)
    print("total instances found = {0:d}".format(total_found))
    print("-" * 80)
    print("\n".join(out_str[:show_max_found]))

In [289]:
find_utts_with_word("puerto", set_key="fisher_dev")

--------------------------------------------------------------------------------
total instances found = 72
--------------------------------------------------------------------------------
20051009_182032_217_fsp-B-7 | i ' m from puerto rico | soy de puerto rico
20051009_182032_217_fsp-B-50 | but he is he is in puerto rico | pero esta esta en puerto rico
20051009_182032_217_fsp-B-51 | but often when i go to puerto rico he wants me to change religion to their orthodoxy because they say that orthodoxy | pero cada rato que yo voy a puerto rico él quiere que uno cambie de religión de la católica a ellos porque dice que la católica
20051009_182032_217_fsp-A-90 | i was going to say were you sir born in puerto rico | yo le iva a decir ¿usted eh nació en puerto rico
20051009_182032_217_fsp-B-92 | yes i was born in puerto rico and i grew up there | sí nací en puerto rico y me crié en puerto rico
20051009_182032_217_fsp-B-109 | but but family since my family stayed there in puerto rico i have my

In [290]:
eg_utt = "20051023_232057_325_fsp-A-3"

In [291]:
generate_translate_probs(eg_utt)

found
found in bucket : 3
saved probs in : ./sp2enw/sp_1.0_l2e-3/20051023_232057_325_fsp-A-3_probs.json
Utterance: 20051023_232057_325_fsp-A-3
+---------+--------------------------------------------------+
| es ref  | yo soy de puerto rico pero vivo aquí en denver   |
|         | colorado                                         |
+---------+--------------------------------------------------+
| en ref  | i ' m from puerto rico but i live here in denver |
|         | colorado                                         |
+---------+--------------------------------------------------+
| en pred | i ' m from puerto rico but i live here in canada |
+---------+--------------------------------------------------+
8.0 11.98


------------------------------------------------------------
predicting word : 0
i          = 0.508
and        = 0.446
¿and       = 0.011
i´m        = 0.004
me         = 0.003
so         = 0.002
they       = 0.002
houston    = 0.002
yes        = 0.002
eh         = 0.002
------------------------------------------------------------
predicting word : 1
'          = 0.785
am         = 0.208
went       = 0.001
was        = 0.001
i          = 0.001
live       = 0.000
know       = 0.000
study      = 0.000
studied    = 0.000
and        = 0.000
------------------------------------------------------------
predicting word : 2
m          = 0.998
ve         = 0.001
re         = 0.000
ll         = 0.000
am         = 0.000
s          = 0.000
d          = 0.000
from       = 0.000
'          = 0.000
i          = 0.000
------------------------------------------------------------
predicting word : 3
from       = 0.958
in         = 0.013
a          = 0.002
puerto     = 0.002
an         = 0.001
on         

In [292]:
prob_fname = os.path.join(m_cfg['model_dir'], "{0:s}_probs.json".format(eg_utt))

In [293]:
eg_utt = "20051017_234550_276_fsp-A-13"
generate_translate_probs(eg_utt)

found
found in bucket : 1
saved probs in : ./sp2enw/sp_1.0_l2e-3/20051017_234550_276_fsp-A-13_probs.json
Utterance: 20051017_234550_276_fsp-A-13
+---------+-------------+
| es ref  | puerto rico |
+---------+-------------+
| en ref  | puerto rico |
+---------+-------------+
| en pred | puerto rico |
+---------+-------------+
42.11 43.42


------------------------------------------------------------
predicting word : 0
puerto     = 0.193
how        = 0.125
four       = 0.054
forty      = 0.053
fourth     = 0.038
very       = 0.021
fifty      = 0.018
perfect    = 0.017
a          = 0.014
strong     = 0.010
------------------------------------------------------------
predicting word : 1
rico       = 0.961
rican      = 0.015
ricans     = 0.004
rica       = 0.002
puerto     = 0.001
aires      = 0.001
del        = 0.000
francisco  = 0.000
de         = 0.000
com        = 0.000
------------------------------------------------------------
predicting word : 2
_EOS       = 0.985
yes        = 0.002
right      = 0.001
and        = 0.001
too        = 0.001
'          = 0.001
are        = 0.001
"          = 0.000
or         = 0.000
but        = 0.000


In [294]:
eg_utt = "20051018_210220_279_fsp-A-26"
generate_translate_probs(eg_utt)

found
found in bucket : 5
saved probs in : ./sp2enw/sp_1.0_l2e-3/20051018_210220_279_fsp-A-26_probs.json
Utterance: 20051018_210220_279_fsp-A-26
+---------+--------------------------------------------------+
| es ref  | cuatro hijos espérate cuatro personas en la casa |
|         | quieren que tu hagas como veinte y cuatro mil    |
|         | dólares                                          |
+---------+--------------------------------------------------+
| en ref  | four childern wait four persons in the home they |
|         | want you to make about twenty four thousand      |
|         | dollars                                          |
+---------+--------------------------------------------------+
| en pred | four kids and four people from home you have to  |
|         | take care of twenty four thousand dollars        |
+---------+--------------------------------------------------+
218.7 225.24


------------------------------------------------------------
predicting word : 0
four       = 0.828
how        = 0.023
another    = 0.023
three      = 0.021
like       = 0.013
fourth     = 0.008
the        = 0.004
eight      = 0.004
what       = 0.003
oh         = 0.003
------------------------------------------------------------
predicting word : 1
kids       = 0.441
children   = 0.389
son        = 0.025
or         = 0.020
and        = 0.020
sons       = 0.012
years      = 0.007
three      = 0.005
boys       = 0.004
hours      = 0.004
------------------------------------------------------------
predicting word : 2
and        = 0.115
you        = 0.099
before     = 0.068
do         = 0.064
three      = 0.061
already    = 0.035
from       = 0.035
have       = 0.032
even       = 0.031
four       = 0.018
------------------------------------------------------------
predicting word : 3
four       = 0.647
three      = 0.077
after      = 0.021
you        = 0.019
your       = 0.014
how        

In [297]:
find_utts_with_word("mhm", set_key="fisher_dev")

--------------------------------------------------------------------------------
total instances found = 114
--------------------------------------------------------------------------------
20051009_182032_217_fsp-A-91 | mhm mhm | mhm mhm
20051009_182032_217_fsp-A-125 | yes mhm | sí ajá
20051009_182032_217_fsp-A-126 | yes mhm | sí mhm
20051009_182032_217_fsp-A-128 | mhm | mhm
20051009_182032_217_fsp-A-137 | mhm | mhm
20051009_182032_217_fsp-A-138 | ah mh mhm | ah mm mhm
20051009_182032_217_fsp-A-153 | mhm | mhm
20051010_212418_225_fsp-A-7 | mhm | mhm
20051010_212418_225_fsp-A-13 | mhm | mhm
20051010_212418_225_fsp-A-14 | mhm | mhm


In [298]:
eg_utt = "20051019_210146_289_fsp-A-54"
generate_translate_probs(eg_utt, curr_set='fisher_dev')

found
found in bucket : 4
saved probs in : ./sp2enw/sp_1.0_l2e-3/20051019_210146_289_fsp-A-54_probs.json
Utterance: 20051019_210146_289_fsp-A-54
+---------+----------------------------------------------------+
| es ref  | hay no hay tanta diferencia entre pobre y rico en  |
|         | canadá como hay aquí en estados unidos             |
+---------+----------------------------------------------------+
| en ref  | there is there is not that much difference between |
|         | a rich and a poor person in canada like there is   |
|         | here in the united states                          |
+---------+----------------------------------------------------+
| en pred | there are not so many difference and tell me that  |
|         | the truth is like here in the united states        |
+---------+----------------------------------------------------+
350.36 355.69


------------------------------------------------------------
predicting word : 0
there      = 0.397
ah         = 0.123
right      = 0.072
oh         = 0.071
i          = 0.069
okay       = 0.031
that       = 0.023
it         = 0.021
ay         = 0.021
aye        = 0.019
------------------------------------------------------------
predicting word : 1
are        = 0.308
aren       = 0.272
'          = 0.143
is         = 0.083
isn        = 0.082
there      = 0.057
it         = 0.009
no         = 0.005
doesn      = 0.004
not        = 0.004
------------------------------------------------------------
predicting word : 2
not        = 0.610
no         = 0.156
so         = 0.100
there      = 0.042
many       = 0.027
a          = 0.016
aren       = 0.011
lots       = 0.006
very       = 0.002
are        = 0.002
------------------------------------------------------------
predicting word : 3
so         = 0.715
there      = 0.089
many       = 0.071
a          = 0.039
much       = 0.013
not        