In [1]:
from asr import ASRModel
from models.correctors import beam_search_forward
import colorama
import torch
from collections import Counter
from typing import List

### Model Loading ###
First we just load the pretrained model using our modified ASR class, although we probably could have just used the usual class instead. Next we modify the internal attributes of the decoder so that it will provide more possibilities from the beam search

In [2]:
asr_model = ASRModel.from_hparams(source="speechbrain/asr-transformer-transformerlm-librispeech",
                                  savedir="pretrained_models/asr-transformer-transformerlm-librispeech",
                                  run_opts={"device": "cuda:0"}
                                  )
asr_model.hparams.decoder.topk = 50
asr_model.hparams.decoder.return_log_probs = True

### Obtaining the predictions ###
Since the transcribe methods only keep the words, we copy most of the method, and just keep the parts we need. This grabs an example audio file, encodes the audio, then uses our modified version of the decoder's method: beam_search_forward. This is modified to correctly output the other possible transcription predictions and not just the probabilities like the current base method

In [8]:
with torch.no_grad():
    waveform = asr_model.load_audio("tmp/SGU884-training-Bob_0.wav")
    batch = waveform.unsqueeze(0)
    wav_lens = torch.tensor([1.0])
    wav_lens = wav_lens.to("cuda:0")
    encoder_out = asr_model.encode_batch(batch, wav_lens)
    predicted_tokens, scores, log_probs = beam_search_forward(asr_model.mods.decoder, encoder_out, wav_lens)
    predicted_words = [
        asr_model.tokenizer.decode_ids(token_seq)
        for token_seq in predicted_tokens
    ]

predicted_words[:5]

[['THIS TIME TWO SETS OF RESEARCHES MAY HAVE FOUND THE FIRST FREE FLOATING STELLAR MASS BLACK HOLES OR BLACK HOLE EVER AH ONLY ABOUT TWO DOZEN SUCH BLACK HOLES HAVE EVER BEEN DETECTED ALL IN BINARY SYSTEMS NOT NONE EVER ALONE ON THIS RESEARCH COMES FROM SPACE TELESCOPE SCIENCE INSTITUTE AND BALTIMORE AND FROM THE UNIVERSITY OF CALIFORNIA BERKELEY'],
 ['THIS TIME TWO SETS OF RESEARCHES MAY HAVE FOUND THE FIRST FREE FLOATING STELLAR MASS BLACK HOLES OR BLACK HOLE EVER ARE ONLY ABOUT TWO DOZEN SUCH BLACK HOLES HAVE EVER BEEN DETECTED ALL IN BINARY SYSTEMS NOT NONE EVER ALONE ON THIS RESEARCH COMES FROM SPACE TELESCOPE SCIENCE INSTITUTE AND BALTIMORE AND FROM THE UNIVERSITY OF CALIFORNIA BERKELEY'],
 ['THIS TIME TWO SETS OF RESEARCHES MAY HAVE FOUND THE FIRST FREE FLOATING STELLAR MASS BLACK HOLES OR BLACK HOLES EVER AH ONLY ABOUT TWO DOZEN SUCH BLACK HOLES HAVE EVER BEEN DETECTED ALL IN BINARY SYSTEMS NOT NONE EVER ALONE ON THIS RESEARCH COMES FROM SPACE TELESCOPE SCIENCE INSTITUTE AND BA

### Soft List Comparison Function ###
This function implements some heuristic junk to look for places where two predictions match, giving a tuple of indices where I think the two lists are the same. If the differences are larger than an extra word or two, then this will fail

In [4]:
def pairwise_compare_lists(list1: list, list2: list) -> List[tuple]:
    comp_list = []
    list1_ptr, list2_ptr = 0, 0
    for _ in range(len(list1) + len(list2)):
        if list1_ptr >= len(list1) or list2_ptr >= len(list2):
            break
        if list1[list1_ptr] == list2[list2_ptr]:
            comp_list.append((list1_ptr, list2_ptr))
        else:
            if list2_ptr + 1 < len(list2) and list1[list1_ptr] == list2[list2_ptr + 1]:
                list2_ptr += 1
                comp_list.append((list1_ptr, list2_ptr))
            elif list1_ptr + 1 < len(list1) and list2[list2_ptr] == list1[list1_ptr + 1]:
                list1_ptr += 1
                comp_list.append((list1_ptr, list2_ptr))
        list1_ptr += 1
        list2_ptr += 1
    return comp_list

### Highlighting ###
Finally, we use this function to compare each alternative prediction to the top prediction, counting the number of discrepancies for each word, and using these to highlight words that are yellow if they're different at least once but less than 4 times, and highlighting them red if they are different more than 4 times. This represents words that the model is less sure about.

In [7]:
danger_words_list = []
top_pred = predicted_words[0][0].split(" ")
for prediction in predicted_words[1:]:
    comp_list = pairwise_compare_lists(top_pred, prediction[0].split(" "))
    possible_indices = set(range(len(top_pred)))
    danger_words_list.append(possible_indices - {i for i, _ in comp_list})

all_danger_words = Counter([word for word_set in danger_words_list for word in word_set])

for i, x in enumerate(top_pred):
    if all_danger_words[i] > 4:
        x = colorama.Fore.RED + x + colorama.Style.RESET_ALL
    elif 1 <= all_danger_words[i] <= 4:
        x = colorama.Fore.YELLOW + x + colorama.Style.RESET_ALL
    print(x, end=" ")

THIS TIME TWO SETS OF [33mRESEARCHES[0m MAY HAVE FOUND THE FIRST FREE FLOATING STELLAR MASS BLACK HOLES OR BLACK [31mHOLE[0m EVER [31mAH[0m ONLY ABOUT TWO [33mDOZEN[0m SUCH BLACK HOLES HAVE EVER BEEN DETECTED ALL [33mIN[0m BINARY SYSTEMS NOT NONE EVER ALONE ON THIS RESEARCH COMES FROM SPACE [31mTELESCOPE[0m [33mSCIENCE[0m INSTITUTE [31mAND[0m BALTIMORE AND FROM THE UNIVERSITY [33mOF[0m [31mCALIFORNIA[0m [33mBERKELEY[0m 