### Generate Phonemes for IWSLT

In [None]:
from transformers import T5ForConditionalGeneration, AutoTokenizer
import pickle
from tqdm import tqdm
import numpy as np
model = T5ForConditionalGeneration.from_pretrained('charsiu/g2p_multilingual_byT5_small_100')
model.cuda()
tokenizer = AutoTokenizer.from_pretrained('google/byt5-small')

def to_phoneme(sentence):
    # tokenized English words
    words = sentence.split()
    words = ['<eng-us>: '+i for i in words]

    out = tokenizer(words,padding=True,add_special_tokens=False,return_tensors='pt')
    out = {k:v.cuda() for k,v in out.items()}
    preds = model.generate(**out,num_beams=1,max_length=len(words)+20) # We do not find beam search helpful. Greedy decoding is enough. 
    phones = tokenizer.batch_decode(preds.tolist(),skip_special_tokens=True)
    ph_hat = " ".join(phones).replace("ɫ","l").replace("ɝ", "ɜr")\
                         .replace("tʃ","ʧ").replace("dʒ","ʤ")\
                         .replace("ɹ",'r').replace("ɡ","g")
    return ph_hat

In [None]:
#iwslt train english text from https://github.com/facebookresearch/fairseq/blob/main/examples/translation/prepare-iwslt14.sh
with open('iwslt14.tokenized.de-en/train.en','r', encoding='utf-8') as f:
    text = f.read()
    
stop_symbols = ['?',',','.','!','"', '”','“',"-",'"',":"]
for symb in stop_symbols:
    text = text.replace(symb, '')
text = text.split("\n")
res_phonemes = ""
batch_size = 5
CHUNKS = int(np.ceil(len(text)/batch_size))
for i in tqdm(range(CHUNKS)):
    batch = " ".join(text[i*batch_size: (i+1)*batch_size])
    res_phonemes += to_phoneme(batch) + " " 

In [None]:
#save iwslt phonemes to pickle
with open("iwslt_phonemes.pkl",'wb') as f:
    pickle.dump(res_phonemes, f)