In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, Input, LSTM, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt 
import seaborn as sns 

# Data Reading

In [None]:
!unzip '/content/cmudict.dict.zip'

Archive:  /content/cmudict.dict.zip
  inflating: cmudict.dict            


# DataFrame creation from Dictionary


In [None]:
'''
'bout B AW1 T
'cause K AH0 Z
'course K AO1 R S
'cuse K Y UW1 Z
'em AH0 M
'''





words = []
phonics_list = []

with open('/content/cmudict.dict', 'r') as f:
  phonics = [line.rstrip('\n') for line in f]
  for p in phonics:
    x = p.split(' ')
    #print(x)
    words.append(x[0])
    phonics_list.append(' '.join(x[1:]))

print(words[:10])
print(phonics_list[:10])

["'bout", "'cause", "'course", "'cuse", "'em", "'frisco", "'gain", "'kay", "'m", "'n"]
['B AW1 T', 'K AH0 Z', 'K AO1 R S', 'K Y UW1 Z', 'AH0 M', 'F R IH1 S K OW0', 'G EH1 N', 'K EY1', 'AH0 M', 'AH0 N']


In [None]:
print(len(words))
print(len(phonics_list))

135010
135010


In [None]:
df = pd.DataFrame({'Word': words, 'Phonics': phonics_list})
df.head()

Unnamed: 0,Word,Phonics
0,'bout,B AW1 T
1,'cause,K AH0 Z
2,'course,K AO1 R S
3,'cuse,K Y UW1 Z
4,'em,AH0 M


In [None]:
df = df.sample(100000).reset_index(drop=True)
df

Unnamed: 0,Word,Phonics
0,patrolman,P AH0 T R OW1 L M AE2 N
1,samples,S AE1 M P AH0 L Z
2,tippy's,T IH1 P IY0 Z
3,mccready,M AH0 K R IY1 D IY0
4,sugden,S AH1 G D AH0 N
...,...,...
99995,defray,D IH0 F R EY1
99996,zohn,Z OW1 N
99997,superbowl's,S UW1 P ER0 B OW2 L Z
99998,stumpo,S T AH1 M P OW2


# Data Prepocessing

### 1. Adding start and end sequence

In [None]:
df['Phonics'] = 'startseq '+df['Phonics']+' endseq'
df

Unnamed: 0,Word,Phonics
0,patrolman,startseq P AH0 T R OW1 L M AE2 N endseq
1,samples,startseq S AE1 M P AH0 L Z endseq
2,tippy's,startseq T IH1 P IY0 Z endseq
3,mccready,startseq M AH0 K R IY1 D IY0 endseq
4,sugden,startseq S AH1 G D AH0 N endseq
...,...,...
99995,defray,startseq D IH0 F R EY1 endseq
99996,zohn,startseq Z OW1 N endseq
99997,superbowl's,startseq S UW1 P ER0 B OW2 L Z endseq
99998,stumpo,startseq S T AH1 M P OW2 endseq


In [None]:
max_len_ip = df['Word'].apply(lambda x: len(x)).max()
max_len_ip


28

In [None]:
max_len_op = df['Phonics'].apply(lambda x: len(x.split())).max()
max_len_op

30

### 2. Tokenization

i) characterwise tokenization in input sequence

ii) wordwise tokenization in output sequence

In [None]:
words = df['Word'].tolist()
phonemes = df['Phonics'].tolist()

In [None]:
char_tokenizer = Tokenizer(char_level=True,oov_token='OOV')
char_tokenizer.fit_on_texts(words)
char_vocab_size = len(char_tokenizer.word_index) + 1
print("Character Vocab Size: ",char_vocab_size)

Character Vocab Size:  37


In [None]:
phone_tokenizer = Tokenizer(oov_token='OOV')
phone_tokenizer.fit_on_texts(phonemes)
phone_vocab_size = len(phone_tokenizer.word_index) + 1
print("Phoneme Vocab Size: ",phone_vocab_size)

Phoneme Vocab Size:  77


In [None]:
ch_sequences = char_tokenizer.texts_to_sequences(words)
ph_sequences = phone_tokenizer.texts_to_sequences(phonemes)

In [None]:
ch_sequences

[[17, 3, 9, 4, 8, 10, 13, 3, 7],
 [6, 3, 13, 17, 10, 2, 6],
 [9, 5, 17, 17, 20, 27, 6],
 [13, 11, 11, 4, 2, 3, 12, 20],
 [6, 14, 16, 12, 2, 7],
 [6, 22, 5, 7, 16, 10, 2, 20],
 [7, 16, 8, 4],
 [11, 15, 3, 10, 21, 5, 7],
 [14, 17, 15, 5, 10, 10],
 [17, 5, 7, 6, 9, 4, 5, 17, 2, 6],
 [18, 4, 2, 13],
 [11, 15, 3, 17, 17, 2, 10],
 [4, 2, 2, 21],
 [12, 14, 3, 10],
 [6, 9, 2, 21, 3],
 [17, 3, 4, 9, 5, 11, 14, 10, 3, 4, 6],
 [17, 5, 9, 11, 15, 2, 4, 27, 6],
 [30, 3, 23, 5, 2, 4, 3],
 [3, 10, 11, 8, 4, 9, 3],
 [4, 5, 2, 6],
 [6, 14, 4, 23, 2, 5, 10, 10, 3, 7, 11, 2],
 [12, 3, 7, 16, 8],
 [11, 14, 4, 3, 9, 2, 12],
 [6, 17, 3, 11, 2, 17, 8, 4, 9],
 [15, 2, 4, 4, 8, 7, 5, 13, 8, 27, 6],
 [29, 14, 16, 16, 10, 2, 12],
 [6, 15, 3, 4, 10, 2, 7, 2],
 [11, 5, 23, 5, 10, 5, 3, 7],
 [13, 5, 6, 4, 2, 3, 12],
 [9, 5, 7, 19, 2, 4, 5, 7, 16],
 [6, 2, 5, 12, 10, 2, 4],
 [3, 13, 7, 5, 8, 11, 2, 7, 9, 2, 6, 5, 6],
 [17, 10, 3, 11, 28, 2, 19],
 [12, 2, 13, 3, 4, 9, 5, 7],
 [19, 5, 7, 20, 8, 7],
 [17, 8, 6, 9, 14, 

In [None]:
ph_sequences

[[2, 19, 4, 8, 9, 32, 7, 13, 45, 5, 3],
 [2, 6, 21, 13, 19, 4, 7, 14, 3],
 [2, 8, 22, 19, 16, 14, 3],
 [2, 13, 4, 10, 9, 26, 11, 16, 3],
 [2, 6, 36, 24, 11, 4, 5, 3],
 [2, 6, 30, 22, 27, 24, 7, 16, 3],
 [2, 50, 27, 24, 34, 9, 3],
 [2, 41, 21, 7, 23, 12, 5, 3],
 [2, 36, 19, 29, 22, 7, 3],
 [2, 19, 22, 5, 6, 8, 9, 46, 19, 6, 3],
 [2, 17, 9, 18, 13, 3],
 [2, 41, 21, 19, 4, 7, 3],
 [2, 9, 26, 23, 3],
 [2, 11, 37, 4, 7, 3],
 [2, 6, 8, 18, 23, 4, 3],
 [2, 19, 15, 8, 22, 10, 39, 4, 7, 15, 14, 3],
 [2, 19, 22, 41, 15, 14, 3],
 [2, 14, 63, 25, 39, 18, 9, 4, 3],
 [2, 40, 7, 10, 34, 9, 8, 4, 3],
 [2, 9, 35, 14, 3],
 [2, 6, 15, 25, 28, 7, 4, 5, 6, 3],
 [2, 11, 21, 27, 24, 33, 3],
 [2, 10, 39, 70, 9, 28, 8, 12, 11, 3],
 [2, 6, 19, 28, 6, 19, 55, 9, 8, 3],
 [2, 29, 15, 20, 5, 4, 13, 33, 14, 3],
 [2, 38, 36, 24, 4, 7, 11, 3],
 [2, 31, 20, 9, 7, 51, 5, 3],
 [2, 6, 4, 25, 22, 7, 39, 4, 5, 3],
 [2, 13, 12, 6, 9, 26, 11, 3],
 [2, 8, 22, 27, 10, 15, 12, 27, 3],
 [2, 6, 35, 11, 7, 15, 3],
 [2, 45, 13, 5, 1

In [None]:
print("word: ", words[10000])
print("word Sequence: ",ch_sequences[10000])

word:  reelection
word Sequence:  [4, 2, 2, 10, 2, 11, 9, 5, 8, 7]


In [None]:
print("Phoneme: ", phonemes[10000])
print("Phonic Sequence: ",ph_sequences[10000])

Phoneme:  startseq R IY0 IH0 L EH1 K SH AH0 N endseq
Phonic Sequence:  [2, 9, 16, 12, 7, 18, 10, 31, 4, 5, 3]


### 3. Decoder input-output generation for teacher forcing

In [None]:
decoder_inputs = []
decoder_outputs = []

for ph in ph_sequences:
    decoder_inputs.append(ph[:-1])
    decoder_outputs.append(ph[1:])

print(decoder_inputs)
print(decoder_outputs)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



### 4. Padding all sequences

In [None]:
encoder_inputs = pad_sequences(ch_sequences, maxlen=max_len_ip, padding='post')

decoder_inputs = pad_sequences(decoder_inputs, maxlen=max_len_op, padding='post')
decoder_outputs = pad_sequences(decoder_outputs, maxlen=max_len_op, padding='post')

In [None]:
len(decoder_inputs)

100000

### Training-Validation splitting


In [None]:
n = len(words)
split_index = int(0.85 * n)
print('length of training dataset:', split_index)
print('length of validation dataset:', n-split_index)


length of training dataset: 85000
length of validation dataset: 15000


In [None]:
X_train = [encoder_inputs[:split_index], decoder_inputs[:split_index]]
y_train = decoder_outputs[:split_index]

X_test = words[split_index:]
y_test = phonemes[split_index:]

In [None]:
len(X_train[0])

85000

In [None]:
X_train[0].shape

(85000, 28)

In [None]:
X_train[1].shape

(85000, 30)

In [None]:
y_train.shape

(85000, 30)

# Model Building

Model 1

In [None]:


# Encoder
char_input = Input(shape=(None,))
x = Embedding(char_vocab_size, 128, mask_zero=True)(char_input)
output_y, state_h, state_c = LSTM(128, return_state=True)(x)

# Decoder
ph_input = Input(shape=(None,))
embedding_layer = Embedding(phone_vocab_size, 128, mask_zero=True)
x = embedding_layer(ph_input)
decoder_lstm = LSTM(128, return_sequences=True, return_state=True)
output_y, _ , _ = decoder_lstm(x, initial_state=[state_h, state_c])
softmax_dense = Dense(phone_vocab_size, activation='softmax')
output = softmax_dense(output_y)

model = Model(inputs=[char_input, ph_input],outputs=output)
model.compile(loss='sparse_categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_18 (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 input_19 (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 embedding_17 (Embedding)       (None, None, 128)    4608        ['input_18[0][0]']               
                                                                                                  
 embedding_18 (Embedding)       (None, None, 128)    9984        ['input_19[0][0]']               
                                                                                            

In [None]:
history = model.fit(X_train,y_train,
                    batch_size=32,
                    epochs=20,
                    validation_split=0.4,
                    validation_batch_size=32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
history = model.fit(X_train,y_train,
                    batch_size=32,
                    epochs=20,
                    validation_split=0.4,
                    validation_batch_size=32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20

KeyboardInterrupt: ignored

In [None]:
model.save_weights('model1.h5')
with open('model1.json', 'w') as f:
  f.write(model.to_json())

Model 2

In [None]:
# Encoder
char_input = Input(shape=(None,))
x = Embedding(char_vocab_size, 128, mask_zero=True)(char_input)
x=LSTM(128, return_sequences=True)(x)
output_y, state_h, state_c = LSTM(256, return_state=True)(x)

# Decoder
ph_input = Input(shape=(None,))
embedding_layer = Embedding(phone_vocab_size, 128, mask_zero=True)
x = embedding_layer(ph_input)
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
output_y, _ , _ = decoder_lstm(x, initial_state=[state_h, state_c])
softmax_dense = Dense(phone_vocab_size, activation='softmax')
output = softmax_dense(output_y)

model = Model(inputs=[char_input, ph_input],outputs=output)
model.compile(loss='sparse_categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "model_17"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_38 (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 embedding_35 (Embedding)       (None, None, 128)    4736        ['input_38[0][0]']               
                                                                                                  
 input_39 (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 lstm_47 (LSTM)                 (None, None, 128)    131584      ['embedding_35[0][0]']           
                                                                                           

In [None]:
history = model.fit(X_train,y_train,
                    batch_size=32,
                    epochs=20,
                    validation_split=0.4,
                    validation_batch_size=32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
model.save_weights('model2.h5')
with open('model2.json', 'w') as f:
  f.write(model.to_json())

Model 3

In [None]:
from tensorflow.keras.optimizers import Adam
adam=Adam(learning_rate=0.01)

In [None]:
char_vocab_size
phone_vocab_size

# Encoder
char_input = Input(shape=(None,))
x = Embedding(char_vocab_size, 256, mask_zero=True)(char_input)
x=LSTM(128, return_sequences=True)(x)
output_y, state_h, state_c = LSTM(256, return_state=True)(x)

# Decoder
ph_input = Input(shape=(None,))
embedding_layer = Embedding(phone_vocab_size, 256, mask_zero=True)
x = embedding_layer(ph_input)
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
output_y, _ , _ = decoder_lstm(x, initial_state=[state_h, state_c])
softmax_dense = Dense(phone_vocab_size, activation='softmax')
output = softmax_dense(output_y)

model = Model(inputs=[char_input, ph_input],outputs=output)
model.compile(loss='sparse_categorical_crossentropy',optimizer=adam, metrics=['accuracy'])
model.summary()

Model: "model_10"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_24 (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 embedding_23 (Embedding)       (None, None, 256)    9216        ['input_24[0][0]']               
                                                                                                  
 input_25 (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 lstm_27 (LSTM)                 (None, None, 128)    197120      ['embedding_23[0][0]']           
                                                                                           

In [None]:
history = model.fit(X_train,y_train,
                    batch_size=32,
                    epochs=20,
                    validation_split=0.4,
                    validation_batch_size=32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
model.save_weights('model3.h5')
with open('model3.json', 'w') as f:
  f.write(model.to_json())

In [None]:
char_vocab_size
phone_vocab_size

# Encoder
char_input = Input(shape=(None,))
x = Embedding(char_vocab_size, 256, mask_zero=True)(char_input)
x=LSTM(128, return_sequences=True)(x)
output_y, state_h, state_c = LSTM(256, return_state=True)(x)

# Decoder
ph_input = Input(shape=(None,))
embedding_layer = Embedding(phone_vocab_size, 256, mask_zero=True)
x = embedding_layer(ph_input)
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
output_y, _ , _ = decoder_lstm(x, initial_state=[state_h, state_c])
softmax_dense = Dense(phone_vocab_size, activation='softmax')
output = softmax_dense(output_y)

model = Model(inputs=[char_input, ph_input],outputs=output)
model.compile(loss='sparse_categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding (Embedding)          (None, None, 256)    9472        ['input_3[0][0]']                
                                                                                                  
 input_4 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 lstm (LSTM)                    (None, None, 128)    197120      ['embedding[0][0]']              
                                                                                              

In [None]:
history = model.fit(X_train,y_train,
                    batch_size=16,
                    epochs=20,
                    validation_split=0.4,
                    validation_batch_size=16)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
history = model.fit(X_train,y_train,
                    batch_size=16,
                    epochs=20,
                    validation_split=0.4,
                    validation_batch_size=16)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20

KeyboardInterrupt: ignored

# Restoring the model

In [None]:
from tensorflow.keras.models import model_from_json
with open('model3.json', 'r') as f:
  model=model_from_json(f.read())


In [None]:
model.summary()

Model: "model_12"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_28 (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 embedding_27 (Embedding)       (None, None, 256)    9216        ['input_28[0][0]']               
                                                                                                  
 input_29 (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 lstm_33 (LSTM)                 (None, None, 128)    197120      ['embedding_27[0][0]']           
                                                                                           

In [None]:
model.load_weights('/content/model3.h5')

In [None]:
model.summary()

Model: "model_12"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_28 (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 embedding_27 (Embedding)       (None, None, 256)    9216        ['input_28[0][0]']               
                                                                                                  
 input_29 (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 lstm_33 (LSTM)                 (None, None, 128)    197120      ['embedding_27[0][0]']           
                                                                                           

# Model Inference

### Creating the encoder and decoder

In [None]:
encoder = Model(char_input, [state_h, state_c])
encoder.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 256)         9472      
                                                                 
 lstm (LSTM)                 (None, None, 128)         197120    
                                                                 
 lstm_1 (LSTM)               [(None, 256),             394240    
                              (None, 256),                       
                              (None, 256)]                       
                                                                 
Total params: 600,832
Trainable params: 600,832
Non-trainable params: 0
_________________________________________________________________


In [None]:
decoder_input_h = Input(shape=(256,))
decoder_input_c = Input(shape=(256,))
x = embedding_layer(ph_input)
x, decoder_output_h, decoder_output_c = decoder_lstm(x, initial_state=[decoder_input_h, decoder_input_c])
x = softmax_dense(x)
decoder = Model([ph_input] + [decoder_input_h, decoder_input_c], 
                                [x] + [decoder_output_h, decoder_output_c])

In [None]:
decoder.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding_1 (Embedding)        (None, None, 256)    19712       ['input_4[0][0]']                
                                                                                                  
 input_5 (InputLayer)           [(None, 256)]        0           []                               
                                                                                                  
 input_6 (InputLayer)           [(None, 256)]        0           []                               
                                                                                            

### Phenome generation for inference


1.    After passing the complete cell state
and hidden state information of the encoder, the starseq token is first passed to the decoder along with the state information which produces the next phoneme token.

2.    Each of these phoneme tokens are then added to the sequential input to generate the subsequent phoneme tokens.

In [None]:
char_tokenizer.texts_to_sequences(['star'])

[[6, 9, 3, 4]]

In [None]:
def predict_pronunciation(ch_input):
    input_seq = char_tokenizer.texts_to_sequences([ch_input])

    next_h, next_c = encoder.predict(input_seq)
    #print('encoder done')

    curr_token = np.zeros((1,1))
    curr_token[0][0] = phone_tokenizer.word_index['startseq']

    pred_sentence = ''

    for i in range(max_len_op):
        print('entering decoder')
        output, next_h, next_c = decoder.predict([curr_token] + [next_h, next_c])
        #print(output)
        next_token = np.argmax(output[0, 0, :])
        next_word = phone_tokenizer.index_word[next_token]
        if next_word == 'endseq':
            break
        else:
            pred_sentence += ' ' + next_word
            curr_token[0] = next_token

    return pred_sentence

In [None]:
predict_pronunciation("sweta")

entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder
entering decoder


' ay0 ay0 eh2 ow1 ae1 ah0 g ah0 g eh1 old old jh aa1 aa1 ao1 ao1 aa1 ao1 ao1 d ih1 ih1 f f f f ay1 ih0 ih0'