In [1]:
import model as m

import argparse
import os
import time

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
from torch.nn.utils import clip_grad_norm
import torchtext.data as data
import random
import matplotlib.pyplot as plt
from tqdm import notebook


In [2]:
# modified train function
def train(config, train_iter, model, criterion, optimizer, epoch, dic, save_path, prob=0):
    global iteration, n_total, train_loss, n_bad_loss
    global init, best_train_loss, stop

    print("=> EPOCH {}".format(epoch))
    train_iter.init_epoch()
    for i, batch in notebook.tqdm(enumerate(train_iter), total=len(train_iter)):
        # TODO 
        # add noise to batch
        g = batch.grapheme
        # if random.random() > prob:
         #   g = grapheme_add_noise(0.1, g, dic)
        
        
        iteration += 1
        model.train()
        output, _, __ = model(g, batch.phoneme[:-1].detach())
        target = batch.phoneme[1:]
        # print(output.size())
        loss = criterion(output.view(output.size(0) * output.size(1), -1),
                         target.view(target.size(0) * target.size(1)))
        
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip, 'inf')
        optimizer.step()
        
        n_total += batch.batch_size
        train_loss += loss.data * batch.batch_size
        
        # print("   % Time: {:5.0f} | Iteration: {:5} | Batch: {:4}/{}"
        #           " | Train loss: {:.4f}"
        #           .format(time.time()-init, iteration, train_iter.iterations,
        #                   len(train_iter), train_loss))
        
        if iteration % config.log_every == 0:
            train_loss /= n_total
            print("   % Time: {:5.0f} | Iteration: {:5} | Batch: {:4}/{}"
                  " | Train loss: {:.4f} "
                  .format(time.time()-init, iteration, train_iter.iterations,
                          len(train_iter), train_loss))
            
            
        
            if train_loss < best_train_loss:
                best_train_loss = train_loss
                n_bad_loss = 0
                torch.save(model.state_dict(), save_path)
            else:
                n_bad_loss += 1
            if n_bad_loss == config.n_bad_loss:
                best_train_loss = train_loss
                n_bad_loss = 0
                m.adjust_learning_rate(optimizer, config.lr_decay)
                new_lr = optimizer.param_groups[0]['lr']
                print("=> Adjust learning rate to: {}".format(new_lr))
                if new_lr < config.lr_min:
                    stop = True
                    break
                    
            # test for val_loss improvement
            n_total = train_loss = 0

In [3]:
def test(test_iter, model, g_dic, p_dic, print_error=False):
    model.eval()
    test_iter.init_epoch()
    test_per = 0
    wrong = []
    with torch.no_grad():
        for batch in test_iter:
            output = model(batch.grapheme).data.tolist()
            target = batch.phoneme[1:].squeeze(1).data.tolist()
            # calculate per, wer here
            try:
                per = m.phoneme_error_rate(output[:output.index(3) + 1], target)
            except ValueError:
                per = m.phoneme_error_rate(output, target) 

            if per > 0:
                
                tmp = list([g_dic[g] for g in batch.grapheme])
                tmp.reverse()
                wrong.append(tmp)

                if print_error:        
                    print("Grapheme: {}\nTarget: {}\nPrediction: {}\n".format(
                      ''.join(tmp),
                      ' '.join([p_dic[p] for p in target]),
                      ' '.join([p_dic[p] for p in output])))

            test_per += per  # batch_size = 1
        
        test_per = test_per / len(test_iter.dataset) * 100
        print("Phoneme error rate (PER): {:.2f}\n"
              .format(test_per))
    
    return test_per, wrong

In [4]:
def print_result(model, lines, grapheme, g_dic, p_dic):
    g = grapheme[:-3]
    for line in lines:
        if line.strip().split()[0] == g:
            target = line.strip().split()[1:]

    input = [2]
    for letter in g:
        input.insert(1, g_dic.index(letter))
    input = torch.unsqueeze(torch.LongTensor(input).to("cuda"), 1)
    
    with torch.no_grad():

        output = model(input).data.tolist()

     
    print("Grapheme: {}\nTarget: {}\nPrediction: {}\n".format(
            g, ' '.join(target), ' '.join([p_dic[p] for p in output[:-1]]) ) )

In [5]:
parser = {
    'num_row': -1, # modify num_row to -1 if you want to input all data points
    'epochs': 15,
    'batch_size': 128,
    'max_len': 20,  # max length of grapheme/phoneme sequences
    'beam_size': 5,  # size of beam for beam-search
    'd_embed': 30,  # embedding dimension
    'd_hidden': 32,  # hidden dimension
    'attention': True,  # use attention or not
    'log_every': 100,  # number of iterations to log and validate training
    'lr': 0.007,  # initial learning rate
    'lr_decay': 0.5,  # decay lr when not observing improvement in val_loss
    'lr_min': 5e-4,  # stop when lr is too low
    'n_bad_loss': 5,  # number of bad val_loss before decaying
    'clip': 2.3,  # clip gradient, to avoid exploding gradient
    'cuda': True,  # using gpu or not
    'seed': 1234,  # initial seed
}
args = argparse.Namespace(**parser)

In [6]:
args.cuda = args.cuda and torch.cuda.is_available()

# if not os.path.isdir(args.intermediate_path):
#     os.makedirs(args.intermediate_path)
# if not os.path.isdir(args.data_path):
#     os.makedirs(args.data_path)
#     URL = "https://github.com/cmusphinx/cmudict/archive/master.zip"
#     !wget $URL -O ./data/cmudict.zip
#     !unzip ./data/cmudict.zip -d ./data/
#     !mv ./data/cmudict-master $args.data_path

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

In [7]:
import pandas as pd
import pickle

In [8]:
df = pd.read_csv("filtered_no_stress_freq.csv", index_col=0)

In [9]:
new_df = df.sample(frac=1, random_state=777)

In [11]:
train_lines = df["word"].tolist()
test_lines = df["word"].tolist()
val_lines = ["this is a sudo line"]

In [12]:
train_iter, val_iter, test_iter, g_field, p_field = m.prepare_data(train_lines, val_lines, test_lines, args)
dic = g_field.vocab.itos



In [14]:
len(test_iter)

34014

In [18]:
# Training
#set up configuration
config = args
config.g_size = len(g_field.vocab)
config.p_size = len(p_field.vocab)
config.d_hidden = 512
config.epochs = 200
save_path = "model_para/train_on_training_512.pt"
    
model = m.G2P(config)
criterion = nn.NLLLoss()
if config.cuda:
    model.cuda()
    criterion.cuda()
optimizer = optim.Adam(model.parameters(), lr=config.lr)  # use Adagrad
    
# training
iteration = n_total = train_loss = n_bad_loss = 0
stop = False
best_train_loss = 10
init = time.time()

# If you want to change epoch, uncomment the following line
# config.epochs = 10

for epoch in range(1, config.epochs+1):
    train(config, train_iter, model, criterion, optimizer, epoch, dic, save_path)
    if stop:
        break


=> EPOCH 1


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:     3 | Iteration:   100 | Batch:  100/266 | Train loss: 1.1513 
   % Time:     6 | Iteration:   200 | Batch:  200/266 | Train loss: 0.2446 

=> EPOCH 2


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:     9 | Iteration:   300 | Batch:   34/266 | Train loss: 0.1446 
   % Time:    11 | Iteration:   400 | Batch:  134/266 | Train loss: 0.1193 
   % Time:    14 | Iteration:   500 | Batch:  234/266 | Train loss: 0.1110 

=> EPOCH 3


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:    17 | Iteration:   600 | Batch:   68/266 | Train loss: 0.0932 
   % Time:    20 | Iteration:   700 | Batch:  168/266 | Train loss: 0.0887 

=> EPOCH 4


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:    23 | Iteration:   800 | Batch:    2/266 | Train loss: 0.0881 
   % Time:    26 | Iteration:   900 | Batch:  102/266 | Train loss: 0.0689 
   % Time:    28 | Iteration:  1000 | Batch:  202/266 | Train loss: 0.0764 

=> EPOCH 5


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:    31 | Iteration:  1100 | Batch:   36/266 | Train loss: 0.0725 
   % Time:    34 | Iteration:  1200 | Batch:  136/266 | Train loss: 0.0628 
   % Time:    37 | Iteration:  1300 | Batch:  236/266 | Train loss: 0.0705 

=> EPOCH 6


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:    39 | Iteration:  1400 | Batch:   70/266 | Train loss: 0.0595 
   % Time:    42 | Iteration:  1500 | Batch:  170/266 | Train loss: 0.0618 

=> EPOCH 7


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:    45 | Iteration:  1600 | Batch:    4/266 | Train loss: 0.0676 
   % Time:    48 | Iteration:  1700 | Batch:  104/266 | Train loss: 0.0534 
   % Time:    51 | Iteration:  1800 | Batch:  204/266 | Train loss: 0.0605 

=> EPOCH 8


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:    53 | Iteration:  1900 | Batch:   38/266 | Train loss: 0.0589 
   % Time:    56 | Iteration:  2000 | Batch:  138/266 | Train loss: 0.0538 
   % Time:    59 | Iteration:  2100 | Batch:  238/266 | Train loss: 0.0651 

=> EPOCH 9


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:    62 | Iteration:  2200 | Batch:   72/266 | Train loss: 0.0521 
   % Time:    65 | Iteration:  2300 | Batch:  172/266 | Train loss: 0.0552 

=> EPOCH 10


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:    68 | Iteration:  2400 | Batch:    6/266 | Train loss: 0.0583 
   % Time:    70 | Iteration:  2500 | Batch:  106/266 | Train loss: 0.0474 
   % Time:    73 | Iteration:  2600 | Batch:  206/266 | Train loss: 0.0517 

=> EPOCH 11


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:    76 | Iteration:  2700 | Batch:   40/266 | Train loss: 0.0544 
   % Time:    79 | Iteration:  2800 | Batch:  140/266 | Train loss: 0.0501 
   % Time:    81 | Iteration:  2900 | Batch:  240/266 | Train loss: 0.0633 

=> EPOCH 12


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:    84 | Iteration:  3000 | Batch:   74/266 | Train loss: 0.0548 
=> Adjust learning rate to: 0.0035
   % Time:    87 | Iteration:  3100 | Batch:  174/266 | Train loss: 0.0411 

=> EPOCH 13


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:    90 | Iteration:  3200 | Batch:    8/266 | Train loss: 0.0308 
   % Time:    93 | Iteration:  3300 | Batch:  108/266 | Train loss: 0.0153 
   % Time:    96 | Iteration:  3400 | Batch:  208/266 | Train loss: 0.0154 

=> EPOCH 14


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:    98 | Iteration:  3500 | Batch:   42/266 | Train loss: 0.0134 
   % Time:   101 | Iteration:  3600 | Batch:  142/266 | Train loss: 0.0094 
   % Time:   104 | Iteration:  3700 | Batch:  242/266 | Train loss: 0.0099 

=> EPOCH 15


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   107 | Iteration:  3800 | Batch:   76/266 | Train loss: 0.0069 
   % Time:   110 | Iteration:  3900 | Batch:  176/266 | Train loss: 0.0069 

=> EPOCH 16


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   113 | Iteration:  4000 | Batch:   10/266 | Train loss: 0.0074 
   % Time:   116 | Iteration:  4100 | Batch:  110/266 | Train loss: 0.0044 
   % Time:   118 | Iteration:  4200 | Batch:  210/266 | Train loss: 0.0049 

=> EPOCH 17


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   121 | Iteration:  4300 | Batch:   44/266 | Train loss: 0.0052 
   % Time:   124 | Iteration:  4400 | Batch:  144/266 | Train loss: 0.0045 
   % Time:   127 | Iteration:  4500 | Batch:  244/266 | Train loss: 0.0056 

=> EPOCH 18


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   130 | Iteration:  4600 | Batch:   78/266 | Train loss: 0.0054 
=> Adjust learning rate to: 0.00175
   % Time:   132 | Iteration:  4700 | Batch:  178/266 | Train loss: 0.0049 

=> EPOCH 19


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   135 | Iteration:  4800 | Batch:   12/266 | Train loss: 0.0040 
   % Time:   138 | Iteration:  4900 | Batch:  112/266 | Train loss: 0.0015 
   % Time:   141 | Iteration:  5000 | Batch:  212/266 | Train loss: 0.0017 

=> EPOCH 20


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   144 | Iteration:  5100 | Batch:   46/266 | Train loss: 0.0015 
   % Time:   147 | Iteration:  5200 | Batch:  146/266 | Train loss: 0.0011 
   % Time:   150 | Iteration:  5300 | Batch:  246/266 | Train loss: 0.0011 

=> EPOCH 21


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   153 | Iteration:  5400 | Batch:   80/266 | Train loss: 0.0008 
   % Time:   155 | Iteration:  5500 | Batch:  180/266 | Train loss: 0.0007 

=> EPOCH 22


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   158 | Iteration:  5600 | Batch:   14/266 | Train loss: 0.0005 
   % Time:   161 | Iteration:  5700 | Batch:  114/266 | Train loss: 0.0004 
   % Time:   164 | Iteration:  5800 | Batch:  214/266 | Train loss: 0.0004 

=> EPOCH 23


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   167 | Iteration:  5900 | Batch:   48/266 | Train loss: 0.0005 
   % Time:   170 | Iteration:  6000 | Batch:  148/266 | Train loss: 0.0004 
   % Time:   173 | Iteration:  6100 | Batch:  248/266 | Train loss: 0.0004 

=> EPOCH 24


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   175 | Iteration:  6200 | Batch:   82/266 | Train loss: 0.0003 
   % Time:   178 | Iteration:  6300 | Batch:  182/266 | Train loss: 0.0004 

=> EPOCH 25


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   181 | Iteration:  6400 | Batch:   16/266 | Train loss: 0.0004 
   % Time:   184 | Iteration:  6500 | Batch:  116/266 | Train loss: 0.0003 
   % Time:   187 | Iteration:  6600 | Batch:  216/266 | Train loss: 0.0003 

=> EPOCH 26


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   190 | Iteration:  6700 | Batch:   50/266 | Train loss: 0.0002 
   % Time:   193 | Iteration:  6800 | Batch:  150/266 | Train loss: 0.0003 
   % Time:   195 | Iteration:  6900 | Batch:  250/266 | Train loss: 0.0005 

=> EPOCH 27


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   198 | Iteration:  7000 | Batch:   84/266 | Train loss: 0.0005 
   % Time:   201 | Iteration:  7100 | Batch:  184/266 | Train loss: 0.0006 

=> EPOCH 28


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   204 | Iteration:  7200 | Batch:   18/266 | Train loss: 0.0013 
=> Adjust learning rate to: 0.000875
   % Time:   207 | Iteration:  7300 | Batch:  118/266 | Train loss: 0.0015 
   % Time:   210 | Iteration:  7400 | Batch:  218/266 | Train loss: 0.0017 

=> EPOCH 29


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   212 | Iteration:  7500 | Batch:   52/266 | Train loss: 0.0011 
   % Time:   215 | Iteration:  7600 | Batch:  152/266 | Train loss: 0.0009 
   % Time:   218 | Iteration:  7700 | Batch:  252/266 | Train loss: 0.0010 

=> EPOCH 30


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   221 | Iteration:  7800 | Batch:   86/266 | Train loss: 0.0007 
   % Time:   224 | Iteration:  7900 | Batch:  186/266 | Train loss: 0.0005 

=> EPOCH 31


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   227 | Iteration:  8000 | Batch:   20/266 | Train loss: 0.0003 
   % Time:   230 | Iteration:  8100 | Batch:  120/266 | Train loss: 0.0003 
   % Time:   232 | Iteration:  8200 | Batch:  220/266 | Train loss: 0.0004 

=> EPOCH 32


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   235 | Iteration:  8300 | Batch:   54/266 | Train loss: 0.0003 
   % Time:   238 | Iteration:  8400 | Batch:  154/266 | Train loss: 0.0003 
   % Time:   241 | Iteration:  8500 | Batch:  254/266 | Train loss: 0.0002 

=> EPOCH 33


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   244 | Iteration:  8600 | Batch:   88/266 | Train loss: 0.0001 
   % Time:   247 | Iteration:  8700 | Batch:  188/266 | Train loss: 0.0002 

=> EPOCH 34


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   250 | Iteration:  8800 | Batch:   22/266 | Train loss: 0.0001 
   % Time:   253 | Iteration:  8900 | Batch:  122/266 | Train loss: 0.0002 
   % Time:   255 | Iteration:  9000 | Batch:  222/266 | Train loss: 0.0002 

=> EPOCH 35


HBox(children=(FloatProgress(value=0.0, max=266.0), HTML(value='')))

   % Time:   258 | Iteration:  9100 | Batch:   56/266 | Train loss: 0.0001 
   % Time:   261 | Iteration:  9200 | Batch:  156/266 | Train loss: 0.0002 
   % Time:   264 | Iteration:  9300 | Batch:  256/266 | Train loss: 0.0003 
=> Adjust learning rate to: 0.0004375



In [24]:
config.d_hidden = 512
save_path = "model_para/train_on_training_512.pt"
model = m.G2P(config)
model.load_state_dict(torch.load(save_path))
model.to("cuda")

G2P(
  (encoder): Encoder(
    (embedding): Embedding(32, 30)
    (lstm): LSTMCell(30, 512)
  )
  (decoder): Decoder(
    (embedding): Embedding(47, 30)
    (lstm): LSTMCell(30, 512)
    (attn): Attention(
      (linear): Linear(in_features=1024, out_features=512, bias=False)
    )
    (linear): Linear(in_features=512, out_features=47, bias=True)
  )
)

In [25]:
train_per, mis_classified = test(test_iter, model, g_dic=g_field.vocab.itos, p_dic=p_field.vocab.itos)

Phoneme error rate (PER): 2.61



In [22]:
config.d_hidden = 64
save_path = "model_para/train_on_training.pt"
model = m.G2P(config)
model.load_state_dict(torch.load(save_path))
model.to("cuda")

G2P(
  (encoder): Encoder(
    (embedding): Embedding(32, 30)
    (lstm): LSTMCell(30, 64)
  )
  (decoder): Decoder(
    (embedding): Embedding(47, 30)
    (lstm): LSTMCell(30, 64)
    (attn): Attention(
      (linear): Linear(in_features=128, out_features=64, bias=False)
    )
    (linear): Linear(in_features=64, out_features=47, bias=True)
  )
)

In [23]:
train_per, mis_classified = test(test_iter, model, g_dic=g_field.vocab.itos, p_dic=p_field.vocab.itos)

Phoneme error rate (PER): 5.42

