In [31]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel
import torch.optim as optim
from torch.nn.utils import clip_grad_norm_
from tqdm import tqdm
import os
from jiwer import wer
import yaml
import json 

In [32]:
####### Config #######
config_path = "conf_3"
config_file = os.path.join(config_path, "config.yml")
with open(config_file,'r') as conf:
    config = yaml.load(conf, Loader=yaml.SafeLoader)


In [33]:
class AddNorm(nn.Module):
    def __init__(self, norm_shape: int, dropout=0.2):
        super().__init__()
        self.dropout = nn.Dropout(dropout)
        self.ln = nn.LayerNorm(norm_shape)

    def forward(self, X, Y):
        return self.ln(self.dropout(Y) + X)

In [34]:
class FeedForwardNetwork(nn.Module):
    def __init__(self, input_dim: int, hidden_ff_dim: int, dropout=0.2):
        super().__init__()
        self.linear1 = nn.Linear(input_dim, hidden_ff_dim)
        self.dropout = nn.Dropout(dropout)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(hidden_ff_dim, input_dim)

    def forward(self, x):
        return self.linear2(self.dropout(self.relu1(self.linear1(x))))

In [35]:
class ShrinkNorm(nn.Module):
    def __init__(self, input_dim: int, shrink_norm_hidden: int, output_dim: int, dropout=0.2):
        super().__init__()
        self.dropout = nn.Dropout(dropout)
        self.linear1 = nn.Linear(input_dim, shrink_norm_hidden)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(shrink_norm_hidden, output_dim)
        self.ln = nn.LayerNorm(output_dim)

    def forward(self, x):
        return self.ln(self.linear2(self.dropout(self.relu1(self.linear1(x)))))

In [36]:
class SinoVietnameseTranslator(nn.Module):
    def __init__(self, tokenizer, base_model, vocab, hidden_ff_dim=512, model_hidden_dim=512, 
                 large_hidden_classification_head_dim=256, small_hidden_classification_head_dim=128,
                 shrink_norm_hidden=512, max_num_spellings=7, num_spelling_threshold=3, train_bert_param=True, dropout=0.2):
        super(SinoVietnameseTranslator, self).__init__()
        self.tokenizer = tokenizer
        self.bert = base_model
        self.vocab = vocab
        self.max_num_spellings = max_num_spellings
        
        for param in self.bert.parameters():
            param.requires_grad = train_bert_param
        
        self.shrink_norm = ShrinkNorm(self.bert.config.hidden_size,shrink_norm_hidden, model_hidden_dim, dropout)
        self.feed_forward = FeedForwardNetwork(model_hidden_dim, hidden_ff_dim, dropout)
        self.add_norm = AddNorm(model_hidden_dim, dropout)
        
        self.classification_heads = nn.ModuleDict()
        for sino_word, viet_spellings in self.vocab.items():
            if len(viet_spellings) > 1 and len(viet_spellings) <= num_spelling_threshold:
                num_spellings = len(viet_spellings)
                self.classification_heads[sino_word] = nn.Sequential(
                    nn.Linear(model_hidden_dim, small_hidden_classification_head_dim),
                    nn.ReLU(),
                    nn.Dropout(dropout),
                    nn.Linear(small_hidden_classification_head_dim, num_spellings),
                    nn.Softmax(dim=-1)
                )
            elif len(viet_spellings) > num_spelling_threshold:
                num_spellings = len(viet_spellings)
                self.classification_heads[sino_word] = nn.Sequential(
                    nn.Linear(model_hidden_dim, large_hidden_classification_head_dim),
                    nn.ReLU(),
                    nn.Dropout(dropout),
                    nn.Linear(large_hidden_classification_head_dim, num_spellings),
                    nn.Softmax(dim=-1)
                )

    def forward(self, input_ids, attention_mask=None):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        sequence_output = outputs.last_hidden_state
        shrink_output = self.shrink_norm(sequence_output)
        projected_output = self.add_norm(shrink_output, self.feed_forward(shrink_output))
        
        batch_size, max_len = input_ids.size()
        predictions = torch.full((batch_size, max_len, self.max_num_spellings), -1.0, device=input_ids.device)
        
        for i in range(batch_size):
            for j in range(max_len):
                token_id = input_ids[i, j].item()
                if token_id == self.tokenizer.pad_token_id:
                    continue
                    
                sino_word = self.tokenizer.convert_ids_to_tokens(token_id)
                
                if sino_word in self.classification_heads:
                    logits = self.classification_heads[sino_word](projected_output[i, j])
                    predictions[i, j, :len(logits)] = logits
                else:
                    predictions[i, j, 0] = 1.0

        return predictions

In [37]:
class SinoVietnameseDataset(Dataset):
    def __init__(self, tokenizer, data, vocab, max_len=512):
        self.data = data
        self.tokenizer = tokenizer
        self.vocab = vocab
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sino_sent, viet_sent = self.data[idx]
        sino_tokens = self.tokenizer.encode(sino_sent, add_special_tokens=False, max_length=self.max_len, truncation=True)
        viet_spellings = viet_sent.split()

        input_ids = sino_tokens + [self.tokenizer.pad_token_id] * (self.max_len - len(sino_tokens))

        labels = []
        for i, sino_word_id in enumerate(sino_tokens):
            sino_word = self.tokenizer.convert_ids_to_tokens(sino_word_id)
            if sino_word in self.vocab:
                viet_spellings_for_word = self.vocab[sino_word]
                if len(viet_spellings_for_word) > 1:
                    label = viet_spellings_for_word.index(viet_spellings[i])
                else:
                    label = -1
            else:
                label = -1
            labels.append(label)

        labels += [-1] * (self.max_len - len(labels))  # Padding
        attention_mask = [1] * len(sino_tokens) + [0] * (self.max_len - len(sino_tokens))

        return {
            "input_ids": torch.tensor(input_ids),
            "labels": torch.tensor(labels),
            "attention_mask": torch.tensor(attention_mask),
        }


In [38]:

def load_data(data_file):
    data = []
    with open(data_file, 'r', encoding='utf-8') as f:
        for line in f.readlines():
            if ',' not in line:
                continue
            sino_sent, viet_sent = line.strip().split(',')
            data.append((sino_sent, viet_sent))
    return data

train_data_path = "data/train.txt"
test_data_path = "data/test.txt"
train_data = load_data(train_data_path)
test_data = load_data(test_data_path)

In [39]:
with open('vocab/vocab.json', 'r') as vocab_file, open('vocab/sino_viet_words.json', 'r') as words_file:
    base_vocab = json.load(vocab_file)
    sino_viet_words = json.load(words_file)

print(type(base_vocab))
print(len(base_vocab))
print(type(sino_viet_words))
print(len(sino_viet_words))

<class 'dict'>
7688
<class 'list'>
7688


In [40]:
# Model Config
bert_model = config['model_config']['bert_model'] 

base_tokenizer = BertTokenizer.from_pretrained(bert_model)
base_tokenizer.add_tokens(sino_viet_words)

base_model = BertModel.from_pretrained(bert_model)
base_model.resize_token_embeddings(len(base_tokenizer))

Embedding(23683, 768)

In [41]:
# Data Config
batch_size = config['data_config']['batch_size']
max_len = config['data_config']['max_len']

train_dataset = SinoVietnameseDataset(base_tokenizer, train_data, base_vocab, max_len)
test_dataset = SinoVietnameseDataset(base_tokenizer, test_data, base_vocab, max_len)

print(f"Train set: {len(train_dataset)}")
print(f"Test set: {len(test_dataset)}")

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Train batch num: {len(train_loader)}")
print(f"Test batch num: {len(test_loader)}")

Train set: 153372
Test set: 17042
Train batch num: 9586
Test batch num: 1066


In [42]:
def decode_predictions(predictions, input_ids, tokenizer, vocab):
    decoded_sentences = []
    for i, predicted_indices in enumerate(predictions):
        decoded_sentence = []
        for j, spelling_index in enumerate(predicted_indices):
            token = input_ids[i, j].item()
            if token == tokenizer.pad_token_id:
                continue
                
            sino_word = tokenizer.convert_ids_to_tokens(token)
            if spelling_index == -1:
                viet_spelling = vocab[sino_word][0]
            else:
                viet_spelling = vocab[sino_word][spelling_index]
            decoded_sentence.append(viet_spelling)

        decoded_sentences.append(" ".join(decoded_sentence))
    return decoded_sentences

In [43]:
def train(model, train_dataloader, test_dataloader, epochs=3, lr=1e-5, 
          max_grad_norm=1.0, model_load_path=None, config_folder_dir="config/"):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    tokenizer = model.tokenizer

    optimizer = optim.AdamW(model.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss(ignore_index=-1)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)
    
    log_dir = os.path.join(config_folder_dir, f"running/")
    os.makedirs(log_dir, exist_ok=True)
    train_losses_dir = os.path.join(log_dir, f"train_losses.txt")
    test_losses_dir = os.path.join(log_dir, f"test_losses.txt")
    test_accuracies_dir = os.path.join(log_dir, f"test_accuracies.txt")
    test_wers_dir = os.path.join(log_dir, f"test_wers.txt")
    
    if model_load_path:
        model.load_state_dict(torch.load(model_load_path))
    # Determine the starting epoch
    start_epoch = 0
    if model_load_path:
        start_epoch = int(model_load_path.split("_")[-1].split(".")[0]) 
    
    for epoch in range(start_epoch, start_epoch + epochs):
        model.train()
        total_loss = 0

        # Training loop with progress bar
        train_iterator = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{start_epoch + epochs}", unit="batch")
        for batch in train_iterator:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            optimizer.zero_grad()

            outputs = model(input_ids, attention_mask=attention_mask)

            # Flatten 
            preds = outputs.view(-1, outputs.size(-1))
            targets = labels.view(-1)

            loss = criterion(preds, targets) # batch loss
            total_loss += loss.item()

            loss.backward()
            clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step()
            
            train_iterator.set_postfix(loss=loss.item())

        avg_train_loss = total_loss / len(train_dataloader)

        with open(train_losses_dir, 'a') as tl:
            tl.write(f"{avg_train_loss};")

        print(f"Epoch {epoch+1}/{start_epoch + epochs}, Training Loss: {avg_train_loss}")

        ################################## Run test ##################################
        model.eval()
        total_test_loss = 0
        correct_predictions = 0 # calculate accuracies over sino words that have multiple viet spellings only
        total_predictions = 0
        all_ground_truths = []
        all_predictions = []
        
        with torch.no_grad():
            test_iterator = tqdm(test_dataloader, desc="Validating", unit="batch")
            for batch in test_iterator:
                input_ids = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                labels = batch["labels"].to(device)

                outputs = model(input_ids, attention_mask=attention_mask)

                preds = outputs.view(-1, outputs.size(-1)) # Flatten
                targets = labels.view(-1)

                test_loss = criterion(preds, targets)
                total_test_loss += test_loss.item()

                predictions = torch.argmax(outputs, dim=-1)
                mask = labels != -1
                correct_predictions += (predictions[mask] == labels[mask]).sum().item()
                total_predictions += mask.sum().item()
                
                batch_predictions = decode_predictions(predictions, input_ids, tokenizer, model.vocab)
                batch_ground_truths = decode_predictions(labels, input_ids, tokenizer, model.vocab)
                all_predictions.extend(batch_predictions)
                all_ground_truths.extend(batch_ground_truths)
            
            avg_test_loss = total_test_loss / len(test_dataloader)
            with open(test_losses_dir, 'a') as tl2:
                tl2.write(f"{avg_test_loss};")
            
            test_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
            with open(test_accuracies_dir, 'a') as ta:
                ta.write(f"{test_accuracy * 100};")
            
            test_wer = wer(all_ground_truths, all_predictions)
            with open(test_wers_dir, 'a') as tw:
                tw.write(f"{test_wer * 100};")
            
            print(f"Epoch {epoch+1}/{start_epoch + epochs}, Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_accuracy * 100:.4f}, Test WER: {test_wer * 100:.4f}")

        scheduler.step(avg_test_loss)
        print(f"Learning rate: {scheduler.get_last_lr()}")
        
        # Save the model after each epoch
        save_dir = os.path.join(log_dir, f"saved_model/")
        os.makedirs(save_dir, exist_ok=True)
        model_save_path = os.path.join(save_dir, f"sivi_model_epoch_{epoch+1}.pt")
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")

In [44]:
# Model config
hidden_ff_dim = config['model_config']['hidden_ff_dim']
model_hidden_dim = config['model_config']['model_hidden_dim']
shrink_norm_hidden = config['model_config']['shrink_norm_hidden']
large_hidden_classification_head_dim = config['model_config']['large_hidden_classification_head_dim']
small_hidden_classification_head_dim = config['model_config']['small_hidden_classification_head_dim']
max_num_spellings = config['model_config']['max_num_spellings']
num_spelling_threshold = config['model_config']['num_spelling_threshold']
train_bert_param = config['model_config']['train_bert_param']
dropout = config['model_config']['dropout']

model = SinoVietnameseTranslator(base_tokenizer, base_model, base_vocab, hidden_ff_dim=hidden_ff_dim, 
                                model_hidden_dim=model_hidden_dim, shrink_norm_hidden=shrink_norm_hidden,
                                large_hidden_classification_head_dim=large_hidden_classification_head_dim,
                                small_hidden_classification_head_dim=small_hidden_classification_head_dim,
                                max_num_spellings=max_num_spellings, train_bert_param=train_bert_param,
                                num_spelling_threshold=num_spelling_threshold, dropout=dropout)

num_param = sum([param.nelement() for param in model.parameters()]) / 1000000
print(f"{num_param:.1f}M params.")

175.6M params.


In [30]:
# Trainning config
num_epochs = config['training_config']['num_epochs']
learning_rate = config['training_config']['learning_rate']
model_load_path = None if config['training_config']['model_load_path'] == 'None' else config['training_config']['model_load_path']
config_folder_dir = config_path

train(model, train_loader, test_loader, epochs=num_epochs, lr=learning_rate,
    model_load_path=model_load_path, config_folder_dir=config_folder_dir)

Epoch 1/60: 100%|██████████| 9586/9586 [1:10:43<00:00,  2.26batch/s, loss=0.859]


Epoch 1/60, Training Loss: 0.8995738918699702


Validating: 100%|██████████| 1066/1066 [07:27<00:00,  2.38batch/s]


Epoch 1/60, Test Loss: 0.8336, Test Accuracy: 91.8918, Test WER: 2.3552
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_1.pt


Epoch 2/60: 100%|██████████| 9586/9586 [1:10:33<00:00,  2.26batch/s, loss=0.818]


Epoch 2/60, Training Loss: 0.8273595834969728


Validating: 100%|██████████| 1066/1066 [07:21<00:00,  2.41batch/s]


Epoch 2/60, Test Loss: 0.8177, Test Accuracy: 92.7644, Test WER: 2.1017
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_2.pt


Epoch 3/60: 100%|██████████| 9586/9586 [1:10:28<00:00,  2.27batch/s, loss=0.787]


Epoch 3/60, Training Loss: 0.8150773339335714


Validating: 100%|██████████| 1066/1066 [07:26<00:00,  2.39batch/s]


Epoch 3/60, Test Loss: 0.8114, Test Accuracy: 93.2099, Test WER: 1.9723
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_3.pt


Epoch 4/60: 100%|██████████| 9586/9586 [1:10:39<00:00,  2.26batch/s, loss=0.868]


Epoch 4/60, Training Loss: 0.8077745053978571


Validating: 100%|██████████| 1066/1066 [07:27<00:00,  2.38batch/s]


Epoch 4/60, Test Loss: 0.8072, Test Accuracy: 93.5270, Test WER: 1.8802
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_4.pt


Epoch 5/60: 100%|██████████| 9586/9586 [1:10:52<00:00,  2.25batch/s, loss=0.783]


Epoch 5/60, Training Loss: 0.8034444764526956


Validating: 100%|██████████| 1066/1066 [07:22<00:00,  2.41batch/s]


Epoch 5/60, Test Loss: 0.8061, Test Accuracy: 93.6334, Test WER: 1.8493
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_5.pt


Epoch 6/60: 100%|██████████| 9586/9586 [1:10:45<00:00,  2.26batch/s, loss=0.76] 


Epoch 6/60, Training Loss: 0.7994044905300307


Validating: 100%|██████████| 1066/1066 [07:26<00:00,  2.39batch/s]


Epoch 6/60, Test Loss: 0.8044, Test Accuracy: 93.7305, Test WER: 1.8211
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_6.pt


Epoch 7/60: 100%|██████████| 9586/9586 [1:11:31<00:00,  2.23batch/s, loss=0.837]


Epoch 7/60, Training Loss: 0.7961974684129596


Validating: 100%|██████████| 1066/1066 [08:08<00:00,  2.18batch/s]


Epoch 7/60, Test Loss: 0.8028, Test Accuracy: 93.8643, Test WER: 1.7822
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_7.pt


Epoch 8/60: 100%|██████████| 9586/9586 [1:13:20<00:00,  2.18batch/s, loss=0.735]


Epoch 8/60, Training Loss: 0.7931063297071961


Validating: 100%|██████████| 1066/1066 [08:05<00:00,  2.20batch/s]


Epoch 8/60, Test Loss: 0.8015, Test Accuracy: 93.9468, Test WER: 1.7583
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_8.pt


Epoch 9/60: 100%|██████████| 9586/9586 [1:13:20<00:00,  2.18batch/s, loss=0.788]


Epoch 9/60, Training Loss: 0.7905960760819447


Validating: 100%|██████████| 1066/1066 [08:00<00:00,  2.22batch/s]


Epoch 9/60, Test Loss: 0.8000, Test Accuracy: 94.1045, Test WER: 1.7125
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_9.pt


Epoch 10/60: 100%|██████████| 9586/9586 [1:13:15<00:00,  2.18batch/s, loss=0.817]


Epoch 10/60, Training Loss: 0.7882982905376636


Validating: 100%|██████████| 1066/1066 [08:05<00:00,  2.20batch/s]


Epoch 10/60, Test Loss: 0.7998, Test Accuracy: 94.1595, Test WER: 1.6965
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_10.pt


Epoch 11/60: 100%|██████████| 9586/9586 [1:13:23<00:00,  2.18batch/s, loss=0.812]


Epoch 11/60, Training Loss: 0.7862719512361339


Validating: 100%|██████████| 1066/1066 [08:21<00:00,  2.13batch/s]


Epoch 11/60, Test Loss: 0.7994, Test Accuracy: 94.1778, Test WER: 1.6912
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_11.pt


Epoch 12/60: 100%|██████████| 9586/9586 [1:13:19<00:00,  2.18batch/s, loss=0.829]


Epoch 12/60, Training Loss: 0.7846030890568547


Validating: 100%|██████████| 1066/1066 [08:02<00:00,  2.21batch/s]


Epoch 12/60, Test Loss: 0.7990, Test Accuracy: 94.1962, Test WER: 1.6859
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_12.pt


Epoch 13/60: 100%|██████████| 9586/9586 [1:11:23<00:00,  2.24batch/s, loss=0.752]


Epoch 13/60, Training Loss: 0.7827939939864513


Validating: 100%|██████████| 1066/1066 [07:15<00:00,  2.45batch/s]


Epoch 13/60, Test Loss: 0.7980, Test Accuracy: 94.2878, Test WER: 1.6592
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_13.pt


Epoch 14/60: 100%|██████████| 9586/9586 [1:10:29<00:00,  2.27batch/s, loss=0.887]


Epoch 14/60, Training Loss: 0.7813843189488615


Validating: 100%|██████████| 1066/1066 [07:25<00:00,  2.39batch/s]


Epoch 14/60, Test Loss: 0.7982, Test Accuracy: 94.2621, Test WER: 1.6667
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_14.pt


Epoch 15/60: 100%|██████████| 9586/9586 [1:10:33<00:00,  2.26batch/s, loss=0.832]


Epoch 15/60, Training Loss: 0.7800157277526374


Validating: 100%|██████████| 1066/1066 [07:21<00:00,  2.42batch/s]


Epoch 15/60, Test Loss: 0.7975, Test Accuracy: 94.3098, Test WER: 1.6528
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_15.pt


Epoch 16/60: 100%|██████████| 9586/9586 [1:12:02<00:00,  2.22batch/s, loss=0.793]


Epoch 16/60, Training Loss: 0.7789544949582791


Validating: 100%|██████████| 1066/1066 [08:37<00:00,  2.06batch/s]


Epoch 16/60, Test Loss: 0.7963, Test Accuracy: 94.4601, Test WER: 1.6092
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_16.pt


Epoch 17/60: 100%|██████████| 9586/9586 [1:14:47<00:00,  2.14batch/s, loss=0.742]


Epoch 17/60, Training Loss: 0.7779217638726675


Validating: 100%|██████████| 1066/1066 [08:47<00:00,  2.02batch/s]


Epoch 17/60, Test Loss: 0.7960, Test Accuracy: 94.4345, Test WER: 1.6166
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_17.pt


Epoch 18/60: 100%|██████████| 9586/9586 [1:14:44<00:00,  2.14batch/s, loss=0.744]


Epoch 18/60, Training Loss: 0.7767775221965817


Validating: 100%|██████████| 1066/1066 [08:46<00:00,  2.03batch/s]


Epoch 18/60, Test Loss: 0.7948, Test Accuracy: 94.5646, Test WER: 1.5788
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_18.pt


Epoch 19/60: 100%|██████████| 9586/9586 [1:14:50<00:00,  2.13batch/s, loss=0.738]


Epoch 19/60, Training Loss: 0.7762631360770217


Validating: 100%|██████████| 1066/1066 [08:50<00:00,  2.01batch/s]


Epoch 19/60, Test Loss: 0.7959, Test Accuracy: 94.4436, Test WER: 1.6140
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_19.pt


Epoch 20/60: 100%|██████████| 9586/9586 [1:15:23<00:00,  2.12batch/s, loss=0.78] 


Epoch 20/60, Training Loss: 0.7754699324929787


Validating: 100%|██████████| 1066/1066 [08:52<00:00,  2.00batch/s]


Epoch 20/60, Test Loss: 0.7945, Test Accuracy: 94.6178, Test WER: 1.5634
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_20.pt


Epoch 21/60: 100%|██████████| 9586/9586 [1:15:06<00:00,  2.13batch/s, loss=0.762]


Epoch 21/60, Training Loss: 0.7746074558013222


Validating: 100%|██████████| 1066/1066 [08:45<00:00,  2.03batch/s]


Epoch 21/60, Test Loss: 0.7947, Test Accuracy: 94.5793, Test WER: 1.5746
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_21.pt


Epoch 22/60: 100%|██████████| 9586/9586 [1:14:41<00:00,  2.14batch/s, loss=0.784]


Epoch 22/60, Training Loss: 0.7741521141013248


Validating: 100%|██████████| 1066/1066 [08:44<00:00,  2.03batch/s]


Epoch 22/60, Test Loss: 0.7948, Test Accuracy: 94.6031, Test WER: 1.5677
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_22.pt


Epoch 23/60: 100%|██████████| 9586/9586 [1:14:58<00:00,  2.13batch/s, loss=0.744]


Epoch 23/60, Training Loss: 0.7736161400325209


Validating: 100%|██████████| 1066/1066 [08:49<00:00,  2.01batch/s]


Epoch 23/60, Test Loss: 0.7939, Test Accuracy: 94.6893, Test WER: 1.5426
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_23.pt


Epoch 24/60: 100%|██████████| 9586/9586 [1:15:00<00:00,  2.13batch/s, loss=0.764]


Epoch 24/60, Training Loss: 0.7731286829382946


Validating: 100%|██████████| 1066/1066 [08:52<00:00,  2.00batch/s]


Epoch 24/60, Test Loss: 0.7942, Test Accuracy: 94.6361, Test WER: 1.5581
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_24.pt


Epoch 25/60: 100%|██████████| 9586/9586 [1:14:49<00:00,  2.14batch/s, loss=0.778]


Epoch 25/60, Training Loss: 0.7726239793339925


Validating: 100%|██████████| 1066/1066 [08:48<00:00,  2.02batch/s]


Epoch 25/60, Test Loss: 0.7943, Test Accuracy: 94.6361, Test WER: 1.5581
Learning rate: [1e-05]
Model saved to conf_2\running/saved_model/sivi_model_epoch_25.pt


Epoch 26/60: 100%|██████████| 9586/9586 [1:14:52<00:00,  2.13batch/s, loss=0.775]


Epoch 26/60, Training Loss: 0.7721331426887404


Validating: 100%|██████████| 1066/1066 [08:52<00:00,  2.00batch/s]


Epoch 26/60, Test Loss: 0.7945, Test Accuracy: 94.5976, Test WER: 1.5692
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_26.pt


Epoch 27/60: 100%|██████████| 9586/9586 [1:14:43<00:00,  2.14batch/s, loss=0.829]


Epoch 27/60, Training Loss: 0.7711083670262378


Validating: 100%|██████████| 1066/1066 [08:38<00:00,  2.06batch/s]


Epoch 27/60, Test Loss: 0.7926, Test Accuracy: 94.8176, Test WER: 1.5053
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_27.pt


Epoch 28/60: 100%|██████████| 9586/9586 [1:14:48<00:00,  2.14batch/s, loss=0.776]


Epoch 28/60, Training Loss: 0.7701767231362014


Validating: 100%|██████████| 1066/1066 [08:51<00:00,  2.01batch/s]


Epoch 28/60, Test Loss: 0.7929, Test Accuracy: 94.7644, Test WER: 1.5208
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_28.pt


Epoch 29/60: 100%|██████████| 9586/9586 [1:15:01<00:00,  2.13batch/s, loss=0.868]


Epoch 29/60, Training Loss: 0.7696713276989346


Validating: 100%|██████████| 1066/1066 [08:52<00:00,  2.00batch/s]


Epoch 29/60, Test Loss: 0.7922, Test Accuracy: 94.8488, Test WER: 1.4963
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_29.pt


Epoch 30/60: 100%|██████████| 9586/9586 [1:14:54<00:00,  2.13batch/s, loss=0.738]


Epoch 30/60, Training Loss: 0.7694201147459306


Validating: 100%|██████████| 1066/1066 [08:57<00:00,  1.98batch/s]


Epoch 30/60, Test Loss: 0.7921, Test Accuracy: 94.8378, Test WER: 1.4995
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_30.pt


Epoch 31/60: 100%|██████████| 9586/9586 [1:15:46<00:00,  2.11batch/s, loss=0.805]


Epoch 31/60, Training Loss: 0.7692694492574378


Validating: 100%|██████████| 1066/1066 [09:12<00:00,  1.93batch/s]


Epoch 31/60, Test Loss: 0.7919, Test Accuracy: 94.8213, Test WER: 1.5043
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_31.pt


Epoch 32/60: 100%|██████████| 9586/9586 [1:16:00<00:00,  2.10batch/s, loss=0.748]


Epoch 32/60, Training Loss: 0.7688934817994136


Validating: 100%|██████████| 1066/1066 [09:02<00:00,  1.97batch/s]


Epoch 32/60, Test Loss: 0.7918, Test Accuracy: 94.8689, Test WER: 1.4904
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_32.pt


Epoch 33/60: 100%|██████████| 9586/9586 [1:15:21<00:00,  2.12batch/s, loss=0.747]


Epoch 33/60, Training Loss: 0.768721918130304


Validating: 100%|██████████| 1066/1066 [09:06<00:00,  1.95batch/s]


Epoch 33/60, Test Loss: 0.7912, Test Accuracy: 94.9203, Test WER: 1.4755
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_33.pt


Epoch 34/60: 100%|██████████| 9586/9586 [1:15:37<00:00,  2.11batch/s, loss=0.781]


Epoch 34/60, Training Loss: 0.7681147836703686


Validating: 100%|██████████| 1066/1066 [09:06<00:00,  1.95batch/s]


Epoch 34/60, Test Loss: 0.7916, Test Accuracy: 94.8854, Test WER: 1.4856
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_34.pt


Epoch 35/60: 100%|██████████| 9586/9586 [1:15:30<00:00,  2.12batch/s, loss=0.768]


Epoch 35/60, Training Loss: 0.7679637912206154


Validating: 100%|██████████| 1066/1066 [08:56<00:00,  1.99batch/s]


Epoch 35/60, Test Loss: 0.7919, Test Accuracy: 94.8854, Test WER: 1.4856
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_35.pt


Epoch 36/60: 100%|██████████| 9586/9586 [1:15:45<00:00,  2.11batch/s, loss=0.752]


Epoch 36/60, Training Loss: 0.7678866310741915


Validating: 100%|██████████| 1066/1066 [08:58<00:00,  1.98batch/s]


Epoch 36/60, Test Loss: 0.7908, Test Accuracy: 95.0027, Test WER: 1.4516
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_36.pt


Epoch 37/60: 100%|██████████| 9586/9586 [1:15:38<00:00,  2.11batch/s, loss=0.768]


Epoch 37/60, Training Loss: 0.7677859992951609


Validating: 100%|██████████| 1066/1066 [08:49<00:00,  2.01batch/s]


Epoch 37/60, Test Loss: 0.7911, Test Accuracy: 94.9239, Test WER: 1.4745
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_37.pt


Epoch 38/60: 100%|██████████| 9586/9586 [1:15:31<00:00,  2.12batch/s, loss=0.748]


Epoch 38/60, Training Loss: 0.767597239393793


Validating: 100%|██████████| 1066/1066 [08:42<00:00,  2.04batch/s]


Epoch 38/60, Test Loss: 0.7907, Test Accuracy: 94.9918, Test WER: 1.4548
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_38.pt


Epoch 39/60: 100%|██████████| 9586/9586 [1:15:15<00:00,  2.12batch/s, loss=0.76] 


Epoch 39/60, Training Loss: 0.7673972663577454


Validating: 100%|██████████| 1066/1066 [08:40<00:00,  2.05batch/s]


Epoch 39/60, Test Loss: 0.7902, Test Accuracy: 95.0027, Test WER: 1.4516
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_39.pt


Epoch 40/60: 100%|██████████| 9586/9586 [1:15:01<00:00,  2.13batch/s, loss=0.75] 


Epoch 40/60, Training Loss: 0.767116001403185


Validating: 100%|██████████| 1066/1066 [08:46<00:00,  2.02batch/s]


Epoch 40/60, Test Loss: 0.7900, Test Accuracy: 95.0357, Test WER: 1.4420
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_40.pt


Epoch 41/60: 100%|██████████| 9586/9586 [1:15:13<00:00,  2.12batch/s, loss=0.76] 


Epoch 41/60, Training Loss: 0.7670364777516553


Validating: 100%|██████████| 1066/1066 [08:45<00:00,  2.03batch/s]


Epoch 41/60, Test Loss: 0.7911, Test Accuracy: 94.9404, Test WER: 1.4697
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_41.pt


Epoch 42/60: 100%|██████████| 9586/9586 [1:15:05<00:00,  2.13batch/s, loss=0.792]


Epoch 42/60, Training Loss: 0.7670867553949605


Validating: 100%|██████████| 1066/1066 [08:31<00:00,  2.08batch/s]


Epoch 42/60, Test Loss: 0.7909, Test Accuracy: 94.9514, Test WER: 1.4665
Learning rate: [5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_42.pt


Epoch 43/60: 100%|██████████| 9586/9586 [1:15:09<00:00,  2.13batch/s, loss=0.74] 


Epoch 43/60, Training Loss: 0.7669127529345946


Validating: 100%|██████████| 1066/1066 [08:50<00:00,  2.01batch/s]


Epoch 43/60, Test Loss: 0.7905, Test Accuracy: 94.9844, Test WER: 1.4569
Learning rate: [2.5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_43.pt


Epoch 44/60: 100%|██████████| 9586/9586 [1:15:09<00:00,  2.13batch/s, loss=0.739]


Epoch 44/60, Training Loss: 0.7664154461883638


Validating: 100%|██████████| 1066/1066 [08:41<00:00,  2.05batch/s]


Epoch 44/60, Test Loss: 0.7903, Test Accuracy: 95.0339, Test WER: 1.4425
Learning rate: [2.5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_44.pt


Epoch 45/60: 100%|██████████| 9586/9586 [1:14:57<00:00,  2.13batch/s, loss=0.8]  


Epoch 45/60, Training Loss: 0.7661865341335057


Validating: 100%|██████████| 1066/1066 [08:38<00:00,  2.06batch/s]


Epoch 45/60, Test Loss: 0.7903, Test Accuracy: 95.0192, Test WER: 1.4468
Learning rate: [2.5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_45.pt


Epoch 46/60: 100%|██████████| 9586/9586 [1:14:53<00:00,  2.13batch/s, loss=0.731]


Epoch 46/60, Training Loss: 0.7659891289968768


Validating: 100%|██████████| 1066/1066 [08:45<00:00,  2.03batch/s]


Epoch 46/60, Test Loss: 0.7899, Test Accuracy: 95.0522, Test WER: 1.4372
Learning rate: [2.5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_46.pt


Epoch 47/60: 100%|██████████| 9586/9586 [1:14:54<00:00,  2.13batch/s, loss=0.805]


Epoch 47/60, Training Loss: 0.7660140600168155


Validating: 100%|██████████| 1066/1066 [08:36<00:00,  2.06batch/s]


Epoch 47/60, Test Loss: 0.7893, Test Accuracy: 95.0981, Test WER: 1.4239
Learning rate: [2.5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_47.pt


Epoch 48/60: 100%|██████████| 9586/9586 [1:14:08<00:00,  2.15batch/s, loss=0.781]


Epoch 48/60, Training Loss: 0.7658630039767534


Validating: 100%|██████████| 1066/1066 [08:31<00:00,  2.08batch/s]


Epoch 48/60, Test Loss: 0.7901, Test Accuracy: 95.0357, Test WER: 1.4420
Learning rate: [2.5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_48.pt


Epoch 49/60: 100%|██████████| 9586/9586 [1:13:47<00:00,  2.16batch/s, loss=0.773]


Epoch 49/60, Training Loss: 0.7657214440646312


Validating: 100%|██████████| 1066/1066 [08:46<00:00,  2.03batch/s]


Epoch 49/60, Test Loss: 0.7897, Test Accuracy: 95.0614, Test WER: 1.4345
Learning rate: [2.5e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_49.pt


Epoch 50/60: 100%|██████████| 9586/9586 [1:14:09<00:00,  2.15batch/s, loss=0.797]


Epoch 50/60, Training Loss: 0.7656273669746856


Validating: 100%|██████████| 1066/1066 [08:22<00:00,  2.12batch/s]


Epoch 50/60, Test Loss: 0.7904, Test Accuracy: 94.9936, Test WER: 1.4542
Learning rate: [1.25e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_50.pt


Epoch 51/60: 100%|██████████| 9586/9586 [1:13:48<00:00,  2.16batch/s, loss=0.839]


Epoch 51/60, Training Loss: 0.7654969021693515


Validating: 100%|██████████| 1066/1066 [08:22<00:00,  2.12batch/s]


Epoch 51/60, Test Loss: 0.7898, Test Accuracy: 95.0632, Test WER: 1.4340
Learning rate: [1.25e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_51.pt


Epoch 52/60: 100%|██████████| 9586/9586 [1:13:49<00:00,  2.16batch/s, loss=0.793]


Epoch 52/60, Training Loss: 0.7654707088192892


Validating: 100%|██████████| 1066/1066 [08:18<00:00,  2.14batch/s]


Epoch 52/60, Test Loss: 0.7895, Test Accuracy: 95.0889, Test WER: 1.4265
Learning rate: [1.25e-06]
Model saved to conf_2\running/saved_model/sivi_model_epoch_52.pt


Epoch 53/60: 100%|██████████| 9586/9586 [1:14:02<00:00,  2.16batch/s, loss=0.779]


Epoch 53/60, Training Loss: 0.7654052534619229


Validating: 100%|██████████| 1066/1066 [08:35<00:00,  2.07batch/s]


Epoch 53/60, Test Loss: 0.7895, Test Accuracy: 95.0852, Test WER: 1.4276
Learning rate: [6.25e-07]
Model saved to conf_2\running/saved_model/sivi_model_epoch_53.pt


Epoch 54/60: 100%|██████████| 9586/9586 [1:14:01<00:00,  2.16batch/s, loss=0.735]


Epoch 54/60, Training Loss: 0.7651708161820954


Validating: 100%|██████████| 1066/1066 [08:30<00:00,  2.09batch/s]


Epoch 54/60, Test Loss: 0.7895, Test Accuracy: 95.0999, Test WER: 1.4233
Learning rate: [6.25e-07]
Model saved to conf_2\running/saved_model/sivi_model_epoch_54.pt


Epoch 55/60: 100%|██████████| 9586/9586 [1:14:03<00:00,  2.16batch/s, loss=0.719]


Epoch 55/60, Training Loss: 0.76511367692247


Validating: 100%|██████████| 1066/1066 [08:25<00:00,  2.11batch/s]


Epoch 55/60, Test Loss: 0.7892, Test Accuracy: 95.1274, Test WER: 1.4154
Learning rate: [6.25e-07]
Model saved to conf_2\running/saved_model/sivi_model_epoch_55.pt


Epoch 56/60: 100%|██████████| 9586/9586 [1:13:58<00:00,  2.16batch/s, loss=0.76] 


Epoch 56/60, Training Loss: 0.765051692183272


Validating: 100%|██████████| 1066/1066 [08:42<00:00,  2.04batch/s]


Epoch 56/60, Test Loss: 0.7893, Test Accuracy: 95.1072, Test WER: 1.4212
Learning rate: [6.25e-07]
Model saved to conf_2\running/saved_model/sivi_model_epoch_56.pt


Epoch 57/60: 100%|██████████| 9586/9586 [1:14:10<00:00,  2.15batch/s, loss=0.725]


Epoch 57/60, Training Loss: 0.7649236046867482


Validating: 100%|██████████| 1066/1066 [08:35<00:00,  2.07batch/s]


Epoch 57/60, Test Loss: 0.7894, Test Accuracy: 95.1017, Test WER: 1.4228
Learning rate: [6.25e-07]
Model saved to conf_2\running/saved_model/sivi_model_epoch_57.pt


Epoch 58/60: 100%|██████████| 9586/9586 [1:13:32<00:00,  2.17batch/s, loss=0.774]


Epoch 58/60, Training Loss: 0.7650187138647568


Validating: 100%|██████████| 1066/1066 [08:17<00:00,  2.14batch/s]


Epoch 58/60, Test Loss: 0.7892, Test Accuracy: 95.1237, Test WER: 1.4164
Learning rate: [3.125e-07]
Model saved to conf_2\running/saved_model/sivi_model_epoch_58.pt


Epoch 59/60: 100%|██████████| 9586/9586 [1:14:03<00:00,  2.16batch/s, loss=0.748]


Epoch 59/60, Training Loss: 0.7650118617493649


Validating: 100%|██████████| 1066/1066 [08:37<00:00,  2.06batch/s]


Epoch 59/60, Test Loss: 0.7891, Test Accuracy: 95.1274, Test WER: 1.4154
Learning rate: [3.125e-07]
Model saved to conf_2\running/saved_model/sivi_model_epoch_59.pt


Epoch 60/60: 100%|██████████| 9586/9586 [1:14:10<00:00,  2.15batch/s, loss=0.756]


Epoch 60/60, Training Loss: 0.7648214054224562


Validating: 100%|██████████| 1066/1066 [08:33<00:00,  2.08batch/s]


Epoch 60/60, Test Loss: 0.7891, Test Accuracy: 95.1292, Test WER: 1.4148
Learning rate: [3.125e-07]
Model saved to conf_2\running/saved_model/sivi_model_epoch_60.pt


In [15]:
torch.cuda.empty_cache()