In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel
import torch.optim as optim
from torch.nn.utils import clip_grad_norm_
from tqdm import tqdm
import os
from jiwer import wer
import yaml
import json 
import math

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
####### Config #######
config_path = "conf_10_gelu"
config_file = os.path.join(config_path, "config.yml")
with open(config_file,'r') as conf:
    config = yaml.load(conf, Loader=yaml.SafeLoader)


In [3]:
class AddNorm(nn.Module):
    def __init__(self, norm_shape: int, dropout=0.2):
        super().__init__()
        self.dropout = nn.Dropout(dropout)
        self.ln = nn.LayerNorm(norm_shape)

    def forward(self, X, Y):
        return self.ln(self.dropout(Y) + X)

In [4]:
class FeedForwardNetwork(nn.Module):
    def __init__(self, input_dim: int, hidden_ff_dim: int, dropout=0.2, use_gelu=True):
        super().__init__()
        self.linear1 = nn.Linear(input_dim, hidden_ff_dim)
        self.dropout = nn.Dropout(dropout)
        self.activation = nn.GELU() if use_gelu else nn.ReLU()
        self.linear2 = nn.Linear(hidden_ff_dim, input_dim)

    def forward(self, x):
        return self.linear2(self.dropout(self.activation(self.linear1(x))))

In [5]:
class ShrinkNorm(nn.Module):
    def __init__(self, input_dim: int, shrink_norm_hidden: int, output_dim: int, dropout=0.2, use_gelu=True):
        super().__init__()
        self.dropout = nn.Dropout(dropout)
        self.linear1 = nn.Linear(input_dim, shrink_norm_hidden)
        self.activation = nn.GELU() if use_gelu else nn.ReLU()
        self.linear2 = nn.Linear(shrink_norm_hidden, output_dim)
        self.ln = nn.LayerNorm(output_dim)

    def forward(self, x):
        return self.ln(self.linear2(self.dropout(self.activation(self.linear1(x)))))

In [6]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, seq_len: int, dropout: float) -> None:
        super().__init__()
        self.d_model = d_model
        self.seq_len = seq_len
        self.dropout = nn.Dropout(dropout)
        pe = torch.zeros(seq_len, d_model)
        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1) 
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 
        pe[:, 0::2] = torch.sin(position * div_term) 
        pe[:, 1::2] = torch.cos(position * div_term) 
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + (self.pe[:, :x.shape[1], :]).requires_grad_(False) # (batch, seq_len, d_model)
        return self.dropout(x)

In [7]:
class TransformerEncoderBlock(nn.Module): 
    """The Transformer encoder block."""
    def __init__(self, model_hidden_dim, ffn_num_hiddens, num_heads, dropout, use_gelu=True):
        super().__init__()
        self.multihead_attention = nn.MultiheadAttention(embed_dim=model_hidden_dim, num_heads=num_heads, 
                                                         batch_first=True, dropout=dropout)

        self.addnorm1 = AddNorm(model_hidden_dim, dropout)
        self.ffn = FeedForwardNetwork(model_hidden_dim, ffn_num_hiddens, use_gelu)
        self.addnorm2 = AddNorm(model_hidden_dim, dropout)

    def forward(self, X, key_padding_mask):
        Y = self.addnorm1(X, self.multihead_attention(X, X, X, key_padding_mask=key_padding_mask)[0])
        return self.addnorm2(Y, self.ffn(Y))

In [8]:
class TransformerEncoder(nn.Module):  
    """The Transformer encoder."""
    def __init__(self, model_hidden_dim, hidden_ff_dim,
                 num_heads, num_blks, max_len, dropout, use_gelu=True):
        super().__init__()
        self.model_hidden_dim = model_hidden_dim
        self.pos_encoding = PositionalEncoding(d_model=model_hidden_dim,seq_len=max_len, dropout=dropout)
        self.blks = nn.Sequential()
        for i in range(num_blks):
            self.blks.add_module("block"+str(i), TransformerEncoderBlock(
                model_hidden_dim, hidden_ff_dim, num_heads, dropout, use_gelu))

    def forward(self, X, key_padding_mask):
        X = self.pos_encoding(X)
        for blk in self.blks:
            X = blk(X, key_padding_mask)
        return X

In [9]:
class ClassificationHead(nn.Module):
    def __init__(self, input_dim: int, head_hidden_dim: int, num_readings: int, dropout=0.2, use_gelu=True):
        super().__init__()
        self.dropout = nn.Dropout(dropout)
        self.linear1 = nn.Linear(input_dim, head_hidden_dim)
        self.activation = nn.GELU() if use_gelu else nn.ReLU()
        self.linear2 = nn.Linear(head_hidden_dim, num_readings)
        self.output_prob = nn.Softmax(dim=-1)

    def forward(self, x):
        return self.output_prob(self.linear2(self.dropout(self.activation(self.linear1(x)))))

In [10]:
class SinoVietnameseTranslator(nn.Module):
    def __init__(self, tokenizer, base_model, vocab, hidden_ff_dim=1024, model_hidden_dim=256, 
                 head_hidden_dim=128, shrink_norm_hidden=512, max_num_spellings=7, train_bert_param=True, 
                 max_len=512, num_heads=8, num_blks=6, dropout=0.2, use_gelu=True):
        super(SinoVietnameseTranslator, self).__init__()
        self.tokenizer = tokenizer
        self.bert = base_model
        self.vocab = vocab
        self.max_num_spellings = max_num_spellings
        self.max_len = max_len
        
        for param in self.bert.parameters():
            param.requires_grad = train_bert_param
        
        self.shrink_norm = ShrinkNorm(self.bert.config.hidden_size,shrink_norm_hidden, model_hidden_dim, dropout, use_gelu)
        self.encoder = TransformerEncoder(model_hidden_dim, hidden_ff_dim, num_heads, num_blks, max_len, dropout, use_gelu)
        
        self.classification_heads = nn.ModuleDict()
        for sino_word, viet_spellings in self.vocab.items():
            num_readings = len(viet_spellings)
            if num_readings > 1:
                self.classification_heads[sino_word] = ClassificationHead(model_hidden_dim, head_hidden_dim, num_readings, dropout, use_gelu)

    def forward(self, input_ids, attention_mask=None):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        sequence_output = outputs.last_hidden_state

        sequence_output = self.shrink_norm(sequence_output)

        if attention_mask is not None:
            key_padding_mask = ~attention_mask.bool()  # Convert to bool

        projected_output = self.encoder(sequence_output, key_padding_mask)
        
        batch_size, max_len = input_ids.size()
        predictions = torch.full((batch_size, max_len, self.max_num_spellings), -1.0, device=input_ids.device)
        
        for i in range(batch_size):
            for j in range(max_len):
                token_id = input_ids[i, j].item()
                if token_id == self.tokenizer.pad_token_id:
                    continue
                    
                sino_word = self.tokenizer.convert_ids_to_tokens(token_id)
                
                if sino_word in self.classification_heads:
                    logits = self.classification_heads[sino_word](projected_output[i, j])
                    predictions[i, j, :len(logits)] = logits
                else:
                    predictions[i, j, 0] = 1.0

        return predictions

In [11]:
class SinoVietnameseDataset(Dataset):
    def __init__(self, tokenizer, data, vocab, max_len=512):
        self.data = data
        self.tokenizer = tokenizer
        self.vocab = vocab
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sino_sent, viet_sent = self.data[idx]
        sino_tokens = self.tokenizer.encode(sino_sent, add_special_tokens=False, max_length=self.max_len, truncation=True)
        viet_spellings = viet_sent.split()

        input_ids = sino_tokens + [self.tokenizer.pad_token_id] * (self.max_len - len(sino_tokens))

        labels = []
        for i, sino_word_id in enumerate(sino_tokens):
            sino_word = self.tokenizer.convert_ids_to_tokens(sino_word_id)
            if sino_word in self.vocab:
                viet_spellings_for_word = self.vocab[sino_word]
                if len(viet_spellings_for_word) > 1:
                    label = viet_spellings_for_word.index(viet_spellings[i])
                else:
                    label = -1
            else:
                label = -1
            labels.append(label)

        labels += [-1] * (self.max_len - len(labels))  # Padding
        attention_mask = [1] * len(sino_tokens) + [0] * (self.max_len - len(sino_tokens))

        return {
            "input_ids": torch.tensor(input_ids),
            "labels": torch.tensor(labels),
            "attention_mask": torch.tensor(attention_mask),
        }


In [12]:
def load_data(data_file):
    data = []
    with open(data_file, 'r', encoding='utf-8') as f:
        for line in f.readlines():
            if ',' not in line:
                continue
            sino_sent, viet_sent = line.strip().split(',')
            data.append((sino_sent, viet_sent))
    return data

train_data_path = "data/train.txt"
test_data_path = "data/test.txt"
train_data = load_data(train_data_path)
test_data = load_data(test_data_path)

In [13]:
with open('vocab/vocab.json', 'r') as vocab_file, open('vocab/sino_viet_words.json', 'r') as words_file:
    base_vocab = json.load(vocab_file)
    sino_viet_words = json.load(words_file)

print(type(base_vocab))
print(len(base_vocab))
print(type(sino_viet_words))
print(len(sino_viet_words))

<class 'dict'>
7688
<class 'list'>
7688


In [14]:
# Model Config
bert_model = config['model_config']['bert_model'] 

base_tokenizer = BertTokenizer.from_pretrained(bert_model)
base_tokenizer.add_tokens(sino_viet_words)

base_model = BertModel.from_pretrained(bert_model)
base_model.resize_token_embeddings(len(base_tokenizer))

Embedding(23683, 768)

In [15]:
# Data Config
batch_size = config['data_config']['batch_size']
max_len = config['data_config']['max_len']

train_dataset = SinoVietnameseDataset(base_tokenizer, train_data, base_vocab, max_len)
test_dataset = SinoVietnameseDataset(base_tokenizer, test_data, base_vocab, max_len)

print(f"Train set: {len(train_dataset)}")
print(f"Test set: {len(test_dataset)}")

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Train batch num: {len(train_loader)}")
print(f"Test batch num: {len(test_loader)}")

Train set: 153372
Test set: 17042
Train batch num: 9586
Test batch num: 1066


In [16]:
def decode_predictions(predictions, input_ids, tokenizer, vocab):
    decoded_sentences = []
    for i, predicted_indices in enumerate(predictions):
        decoded_sentence = []
        for j, spelling_index in enumerate(predicted_indices):
            token = input_ids[i, j].item()
            if token == tokenizer.pad_token_id:
                continue
                
            sino_word = tokenizer.convert_ids_to_tokens(token)
            if spelling_index == -1:
                viet_spelling = vocab[sino_word][0]
            else:
                viet_spelling = vocab[sino_word][spelling_index]
            decoded_sentence.append(viet_spelling)

        decoded_sentences.append(" ".join(decoded_sentence))
    return decoded_sentences

In [17]:
def train(model, train_dataloader, test_dataloader, epochs=60, lr=1e-5, 
          max_grad_norm=1.0, model_load_path=None, config_folder_dir="config/"):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    tokenizer = model.tokenizer

    optimizer = optim.AdamW(model.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss(ignore_index=-1)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)
    
    log_dir = os.path.join(config_folder_dir, f"running/")
    os.makedirs(log_dir, exist_ok=True)
    train_losses_dir = os.path.join(log_dir, f"train_losses.txt")
    test_losses_dir = os.path.join(log_dir, f"test_losses.txt")
    test_accuracies_dir = os.path.join(log_dir, f"test_accuracies.txt")
    test_wers_dir = os.path.join(log_dir, f"test_wers.txt")

    # Determine the starting epoch
    start_epoch = 0
    
    if model_load_path:
        checkpoint = torch.load(model_load_path)
        start_epoch = checkpoint['epoch']
        assert start_epoch < epochs, "Invalid model load path (epoch > total epochs)"
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    for epoch in range(start_epoch, epochs):
        model.train()
        total_loss = 0

        # Training loop with progress bar
        train_iterator = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{epochs}", unit="batch")
        for batch in train_iterator:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            optimizer.zero_grad()

            outputs = model(input_ids, attention_mask=attention_mask)

            # Flatten 
            preds = outputs.view(-1, outputs.size(-1))
            targets = labels.view(-1)

            loss = criterion(preds, targets) # batch loss
            total_loss += loss.item()

            loss.backward()
            clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step()
            
            train_iterator.set_postfix(loss=loss.item())

        avg_train_loss = total_loss / len(train_dataloader)

        with open(train_losses_dir, 'a') as tl:
            tl.write(f"{avg_train_loss};")

        print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_train_loss}")

        ################################## Run test ##################################
        model.eval()
        total_test_loss = 0
        correct_predictions = 0 # calculate accuracies over sino words that have multiple viet spellings only
        total_predictions = 0
        all_ground_truths = []
        all_predictions = []
        
        with torch.no_grad():
            test_iterator = tqdm(test_dataloader, desc="Validating", unit="batch")
            for batch in test_iterator:
                input_ids = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                labels = batch["labels"].to(device)

                outputs = model(input_ids, attention_mask=attention_mask)

                preds = outputs.view(-1, outputs.size(-1)) # Flatten
                targets = labels.view(-1)

                test_loss = criterion(preds, targets)
                total_test_loss += test_loss.item()

                predictions = torch.argmax(outputs, dim=-1)
                mask = labels != -1
                correct_predictions += (predictions[mask] == labels[mask]).sum().item()
                total_predictions += mask.sum().item()
                
                batch_predictions = decode_predictions(predictions, input_ids, tokenizer, model.vocab)
                batch_ground_truths = decode_predictions(labels, input_ids, tokenizer, model.vocab)
                all_predictions.extend(batch_predictions)
                all_ground_truths.extend(batch_ground_truths)
            
            avg_test_loss = total_test_loss / len(test_dataloader)
            with open(test_losses_dir, 'a') as tl2:
                tl2.write(f"{avg_test_loss};")
            
            test_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
            with open(test_accuracies_dir, 'a') as ta:
                ta.write(f"{test_accuracy * 100};")
            
            test_wer = wer(all_ground_truths, all_predictions)
            with open(test_wers_dir, 'a') as tw:
                tw.write(f"{test_wer * 100};")
            
            print(f"Epoch {epoch+1}/{epochs}, Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_accuracy * 100:.4f}, Test WER: {test_wer * 100:.4f}")

        scheduler.step(avg_test_loss)
        cur_lr = scheduler.get_last_lr()
        lr_log_dir = os.path.join(log_dir, f"lr_log.txt")
        with open(lr_log_dir, 'a') as lr:
            lr.write(f"{epoch+1}. {cur_lr}\n")
        print(f"Learning rate: {cur_lr}")
        
        # Save the model after each epoch
        save_dir = os.path.join(log_dir, f"saved_model/")
        os.makedirs(save_dir, exist_ok=True)
        model_save_path = os.path.join(save_dir, f"sivi_model_epoch_{epoch+1}.pt")
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            }, model_save_path)
        print(f"Model saved to {model_save_path}")

In [18]:
# Model config
hidden_ff_dim = config['model_config']['hidden_ff_dim']
model_hidden_dim = config['model_config']['model_hidden_dim']
shrink_norm_hidden = config['model_config']['shrink_norm_hidden']
head_hidden_dim = config['model_config']['head_hidden_dim']
max_num_spellings = config['model_config']['max_num_spellings']
train_bert_param = config['model_config']['train_bert_param']
num_heads = config['model_config']['num_heads']
num_blks = config['model_config']['num_blks']
dropout = config['model_config']['dropout']
use_gelu = config['model_config']['use_gelu']

model = SinoVietnameseTranslator(base_tokenizer, base_model, base_vocab, hidden_ff_dim=hidden_ff_dim, 
                                model_hidden_dim=model_hidden_dim, shrink_norm_hidden=shrink_norm_hidden,
                                head_hidden_dim=head_hidden_dim, max_num_spellings=max_num_spellings,
                                train_bert_param=train_bert_param, num_heads=num_heads, use_gelu=use_gelu,
                                max_len=max_len, num_blks=num_blks, dropout=dropout)

num_param = sum([param.nelement() for param in model.parameters()]) / 1000000
print(f"{num_param:.1f}M params.")

152.5M params.


In [18]:
# Trainning config
num_epochs = config['training_config']['num_epochs']
learning_rate = config['training_config']['learning_rate']
model_load_path = None if config['training_config']['model_load_path'] == 'None' else config['training_config']['model_load_path']
config_folder_dir = config_path

train(model, train_loader, test_loader, epochs=num_epochs, lr=learning_rate,
    model_load_path=model_load_path, config_folder_dir=config_folder_dir)

  attn_output = torch.nn.functional.scaled_dot_product_attention(
Epoch 1/80: 100%|██████████| 9586/9586 [1:12:12<00:00,  2.21batch/s, loss=0.886]


Epoch 1/80, Training Loss: 0.8947425337853024


Validating: 100%|██████████| 1066/1066 [07:16<00:00,  2.44batch/s]


Epoch 1/80, Test Loss: 0.8286, Test Accuracy: 92.3061, Test WER: 2.2349
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_1.pt


Epoch 2/80: 100%|██████████| 9586/9586 [1:11:08<00:00,  2.25batch/s, loss=0.825]


Epoch 2/80, Training Loss: 0.82325393041091


Validating: 100%|██████████| 1066/1066 [07:06<00:00,  2.50batch/s]


Epoch 2/80, Test Loss: 0.8166, Test Accuracy: 92.8378, Test WER: 2.0804
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_2.pt


Epoch 3/80: 100%|██████████| 9586/9586 [1:11:15<00:00,  2.24batch/s, loss=0.809]


Epoch 3/80, Training Loss: 0.8123829269394257


Validating: 100%|██████████| 1066/1066 [07:07<00:00,  2.49batch/s]


Epoch 3/80, Test Loss: 0.8109, Test Accuracy: 93.2814, Test WER: 1.9516
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_3.pt


Epoch 4/80: 100%|██████████| 9586/9586 [1:11:09<00:00,  2.25batch/s, loss=0.8]  


Epoch 4/80, Training Loss: 0.8061203011547676


Validating: 100%|██████████| 1066/1066 [07:02<00:00,  2.52batch/s]


Epoch 4/80, Test Loss: 0.8093, Test Accuracy: 93.3712, Test WER: 1.9255
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_4.pt


Epoch 5/80: 100%|██████████| 9586/9586 [1:11:04<00:00,  2.25batch/s, loss=0.783]


Epoch 5/80, Training Loss: 0.8018302850140775


Validating: 100%|██████████| 1066/1066 [07:03<00:00,  2.52batch/s]


Epoch 5/80, Test Loss: 0.8071, Test Accuracy: 93.5087, Test WER: 1.8855
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_5.pt


Epoch 6/80: 100%|██████████| 9586/9586 [1:11:12<00:00,  2.24batch/s, loss=0.805]


Epoch 6/80, Training Loss: 0.7985561392375927


Validating: 100%|██████████| 1066/1066 [07:07<00:00,  2.50batch/s]


Epoch 6/80, Test Loss: 0.8055, Test Accuracy: 93.6279, Test WER: 1.8509
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_6.pt


Epoch 7/80: 100%|██████████| 9586/9586 [1:11:12<00:00,  2.24batch/s, loss=0.772]


Epoch 7/80, Training Loss: 0.7953150650541145


Validating: 100%|██████████| 1066/1066 [07:13<00:00,  2.46batch/s]


Epoch 7/80, Test Loss: 0.8032, Test Accuracy: 93.8552, Test WER: 1.7849
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_7.pt


Epoch 8/80: 100%|██████████| 9586/9586 [1:16:46<00:00,  2.08batch/s, loss=0.8]  


Epoch 8/80, Training Loss: 0.7924031920431055


Validating: 100%|██████████| 1066/1066 [08:40<00:00,  2.05batch/s]


Epoch 8/80, Test Loss: 0.8020, Test Accuracy: 93.9853, Test WER: 1.7471
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_8.pt


Epoch 9/80: 100%|██████████| 9586/9586 [1:17:22<00:00,  2.06batch/s, loss=0.739]


Epoch 9/80, Training Loss: 0.7894537474376385


Validating: 100%|██████████| 1066/1066 [08:36<00:00,  2.06batch/s]


Epoch 9/80, Test Loss: 0.8017, Test Accuracy: 93.9358, Test WER: 1.7615
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_9.pt


Epoch 10/80: 100%|██████████| 9586/9586 [1:17:07<00:00,  2.07batch/s, loss=0.786]


Epoch 10/80, Training Loss: 0.7869384683188282


Validating: 100%|██████████| 1066/1066 [08:54<00:00,  1.99batch/s]


Epoch 10/80, Test Loss: 0.7999, Test Accuracy: 94.1632, Test WER: 1.6954
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_10.pt


Epoch 11/80: 100%|██████████| 9586/9586 [1:17:05<00:00,  2.07batch/s, loss=0.745]


Epoch 11/80, Training Loss: 0.7847157891846738


Validating: 100%|██████████| 1066/1066 [08:46<00:00,  2.02batch/s]


Epoch 11/80, Test Loss: 0.7986, Test Accuracy: 94.2585, Test WER: 1.6678
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_11.pt


Epoch 12/80: 100%|██████████| 9586/9586 [1:17:21<00:00,  2.07batch/s, loss=0.772]


Epoch 12/80, Training Loss: 0.7822908505427422


Validating: 100%|██████████| 1066/1066 [08:42<00:00,  2.04batch/s]


Epoch 12/80, Test Loss: 0.7971, Test Accuracy: 94.4253, Test WER: 1.6193
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_12.pt


Epoch 13/80: 100%|██████████| 9586/9586 [1:17:19<00:00,  2.07batch/s, loss=0.795]


Epoch 13/80, Training Loss: 0.7804426941410525


Validating: 100%|██████████| 1066/1066 [08:58<00:00,  1.98batch/s]


Epoch 13/80, Test Loss: 0.7971, Test Accuracy: 94.3831, Test WER: 1.6315
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_13.pt


Epoch 14/80: 100%|██████████| 9586/9586 [1:17:13<00:00,  2.07batch/s, loss=0.736]


Epoch 14/80, Training Loss: 0.7788830925732845


Validating: 100%|██████████| 1066/1066 [08:36<00:00,  2.07batch/s]


Epoch 14/80, Test Loss: 0.7966, Test Accuracy: 94.4088, Test WER: 1.6241
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_14.pt


Epoch 15/80: 100%|██████████| 9586/9586 [1:17:17<00:00,  2.07batch/s, loss=0.728]


Epoch 15/80, Training Loss: 0.7776845339977142


Validating: 100%|██████████| 1066/1066 [08:47<00:00,  2.02batch/s]


Epoch 15/80, Test Loss: 0.7961, Test Accuracy: 94.4693, Test WER: 1.6065
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_15.pt


Epoch 16/80: 100%|██████████| 9586/9586 [1:17:15<00:00,  2.07batch/s, loss=0.771]


Epoch 16/80, Training Loss: 0.7765519181000223


Validating: 100%|██████████| 1066/1066 [08:54<00:00,  1.99batch/s]


Epoch 16/80, Test Loss: 0.7954, Test Accuracy: 94.5665, Test WER: 1.5783
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_16.pt


Epoch 17/80: 100%|██████████| 9586/9586 [1:17:02<00:00,  2.07batch/s, loss=0.794]


Epoch 17/80, Training Loss: 0.7756027263936434


Validating: 100%|██████████| 1066/1066 [08:44<00:00,  2.03batch/s]


Epoch 17/80, Test Loss: 0.7956, Test Accuracy: 94.5206, Test WER: 1.5916
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_17.pt


Epoch 18/80: 100%|██████████| 9586/9586 [1:17:15<00:00,  2.07batch/s, loss=0.766]


Epoch 18/80, Training Loss: 0.7747388694061508


Validating: 100%|██████████| 1066/1066 [08:22<00:00,  2.12batch/s]


Epoch 18/80, Test Loss: 0.7945, Test Accuracy: 94.6508, Test WER: 1.5538
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_18.pt


Epoch 19/80: 100%|██████████| 9586/9586 [1:17:17<00:00,  2.07batch/s, loss=0.766]


Epoch 19/80, Training Loss: 0.7739007625516661


Validating: 100%|██████████| 1066/1066 [08:57<00:00,  1.98batch/s]


Epoch 19/80, Test Loss: 0.7946, Test Accuracy: 94.5701, Test WER: 1.5772
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_19.pt


Epoch 20/80: 100%|██████████| 9586/9586 [1:17:31<00:00,  2.06batch/s, loss=0.74] 


Epoch 20/80, Training Loss: 0.7728787015804089


Validating: 100%|██████████| 1066/1066 [08:38<00:00,  2.06batch/s]


Epoch 20/80, Test Loss: 0.7944, Test Accuracy: 94.6214, Test WER: 1.5623
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_20.pt


Epoch 21/80: 100%|██████████| 9586/9586 [1:17:19<00:00,  2.07batch/s, loss=0.757]


Epoch 21/80, Training Loss: 0.7721501097919398


Validating: 100%|██████████| 1066/1066 [08:49<00:00,  2.01batch/s]


Epoch 21/80, Test Loss: 0.7940, Test Accuracy: 94.6544, Test WER: 1.5527
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_21.pt


Epoch 22/80: 100%|██████████| 9586/9586 [1:17:24<00:00,  2.06batch/s, loss=0.765]


Epoch 22/80, Training Loss: 0.771737481460625


Validating: 100%|██████████| 1066/1066 [08:53<00:00,  2.00batch/s]


Epoch 22/80, Test Loss: 0.7944, Test Accuracy: 94.6306, Test WER: 1.5597
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_22.pt


Epoch 23/80: 100%|██████████| 9586/9586 [1:17:00<00:00,  2.07batch/s, loss=0.733]


Epoch 23/80, Training Loss: 0.7711766431208769


Validating: 100%|██████████| 1066/1066 [08:42<00:00,  2.04batch/s]


Epoch 23/80, Test Loss: 0.7940, Test Accuracy: 94.6599, Test WER: 1.5511
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_23.pt


Epoch 24/80: 100%|██████████| 9586/9586 [1:15:57<00:00,  2.10batch/s, loss=0.772]


Epoch 24/80, Training Loss: 0.7706634378833659


Validating: 100%|██████████| 1066/1066 [07:14<00:00,  2.45batch/s]


Epoch 24/80, Test Loss: 0.7932, Test Accuracy: 94.7186, Test WER: 1.5341
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_24.pt


Epoch 25/80: 100%|██████████| 9586/9586 [1:11:11<00:00,  2.24batch/s, loss=0.798]


Epoch 25/80, Training Loss: 0.7702991323047755


Validating: 100%|██████████| 1066/1066 [07:01<00:00,  2.53batch/s]


Epoch 25/80, Test Loss: 0.7924, Test Accuracy: 94.7938, Test WER: 1.5123
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_25.pt


Epoch 26/80: 100%|██████████| 9586/9586 [1:12:03<00:00,  2.22batch/s, loss=0.749]


Epoch 26/80, Training Loss: 0.7699950070538353


Validating: 100%|██████████| 1066/1066 [08:36<00:00,  2.06batch/s]


Epoch 26/80, Test Loss: 0.7927, Test Accuracy: 94.7809, Test WER: 1.5160
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_26.pt


Epoch 27/80: 100%|██████████| 9586/9586 [1:16:18<00:00,  2.09batch/s, loss=0.809]


Epoch 27/80, Training Loss: 0.7695146055677204


Validating: 100%|██████████| 1066/1066 [08:40<00:00,  2.05batch/s]


Epoch 27/80, Test Loss: 0.7932, Test Accuracy: 94.7259, Test WER: 1.5320
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_27.pt


Epoch 28/80: 100%|██████████| 9586/9586 [1:16:29<00:00,  2.09batch/s, loss=0.726]


Epoch 28/80, Training Loss: 0.7693887931331075


Validating: 100%|██████████| 1066/1066 [08:31<00:00,  2.08batch/s]


Epoch 28/80, Test Loss: 0.7920, Test Accuracy: 94.8726, Test WER: 1.4894
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_28.pt


Epoch 29/80: 100%|██████████| 9586/9586 [1:16:40<00:00,  2.08batch/s, loss=0.777]


Epoch 29/80, Training Loss: 0.7691483116711999


Validating: 100%|██████████| 1066/1066 [08:26<00:00,  2.11batch/s]


Epoch 29/80, Test Loss: 0.7920, Test Accuracy: 94.8543, Test WER: 1.4947
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_29.pt


Epoch 30/80: 100%|██████████| 9586/9586 [1:16:43<00:00,  2.08batch/s, loss=0.78] 


Epoch 30/80, Training Loss: 0.7687266002411686


Validating: 100%|██████████| 1066/1066 [08:34<00:00,  2.07batch/s]


Epoch 30/80, Test Loss: 0.7921, Test Accuracy: 94.8579, Test WER: 1.4936
Learning rate: [1e-05]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_30.pt


Epoch 31/80: 100%|██████████| 9586/9586 [1:16:42<00:00,  2.08batch/s, loss=0.774]


Epoch 31/80, Training Loss: 0.7684663261199282


Validating: 100%|██████████| 1066/1066 [08:38<00:00,  2.05batch/s]


Epoch 31/80, Test Loss: 0.7933, Test Accuracy: 94.7168, Test WER: 1.5346
Learning rate: [5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_31.pt


Epoch 32/80: 100%|██████████| 9586/9586 [1:16:47<00:00,  2.08batch/s, loss=0.763]


Epoch 32/80, Training Loss: 0.767622467877346


Validating: 100%|██████████| 1066/1066 [08:37<00:00,  2.06batch/s]


Epoch 32/80, Test Loss: 0.7911, Test Accuracy: 94.9423, Test WER: 1.4691
Learning rate: [5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_32.pt


Epoch 33/80: 100%|██████████| 9586/9586 [1:11:46<00:00,  2.23batch/s, loss=0.744]


Epoch 33/80, Training Loss: 0.7672537927112225


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 33/80, Test Loss: 0.7897, Test Accuracy: 95.0816, Test WER: 1.4287
Learning rate: [5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_33.pt


Epoch 34/80: 100%|██████████| 9586/9586 [1:11:39<00:00,  2.23batch/s, loss=0.767]


Epoch 34/80, Training Loss: 0.7666977253359144


Validating: 100%|██████████| 1066/1066 [07:20<00:00,  2.42batch/s]


Epoch 34/80, Test Loss: 0.7902, Test Accuracy: 95.0211, Test WER: 1.4462
Learning rate: [5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_34.pt


Epoch 35/80: 100%|██████████| 9586/9586 [1:11:32<00:00,  2.23batch/s, loss=0.74] 


Epoch 35/80, Training Loss: 0.7668778558824993


Validating: 100%|██████████| 1066/1066 [07:08<00:00,  2.49batch/s]


Epoch 35/80, Test Loss: 0.7903, Test Accuracy: 95.0192, Test WER: 1.4468
Learning rate: [5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_35.pt


Epoch 36/80: 100%|██████████| 9586/9586 [1:15:34<00:00,  2.11batch/s, loss=0.773]


Epoch 36/80, Training Loss: 0.7663572363457499


Validating: 100%|██████████| 1066/1066 [08:20<00:00,  2.13batch/s]


Epoch 36/80, Test Loss: 0.7904, Test Accuracy: 95.0266, Test WER: 1.4446
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_36.pt


Epoch 37/80: 100%|██████████| 9586/9586 [1:16:41<00:00,  2.08batch/s, loss=0.73] 


Epoch 37/80, Training Loss: 0.7661345952019273


Validating: 100%|██████████| 1066/1066 [08:30<00:00,  2.09batch/s]


Epoch 37/80, Test Loss: 0.7893, Test Accuracy: 95.1072, Test WER: 1.4212
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_37.pt


Epoch 38/80: 100%|██████████| 9586/9586 [1:16:40<00:00,  2.08batch/s, loss=0.808]


Epoch 38/80, Training Loss: 0.7659134195451818


Validating: 100%|██████████| 1066/1066 [08:35<00:00,  2.07batch/s]


Epoch 38/80, Test Loss: 0.7894, Test Accuracy: 95.1182, Test WER: 1.4180
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_38.pt


Epoch 39/80: 100%|██████████| 9586/9586 [1:16:21<00:00,  2.09batch/s, loss=0.75] 


Epoch 39/80, Training Loss: 0.7658086353444665


Validating: 100%|██████████| 1066/1066 [08:31<00:00,  2.08batch/s]


Epoch 39/80, Test Loss: 0.7891, Test Accuracy: 95.1256, Test WER: 1.4159
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_39.pt


Epoch 40/80: 100%|██████████| 9586/9586 [1:16:15<00:00,  2.10batch/s, loss=0.741]


Epoch 40/80, Training Loss: 0.7656292733719819


Validating: 100%|██████████| 1066/1066 [08:12<00:00,  2.16batch/s]


Epoch 40/80, Test Loss: 0.7890, Test Accuracy: 95.1604, Test WER: 1.4058
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_40.pt


Epoch 41/80: 100%|██████████| 9586/9586 [1:16:12<00:00,  2.10batch/s, loss=0.749]


Epoch 41/80, Training Loss: 0.765338699914159


Validating: 100%|██████████| 1066/1066 [08:17<00:00,  2.14batch/s]


Epoch 41/80, Test Loss: 0.7888, Test Accuracy: 95.2081, Test WER: 1.3919
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_41.pt


Epoch 42/80: 100%|██████████| 9586/9586 [1:15:54<00:00,  2.10batch/s, loss=0.826]


Epoch 42/80, Training Loss: 0.7650312640125757


Validating: 100%|██████████| 1066/1066 [08:22<00:00,  2.12batch/s]


Epoch 42/80, Test Loss: 0.7888, Test Accuracy: 95.1696, Test WER: 1.4031
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_42.pt


Epoch 43/80: 100%|██████████| 9586/9586 [1:15:58<00:00,  2.10batch/s, loss=0.771]


Epoch 43/80, Training Loss: 0.7650470258907065


Validating: 100%|██████████| 1066/1066 [08:06<00:00,  2.19batch/s]


Epoch 43/80, Test Loss: 0.7887, Test Accuracy: 95.1751, Test WER: 1.4015
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_43.pt


Epoch 44/80: 100%|██████████| 9586/9586 [1:16:04<00:00,  2.10batch/s, loss=0.728]


Epoch 44/80, Training Loss: 0.7648996689449937


Validating: 100%|██████████| 1066/1066 [08:13<00:00,  2.16batch/s]


Epoch 44/80, Test Loss: 0.7885, Test Accuracy: 95.2117, Test WER: 1.3909
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_44.pt


Epoch 45/80: 100%|██████████| 9586/9586 [1:16:05<00:00,  2.10batch/s, loss=0.785]


Epoch 45/80, Training Loss: 0.7648981637505035


Validating: 100%|██████████| 1066/1066 [08:10<00:00,  2.17batch/s]


Epoch 45/80, Test Loss: 0.7885, Test Accuracy: 95.1861, Test WER: 1.3983
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_45.pt


Epoch 46/80: 100%|██████████| 9586/9586 [1:16:04<00:00,  2.10batch/s, loss=0.782]


Epoch 46/80, Training Loss: 0.7643771947872755


Validating: 100%|██████████| 1066/1066 [08:12<00:00,  2.16batch/s]


Epoch 46/80, Test Loss: 0.7882, Test Accuracy: 95.2227, Test WER: 1.3877
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_46.pt


Epoch 47/80: 100%|██████████| 9586/9586 [1:16:00<00:00,  2.10batch/s, loss=0.773]


Epoch 47/80, Training Loss: 0.7642519373548521


Validating: 100%|██████████| 1066/1066 [08:10<00:00,  2.17batch/s]


Epoch 47/80, Test Loss: 0.7882, Test Accuracy: 95.2227, Test WER: 1.3877
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_47.pt


Epoch 48/80: 100%|██████████| 9586/9586 [1:15:57<00:00,  2.10batch/s, loss=0.742]


Epoch 48/80, Training Loss: 0.7642086780993993


Validating: 100%|██████████| 1066/1066 [08:23<00:00,  2.12batch/s]


Epoch 48/80, Test Loss: 0.7883, Test Accuracy: 95.2246, Test WER: 1.3871
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_48.pt


Epoch 49/80: 100%|██████████| 9586/9586 [1:16:27<00:00,  2.09batch/s, loss=0.775]


Epoch 49/80, Training Loss: 0.7639504565641474


Validating: 100%|██████████| 1066/1066 [08:27<00:00,  2.10batch/s]


Epoch 49/80, Test Loss: 0.7880, Test Accuracy: 95.2594, Test WER: 1.3770
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_49.pt


Epoch 50/80: 100%|██████████| 9586/9586 [1:16:15<00:00,  2.09batch/s, loss=0.735]


Epoch 50/80, Training Loss: 0.7638814217631911


Validating: 100%|██████████| 1066/1066 [08:34<00:00,  2.07batch/s]


Epoch 50/80, Test Loss: 0.7880, Test Accuracy: 95.2576, Test WER: 1.3776
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_50.pt


Epoch 51/80: 100%|██████████| 9586/9586 [1:15:49<00:00,  2.11batch/s, loss=0.759]


Epoch 51/80, Training Loss: 0.7638725746083355


Validating: 100%|██████████| 1066/1066 [07:24<00:00,  2.40batch/s]


Epoch 51/80, Test Loss: 0.7878, Test Accuracy: 95.2686, Test WER: 1.3744
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_51.pt


Epoch 52/80: 100%|██████████| 9586/9586 [1:12:20<00:00,  2.21batch/s, loss=0.732]


Epoch 52/80, Training Loss: 0.7637994565472284


Validating: 100%|██████████| 1066/1066 [07:22<00:00,  2.41batch/s]


Epoch 52/80, Test Loss: 0.7883, Test Accuracy: 95.2227, Test WER: 1.3877
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_52.pt


Epoch 53/80: 100%|██████████| 9586/9586 [1:11:57<00:00,  2.22batch/s, loss=0.728]


Epoch 53/80, Training Loss: 0.7637652598714719


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 53/80, Test Loss: 0.7878, Test Accuracy: 95.2759, Test WER: 1.3722
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_53.pt


Epoch 54/80: 100%|██████████| 9586/9586 [1:10:46<00:00,  2.26batch/s, loss=0.748]


Epoch 54/80, Training Loss: 0.7636616283422618


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 54/80, Test Loss: 0.7875, Test Accuracy: 95.3034, Test WER: 1.3642
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_54.pt


Epoch 55/80: 100%|██████████| 9586/9586 [1:10:47<00:00,  2.26batch/s, loss=0.788]


Epoch 55/80, Training Loss: 0.7633940047183615


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 55/80, Test Loss: 0.7877, Test Accuracy: 95.2796, Test WER: 1.3712
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_55.pt


Epoch 56/80: 100%|██████████| 9586/9586 [1:10:50<00:00,  2.26batch/s, loss=0.746]


Epoch 56/80, Training Loss: 0.763496812141541


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 56/80, Test Loss: 0.7880, Test Accuracy: 95.2594, Test WER: 1.3770
Learning rate: [2.5e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_56.pt


Epoch 57/80: 100%|██████████| 9586/9586 [1:10:45<00:00,  2.26batch/s, loss=0.735]


Epoch 57/80, Training Loss: 0.7633530609908145


Validating: 100%|██████████| 1066/1066 [06:57<00:00,  2.55batch/s]


Epoch 57/80, Test Loss: 0.7877, Test Accuracy: 95.2942, Test WER: 1.3669
Learning rate: [1.25e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_57.pt


Epoch 58/80: 100%|██████████| 9586/9586 [1:10:46<00:00,  2.26batch/s, loss=0.744]


Epoch 58/80, Training Loss: 0.7630752621464557


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 58/80, Test Loss: 0.7877, Test Accuracy: 95.2796, Test WER: 1.3712
Learning rate: [1.25e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_58.pt


Epoch 59/80: 100%|██████████| 9586/9586 [1:10:57<00:00,  2.25batch/s, loss=0.779]


Epoch 59/80, Training Loss: 0.7631632188292702


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 59/80, Test Loss: 0.7880, Test Accuracy: 95.2649, Test WER: 1.3754
Learning rate: [1.25e-06]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_59.pt


Epoch 60/80: 100%|██████████| 9586/9586 [1:10:49<00:00,  2.26batch/s, loss=0.791]


Epoch 60/80, Training Loss: 0.7630370631052312


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 60/80, Test Loss: 0.7878, Test Accuracy: 95.2686, Test WER: 1.3744
Learning rate: [6.25e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_60.pt


Epoch 61/80: 100%|██████████| 9586/9586 [1:10:45<00:00,  2.26batch/s, loss=0.843]


Epoch 61/80, Training Loss: 0.7629440235813052


Validating: 100%|██████████| 1066/1066 [07:00<00:00,  2.53batch/s]


Epoch 61/80, Test Loss: 0.7876, Test Accuracy: 95.2961, Test WER: 1.3664
Learning rate: [6.25e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_61.pt


Epoch 62/80: 100%|██████████| 9586/9586 [1:10:51<00:00,  2.25batch/s, loss=0.776]


Epoch 62/80, Training Loss: 0.7628116326928511


Validating: 100%|██████████| 1066/1066 [06:58<00:00,  2.55batch/s]


Epoch 62/80, Test Loss: 0.7878, Test Accuracy: 95.2997, Test WER: 1.3653
Learning rate: [6.25e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_62.pt


Epoch 63/80: 100%|██████████| 9586/9586 [1:10:46<00:00,  2.26batch/s, loss=0.765]


Epoch 63/80, Training Loss: 0.7628127902889212


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 63/80, Test Loss: 0.7875, Test Accuracy: 95.3162, Test WER: 1.3605
Learning rate: [3.125e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_63.pt


Epoch 64/80: 100%|██████████| 9586/9586 [1:10:48<00:00,  2.26batch/s, loss=0.785]


Epoch 64/80, Training Loss: 0.7627090332081788


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 64/80, Test Loss: 0.7876, Test Accuracy: 95.2997, Test WER: 1.3653
Learning rate: [3.125e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_64.pt


Epoch 65/80: 100%|██████████| 9586/9586 [1:10:45<00:00,  2.26batch/s, loss=0.854]


Epoch 65/80, Training Loss: 0.762879958244547


Validating: 100%|██████████| 1066/1066 [07:00<00:00,  2.54batch/s]


Epoch 65/80, Test Loss: 0.7875, Test Accuracy: 95.3052, Test WER: 1.3637
Learning rate: [3.125e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_65.pt


Epoch 66/80: 100%|██████████| 9586/9586 [1:10:42<00:00,  2.26batch/s, loss=0.764]


Epoch 66/80, Training Loss: 0.7626526223240281


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 66/80, Test Loss: 0.7874, Test Accuracy: 95.3126, Test WER: 1.3616
Learning rate: [3.125e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_66.pt


Epoch 67/80: 100%|██████████| 9586/9586 [1:10:47<00:00,  2.26batch/s, loss=0.772]


Epoch 67/80, Training Loss: 0.7626331756962085


Validating: 100%|██████████| 1066/1066 [06:58<00:00,  2.54batch/s]


Epoch 67/80, Test Loss: 0.7874, Test Accuracy: 95.3016, Test WER: 1.3648
Learning rate: [3.125e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_67.pt


Epoch 68/80: 100%|██████████| 9586/9586 [1:10:53<00:00,  2.25batch/s, loss=0.753]


Epoch 68/80, Training Loss: 0.7628855284807853


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 68/80, Test Loss: 0.7874, Test Accuracy: 95.3236, Test WER: 1.3584
Learning rate: [3.125e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_68.pt


Epoch 69/80: 100%|██████████| 9586/9586 [1:10:50<00:00,  2.26batch/s, loss=0.762]


Epoch 69/80, Training Loss: 0.7626189693152569


Validating: 100%|██████████| 1066/1066 [06:58<00:00,  2.54batch/s]


Epoch 69/80, Test Loss: 0.7872, Test Accuracy: 95.3291, Test WER: 1.3568
Learning rate: [3.125e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_69.pt


Epoch 70/80: 100%|██████████| 9586/9586 [1:10:50<00:00,  2.26batch/s, loss=0.749]


Epoch 70/80, Training Loss: 0.7623775979372248


Validating: 100%|██████████| 1066/1066 [06:59<00:00,  2.54batch/s]


Epoch 70/80, Test Loss: 0.7874, Test Accuracy: 95.3144, Test WER: 1.3610
Learning rate: [3.125e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_70.pt


Epoch 71/80: 100%|██████████| 9586/9586 [1:10:51<00:00,  2.25batch/s, loss=0.741]


Epoch 71/80, Training Loss: 0.7625162731984352


Validating: 100%|██████████| 1066/1066 [07:23<00:00,  2.40batch/s]


Epoch 71/80, Test Loss: 0.7875, Test Accuracy: 95.3016, Test WER: 1.3648
Learning rate: [3.125e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_71.pt


Epoch 72/80: 100%|██████████| 9586/9586 [1:16:20<00:00,  2.09batch/s, loss=0.736]


Epoch 72/80, Training Loss: 0.7625921690685377


Validating: 100%|██████████| 1066/1066 [08:47<00:00,  2.02batch/s]


Epoch 72/80, Test Loss: 0.7873, Test Accuracy: 95.3236, Test WER: 1.3584
Learning rate: [1.5625e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_72.pt


Epoch 73/80: 100%|██████████| 9586/9586 [1:17:20<00:00,  2.07batch/s, loss=0.731]


Epoch 73/80, Training Loss: 0.7624039434948918


Validating: 100%|██████████| 1066/1066 [08:30<00:00,  2.09batch/s]


Epoch 73/80, Test Loss: 0.7873, Test Accuracy: 95.3144, Test WER: 1.3610
Learning rate: [1.5625e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_73.pt


Epoch 74/80: 100%|██████████| 9586/9586 [1:17:26<00:00,  2.06batch/s, loss=0.743]


Epoch 74/80, Training Loss: 0.7624361442917701


Validating: 100%|██████████| 1066/1066 [08:51<00:00,  2.01batch/s]


Epoch 74/80, Test Loss: 0.7872, Test Accuracy: 95.3217, Test WER: 1.3589
Learning rate: [1.5625e-07]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_74.pt


Epoch 75/80: 100%|██████████| 9586/9586 [1:17:23<00:00,  2.06batch/s, loss=0.736]


Epoch 75/80, Training Loss: 0.7622725591327759


Validating: 100%|██████████| 1066/1066 [08:42<00:00,  2.04batch/s]


Epoch 75/80, Test Loss: 0.7873, Test Accuracy: 95.3272, Test WER: 1.3573
Learning rate: [7.8125e-08]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_75.pt


Epoch 76/80: 100%|██████████| 9586/9586 [1:17:24<00:00,  2.06batch/s, loss=0.738]


Epoch 76/80, Training Loss: 0.7623415024624273


Validating: 100%|██████████| 1066/1066 [08:23<00:00,  2.12batch/s]


Epoch 76/80, Test Loss: 0.7872, Test Accuracy: 95.3309, Test WER: 1.3563
Learning rate: [7.8125e-08]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_76.pt


Epoch 77/80: 100%|██████████| 9586/9586 [1:17:34<00:00,  2.06batch/s, loss=0.776]


Epoch 77/80, Training Loss: 0.7624949832786331


Validating: 100%|██████████| 1066/1066 [08:54<00:00,  1.99batch/s]


Epoch 77/80, Test Loss: 0.7873, Test Accuracy: 95.3254, Test WER: 1.3578
Learning rate: [7.8125e-08]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_77.pt


Epoch 78/80: 100%|██████████| 9586/9586 [1:14:42<00:00,  2.14batch/s, loss=0.761]


Epoch 78/80, Training Loss: 0.7623481826097762


Validating: 100%|██████████| 1066/1066 [07:20<00:00,  2.42batch/s]


Epoch 78/80, Test Loss: 0.7872, Test Accuracy: 95.3199, Test WER: 1.3594
Learning rate: [3.90625e-08]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_78.pt


Epoch 79/80: 100%|██████████| 9586/9586 [1:12:15<00:00,  2.21batch/s, loss=0.731]


Epoch 79/80, Training Loss: 0.7624461771537535


Validating: 100%|██████████| 1066/1066 [07:19<00:00,  2.43batch/s]


Epoch 79/80, Test Loss: 0.7872, Test Accuracy: 95.3236, Test WER: 1.3584
Learning rate: [3.90625e-08]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_79.pt


Epoch 80/80: 100%|██████████| 9586/9586 [1:12:21<00:00,  2.21batch/s, loss=0.733]


Epoch 80/80, Training Loss: 0.7622776177800973


Validating: 100%|██████████| 1066/1066 [07:21<00:00,  2.41batch/s]


Epoch 80/80, Test Loss: 0.7872, Test Accuracy: 95.3199, Test WER: 1.3594
Learning rate: [3.90625e-08]
Model saved to conf_9_new\running/saved_model/sivi_model_epoch_80.pt


In [15]:
torch.cuda.empty_cache()