## CS310 Natural Language Processing
## Assignment 3. Recurrent Neural Networks for Language Modeling 

**Total points**: 

In this assignment, you will train a vanilla RNN-based language model on the Harry Potter text data. 

### 0. Import Necessary Libraries

In [None]:
import nltk
from nltk.tokenize import word_tokenize
import torch
import torch.nn as nn
import numpy as np
from collections import Counter
from torch.utils.data import Dataset, DataLoader, random_split
import logging
from datetime import datetime
import torch
import torch.nn.functional as F
import math

nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /Users/ruiyuhan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/ruiyuhan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [4]:
def read_text(file_path):
    with open(file_path,'r',encoding='utf-8')as f:
        text=f.read()
    return text

text=read_text('Harry_Potter_all_books_preprocessed.txt')
text=text.lower()
words=word_tokenize(text)
print(words[:10])

['the', 'boy', 'who', 'lived', 'mr', 'and', 'mrs', 'dursley', 'of', 'number']


In [None]:
def build_vocab(words):
    vocab={'<PAD>':0,'<UNK>':1}
    word_count=Counter(words)
    for word, _ in word_count.items():
        vocab[word]=len(vocab)
    return vocab

vocab=build_vocab(words)
print('the size of vocab:',len(vocab))

the size of vocab: 26355


In [6]:
def text_to_tensor(words,vocab):
    indices=[vocab.get(word,vocab['<UNK>']) for word in words]
    return torch.tensor(indices,dtype=torch.long)

data_tensor=text_to_tensor(words,vocab)
print("the shape of data_tensor",data_tensor.size())
print(data_tensor[:10])

the shape of data_tensor torch.Size([1105952])
tensor([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11])


In [7]:
class TextDataset(Dataset):
    def __init__(self,data_tensor,seq_length):
        self.data=data_tensor
        self.seq_length=seq_length
    def __len__(self):
        return len(self.data)-self.seq_length
    def __getitem__(self,idx):
        input_seq = self.data[idx:idx + self.seq_length]
        target_seq = self.data[idx + 1:idx + self.seq_length + 1]
        return input_seq,target_seq

### 1. Build the Model

In [8]:
class RNN_LM(nn.Module):
    def __init__(self,vocab_size,embed_size,hidden_size,num_layers,bidirectional=False):
        super(RNN_LM,self).__init__()
        self.embed=nn.Embedding(vocab_size,embed_size)
        self.rnn=nn.RNN(embed_size,hidden_size,num_layers,batch_first=True,bidirectional=bidirectional)
        self.bidirectional=bidirectional
        self.num_layers=num_layers
        self.hidden_size=hidden_size
        if bidirectional:
            self.fc=nn.Linear(hidden_size*2,vocab_size)
        else:
            self.fc=nn.Linear(hidden_size,vocab_size)
    def forward(self,x,hidden=None):
        # x: [batch_size, seq_length]
        embedded=self.embed(x)
        output,hidden=self.rnn(embedded,hidden)
        output=self.fc(output)
        return output,hidden
    def init_hidden(self, batch_size, device):
        # 初始化隐藏状态
        # 对于单向 RNN: [num_layers, batch_size, hidden_size]
        # 对于双向 RNN: [num_layers * 2, batch_size, hidden_size]
        num_directions = 2 if self.bidirectional else 1
        hidden = torch.zeros(self.num_layers * num_directions, batch_size, self.hidden_size, device=device)
        return hidden

vocab_size=len(vocab)
embed_size=128
hidden_size=256
num_layers=2
bidirectional=False

rnn_model=RNN_LM(vocab_size,embed_size,hidden_size,num_layers,bidirectional)
print(rnn_model)

RNN_LM(
  (embed): Embedding(26355, 128)
  (rnn): RNN(128, 256, num_layers=2, batch_first=True)
  (fc): Linear(in_features=256, out_features=26355, bias=True)
)


In [9]:
seq_length=20
dataset=TextDataset(data_tensor,seq_length)
total_size=len(dataset)
train_size=int(total_size*0.9)
test_size=int(total_size*0.05)
val_size=total_size-train_size-test_size
train_dataset,val_dataset,test_dataset=random_split(dataset,[train_size,val_size,test_size])

train_loader=DataLoader(train_dataset,batch_size=32,shuffle=True)
test_loader=DataLoader(test_dataset,batch_size=32,shuffle=False)

print(f"size of train dataset: {len(train_dataset)}")
print(f"size of test dataset: {len(test_dataset)}")
print(f"size of val dataset: {len(val_dataset)}")

size of train dataset: 995338
size of test dataset: 55296
size of val dataset: 55298


### 2. Train and Evaluate

In [None]:
def setup_logging():
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    log_filename = f"training_log_{timestamp}.log"
    
    logger = logging.getLogger()
    for handler in logger.handlers[:]: 
        logger.removeHandler(handler)
    
    logger.setLevel(logging.INFO)
    
    file_handler = logging.FileHandler(log_filename)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
    logger.addHandler(file_handler)
    
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
    logger.addHandler(console_handler)
    
    return log_filename

def train_model(model, train_loader, epochs=3, log_interval=100, save_path=None):
    log_filename = setup_logging()
    logging.info(f"Starting training with {epochs} epoch(s), log_interval={log_interval}")
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    losses = []
    
    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch_idx, (batch_input, batch_target) in enumerate(train_loader):
            optimizer.zero_grad()
            logits, _ = model(batch_input)
            loss = criterion(logits.view(-1, vocab_size), batch_target.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
            if (batch_idx + 1) % log_interval == 0:
                avg_loss_so_far = total_loss / (batch_idx + 1)
                log_message = (f"Epoch {epoch+1}, Batch {batch_idx+1}/{len(train_loader)}, "
                              f"Batch Loss: {loss.item():.4f}, Avg Loss So Far: {avg_loss_so_far:.4f}")
                print(log_message)
                logging.info(log_message)

        avg_loss = total_loss / len(train_loader)
        losses.append(avg_loss)
        summary_message = f"Epoch {epoch+1} Summary, Avg Loss: {avg_loss:.4f}"
        print(summary_message)
        logging.info(summary_message)
    
    if save_path is not None:
        torch.save(model.state_dict(), save_path)
        logging.info(f"Model saved to {save_path}")
        print(f"Model saved to {save_path}")
    
    logging.info("Training completed")
    print(f"Training log saved to: {log_filename}")
    return losses

print("Training RNN model...")
save_path = "rnn_model_new.pth" 
rnn_losses = train_model(rnn_model, train_loader, epochs=1, log_interval=100, save_path=save_path)

2025-03-31 00:59:20,878 - INFO - Starting training with 1 epoch(s), log_interval=100


Training RNN model...


2025-03-31 00:59:25,245 - INFO - Epoch 1, Batch 100/31105, Batch Loss: 4.2583, Avg Loss So Far: 4.2274


Epoch 1, Batch 100/31105, Batch Loss: 4.2583, Avg Loss So Far: 4.2274


2025-03-31 00:59:29,610 - INFO - Epoch 1, Batch 200/31105, Batch Loss: 4.2716, Avg Loss So Far: 4.2483


Epoch 1, Batch 200/31105, Batch Loss: 4.2716, Avg Loss So Far: 4.2483


2025-03-31 00:59:34,665 - INFO - Epoch 1, Batch 300/31105, Batch Loss: 4.3097, Avg Loss So Far: 4.2720


Epoch 1, Batch 300/31105, Batch Loss: 4.3097, Avg Loss So Far: 4.2720


2025-03-31 00:59:40,215 - INFO - Epoch 1, Batch 400/31105, Batch Loss: 4.2487, Avg Loss So Far: 4.2776


Epoch 1, Batch 400/31105, Batch Loss: 4.2487, Avg Loss So Far: 4.2776


2025-03-31 00:59:45,720 - INFO - Epoch 1, Batch 500/31105, Batch Loss: 4.5610, Avg Loss So Far: 4.2888


Epoch 1, Batch 500/31105, Batch Loss: 4.5610, Avg Loss So Far: 4.2888


2025-03-31 00:59:51,197 - INFO - Epoch 1, Batch 600/31105, Batch Loss: 4.1749, Avg Loss So Far: 4.2883


Epoch 1, Batch 600/31105, Batch Loss: 4.1749, Avg Loss So Far: 4.2883


2025-03-31 00:59:56,679 - INFO - Epoch 1, Batch 700/31105, Batch Loss: 4.3358, Avg Loss So Far: 4.2919


Epoch 1, Batch 700/31105, Batch Loss: 4.3358, Avg Loss So Far: 4.2919


2025-03-31 01:00:02,206 - INFO - Epoch 1, Batch 800/31105, Batch Loss: 4.2321, Avg Loss So Far: 4.2910


Epoch 1, Batch 800/31105, Batch Loss: 4.2321, Avg Loss So Far: 4.2910


2025-03-31 01:00:07,748 - INFO - Epoch 1, Batch 900/31105, Batch Loss: 4.3742, Avg Loss So Far: 4.2904


Epoch 1, Batch 900/31105, Batch Loss: 4.3742, Avg Loss So Far: 4.2904


2025-03-31 01:00:13,286 - INFO - Epoch 1, Batch 1000/31105, Batch Loss: 4.4616, Avg Loss So Far: 4.2894


Epoch 1, Batch 1000/31105, Batch Loss: 4.4616, Avg Loss So Far: 4.2894


2025-03-31 01:00:18,793 - INFO - Epoch 1, Batch 1100/31105, Batch Loss: 4.4173, Avg Loss So Far: 4.2876


Epoch 1, Batch 1100/31105, Batch Loss: 4.4173, Avg Loss So Far: 4.2876


2025-03-31 01:00:24,282 - INFO - Epoch 1, Batch 1200/31105, Batch Loss: 4.4667, Avg Loss So Far: 4.2864


Epoch 1, Batch 1200/31105, Batch Loss: 4.4667, Avg Loss So Far: 4.2864


2025-03-31 01:00:29,712 - INFO - Epoch 1, Batch 1300/31105, Batch Loss: 4.3479, Avg Loss So Far: 4.2840


Epoch 1, Batch 1300/31105, Batch Loss: 4.3479, Avg Loss So Far: 4.2840


2025-03-31 01:00:35,154 - INFO - Epoch 1, Batch 1400/31105, Batch Loss: 4.1559, Avg Loss So Far: 4.2824


Epoch 1, Batch 1400/31105, Batch Loss: 4.1559, Avg Loss So Far: 4.2824


2025-03-31 01:00:40,643 - INFO - Epoch 1, Batch 1500/31105, Batch Loss: 4.1666, Avg Loss So Far: 4.2798


Epoch 1, Batch 1500/31105, Batch Loss: 4.1666, Avg Loss So Far: 4.2798


2025-03-31 01:00:46,059 - INFO - Epoch 1, Batch 1600/31105, Batch Loss: 4.2744, Avg Loss So Far: 4.2757


Epoch 1, Batch 1600/31105, Batch Loss: 4.2744, Avg Loss So Far: 4.2757


2025-03-31 01:00:51,513 - INFO - Epoch 1, Batch 1700/31105, Batch Loss: 4.3687, Avg Loss So Far: 4.2737


Epoch 1, Batch 1700/31105, Batch Loss: 4.3687, Avg Loss So Far: 4.2737


2025-03-31 01:00:56,936 - INFO - Epoch 1, Batch 1800/31105, Batch Loss: 3.9981, Avg Loss So Far: 4.2708


Epoch 1, Batch 1800/31105, Batch Loss: 3.9981, Avg Loss So Far: 4.2708


2025-03-31 01:01:02,437 - INFO - Epoch 1, Batch 1900/31105, Batch Loss: 4.4004, Avg Loss So Far: 4.2686


Epoch 1, Batch 1900/31105, Batch Loss: 4.4004, Avg Loss So Far: 4.2686


2025-03-31 01:01:07,845 - INFO - Epoch 1, Batch 2000/31105, Batch Loss: 3.9435, Avg Loss So Far: 4.2657


Epoch 1, Batch 2000/31105, Batch Loss: 3.9435, Avg Loss So Far: 4.2657


2025-03-31 01:01:13,306 - INFO - Epoch 1, Batch 2100/31105, Batch Loss: 4.2238, Avg Loss So Far: 4.2624


Epoch 1, Batch 2100/31105, Batch Loss: 4.2238, Avg Loss So Far: 4.2624


2025-03-31 01:01:18,710 - INFO - Epoch 1, Batch 2200/31105, Batch Loss: 4.2773, Avg Loss So Far: 4.2599


Epoch 1, Batch 2200/31105, Batch Loss: 4.2773, Avg Loss So Far: 4.2599


2025-03-31 01:01:24,212 - INFO - Epoch 1, Batch 2300/31105, Batch Loss: 4.2610, Avg Loss So Far: 4.2576


Epoch 1, Batch 2300/31105, Batch Loss: 4.2610, Avg Loss So Far: 4.2576


2025-03-31 01:01:29,665 - INFO - Epoch 1, Batch 2400/31105, Batch Loss: 4.2754, Avg Loss So Far: 4.2554


Epoch 1, Batch 2400/31105, Batch Loss: 4.2754, Avg Loss So Far: 4.2554


2025-03-31 01:01:35,102 - INFO - Epoch 1, Batch 2500/31105, Batch Loss: 4.1724, Avg Loss So Far: 4.2527


Epoch 1, Batch 2500/31105, Batch Loss: 4.1724, Avg Loss So Far: 4.2527


2025-03-31 01:01:40,560 - INFO - Epoch 1, Batch 2600/31105, Batch Loss: 4.2392, Avg Loss So Far: 4.2511


Epoch 1, Batch 2600/31105, Batch Loss: 4.2392, Avg Loss So Far: 4.2511


2025-03-31 01:01:45,990 - INFO - Epoch 1, Batch 2700/31105, Batch Loss: 4.3471, Avg Loss So Far: 4.2491


Epoch 1, Batch 2700/31105, Batch Loss: 4.3471, Avg Loss So Far: 4.2491


2025-03-31 01:01:51,444 - INFO - Epoch 1, Batch 2800/31105, Batch Loss: 4.1269, Avg Loss So Far: 4.2463


Epoch 1, Batch 2800/31105, Batch Loss: 4.1269, Avg Loss So Far: 4.2463


2025-03-31 01:01:56,889 - INFO - Epoch 1, Batch 2900/31105, Batch Loss: 4.2251, Avg Loss So Far: 4.2438


Epoch 1, Batch 2900/31105, Batch Loss: 4.2251, Avg Loss So Far: 4.2438


2025-03-31 01:02:02,377 - INFO - Epoch 1, Batch 3000/31105, Batch Loss: 4.1959, Avg Loss So Far: 4.2409


Epoch 1, Batch 3000/31105, Batch Loss: 4.1959, Avg Loss So Far: 4.2409


2025-03-31 01:02:07,831 - INFO - Epoch 1, Batch 3100/31105, Batch Loss: 3.9834, Avg Loss So Far: 4.2380


Epoch 1, Batch 3100/31105, Batch Loss: 3.9834, Avg Loss So Far: 4.2380


2025-03-31 01:02:13,280 - INFO - Epoch 1, Batch 3200/31105, Batch Loss: 4.2085, Avg Loss So Far: 4.2349


Epoch 1, Batch 3200/31105, Batch Loss: 4.2085, Avg Loss So Far: 4.2349


2025-03-31 01:02:18,715 - INFO - Epoch 1, Batch 3300/31105, Batch Loss: 4.1597, Avg Loss So Far: 4.2324


Epoch 1, Batch 3300/31105, Batch Loss: 4.1597, Avg Loss So Far: 4.2324


2025-03-31 01:02:24,191 - INFO - Epoch 1, Batch 3400/31105, Batch Loss: 3.9302, Avg Loss So Far: 4.2298


Epoch 1, Batch 3400/31105, Batch Loss: 3.9302, Avg Loss So Far: 4.2298


2025-03-31 01:02:29,665 - INFO - Epoch 1, Batch 3500/31105, Batch Loss: 4.2586, Avg Loss So Far: 4.2273


Epoch 1, Batch 3500/31105, Batch Loss: 4.2586, Avg Loss So Far: 4.2273


2025-03-31 01:02:35,126 - INFO - Epoch 1, Batch 3600/31105, Batch Loss: 4.0210, Avg Loss So Far: 4.2246


Epoch 1, Batch 3600/31105, Batch Loss: 4.0210, Avg Loss So Far: 4.2246


2025-03-31 01:02:40,531 - INFO - Epoch 1, Batch 3700/31105, Batch Loss: 4.1089, Avg Loss So Far: 4.2221


Epoch 1, Batch 3700/31105, Batch Loss: 4.1089, Avg Loss So Far: 4.2221


2025-03-31 01:02:45,963 - INFO - Epoch 1, Batch 3800/31105, Batch Loss: 4.0385, Avg Loss So Far: 4.2196


Epoch 1, Batch 3800/31105, Batch Loss: 4.0385, Avg Loss So Far: 4.2196


2025-03-31 01:02:51,424 - INFO - Epoch 1, Batch 3900/31105, Batch Loss: 4.0630, Avg Loss So Far: 4.2170


Epoch 1, Batch 3900/31105, Batch Loss: 4.0630, Avg Loss So Far: 4.2170


2025-03-31 01:02:56,942 - INFO - Epoch 1, Batch 4000/31105, Batch Loss: 4.2917, Avg Loss So Far: 4.2148


Epoch 1, Batch 4000/31105, Batch Loss: 4.2917, Avg Loss So Far: 4.2148


2025-03-31 01:03:02,416 - INFO - Epoch 1, Batch 4100/31105, Batch Loss: 4.1135, Avg Loss So Far: 4.2124


Epoch 1, Batch 4100/31105, Batch Loss: 4.1135, Avg Loss So Far: 4.2124


2025-03-31 01:03:07,930 - INFO - Epoch 1, Batch 4200/31105, Batch Loss: 4.2954, Avg Loss So Far: 4.2101


Epoch 1, Batch 4200/31105, Batch Loss: 4.2954, Avg Loss So Far: 4.2101


2025-03-31 01:03:13,396 - INFO - Epoch 1, Batch 4300/31105, Batch Loss: 4.0895, Avg Loss So Far: 4.2077


Epoch 1, Batch 4300/31105, Batch Loss: 4.0895, Avg Loss So Far: 4.2077


2025-03-31 01:03:18,857 - INFO - Epoch 1, Batch 4400/31105, Batch Loss: 4.1002, Avg Loss So Far: 4.2052


Epoch 1, Batch 4400/31105, Batch Loss: 4.1002, Avg Loss So Far: 4.2052


2025-03-31 01:03:24,331 - INFO - Epoch 1, Batch 4500/31105, Batch Loss: 4.1011, Avg Loss So Far: 4.2028


Epoch 1, Batch 4500/31105, Batch Loss: 4.1011, Avg Loss So Far: 4.2028


2025-03-31 01:03:29,804 - INFO - Epoch 1, Batch 4600/31105, Batch Loss: 3.9709, Avg Loss So Far: 4.1997


Epoch 1, Batch 4600/31105, Batch Loss: 3.9709, Avg Loss So Far: 4.1997


2025-03-31 01:03:35,278 - INFO - Epoch 1, Batch 4700/31105, Batch Loss: 3.9901, Avg Loss So Far: 4.1972


Epoch 1, Batch 4700/31105, Batch Loss: 3.9901, Avg Loss So Far: 4.1972


2025-03-31 01:03:40,743 - INFO - Epoch 1, Batch 4800/31105, Batch Loss: 4.1457, Avg Loss So Far: 4.1946


Epoch 1, Batch 4800/31105, Batch Loss: 4.1457, Avg Loss So Far: 4.1946


2025-03-31 01:03:46,221 - INFO - Epoch 1, Batch 4900/31105, Batch Loss: 4.0033, Avg Loss So Far: 4.1920


Epoch 1, Batch 4900/31105, Batch Loss: 4.0033, Avg Loss So Far: 4.1920


2025-03-31 01:03:51,731 - INFO - Epoch 1, Batch 5000/31105, Batch Loss: 4.0334, Avg Loss So Far: 4.1893


Epoch 1, Batch 5000/31105, Batch Loss: 4.0334, Avg Loss So Far: 4.1893


2025-03-31 01:03:57,177 - INFO - Epoch 1, Batch 5100/31105, Batch Loss: 4.0453, Avg Loss So Far: 4.1871


Epoch 1, Batch 5100/31105, Batch Loss: 4.0453, Avg Loss So Far: 4.1871


2025-03-31 01:04:02,695 - INFO - Epoch 1, Batch 5200/31105, Batch Loss: 3.9192, Avg Loss So Far: 4.1847


Epoch 1, Batch 5200/31105, Batch Loss: 3.9192, Avg Loss So Far: 4.1847


2025-03-31 01:04:08,164 - INFO - Epoch 1, Batch 5300/31105, Batch Loss: 3.9825, Avg Loss So Far: 4.1824


Epoch 1, Batch 5300/31105, Batch Loss: 3.9825, Avg Loss So Far: 4.1824


2025-03-31 01:04:13,658 - INFO - Epoch 1, Batch 5400/31105, Batch Loss: 4.2023, Avg Loss So Far: 4.1803


Epoch 1, Batch 5400/31105, Batch Loss: 4.2023, Avg Loss So Far: 4.1803


2025-03-31 01:04:19,128 - INFO - Epoch 1, Batch 5500/31105, Batch Loss: 4.1980, Avg Loss So Far: 4.1779


Epoch 1, Batch 5500/31105, Batch Loss: 4.1980, Avg Loss So Far: 4.1779


2025-03-31 01:04:24,599 - INFO - Epoch 1, Batch 5600/31105, Batch Loss: 3.9667, Avg Loss So Far: 4.1757


Epoch 1, Batch 5600/31105, Batch Loss: 3.9667, Avg Loss So Far: 4.1757


2025-03-31 01:04:30,057 - INFO - Epoch 1, Batch 5700/31105, Batch Loss: 3.8836, Avg Loss So Far: 4.1732


Epoch 1, Batch 5700/31105, Batch Loss: 3.8836, Avg Loss So Far: 4.1732


2025-03-31 01:04:35,544 - INFO - Epoch 1, Batch 5800/31105, Batch Loss: 3.8034, Avg Loss So Far: 4.1710


Epoch 1, Batch 5800/31105, Batch Loss: 3.8034, Avg Loss So Far: 4.1710


2025-03-31 01:04:41,029 - INFO - Epoch 1, Batch 5900/31105, Batch Loss: 3.9468, Avg Loss So Far: 4.1687


Epoch 1, Batch 5900/31105, Batch Loss: 3.9468, Avg Loss So Far: 4.1687


2025-03-31 01:04:46,534 - INFO - Epoch 1, Batch 6000/31105, Batch Loss: 3.9429, Avg Loss So Far: 4.1665


Epoch 1, Batch 6000/31105, Batch Loss: 3.9429, Avg Loss So Far: 4.1665


2025-03-31 01:04:52,039 - INFO - Epoch 1, Batch 6100/31105, Batch Loss: 4.0723, Avg Loss So Far: 4.1644


Epoch 1, Batch 6100/31105, Batch Loss: 4.0723, Avg Loss So Far: 4.1644


2025-03-31 01:04:57,489 - INFO - Epoch 1, Batch 6200/31105, Batch Loss: 4.0640, Avg Loss So Far: 4.1620


Epoch 1, Batch 6200/31105, Batch Loss: 4.0640, Avg Loss So Far: 4.1620


2025-03-31 01:05:02,973 - INFO - Epoch 1, Batch 6300/31105, Batch Loss: 4.0954, Avg Loss So Far: 4.1602


Epoch 1, Batch 6300/31105, Batch Loss: 4.0954, Avg Loss So Far: 4.1602


2025-03-31 01:05:08,452 - INFO - Epoch 1, Batch 6400/31105, Batch Loss: 3.9503, Avg Loss So Far: 4.1581


Epoch 1, Batch 6400/31105, Batch Loss: 3.9503, Avg Loss So Far: 4.1581


2025-03-31 01:05:13,930 - INFO - Epoch 1, Batch 6500/31105, Batch Loss: 4.1449, Avg Loss So Far: 4.1561


Epoch 1, Batch 6500/31105, Batch Loss: 4.1449, Avg Loss So Far: 4.1561


2025-03-31 01:05:19,419 - INFO - Epoch 1, Batch 6600/31105, Batch Loss: 3.8927, Avg Loss So Far: 4.1540


Epoch 1, Batch 6600/31105, Batch Loss: 3.8927, Avg Loss So Far: 4.1540


2025-03-31 01:05:24,926 - INFO - Epoch 1, Batch 6700/31105, Batch Loss: 3.9952, Avg Loss So Far: 4.1521


Epoch 1, Batch 6700/31105, Batch Loss: 3.9952, Avg Loss So Far: 4.1521


2025-03-31 01:05:30,407 - INFO - Epoch 1, Batch 6800/31105, Batch Loss: 3.9074, Avg Loss So Far: 4.1500


Epoch 1, Batch 6800/31105, Batch Loss: 3.9074, Avg Loss So Far: 4.1500


2025-03-31 01:05:35,893 - INFO - Epoch 1, Batch 6900/31105, Batch Loss: 3.9735, Avg Loss So Far: 4.1480


Epoch 1, Batch 6900/31105, Batch Loss: 3.9735, Avg Loss So Far: 4.1480


2025-03-31 01:05:41,351 - INFO - Epoch 1, Batch 7000/31105, Batch Loss: 4.0347, Avg Loss So Far: 4.1464


Epoch 1, Batch 7000/31105, Batch Loss: 4.0347, Avg Loss So Far: 4.1464


2025-03-31 01:05:46,868 - INFO - Epoch 1, Batch 7100/31105, Batch Loss: 4.0704, Avg Loss So Far: 4.1445


Epoch 1, Batch 7100/31105, Batch Loss: 4.0704, Avg Loss So Far: 4.1445


2025-03-31 01:05:52,359 - INFO - Epoch 1, Batch 7200/31105, Batch Loss: 3.8307, Avg Loss So Far: 4.1424


Epoch 1, Batch 7200/31105, Batch Loss: 3.8307, Avg Loss So Far: 4.1424


2025-03-31 01:05:57,793 - INFO - Epoch 1, Batch 7300/31105, Batch Loss: 3.9268, Avg Loss So Far: 4.1407


Epoch 1, Batch 7300/31105, Batch Loss: 3.9268, Avg Loss So Far: 4.1407


2025-03-31 01:06:03,323 - INFO - Epoch 1, Batch 7400/31105, Batch Loss: 4.0558, Avg Loss So Far: 4.1386


Epoch 1, Batch 7400/31105, Batch Loss: 4.0558, Avg Loss So Far: 4.1386


2025-03-31 01:06:08,808 - INFO - Epoch 1, Batch 7500/31105, Batch Loss: 3.9280, Avg Loss So Far: 4.1367


Epoch 1, Batch 7500/31105, Batch Loss: 3.9280, Avg Loss So Far: 4.1367


2025-03-31 01:06:14,267 - INFO - Epoch 1, Batch 7600/31105, Batch Loss: 3.9284, Avg Loss So Far: 4.1346


Epoch 1, Batch 7600/31105, Batch Loss: 3.9284, Avg Loss So Far: 4.1346


2025-03-31 01:06:19,712 - INFO - Epoch 1, Batch 7700/31105, Batch Loss: 3.9923, Avg Loss So Far: 4.1328


Epoch 1, Batch 7700/31105, Batch Loss: 3.9923, Avg Loss So Far: 4.1328


2025-03-31 01:06:25,211 - INFO - Epoch 1, Batch 7800/31105, Batch Loss: 4.0528, Avg Loss So Far: 4.1310


Epoch 1, Batch 7800/31105, Batch Loss: 4.0528, Avg Loss So Far: 4.1310


2025-03-31 01:06:30,707 - INFO - Epoch 1, Batch 7900/31105, Batch Loss: 3.9635, Avg Loss So Far: 4.1293


Epoch 1, Batch 7900/31105, Batch Loss: 3.9635, Avg Loss So Far: 4.1293


2025-03-31 01:06:36,195 - INFO - Epoch 1, Batch 8000/31105, Batch Loss: 4.0797, Avg Loss So Far: 4.1274


Epoch 1, Batch 8000/31105, Batch Loss: 4.0797, Avg Loss So Far: 4.1274


2025-03-31 01:06:41,664 - INFO - Epoch 1, Batch 8100/31105, Batch Loss: 3.7373, Avg Loss So Far: 4.1255


Epoch 1, Batch 8100/31105, Batch Loss: 3.7373, Avg Loss So Far: 4.1255


2025-03-31 01:06:47,148 - INFO - Epoch 1, Batch 8200/31105, Batch Loss: 3.9959, Avg Loss So Far: 4.1237


Epoch 1, Batch 8200/31105, Batch Loss: 3.9959, Avg Loss So Far: 4.1237


2025-03-31 01:06:52,683 - INFO - Epoch 1, Batch 8300/31105, Batch Loss: 3.9231, Avg Loss So Far: 4.1221


Epoch 1, Batch 8300/31105, Batch Loss: 3.9231, Avg Loss So Far: 4.1221


2025-03-31 01:06:58,196 - INFO - Epoch 1, Batch 8400/31105, Batch Loss: 3.9578, Avg Loss So Far: 4.1203


Epoch 1, Batch 8400/31105, Batch Loss: 3.9578, Avg Loss So Far: 4.1203


2025-03-31 01:07:03,669 - INFO - Epoch 1, Batch 8500/31105, Batch Loss: 3.9180, Avg Loss So Far: 4.1183


Epoch 1, Batch 8500/31105, Batch Loss: 3.9180, Avg Loss So Far: 4.1183


2025-03-31 01:07:09,124 - INFO - Epoch 1, Batch 8600/31105, Batch Loss: 3.7549, Avg Loss So Far: 4.1164


Epoch 1, Batch 8600/31105, Batch Loss: 3.7549, Avg Loss So Far: 4.1164


2025-03-31 01:07:14,610 - INFO - Epoch 1, Batch 8700/31105, Batch Loss: 4.0663, Avg Loss So Far: 4.1145


Epoch 1, Batch 8700/31105, Batch Loss: 4.0663, Avg Loss So Far: 4.1145


2025-03-31 01:07:20,119 - INFO - Epoch 1, Batch 8800/31105, Batch Loss: 3.8106, Avg Loss So Far: 4.1127


Epoch 1, Batch 8800/31105, Batch Loss: 3.8106, Avg Loss So Far: 4.1127


2025-03-31 01:07:25,645 - INFO - Epoch 1, Batch 8900/31105, Batch Loss: 4.0756, Avg Loss So Far: 4.1110


Epoch 1, Batch 8900/31105, Batch Loss: 4.0756, Avg Loss So Far: 4.1110


2025-03-31 01:07:31,136 - INFO - Epoch 1, Batch 9000/31105, Batch Loss: 3.9092, Avg Loss So Far: 4.1091


Epoch 1, Batch 9000/31105, Batch Loss: 3.9092, Avg Loss So Far: 4.1091


2025-03-31 01:07:36,627 - INFO - Epoch 1, Batch 9100/31105, Batch Loss: 3.8649, Avg Loss So Far: 4.1074


Epoch 1, Batch 9100/31105, Batch Loss: 3.8649, Avg Loss So Far: 4.1074


2025-03-31 01:07:42,115 - INFO - Epoch 1, Batch 9200/31105, Batch Loss: 3.8729, Avg Loss So Far: 4.1057


Epoch 1, Batch 9200/31105, Batch Loss: 3.8729, Avg Loss So Far: 4.1057


2025-03-31 01:07:47,632 - INFO - Epoch 1, Batch 9300/31105, Batch Loss: 3.9949, Avg Loss So Far: 4.1040


Epoch 1, Batch 9300/31105, Batch Loss: 3.9949, Avg Loss So Far: 4.1040


2025-03-31 01:07:53,114 - INFO - Epoch 1, Batch 9400/31105, Batch Loss: 4.0640, Avg Loss So Far: 4.1024


Epoch 1, Batch 9400/31105, Batch Loss: 4.0640, Avg Loss So Far: 4.1024


2025-03-31 01:07:58,658 - INFO - Epoch 1, Batch 9500/31105, Batch Loss: 4.0693, Avg Loss So Far: 4.1008


Epoch 1, Batch 9500/31105, Batch Loss: 4.0693, Avg Loss So Far: 4.1008


2025-03-31 01:08:04,140 - INFO - Epoch 1, Batch 9600/31105, Batch Loss: 3.9465, Avg Loss So Far: 4.0994


Epoch 1, Batch 9600/31105, Batch Loss: 3.9465, Avg Loss So Far: 4.0994


2025-03-31 01:08:09,667 - INFO - Epoch 1, Batch 9700/31105, Batch Loss: 3.8396, Avg Loss So Far: 4.0977


Epoch 1, Batch 9700/31105, Batch Loss: 3.8396, Avg Loss So Far: 4.0977


2025-03-31 01:08:15,142 - INFO - Epoch 1, Batch 9800/31105, Batch Loss: 3.9694, Avg Loss So Far: 4.0960


Epoch 1, Batch 9800/31105, Batch Loss: 3.9694, Avg Loss So Far: 4.0960


2025-03-31 01:08:20,644 - INFO - Epoch 1, Batch 9900/31105, Batch Loss: 3.9927, Avg Loss So Far: 4.0944


Epoch 1, Batch 9900/31105, Batch Loss: 3.9927, Avg Loss So Far: 4.0944


2025-03-31 01:08:26,128 - INFO - Epoch 1, Batch 10000/31105, Batch Loss: 4.0121, Avg Loss So Far: 4.0930


Epoch 1, Batch 10000/31105, Batch Loss: 4.0121, Avg Loss So Far: 4.0930


2025-03-31 01:08:31,594 - INFO - Epoch 1, Batch 10100/31105, Batch Loss: 4.0621, Avg Loss So Far: 4.0916


Epoch 1, Batch 10100/31105, Batch Loss: 4.0621, Avg Loss So Far: 4.0916


2025-03-31 01:08:37,055 - INFO - Epoch 1, Batch 10200/31105, Batch Loss: 4.0382, Avg Loss So Far: 4.0901


Epoch 1, Batch 10200/31105, Batch Loss: 4.0382, Avg Loss So Far: 4.0901


2025-03-31 01:08:42,547 - INFO - Epoch 1, Batch 10300/31105, Batch Loss: 3.8493, Avg Loss So Far: 4.0886


Epoch 1, Batch 10300/31105, Batch Loss: 3.8493, Avg Loss So Far: 4.0886


2025-03-31 01:08:48,043 - INFO - Epoch 1, Batch 10400/31105, Batch Loss: 3.9173, Avg Loss So Far: 4.0872


Epoch 1, Batch 10400/31105, Batch Loss: 3.9173, Avg Loss So Far: 4.0872


2025-03-31 01:08:53,536 - INFO - Epoch 1, Batch 10500/31105, Batch Loss: 3.9041, Avg Loss So Far: 4.0856


Epoch 1, Batch 10500/31105, Batch Loss: 3.9041, Avg Loss So Far: 4.0856


2025-03-31 01:08:59,064 - INFO - Epoch 1, Batch 10600/31105, Batch Loss: 4.0545, Avg Loss So Far: 4.0840


Epoch 1, Batch 10600/31105, Batch Loss: 4.0545, Avg Loss So Far: 4.0840


2025-03-31 01:09:04,495 - INFO - Epoch 1, Batch 10700/31105, Batch Loss: 3.8122, Avg Loss So Far: 4.0824


Epoch 1, Batch 10700/31105, Batch Loss: 3.8122, Avg Loss So Far: 4.0824


2025-03-31 01:09:10,004 - INFO - Epoch 1, Batch 10800/31105, Batch Loss: 3.8762, Avg Loss So Far: 4.0808


Epoch 1, Batch 10800/31105, Batch Loss: 3.8762, Avg Loss So Far: 4.0808


2025-03-31 01:09:15,485 - INFO - Epoch 1, Batch 10900/31105, Batch Loss: 3.9913, Avg Loss So Far: 4.0793


Epoch 1, Batch 10900/31105, Batch Loss: 3.9913, Avg Loss So Far: 4.0793


2025-03-31 01:09:20,985 - INFO - Epoch 1, Batch 11000/31105, Batch Loss: 3.9896, Avg Loss So Far: 4.0778


Epoch 1, Batch 11000/31105, Batch Loss: 3.9896, Avg Loss So Far: 4.0778


2025-03-31 01:09:26,482 - INFO - Epoch 1, Batch 11100/31105, Batch Loss: 3.7896, Avg Loss So Far: 4.0763


Epoch 1, Batch 11100/31105, Batch Loss: 3.7896, Avg Loss So Far: 4.0763


2025-03-31 01:09:31,978 - INFO - Epoch 1, Batch 11200/31105, Batch Loss: 3.9908, Avg Loss So Far: 4.0750


Epoch 1, Batch 11200/31105, Batch Loss: 3.9908, Avg Loss So Far: 4.0750


2025-03-31 01:09:37,465 - INFO - Epoch 1, Batch 11300/31105, Batch Loss: 3.9821, Avg Loss So Far: 4.0736


Epoch 1, Batch 11300/31105, Batch Loss: 3.9821, Avg Loss So Far: 4.0736


2025-03-31 01:09:42,968 - INFO - Epoch 1, Batch 11400/31105, Batch Loss: 3.8903, Avg Loss So Far: 4.0721


Epoch 1, Batch 11400/31105, Batch Loss: 3.8903, Avg Loss So Far: 4.0721


2025-03-31 01:09:48,489 - INFO - Epoch 1, Batch 11500/31105, Batch Loss: 3.9528, Avg Loss So Far: 4.0705


Epoch 1, Batch 11500/31105, Batch Loss: 3.9528, Avg Loss So Far: 4.0705


2025-03-31 01:09:54,036 - INFO - Epoch 1, Batch 11600/31105, Batch Loss: 3.7653, Avg Loss So Far: 4.0690


Epoch 1, Batch 11600/31105, Batch Loss: 3.7653, Avg Loss So Far: 4.0690


2025-03-31 01:09:59,517 - INFO - Epoch 1, Batch 11700/31105, Batch Loss: 3.7743, Avg Loss So Far: 4.0675


Epoch 1, Batch 11700/31105, Batch Loss: 3.7743, Avg Loss So Far: 4.0675


2025-03-31 01:10:04,985 - INFO - Epoch 1, Batch 11800/31105, Batch Loss: 3.9788, Avg Loss So Far: 4.0659


Epoch 1, Batch 11800/31105, Batch Loss: 3.9788, Avg Loss So Far: 4.0659


2025-03-31 01:10:10,462 - INFO - Epoch 1, Batch 11900/31105, Batch Loss: 4.0173, Avg Loss So Far: 4.0648


Epoch 1, Batch 11900/31105, Batch Loss: 4.0173, Avg Loss So Far: 4.0648


2025-03-31 01:10:15,965 - INFO - Epoch 1, Batch 12000/31105, Batch Loss: 3.7587, Avg Loss So Far: 4.0632


Epoch 1, Batch 12000/31105, Batch Loss: 3.7587, Avg Loss So Far: 4.0632


2025-03-31 01:10:21,442 - INFO - Epoch 1, Batch 12100/31105, Batch Loss: 4.1810, Avg Loss So Far: 4.0619


Epoch 1, Batch 12100/31105, Batch Loss: 4.1810, Avg Loss So Far: 4.0619


2025-03-31 01:10:26,927 - INFO - Epoch 1, Batch 12200/31105, Batch Loss: 3.8731, Avg Loss So Far: 4.0604


Epoch 1, Batch 12200/31105, Batch Loss: 3.8731, Avg Loss So Far: 4.0604


2025-03-31 01:10:32,395 - INFO - Epoch 1, Batch 12300/31105, Batch Loss: 3.9356, Avg Loss So Far: 4.0590


Epoch 1, Batch 12300/31105, Batch Loss: 3.9356, Avg Loss So Far: 4.0590


2025-03-31 01:10:37,866 - INFO - Epoch 1, Batch 12400/31105, Batch Loss: 3.9034, Avg Loss So Far: 4.0575


Epoch 1, Batch 12400/31105, Batch Loss: 3.9034, Avg Loss So Far: 4.0575


2025-03-31 01:10:43,363 - INFO - Epoch 1, Batch 12500/31105, Batch Loss: 3.9159, Avg Loss So Far: 4.0560


Epoch 1, Batch 12500/31105, Batch Loss: 3.9159, Avg Loss So Far: 4.0560


2025-03-31 01:10:48,847 - INFO - Epoch 1, Batch 12600/31105, Batch Loss: 3.7296, Avg Loss So Far: 4.0546


Epoch 1, Batch 12600/31105, Batch Loss: 3.7296, Avg Loss So Far: 4.0546


2025-03-31 01:10:54,357 - INFO - Epoch 1, Batch 12700/31105, Batch Loss: 3.8079, Avg Loss So Far: 4.0531


Epoch 1, Batch 12700/31105, Batch Loss: 3.8079, Avg Loss So Far: 4.0531


2025-03-31 01:10:59,859 - INFO - Epoch 1, Batch 12800/31105, Batch Loss: 3.8270, Avg Loss So Far: 4.0517


Epoch 1, Batch 12800/31105, Batch Loss: 3.8270, Avg Loss So Far: 4.0517


2025-03-31 01:11:05,348 - INFO - Epoch 1, Batch 12900/31105, Batch Loss: 3.8202, Avg Loss So Far: 4.0502


Epoch 1, Batch 12900/31105, Batch Loss: 3.8202, Avg Loss So Far: 4.0502


2025-03-31 01:11:10,804 - INFO - Epoch 1, Batch 13000/31105, Batch Loss: 3.7227, Avg Loss So Far: 4.0489


Epoch 1, Batch 13000/31105, Batch Loss: 3.7227, Avg Loss So Far: 4.0489


2025-03-31 01:11:16,334 - INFO - Epoch 1, Batch 13100/31105, Batch Loss: 3.8853, Avg Loss So Far: 4.0475


Epoch 1, Batch 13100/31105, Batch Loss: 3.8853, Avg Loss So Far: 4.0475


2025-03-31 01:11:21,816 - INFO - Epoch 1, Batch 13200/31105, Batch Loss: 3.9326, Avg Loss So Far: 4.0462


Epoch 1, Batch 13200/31105, Batch Loss: 3.9326, Avg Loss So Far: 4.0462


2025-03-31 01:11:27,309 - INFO - Epoch 1, Batch 13300/31105, Batch Loss: 3.9090, Avg Loss So Far: 4.0450


Epoch 1, Batch 13300/31105, Batch Loss: 3.9090, Avg Loss So Far: 4.0450


2025-03-31 01:11:32,794 - INFO - Epoch 1, Batch 13400/31105, Batch Loss: 3.9480, Avg Loss So Far: 4.0436


Epoch 1, Batch 13400/31105, Batch Loss: 3.9480, Avg Loss So Far: 4.0436


2025-03-31 01:11:38,276 - INFO - Epoch 1, Batch 13500/31105, Batch Loss: 3.8408, Avg Loss So Far: 4.0422


Epoch 1, Batch 13500/31105, Batch Loss: 3.8408, Avg Loss So Far: 4.0422


2025-03-31 01:11:43,757 - INFO - Epoch 1, Batch 13600/31105, Batch Loss: 3.8736, Avg Loss So Far: 4.0409


Epoch 1, Batch 13600/31105, Batch Loss: 3.8736, Avg Loss So Far: 4.0409


2025-03-31 01:11:49,269 - INFO - Epoch 1, Batch 13700/31105, Batch Loss: 3.8813, Avg Loss So Far: 4.0394


Epoch 1, Batch 13700/31105, Batch Loss: 3.8813, Avg Loss So Far: 4.0394


2025-03-31 01:11:54,769 - INFO - Epoch 1, Batch 13800/31105, Batch Loss: 4.0382, Avg Loss So Far: 4.0382


Epoch 1, Batch 13800/31105, Batch Loss: 4.0382, Avg Loss So Far: 4.0382


2025-03-31 01:12:00,213 - INFO - Epoch 1, Batch 13900/31105, Batch Loss: 3.8528, Avg Loss So Far: 4.0369


Epoch 1, Batch 13900/31105, Batch Loss: 3.8528, Avg Loss So Far: 4.0369


2025-03-31 01:12:05,693 - INFO - Epoch 1, Batch 14000/31105, Batch Loss: 3.9024, Avg Loss So Far: 4.0356


Epoch 1, Batch 14000/31105, Batch Loss: 3.9024, Avg Loss So Far: 4.0356


2025-03-31 01:12:11,198 - INFO - Epoch 1, Batch 14100/31105, Batch Loss: 3.6329, Avg Loss So Far: 4.0342


Epoch 1, Batch 14100/31105, Batch Loss: 3.6329, Avg Loss So Far: 4.0342


2025-03-31 01:12:16,689 - INFO - Epoch 1, Batch 14200/31105, Batch Loss: 3.7277, Avg Loss So Far: 4.0326


Epoch 1, Batch 14200/31105, Batch Loss: 3.7277, Avg Loss So Far: 4.0326


2025-03-31 01:12:22,167 - INFO - Epoch 1, Batch 14300/31105, Batch Loss: 3.9420, Avg Loss So Far: 4.0312


Epoch 1, Batch 14300/31105, Batch Loss: 3.9420, Avg Loss So Far: 4.0312


2025-03-31 01:12:27,679 - INFO - Epoch 1, Batch 14400/31105, Batch Loss: 3.7187, Avg Loss So Far: 4.0298


Epoch 1, Batch 14400/31105, Batch Loss: 3.7187, Avg Loss So Far: 4.0298


2025-03-31 01:12:33,133 - INFO - Epoch 1, Batch 14500/31105, Batch Loss: 3.6157, Avg Loss So Far: 4.0286


Epoch 1, Batch 14500/31105, Batch Loss: 3.6157, Avg Loss So Far: 4.0286


2025-03-31 01:12:38,641 - INFO - Epoch 1, Batch 14600/31105, Batch Loss: 4.0150, Avg Loss So Far: 4.0273


Epoch 1, Batch 14600/31105, Batch Loss: 4.0150, Avg Loss So Far: 4.0273


2025-03-31 01:12:44,069 - INFO - Epoch 1, Batch 14700/31105, Batch Loss: 3.6977, Avg Loss So Far: 4.0262


Epoch 1, Batch 14700/31105, Batch Loss: 3.6977, Avg Loss So Far: 4.0262


2025-03-31 01:12:49,558 - INFO - Epoch 1, Batch 14800/31105, Batch Loss: 3.9323, Avg Loss So Far: 4.0250


Epoch 1, Batch 14800/31105, Batch Loss: 3.9323, Avg Loss So Far: 4.0250


2025-03-31 01:12:55,020 - INFO - Epoch 1, Batch 14900/31105, Batch Loss: 3.7196, Avg Loss So Far: 4.0236


Epoch 1, Batch 14900/31105, Batch Loss: 3.7196, Avg Loss So Far: 4.0236


2025-03-31 01:13:00,508 - INFO - Epoch 1, Batch 15000/31105, Batch Loss: 3.7790, Avg Loss So Far: 4.0222


Epoch 1, Batch 15000/31105, Batch Loss: 3.7790, Avg Loss So Far: 4.0222


2025-03-31 01:13:06,004 - INFO - Epoch 1, Batch 15100/31105, Batch Loss: 3.9758, Avg Loss So Far: 4.0210


Epoch 1, Batch 15100/31105, Batch Loss: 3.9758, Avg Loss So Far: 4.0210


2025-03-31 01:13:11,460 - INFO - Epoch 1, Batch 15200/31105, Batch Loss: 4.0130, Avg Loss So Far: 4.0197


Epoch 1, Batch 15200/31105, Batch Loss: 4.0130, Avg Loss So Far: 4.0197


2025-03-31 01:13:16,951 - INFO - Epoch 1, Batch 15300/31105, Batch Loss: 4.2632, Avg Loss So Far: 4.0185


Epoch 1, Batch 15300/31105, Batch Loss: 4.2632, Avg Loss So Far: 4.0185


2025-03-31 01:13:22,429 - INFO - Epoch 1, Batch 15400/31105, Batch Loss: 3.7944, Avg Loss So Far: 4.0173


Epoch 1, Batch 15400/31105, Batch Loss: 3.7944, Avg Loss So Far: 4.0173


2025-03-31 01:13:27,975 - INFO - Epoch 1, Batch 15500/31105, Batch Loss: 3.9791, Avg Loss So Far: 4.0159


Epoch 1, Batch 15500/31105, Batch Loss: 3.9791, Avg Loss So Far: 4.0159


2025-03-31 01:13:33,447 - INFO - Epoch 1, Batch 15600/31105, Batch Loss: 3.9763, Avg Loss So Far: 4.0146


Epoch 1, Batch 15600/31105, Batch Loss: 3.9763, Avg Loss So Far: 4.0146


2025-03-31 01:13:38,985 - INFO - Epoch 1, Batch 15700/31105, Batch Loss: 3.7882, Avg Loss So Far: 4.0134


Epoch 1, Batch 15700/31105, Batch Loss: 3.7882, Avg Loss So Far: 4.0134


2025-03-31 01:13:44,432 - INFO - Epoch 1, Batch 15800/31105, Batch Loss: 3.8540, Avg Loss So Far: 4.0123


Epoch 1, Batch 15800/31105, Batch Loss: 3.8540, Avg Loss So Far: 4.0123


2025-03-31 01:13:49,914 - INFO - Epoch 1, Batch 15900/31105, Batch Loss: 3.8585, Avg Loss So Far: 4.0110


Epoch 1, Batch 15900/31105, Batch Loss: 3.8585, Avg Loss So Far: 4.0110


2025-03-31 01:13:55,368 - INFO - Epoch 1, Batch 16000/31105, Batch Loss: 3.7554, Avg Loss So Far: 4.0098


Epoch 1, Batch 16000/31105, Batch Loss: 3.7554, Avg Loss So Far: 4.0098


2025-03-31 01:14:00,875 - INFO - Epoch 1, Batch 16100/31105, Batch Loss: 3.8011, Avg Loss So Far: 4.0086


Epoch 1, Batch 16100/31105, Batch Loss: 3.8011, Avg Loss So Far: 4.0086


2025-03-31 01:14:06,335 - INFO - Epoch 1, Batch 16200/31105, Batch Loss: 3.5953, Avg Loss So Far: 4.0073


Epoch 1, Batch 16200/31105, Batch Loss: 3.5953, Avg Loss So Far: 4.0073


2025-03-31 01:14:11,816 - INFO - Epoch 1, Batch 16300/31105, Batch Loss: 3.9450, Avg Loss So Far: 4.0061


Epoch 1, Batch 16300/31105, Batch Loss: 3.9450, Avg Loss So Far: 4.0061


2025-03-31 01:14:17,284 - INFO - Epoch 1, Batch 16400/31105, Batch Loss: 3.8347, Avg Loss So Far: 4.0049


Epoch 1, Batch 16400/31105, Batch Loss: 3.8347, Avg Loss So Far: 4.0049


2025-03-31 01:14:22,747 - INFO - Epoch 1, Batch 16500/31105, Batch Loss: 3.8361, Avg Loss So Far: 4.0037


Epoch 1, Batch 16500/31105, Batch Loss: 3.8361, Avg Loss So Far: 4.0037


2025-03-31 01:14:28,210 - INFO - Epoch 1, Batch 16600/31105, Batch Loss: 3.8258, Avg Loss So Far: 4.0026


Epoch 1, Batch 16600/31105, Batch Loss: 3.8258, Avg Loss So Far: 4.0026


2025-03-31 01:14:33,738 - INFO - Epoch 1, Batch 16700/31105, Batch Loss: 3.7659, Avg Loss So Far: 4.0013


Epoch 1, Batch 16700/31105, Batch Loss: 3.7659, Avg Loss So Far: 4.0013


2025-03-31 01:14:39,196 - INFO - Epoch 1, Batch 16800/31105, Batch Loss: 3.9279, Avg Loss So Far: 4.0001


Epoch 1, Batch 16800/31105, Batch Loss: 3.9279, Avg Loss So Far: 4.0001


2025-03-31 01:14:44,653 - INFO - Epoch 1, Batch 16900/31105, Batch Loss: 3.7909, Avg Loss So Far: 3.9990


Epoch 1, Batch 16900/31105, Batch Loss: 3.7909, Avg Loss So Far: 3.9990


2025-03-31 01:14:50,202 - INFO - Epoch 1, Batch 17000/31105, Batch Loss: 3.8937, Avg Loss So Far: 3.9979


Epoch 1, Batch 17000/31105, Batch Loss: 3.8937, Avg Loss So Far: 3.9979


2025-03-31 01:14:55,691 - INFO - Epoch 1, Batch 17100/31105, Batch Loss: 3.8095, Avg Loss So Far: 3.9967


Epoch 1, Batch 17100/31105, Batch Loss: 3.8095, Avg Loss So Far: 3.9967


2025-03-31 01:15:01,171 - INFO - Epoch 1, Batch 17200/31105, Batch Loss: 3.7850, Avg Loss So Far: 3.9956


Epoch 1, Batch 17200/31105, Batch Loss: 3.7850, Avg Loss So Far: 3.9956


2025-03-31 01:15:06,664 - INFO - Epoch 1, Batch 17300/31105, Batch Loss: 3.7903, Avg Loss So Far: 3.9944


Epoch 1, Batch 17300/31105, Batch Loss: 3.7903, Avg Loss So Far: 3.9944


2025-03-31 01:15:12,207 - INFO - Epoch 1, Batch 17400/31105, Batch Loss: 3.8265, Avg Loss So Far: 3.9933


Epoch 1, Batch 17400/31105, Batch Loss: 3.8265, Avg Loss So Far: 3.9933


2025-03-31 01:15:17,669 - INFO - Epoch 1, Batch 17500/31105, Batch Loss: 4.1457, Avg Loss So Far: 3.9921


Epoch 1, Batch 17500/31105, Batch Loss: 4.1457, Avg Loss So Far: 3.9921


2025-03-31 01:15:23,173 - INFO - Epoch 1, Batch 17600/31105, Batch Loss: 3.8558, Avg Loss So Far: 3.9908


Epoch 1, Batch 17600/31105, Batch Loss: 3.8558, Avg Loss So Far: 3.9908


2025-03-31 01:15:28,630 - INFO - Epoch 1, Batch 17700/31105, Batch Loss: 3.8414, Avg Loss So Far: 3.9897


Epoch 1, Batch 17700/31105, Batch Loss: 3.8414, Avg Loss So Far: 3.9897


2025-03-31 01:15:34,158 - INFO - Epoch 1, Batch 17800/31105, Batch Loss: 3.7953, Avg Loss So Far: 3.9886


Epoch 1, Batch 17800/31105, Batch Loss: 3.7953, Avg Loss So Far: 3.9886


2025-03-31 01:15:39,664 - INFO - Epoch 1, Batch 17900/31105, Batch Loss: 3.9870, Avg Loss So Far: 3.9875


Epoch 1, Batch 17900/31105, Batch Loss: 3.9870, Avg Loss So Far: 3.9875


2025-03-31 01:15:45,130 - INFO - Epoch 1, Batch 18000/31105, Batch Loss: 3.6570, Avg Loss So Far: 3.9864


Epoch 1, Batch 18000/31105, Batch Loss: 3.6570, Avg Loss So Far: 3.9864


2025-03-31 01:15:50,623 - INFO - Epoch 1, Batch 18100/31105, Batch Loss: 3.7986, Avg Loss So Far: 3.9854


Epoch 1, Batch 18100/31105, Batch Loss: 3.7986, Avg Loss So Far: 3.9854


2025-03-31 01:15:56,066 - INFO - Epoch 1, Batch 18200/31105, Batch Loss: 3.7017, Avg Loss So Far: 3.9842


Epoch 1, Batch 18200/31105, Batch Loss: 3.7017, Avg Loss So Far: 3.9842


2025-03-31 01:16:01,561 - INFO - Epoch 1, Batch 18300/31105, Batch Loss: 3.6096, Avg Loss So Far: 3.9831


Epoch 1, Batch 18300/31105, Batch Loss: 3.6096, Avg Loss So Far: 3.9831


2025-03-31 01:16:07,031 - INFO - Epoch 1, Batch 18400/31105, Batch Loss: 3.6850, Avg Loss So Far: 3.9819


Epoch 1, Batch 18400/31105, Batch Loss: 3.6850, Avg Loss So Far: 3.9819


2025-03-31 01:16:12,517 - INFO - Epoch 1, Batch 18500/31105, Batch Loss: 3.7240, Avg Loss So Far: 3.9808


Epoch 1, Batch 18500/31105, Batch Loss: 3.7240, Avg Loss So Far: 3.9808


2025-03-31 01:16:17,929 - INFO - Epoch 1, Batch 18600/31105, Batch Loss: 3.8337, Avg Loss So Far: 3.9797


Epoch 1, Batch 18600/31105, Batch Loss: 3.8337, Avg Loss So Far: 3.9797


2025-03-31 01:16:23,433 - INFO - Epoch 1, Batch 18700/31105, Batch Loss: 3.6905, Avg Loss So Far: 3.9787


Epoch 1, Batch 18700/31105, Batch Loss: 3.6905, Avg Loss So Far: 3.9787


2025-03-31 01:16:28,907 - INFO - Epoch 1, Batch 18800/31105, Batch Loss: 3.8064, Avg Loss So Far: 3.9775


Epoch 1, Batch 18800/31105, Batch Loss: 3.8064, Avg Loss So Far: 3.9775


2025-03-31 01:16:34,402 - INFO - Epoch 1, Batch 18900/31105, Batch Loss: 3.8470, Avg Loss So Far: 3.9763


Epoch 1, Batch 18900/31105, Batch Loss: 3.8470, Avg Loss So Far: 3.9763


2025-03-31 01:16:39,884 - INFO - Epoch 1, Batch 19000/31105, Batch Loss: 4.0358, Avg Loss So Far: 3.9753


Epoch 1, Batch 19000/31105, Batch Loss: 4.0358, Avg Loss So Far: 3.9753


2025-03-31 01:16:45,399 - INFO - Epoch 1, Batch 19100/31105, Batch Loss: 3.8406, Avg Loss So Far: 3.9741


Epoch 1, Batch 19100/31105, Batch Loss: 3.8406, Avg Loss So Far: 3.9741


2025-03-31 01:16:50,882 - INFO - Epoch 1, Batch 19200/31105, Batch Loss: 3.8950, Avg Loss So Far: 3.9729


Epoch 1, Batch 19200/31105, Batch Loss: 3.8950, Avg Loss So Far: 3.9729


2025-03-31 01:16:56,378 - INFO - Epoch 1, Batch 19300/31105, Batch Loss: 3.7920, Avg Loss So Far: 3.9718


Epoch 1, Batch 19300/31105, Batch Loss: 3.7920, Avg Loss So Far: 3.9718


2025-03-31 01:17:01,880 - INFO - Epoch 1, Batch 19400/31105, Batch Loss: 3.6987, Avg Loss So Far: 3.9707


Epoch 1, Batch 19400/31105, Batch Loss: 3.6987, Avg Loss So Far: 3.9707


2025-03-31 01:17:07,325 - INFO - Epoch 1, Batch 19500/31105, Batch Loss: 3.5883, Avg Loss So Far: 3.9696


Epoch 1, Batch 19500/31105, Batch Loss: 3.5883, Avg Loss So Far: 3.9696


2025-03-31 01:17:12,789 - INFO - Epoch 1, Batch 19600/31105, Batch Loss: 3.8723, Avg Loss So Far: 3.9685


Epoch 1, Batch 19600/31105, Batch Loss: 3.8723, Avg Loss So Far: 3.9685


2025-03-31 01:17:18,291 - INFO - Epoch 1, Batch 19700/31105, Batch Loss: 3.6432, Avg Loss So Far: 3.9674


Epoch 1, Batch 19700/31105, Batch Loss: 3.6432, Avg Loss So Far: 3.9674


2025-03-31 01:17:23,770 - INFO - Epoch 1, Batch 19800/31105, Batch Loss: 3.8102, Avg Loss So Far: 3.9663


Epoch 1, Batch 19800/31105, Batch Loss: 3.8102, Avg Loss So Far: 3.9663


2025-03-31 01:17:29,259 - INFO - Epoch 1, Batch 19900/31105, Batch Loss: 3.6025, Avg Loss So Far: 3.9653


Epoch 1, Batch 19900/31105, Batch Loss: 3.6025, Avg Loss So Far: 3.9653


2025-03-31 01:17:34,753 - INFO - Epoch 1, Batch 20000/31105, Batch Loss: 3.6882, Avg Loss So Far: 3.9642


Epoch 1, Batch 20000/31105, Batch Loss: 3.6882, Avg Loss So Far: 3.9642


2025-03-31 01:17:40,263 - INFO - Epoch 1, Batch 20100/31105, Batch Loss: 3.7089, Avg Loss So Far: 3.9633


Epoch 1, Batch 20100/31105, Batch Loss: 3.7089, Avg Loss So Far: 3.9633


2025-03-31 01:17:45,786 - INFO - Epoch 1, Batch 20200/31105, Batch Loss: 3.6762, Avg Loss So Far: 3.9623


Epoch 1, Batch 20200/31105, Batch Loss: 3.6762, Avg Loss So Far: 3.9623


2025-03-31 01:17:51,303 - INFO - Epoch 1, Batch 20300/31105, Batch Loss: 4.0520, Avg Loss So Far: 3.9612


Epoch 1, Batch 20300/31105, Batch Loss: 4.0520, Avg Loss So Far: 3.9612


2025-03-31 01:17:56,785 - INFO - Epoch 1, Batch 20400/31105, Batch Loss: 3.8011, Avg Loss So Far: 3.9601


Epoch 1, Batch 20400/31105, Batch Loss: 3.8011, Avg Loss So Far: 3.9601


2025-03-31 01:18:02,265 - INFO - Epoch 1, Batch 20500/31105, Batch Loss: 3.8100, Avg Loss So Far: 3.9591


Epoch 1, Batch 20500/31105, Batch Loss: 3.8100, Avg Loss So Far: 3.9591


2025-03-31 01:18:07,749 - INFO - Epoch 1, Batch 20600/31105, Batch Loss: 3.6501, Avg Loss So Far: 3.9580


Epoch 1, Batch 20600/31105, Batch Loss: 3.6501, Avg Loss So Far: 3.9580


2025-03-31 01:18:13,263 - INFO - Epoch 1, Batch 20700/31105, Batch Loss: 3.8134, Avg Loss So Far: 3.9570


Epoch 1, Batch 20700/31105, Batch Loss: 3.8134, Avg Loss So Far: 3.9570


2025-03-31 01:18:18,701 - INFO - Epoch 1, Batch 20800/31105, Batch Loss: 3.8095, Avg Loss So Far: 3.9560


Epoch 1, Batch 20800/31105, Batch Loss: 3.8095, Avg Loss So Far: 3.9560


2025-03-31 01:18:24,159 - INFO - Epoch 1, Batch 20900/31105, Batch Loss: 3.8832, Avg Loss So Far: 3.9550


Epoch 1, Batch 20900/31105, Batch Loss: 3.8832, Avg Loss So Far: 3.9550


2025-03-31 01:18:29,642 - INFO - Epoch 1, Batch 21000/31105, Batch Loss: 3.9449, Avg Loss So Far: 3.9539


Epoch 1, Batch 21000/31105, Batch Loss: 3.9449, Avg Loss So Far: 3.9539


2025-03-31 01:18:35,129 - INFO - Epoch 1, Batch 21100/31105, Batch Loss: 3.9755, Avg Loss So Far: 3.9529


Epoch 1, Batch 21100/31105, Batch Loss: 3.9755, Avg Loss So Far: 3.9529


2025-03-31 01:18:40,646 - INFO - Epoch 1, Batch 21200/31105, Batch Loss: 3.7077, Avg Loss So Far: 3.9519


Epoch 1, Batch 21200/31105, Batch Loss: 3.7077, Avg Loss So Far: 3.9519


2025-03-31 01:18:46,111 - INFO - Epoch 1, Batch 21300/31105, Batch Loss: 3.5144, Avg Loss So Far: 3.9509


Epoch 1, Batch 21300/31105, Batch Loss: 3.5144, Avg Loss So Far: 3.9509


2025-03-31 01:18:51,587 - INFO - Epoch 1, Batch 21400/31105, Batch Loss: 3.6306, Avg Loss So Far: 3.9498


Epoch 1, Batch 21400/31105, Batch Loss: 3.6306, Avg Loss So Far: 3.9498


2025-03-31 01:18:57,074 - INFO - Epoch 1, Batch 21500/31105, Batch Loss: 3.6114, Avg Loss So Far: 3.9488


Epoch 1, Batch 21500/31105, Batch Loss: 3.6114, Avg Loss So Far: 3.9488


2025-03-31 01:19:02,574 - INFO - Epoch 1, Batch 21600/31105, Batch Loss: 3.5904, Avg Loss So Far: 3.9478


Epoch 1, Batch 21600/31105, Batch Loss: 3.5904, Avg Loss So Far: 3.9478


2025-03-31 01:19:08,072 - INFO - Epoch 1, Batch 21700/31105, Batch Loss: 3.7916, Avg Loss So Far: 3.9468


Epoch 1, Batch 21700/31105, Batch Loss: 3.7916, Avg Loss So Far: 3.9468


2025-03-31 01:19:13,533 - INFO - Epoch 1, Batch 21800/31105, Batch Loss: 3.7671, Avg Loss So Far: 3.9459


Epoch 1, Batch 21800/31105, Batch Loss: 3.7671, Avg Loss So Far: 3.9459


2025-03-31 01:19:19,012 - INFO - Epoch 1, Batch 21900/31105, Batch Loss: 3.8681, Avg Loss So Far: 3.9449


Epoch 1, Batch 21900/31105, Batch Loss: 3.8681, Avg Loss So Far: 3.9449


2025-03-31 01:19:24,551 - INFO - Epoch 1, Batch 22000/31105, Batch Loss: 3.7465, Avg Loss So Far: 3.9439


Epoch 1, Batch 22000/31105, Batch Loss: 3.7465, Avg Loss So Far: 3.9439


2025-03-31 01:19:30,006 - INFO - Epoch 1, Batch 22100/31105, Batch Loss: 3.6520, Avg Loss So Far: 3.9429


Epoch 1, Batch 22100/31105, Batch Loss: 3.6520, Avg Loss So Far: 3.9429


2025-03-31 01:19:35,531 - INFO - Epoch 1, Batch 22200/31105, Batch Loss: 3.6759, Avg Loss So Far: 3.9420


Epoch 1, Batch 22200/31105, Batch Loss: 3.6759, Avg Loss So Far: 3.9420


2025-03-31 01:19:41,023 - INFO - Epoch 1, Batch 22300/31105, Batch Loss: 3.7494, Avg Loss So Far: 3.9409


Epoch 1, Batch 22300/31105, Batch Loss: 3.7494, Avg Loss So Far: 3.9409


2025-03-31 01:19:46,480 - INFO - Epoch 1, Batch 22400/31105, Batch Loss: 3.6658, Avg Loss So Far: 3.9399


Epoch 1, Batch 22400/31105, Batch Loss: 3.6658, Avg Loss So Far: 3.9399


2025-03-31 01:19:51,983 - INFO - Epoch 1, Batch 22500/31105, Batch Loss: 3.7158, Avg Loss So Far: 3.9388


Epoch 1, Batch 22500/31105, Batch Loss: 3.7158, Avg Loss So Far: 3.9388


2025-03-31 01:19:57,434 - INFO - Epoch 1, Batch 22600/31105, Batch Loss: 3.7294, Avg Loss So Far: 3.9378


Epoch 1, Batch 22600/31105, Batch Loss: 3.7294, Avg Loss So Far: 3.9378


2025-03-31 01:20:02,886 - INFO - Epoch 1, Batch 22700/31105, Batch Loss: 3.6877, Avg Loss So Far: 3.9369


Epoch 1, Batch 22700/31105, Batch Loss: 3.6877, Avg Loss So Far: 3.9369


2025-03-31 01:20:08,377 - INFO - Epoch 1, Batch 22800/31105, Batch Loss: 3.7974, Avg Loss So Far: 3.9359


Epoch 1, Batch 22800/31105, Batch Loss: 3.7974, Avg Loss So Far: 3.9359


2025-03-31 01:20:13,865 - INFO - Epoch 1, Batch 22900/31105, Batch Loss: 3.7777, Avg Loss So Far: 3.9349


Epoch 1, Batch 22900/31105, Batch Loss: 3.7777, Avg Loss So Far: 3.9349


2025-03-31 01:20:19,366 - INFO - Epoch 1, Batch 23000/31105, Batch Loss: 3.7453, Avg Loss So Far: 3.9339


Epoch 1, Batch 23000/31105, Batch Loss: 3.7453, Avg Loss So Far: 3.9339


2025-03-31 01:20:24,860 - INFO - Epoch 1, Batch 23100/31105, Batch Loss: 3.6566, Avg Loss So Far: 3.9330


Epoch 1, Batch 23100/31105, Batch Loss: 3.6566, Avg Loss So Far: 3.9330


2025-03-31 01:20:30,384 - INFO - Epoch 1, Batch 23200/31105, Batch Loss: 3.6149, Avg Loss So Far: 3.9320


Epoch 1, Batch 23200/31105, Batch Loss: 3.6149, Avg Loss So Far: 3.9320


2025-03-31 01:20:35,882 - INFO - Epoch 1, Batch 23300/31105, Batch Loss: 3.7198, Avg Loss So Far: 3.9310


Epoch 1, Batch 23300/31105, Batch Loss: 3.7198, Avg Loss So Far: 3.9310


2025-03-31 01:20:41,358 - INFO - Epoch 1, Batch 23400/31105, Batch Loss: 3.6177, Avg Loss So Far: 3.9301


Epoch 1, Batch 23400/31105, Batch Loss: 3.6177, Avg Loss So Far: 3.9301


2025-03-31 01:20:46,866 - INFO - Epoch 1, Batch 23500/31105, Batch Loss: 3.7721, Avg Loss So Far: 3.9292


Epoch 1, Batch 23500/31105, Batch Loss: 3.7721, Avg Loss So Far: 3.9292


2025-03-31 01:20:52,345 - INFO - Epoch 1, Batch 23600/31105, Batch Loss: 3.5827, Avg Loss So Far: 3.9283


Epoch 1, Batch 23600/31105, Batch Loss: 3.5827, Avg Loss So Far: 3.9283


2025-03-31 01:20:57,811 - INFO - Epoch 1, Batch 23700/31105, Batch Loss: 3.6973, Avg Loss So Far: 3.9273


Epoch 1, Batch 23700/31105, Batch Loss: 3.6973, Avg Loss So Far: 3.9273


2025-03-31 01:21:03,275 - INFO - Epoch 1, Batch 23800/31105, Batch Loss: 3.4683, Avg Loss So Far: 3.9264


Epoch 1, Batch 23800/31105, Batch Loss: 3.4683, Avg Loss So Far: 3.9264


2025-03-31 01:21:08,750 - INFO - Epoch 1, Batch 23900/31105, Batch Loss: 3.7362, Avg Loss So Far: 3.9255


Epoch 1, Batch 23900/31105, Batch Loss: 3.7362, Avg Loss So Far: 3.9255


2025-03-31 01:21:14,259 - INFO - Epoch 1, Batch 24000/31105, Batch Loss: 3.6721, Avg Loss So Far: 3.9246


Epoch 1, Batch 24000/31105, Batch Loss: 3.6721, Avg Loss So Far: 3.9246


2025-03-31 01:21:19,739 - INFO - Epoch 1, Batch 24100/31105, Batch Loss: 3.6881, Avg Loss So Far: 3.9237


Epoch 1, Batch 24100/31105, Batch Loss: 3.6881, Avg Loss So Far: 3.9237


2025-03-31 01:21:25,270 - INFO - Epoch 1, Batch 24200/31105, Batch Loss: 3.7408, Avg Loss So Far: 3.9228


Epoch 1, Batch 24200/31105, Batch Loss: 3.7408, Avg Loss So Far: 3.9228


2025-03-31 01:21:30,770 - INFO - Epoch 1, Batch 24300/31105, Batch Loss: 3.8447, Avg Loss So Far: 3.9219


Epoch 1, Batch 24300/31105, Batch Loss: 3.8447, Avg Loss So Far: 3.9219


2025-03-31 01:21:36,250 - INFO - Epoch 1, Batch 24400/31105, Batch Loss: 3.7754, Avg Loss So Far: 3.9209


Epoch 1, Batch 24400/31105, Batch Loss: 3.7754, Avg Loss So Far: 3.9209


2025-03-31 01:21:41,745 - INFO - Epoch 1, Batch 24500/31105, Batch Loss: 3.6130, Avg Loss So Far: 3.9199


Epoch 1, Batch 24500/31105, Batch Loss: 3.6130, Avg Loss So Far: 3.9199


2025-03-31 01:21:47,259 - INFO - Epoch 1, Batch 24600/31105, Batch Loss: 3.7036, Avg Loss So Far: 3.9191


Epoch 1, Batch 24600/31105, Batch Loss: 3.7036, Avg Loss So Far: 3.9191


2025-03-31 01:21:52,772 - INFO - Epoch 1, Batch 24700/31105, Batch Loss: 3.4668, Avg Loss So Far: 3.9182


Epoch 1, Batch 24700/31105, Batch Loss: 3.4668, Avg Loss So Far: 3.9182


2025-03-31 01:21:58,219 - INFO - Epoch 1, Batch 24800/31105, Batch Loss: 3.6517, Avg Loss So Far: 3.9173


Epoch 1, Batch 24800/31105, Batch Loss: 3.6517, Avg Loss So Far: 3.9173


2025-03-31 01:22:03,691 - INFO - Epoch 1, Batch 24900/31105, Batch Loss: 3.7367, Avg Loss So Far: 3.9163


Epoch 1, Batch 24900/31105, Batch Loss: 3.7367, Avg Loss So Far: 3.9163


2025-03-31 01:22:09,189 - INFO - Epoch 1, Batch 25000/31105, Batch Loss: 3.8165, Avg Loss So Far: 3.9154


Epoch 1, Batch 25000/31105, Batch Loss: 3.8165, Avg Loss So Far: 3.9154


2025-03-31 01:22:14,664 - INFO - Epoch 1, Batch 25100/31105, Batch Loss: 3.6830, Avg Loss So Far: 3.9144


Epoch 1, Batch 25100/31105, Batch Loss: 3.6830, Avg Loss So Far: 3.9144


2025-03-31 01:22:20,111 - INFO - Epoch 1, Batch 25200/31105, Batch Loss: 3.6651, Avg Loss So Far: 3.9135


Epoch 1, Batch 25200/31105, Batch Loss: 3.6651, Avg Loss So Far: 3.9135


2025-03-31 01:22:25,635 - INFO - Epoch 1, Batch 25300/31105, Batch Loss: 3.8454, Avg Loss So Far: 3.9126


Epoch 1, Batch 25300/31105, Batch Loss: 3.8454, Avg Loss So Far: 3.9126


2025-03-31 01:22:31,135 - INFO - Epoch 1, Batch 25400/31105, Batch Loss: 3.6366, Avg Loss So Far: 3.9117


Epoch 1, Batch 25400/31105, Batch Loss: 3.6366, Avg Loss So Far: 3.9117


2025-03-31 01:22:36,595 - INFO - Epoch 1, Batch 25500/31105, Batch Loss: 3.7575, Avg Loss So Far: 3.9108


Epoch 1, Batch 25500/31105, Batch Loss: 3.7575, Avg Loss So Far: 3.9108


2025-03-31 01:22:42,088 - INFO - Epoch 1, Batch 25600/31105, Batch Loss: 3.5056, Avg Loss So Far: 3.9099


Epoch 1, Batch 25600/31105, Batch Loss: 3.5056, Avg Loss So Far: 3.9099


2025-03-31 01:22:47,635 - INFO - Epoch 1, Batch 25700/31105, Batch Loss: 3.7121, Avg Loss So Far: 3.9091


Epoch 1, Batch 25700/31105, Batch Loss: 3.7121, Avg Loss So Far: 3.9091


2025-03-31 01:22:53,119 - INFO - Epoch 1, Batch 25800/31105, Batch Loss: 3.6086, Avg Loss So Far: 3.9082


Epoch 1, Batch 25800/31105, Batch Loss: 3.6086, Avg Loss So Far: 3.9082


2025-03-31 01:22:58,596 - INFO - Epoch 1, Batch 25900/31105, Batch Loss: 3.6371, Avg Loss So Far: 3.9073


Epoch 1, Batch 25900/31105, Batch Loss: 3.6371, Avg Loss So Far: 3.9073


2025-03-31 01:23:04,069 - INFO - Epoch 1, Batch 26000/31105, Batch Loss: 3.8384, Avg Loss So Far: 3.9065


Epoch 1, Batch 26000/31105, Batch Loss: 3.8384, Avg Loss So Far: 3.9065


2025-03-31 01:23:09,564 - INFO - Epoch 1, Batch 26100/31105, Batch Loss: 3.5278, Avg Loss So Far: 3.9055


Epoch 1, Batch 26100/31105, Batch Loss: 3.5278, Avg Loss So Far: 3.9055


2025-03-31 01:23:15,047 - INFO - Epoch 1, Batch 26200/31105, Batch Loss: 3.6941, Avg Loss So Far: 3.9046


Epoch 1, Batch 26200/31105, Batch Loss: 3.6941, Avg Loss So Far: 3.9046


2025-03-31 01:23:20,512 - INFO - Epoch 1, Batch 26300/31105, Batch Loss: 3.4132, Avg Loss So Far: 3.9037


Epoch 1, Batch 26300/31105, Batch Loss: 3.4132, Avg Loss So Far: 3.9037


2025-03-31 01:23:26,022 - INFO - Epoch 1, Batch 26400/31105, Batch Loss: 3.5604, Avg Loss So Far: 3.9027


Epoch 1, Batch 26400/31105, Batch Loss: 3.5604, Avg Loss So Far: 3.9027


2025-03-31 01:23:31,489 - INFO - Epoch 1, Batch 26500/31105, Batch Loss: 3.8420, Avg Loss So Far: 3.9019


Epoch 1, Batch 26500/31105, Batch Loss: 3.8420, Avg Loss So Far: 3.9019


2025-03-31 01:23:36,981 - INFO - Epoch 1, Batch 26600/31105, Batch Loss: 3.8287, Avg Loss So Far: 3.9010


Epoch 1, Batch 26600/31105, Batch Loss: 3.8287, Avg Loss So Far: 3.9010


2025-03-31 01:23:42,426 - INFO - Epoch 1, Batch 26700/31105, Batch Loss: 3.4582, Avg Loss So Far: 3.9001


Epoch 1, Batch 26700/31105, Batch Loss: 3.4582, Avg Loss So Far: 3.9001


2025-03-31 01:23:47,935 - INFO - Epoch 1, Batch 26800/31105, Batch Loss: 3.5074, Avg Loss So Far: 3.8993


Epoch 1, Batch 26800/31105, Batch Loss: 3.5074, Avg Loss So Far: 3.8993


2025-03-31 01:23:53,507 - INFO - Epoch 1, Batch 26900/31105, Batch Loss: 3.5820, Avg Loss So Far: 3.8983


Epoch 1, Batch 26900/31105, Batch Loss: 3.5820, Avg Loss So Far: 3.8983


2025-03-31 01:23:58,938 - INFO - Epoch 1, Batch 27000/31105, Batch Loss: 3.5401, Avg Loss So Far: 3.8975


Epoch 1, Batch 27000/31105, Batch Loss: 3.5401, Avg Loss So Far: 3.8975


2025-03-31 01:24:04,447 - INFO - Epoch 1, Batch 27100/31105, Batch Loss: 3.5916, Avg Loss So Far: 3.8967


Epoch 1, Batch 27100/31105, Batch Loss: 3.5916, Avg Loss So Far: 3.8967


2025-03-31 01:24:09,929 - INFO - Epoch 1, Batch 27200/31105, Batch Loss: 3.6894, Avg Loss So Far: 3.8959


Epoch 1, Batch 27200/31105, Batch Loss: 3.6894, Avg Loss So Far: 3.8959


2025-03-31 01:24:15,366 - INFO - Epoch 1, Batch 27300/31105, Batch Loss: 3.5188, Avg Loss So Far: 3.8950


Epoch 1, Batch 27300/31105, Batch Loss: 3.5188, Avg Loss So Far: 3.8950


2025-03-31 01:24:20,881 - INFO - Epoch 1, Batch 27400/31105, Batch Loss: 3.6098, Avg Loss So Far: 3.8941


Epoch 1, Batch 27400/31105, Batch Loss: 3.6098, Avg Loss So Far: 3.8941


2025-03-31 01:24:26,344 - INFO - Epoch 1, Batch 27500/31105, Batch Loss: 3.5505, Avg Loss So Far: 3.8933


Epoch 1, Batch 27500/31105, Batch Loss: 3.5505, Avg Loss So Far: 3.8933


2025-03-31 01:24:31,804 - INFO - Epoch 1, Batch 27600/31105, Batch Loss: 3.4273, Avg Loss So Far: 3.8924


Epoch 1, Batch 27600/31105, Batch Loss: 3.4273, Avg Loss So Far: 3.8924


2025-03-31 01:24:37,309 - INFO - Epoch 1, Batch 27700/31105, Batch Loss: 3.6996, Avg Loss So Far: 3.8916


Epoch 1, Batch 27700/31105, Batch Loss: 3.6996, Avg Loss So Far: 3.8916


2025-03-31 01:24:42,775 - INFO - Epoch 1, Batch 27800/31105, Batch Loss: 3.9148, Avg Loss So Far: 3.8908


Epoch 1, Batch 27800/31105, Batch Loss: 3.9148, Avg Loss So Far: 3.8908


2025-03-31 01:24:48,301 - INFO - Epoch 1, Batch 27900/31105, Batch Loss: 3.8058, Avg Loss So Far: 3.8899


Epoch 1, Batch 27900/31105, Batch Loss: 3.8058, Avg Loss So Far: 3.8899


2025-03-31 01:24:53,816 - INFO - Epoch 1, Batch 28000/31105, Batch Loss: 3.5036, Avg Loss So Far: 3.8891


Epoch 1, Batch 28000/31105, Batch Loss: 3.5036, Avg Loss So Far: 3.8891


2025-03-31 01:24:59,337 - INFO - Epoch 1, Batch 28100/31105, Batch Loss: 3.6414, Avg Loss So Far: 3.8883


Epoch 1, Batch 28100/31105, Batch Loss: 3.6414, Avg Loss So Far: 3.8883


2025-03-31 01:25:04,778 - INFO - Epoch 1, Batch 28200/31105, Batch Loss: 3.5682, Avg Loss So Far: 3.8875


Epoch 1, Batch 28200/31105, Batch Loss: 3.5682, Avg Loss So Far: 3.8875


2025-03-31 01:25:10,299 - INFO - Epoch 1, Batch 28300/31105, Batch Loss: 3.7428, Avg Loss So Far: 3.8867


Epoch 1, Batch 28300/31105, Batch Loss: 3.7428, Avg Loss So Far: 3.8867


2025-03-31 01:25:15,780 - INFO - Epoch 1, Batch 28400/31105, Batch Loss: 3.7160, Avg Loss So Far: 3.8858


Epoch 1, Batch 28400/31105, Batch Loss: 3.7160, Avg Loss So Far: 3.8858


2025-03-31 01:25:21,303 - INFO - Epoch 1, Batch 28500/31105, Batch Loss: 3.6614, Avg Loss So Far: 3.8850


Epoch 1, Batch 28500/31105, Batch Loss: 3.6614, Avg Loss So Far: 3.8850


2025-03-31 01:25:26,801 - INFO - Epoch 1, Batch 28600/31105, Batch Loss: 3.5349, Avg Loss So Far: 3.8842


Epoch 1, Batch 28600/31105, Batch Loss: 3.5349, Avg Loss So Far: 3.8842


2025-03-31 01:25:32,312 - INFO - Epoch 1, Batch 28700/31105, Batch Loss: 3.5839, Avg Loss So Far: 3.8835


Epoch 1, Batch 28700/31105, Batch Loss: 3.5839, Avg Loss So Far: 3.8835


2025-03-31 01:25:37,799 - INFO - Epoch 1, Batch 28800/31105, Batch Loss: 3.6943, Avg Loss So Far: 3.8826


Epoch 1, Batch 28800/31105, Batch Loss: 3.6943, Avg Loss So Far: 3.8826


2025-03-31 01:25:43,279 - INFO - Epoch 1, Batch 28900/31105, Batch Loss: 3.6227, Avg Loss So Far: 3.8818


Epoch 1, Batch 28900/31105, Batch Loss: 3.6227, Avg Loss So Far: 3.8818


2025-03-31 01:25:48,754 - INFO - Epoch 1, Batch 29000/31105, Batch Loss: 3.6350, Avg Loss So Far: 3.8810


Epoch 1, Batch 29000/31105, Batch Loss: 3.6350, Avg Loss So Far: 3.8810


2025-03-31 01:25:54,288 - INFO - Epoch 1, Batch 29100/31105, Batch Loss: 3.5710, Avg Loss So Far: 3.8802


Epoch 1, Batch 29100/31105, Batch Loss: 3.5710, Avg Loss So Far: 3.8802


2025-03-31 01:25:59,746 - INFO - Epoch 1, Batch 29200/31105, Batch Loss: 3.8101, Avg Loss So Far: 3.8794


Epoch 1, Batch 29200/31105, Batch Loss: 3.8101, Avg Loss So Far: 3.8794


2025-03-31 01:26:05,248 - INFO - Epoch 1, Batch 29300/31105, Batch Loss: 3.4417, Avg Loss So Far: 3.8786


Epoch 1, Batch 29300/31105, Batch Loss: 3.4417, Avg Loss So Far: 3.8786


2025-03-31 01:26:10,754 - INFO - Epoch 1, Batch 29400/31105, Batch Loss: 3.6754, Avg Loss So Far: 3.8778


Epoch 1, Batch 29400/31105, Batch Loss: 3.6754, Avg Loss So Far: 3.8778


2025-03-31 01:26:16,267 - INFO - Epoch 1, Batch 29500/31105, Batch Loss: 3.5361, Avg Loss So Far: 3.8769


Epoch 1, Batch 29500/31105, Batch Loss: 3.5361, Avg Loss So Far: 3.8769


2025-03-31 01:26:21,747 - INFO - Epoch 1, Batch 29600/31105, Batch Loss: 3.6072, Avg Loss So Far: 3.8761


Epoch 1, Batch 29600/31105, Batch Loss: 3.6072, Avg Loss So Far: 3.8761


2025-03-31 01:26:27,278 - INFO - Epoch 1, Batch 29700/31105, Batch Loss: 3.6635, Avg Loss So Far: 3.8753


Epoch 1, Batch 29700/31105, Batch Loss: 3.6635, Avg Loss So Far: 3.8753


2025-03-31 01:26:32,768 - INFO - Epoch 1, Batch 29800/31105, Batch Loss: 3.7354, Avg Loss So Far: 3.8744


Epoch 1, Batch 29800/31105, Batch Loss: 3.7354, Avg Loss So Far: 3.8744


2025-03-31 01:26:38,275 - INFO - Epoch 1, Batch 29900/31105, Batch Loss: 3.7158, Avg Loss So Far: 3.8737


Epoch 1, Batch 29900/31105, Batch Loss: 3.7158, Avg Loss So Far: 3.8737


2025-03-31 01:26:43,782 - INFO - Epoch 1, Batch 30000/31105, Batch Loss: 3.6133, Avg Loss So Far: 3.8729


Epoch 1, Batch 30000/31105, Batch Loss: 3.6133, Avg Loss So Far: 3.8729


2025-03-31 01:26:49,302 - INFO - Epoch 1, Batch 30100/31105, Batch Loss: 3.5615, Avg Loss So Far: 3.8721


Epoch 1, Batch 30100/31105, Batch Loss: 3.5615, Avg Loss So Far: 3.8721


2025-03-31 01:26:54,763 - INFO - Epoch 1, Batch 30200/31105, Batch Loss: 3.6940, Avg Loss So Far: 3.8712


Epoch 1, Batch 30200/31105, Batch Loss: 3.6940, Avg Loss So Far: 3.8712


2025-03-31 01:27:00,271 - INFO - Epoch 1, Batch 30300/31105, Batch Loss: 3.7143, Avg Loss So Far: 3.8705


Epoch 1, Batch 30300/31105, Batch Loss: 3.7143, Avg Loss So Far: 3.8705


2025-03-31 01:27:05,753 - INFO - Epoch 1, Batch 30400/31105, Batch Loss: 3.5816, Avg Loss So Far: 3.8696


Epoch 1, Batch 30400/31105, Batch Loss: 3.5816, Avg Loss So Far: 3.8696


2025-03-31 01:27:11,269 - INFO - Epoch 1, Batch 30500/31105, Batch Loss: 3.4680, Avg Loss So Far: 3.8688


Epoch 1, Batch 30500/31105, Batch Loss: 3.4680, Avg Loss So Far: 3.8688


2025-03-31 01:27:16,710 - INFO - Epoch 1, Batch 30600/31105, Batch Loss: 3.5479, Avg Loss So Far: 3.8681


Epoch 1, Batch 30600/31105, Batch Loss: 3.5479, Avg Loss So Far: 3.8681


2025-03-31 01:27:22,205 - INFO - Epoch 1, Batch 30700/31105, Batch Loss: 3.6060, Avg Loss So Far: 3.8673


Epoch 1, Batch 30700/31105, Batch Loss: 3.6060, Avg Loss So Far: 3.8673


2025-03-31 01:27:27,698 - INFO - Epoch 1, Batch 30800/31105, Batch Loss: 3.6954, Avg Loss So Far: 3.8665


Epoch 1, Batch 30800/31105, Batch Loss: 3.6954, Avg Loss So Far: 3.8665


2025-03-31 01:27:33,157 - INFO - Epoch 1, Batch 30900/31105, Batch Loss: 3.5182, Avg Loss So Far: 3.8657


Epoch 1, Batch 30900/31105, Batch Loss: 3.5182, Avg Loss So Far: 3.8657


2025-03-31 01:27:38,666 - INFO - Epoch 1, Batch 31000/31105, Batch Loss: 3.4214, Avg Loss So Far: 3.8649


Epoch 1, Batch 31000/31105, Batch Loss: 3.4214, Avg Loss So Far: 3.8649


2025-03-31 01:27:44,177 - INFO - Epoch 1, Batch 31100/31105, Batch Loss: 3.5912, Avg Loss So Far: 3.8641


Epoch 1, Batch 31100/31105, Batch Loss: 3.5912, Avg Loss So Far: 3.8641


2025-03-31 01:27:44,460 - INFO - Epoch 1 Summary, Avg Loss: 3.8641
2025-03-31 01:27:44,506 - INFO - Model saved to rnn_model_new
2025-03-31 01:27:44,507 - INFO - Training completed


Epoch 1 Summary, Avg Loss: 3.8641
Model saved to rnn_model_new
Training log saved to: training_log_20250331_005920.log


In [None]:
def evaluate_perplexity(model, data_loader):
    model.eval()
    total_loss = 0
    total_tokens = 0
    
    criterion = nn.CrossEntropyLoss(ignore_index=0, reduction='none')
    
    with torch.no_grad():
        for inputs, targets in data_loader:
            outputs, _ = model(inputs)
            loss = criterion(outputs.view(-1, outputs.size(-1)), targets.view(-1))
            mask = targets != 0
            batch_loss = loss[mask.view(-1)].sum().item()
            batch_tokens = mask.sum().item()
            total_loss += batch_loss
            total_tokens += batch_tokens
    
    if total_tokens == 0:
        raise ValueError("No non-padding tokens found in the dataset. Check your data or padding index.")
    
    avg_loss = total_loss / total_tokens
    perplexity = math.exp(avg_loss)
    print(f"Total non-padding tokens: {total_tokens}")
    print(f"Total loss: {total_loss:.4f}")
    print(f"Average loss: {avg_loss:.4f}")
    print(f"Perplexity: {perplexity:.2f}")
    return perplexity

rnn_model=RNN_LM(vocab_size,embed_size,hidden_size,num_layers,bidirectional)
rnn_model.load_state_dict(torch.load('rnn_model_new.pth'))
rnn_model.eval()
rnn_perplexity = evaluate_perplexity(rnn_model, test_loader)
print(f"RNN Test Perplexity: {rnn_perplexity:.2f}")

Total non-padding tokens: 1105920
Total loss: 3930417.2094
Average loss: 3.5540
Perplexity: 34.95
RNN Test Perplexity: 34.95


In [None]:
def generate_text(model, prefix, vocab, max_length=50, device="cpu"):
    model.eval()
    with torch.no_grad():
        prefix_words = word_tokenize(prefix.lower())
        prefix_indices = [vocab.get(word, vocab['<UNK>']) for word in prefix_words]
        input_seq = torch.tensor(prefix_indices, dtype=torch.long).unsqueeze(0).to(device)  # Add batch dimension
        
        hidden = model.init_hidden(1, device) 
        
        # Generate sequence
        generated_indices = prefix_indices.copy()
        
        for _ in range(max_length - len(prefix_indices)):
            output, hidden = model(input_seq, hidden)
            
            last_output = output[:, -1, :]  
            next_word_idx = torch.argmax(last_output, dim=-1).item()  # Greedy search
            
            generated_indices.append(next_word_idx)
            
            input_seq = torch.tensor([next_word_idx], dtype=torch.long).unsqueeze(0).to(device)
        
        generated_words = [list(vocab.keys())[list(vocab.values()).index(idx)] for idx in generated_indices]
        return " ".join(generated_words)

prefixes = [
    "Harry looked",
    "Hermione said",
    "Ron shouted",
    "Dumbledore stood",
    "Snape glared"
]

print("\nGenerating 5 pairs of sentences (RNN):")
for prefix in prefixes:
    rnn_sentence = generate_text(rnn_model, prefix, vocab, max_length=30, device='cpu')
    
    print(f"\nPrefix: {prefix}")
    print(f"RNN: {rnn_sentence}")


Generating 5 pairs of sentences (RNN):

Prefix: Harry looked
RNN: harry looked up at the ceiling and then the door swung open and he was sure that he had not seen him .he was wearing a long overcoat and a

Prefix: Hermione said
RNN: hermione said ron nodding toward the remainder of the bus .the ministry of magic confirmed that he had been discovered to act as though he had a stitch in his

Prefix: Ron shouted
RNN: ron shouted at the dangling and carrying a large suitcase and banged her eyes and scanning it to the kilted glass of the chamber of secrets and finally the dark

Prefix: Dumbledore stood
RNN: dumbledore stood up and croaked hedwig and roger davies was almost glad to see her .aberforths not supposed to be in the forest .harry felt a thrill of foreboding .avada

Prefix: Snape glared
RNN: snape glared at him as though he had a stitch in his chest was torn and his eyes were rolling madly and down the table and shouted expelliarmusv and he
