In [2]:
# GPU Memory Management
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

# Clear GPU cache
import torch
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    
print("‚úÖ GPU memory optimizations set")


‚úÖ GPU memory optimizations set


In [3]:
# Install required packages including PDF processing
!pip install rouge-score transformers torch sentencepiece nltk sacrebleu bert-score kagglehub PyPDF2 pdfplumber -q

print("‚úÖ All packages installed successfully!")


‚úÖ All packages installed successfully!


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
import glob
from rouge_score import rouge_scorer
from transformers import AutoTokenizer, AutoModel
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.tokenize import sent_tokenize
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import random
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# PDF processing
import PyPDF2
import pdfplumber
import re
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

print("‚úÖ All imports successful!")


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


‚úÖ All imports successful!


In [5]:
import os
import glob
import random

# Dataset path (already added as input in Kaggle)
path = '/kaggle/input/legal-dataset-sc-judgments-india-19502024'

# Use 2025 folder only
year_folder = '2025'
final_path = os.path.join(path, 'supreme_court_judgments', year_folder)

print(f"üìÅ Path to dataset files: {path}")
print(f"üéØ Using year folder: {year_folder}")
print(f"üìÇ Final path: {final_path}")


üìÅ Path to dataset files: /kaggle/input/legal-dataset-sc-judgments-india-19502024
üéØ Using year folder: 2025
üìÇ Final path: /kaggle/input/legal-dataset-sc-judgments-india-19502024/supreme_court_judgments/2025


In [6]:
def extract_text_from_pdf(pdf_path):
    """Extract text from PDF with multiple fallback methods"""
    text = ""
    
    try:
        # Method 1: pdfplumber
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
        
        if text.strip():
            return clean_pdf_text(text)
    except Exception as e:
        pass
    
    try:
        # Method 2: PyPDF2 (fallback)
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
        
        if text.strip():
            return clean_pdf_text(text)
    except Exception as e:
        pass
    
    return None

def clean_pdf_text(text):
    """Clean extracted PDF text"""
    # Remove excessive whitespace
    text = re.sub(r'\s+', ' ', text)
    
    # Remove page numbers
    text = re.sub(r'\n\s*\d+\s*\n', '\n', text)
    text = re.sub(r'Page \d+ of \d+', '', text, flags=re.IGNORECASE)
    
    # Remove headers/footers
    text = re.sub(r'Supreme Court of India.*?\n', '', text, flags=re.IGNORECASE)
    
    # Fix common PDF issues
    text = text.replace('ÔøΩ', '')
    text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text)
    text = re.sub(r'\n+', '\n', text)
    
    return text.strip()

print("‚úÖ PDF processing utilities defined")


‚úÖ PDF processing utilities defined


In [7]:
class PDFLegalDataset(Dataset):
    """
    Dataset for PDF-based Supreme Court judgments
    FIXED: Handles both .pdf and .PDF extensions
    """
    def __init__(self, pdf_source, min_words=50):
        self.aspects = ['facts', 'analysis', 'argument', 'judgement', 'statute']
        
        # Handle both directory and list of paths
        if isinstance(pdf_source, str):
            # Search for both .pdf and .PDF (case-insensitive)
            pdf_lower = glob.glob(os.path.join(pdf_source, '**/*.pdf'), recursive=True)
            pdf_upper = glob.glob(os.path.join(pdf_source, '**/*.PDF'), recursive=True)
            self.pdf_files = sorted(pdf_lower + pdf_upper)
            print(f"üìÅ Loading from directory: {pdf_source}")
        elif isinstance(pdf_source, list):
            self.pdf_files = pdf_source
            print(f"üìÅ Loading from provided list of {len(pdf_source)} PDFs")
        else:
            raise ValueError("pdf_source must be either a directory path (str) or list of PDF paths")
        
        print(f"   Found {len(self.pdf_files)} PDF files")
        print(f"   Processing ALL PDFs (no limits)...\n")
        
        self.data = []
        failed_files = []
        
        for pdf_file in tqdm(self.pdf_files, desc="Extracting PDFs"):
            try:
                judgment_text = extract_text_from_pdf(pdf_file)
                
                if judgment_text and len(judgment_text.split()) >= min_words:
                    self.data.append({
                        'judgment': judgment_text,
                        'judgment_file': os.path.basename(pdf_file),
                        'pdf_path': pdf_file
                    })
                else:
                    failed_files.append(os.path.basename(pdf_file))
            except Exception as e:
                failed_files.append(os.path.basename(pdf_file))
        
        print(f"\nüìä Dataset Statistics:")
        print(f"   ‚úÖ Successfully loaded: {len(self.data)} judgments")
        print(f"   ‚ùå Failed/too short: {len(failed_files)} files")
        
        if self.data:
            word_counts = [len(item['judgment'].split()) for item in self.data]
            sentence_counts = [len(sent_tokenize(item['judgment'])) for item in self.data]
            print(f"\n   üìà Document Statistics:")
            print(f"     Words - Min: {min(word_counts):,} | Max: {max(word_counts):,} | Avg: {int(np.mean(word_counts)):,}")
            print(f"     Sentences - Min: {min(sentence_counts):,} | Max: {max(sentence_counts):,} | Avg: {int(np.mean(sentence_counts)):,}")
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]

print("‚úÖ PDF Legal Dataset defined (supports .pdf and .PDF)")


‚úÖ PDF Legal Dataset defined (supports .pdf and .PDF)


In [8]:
# Load dataset - use 'pdf_source' parameter (not 'pdf_dir')
dataset = PDFLegalDataset(
    pdf_source=final_path,  # Changed from pdf_dir to pdf_source
    min_words=50
)

if len(dataset) == 0:
    print("\n‚ùå No valid PDFs found!")
else:
    # 3-WAY SPLIT: Train (70%) / Validation (15%) / Test (15%)
    train_size = int(0.70 * len(dataset))
    val_size = int(0.15 * len(dataset))
    test_size = len(dataset) - train_size - val_size
    
    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
        dataset, [train_size, val_size, test_size]
    )
    
    print(f"\nüìà 3-WAY DATASET SPLIT:")
    print(f"{'='*70}")
    print(f"   Total documents:     {len(dataset)}")
    print(f"   Training set:        {len(train_dataset)} ({len(train_dataset)/len(dataset)*100:.1f}%)")
    print(f"   Validation set:      {len(val_dataset)} ({len(val_dataset)/len(dataset)*100:.1f}%)")
    print(f"   Test set:            {len(test_dataset)} ({len(test_dataset)/len(dataset)*100:.1f}%)")
    print(f"{'='*70}")
    
    # Show sample
    print(f"\nüìÑ Sample Document:")
    print(f"   File: {dataset[0]['judgment_file']}")
    print(f"   Length: {len(dataset[0]['judgment'].split())} words")
    print(f"   Preview: {dataset[0]['judgment'][:300]}...")


üìÅ Loading from directory: /kaggle/input/legal-dataset-sc-judgments-india-19502024/supreme_court_judgments/2025
   Found 400 PDF files
   Processing ALL PDFs (no limits)...



Extracting PDFs: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 400/400 [07:40<00:00,  1.15s/it]



üìä Dataset Statistics:
   ‚úÖ Successfully loaded: 400 judgments
   ‚ùå Failed/too short: 0 files

   üìà Document Statistics:
     Words - Min: 584 | Max: 41,504 | Avg: 5,656
     Sentences - Min: 32 | Max: 1,874 | Avg: 244

üìà 3-WAY DATASET SPLIT:
   Total documents:     400
   Training set:        280 (70.0%)
   Validation set:      60 (15.0%)
   Test set:            60 (15.0%)

üìÑ Sample Document:
   File: A_John_Kennedy_vs_The_State_Of_Tamil_Nadu_on_24_March_2025_1.PDF
   Length: 3381 words
   Preview: A John Kennedy vs The State Of Tamil Nadu on 24 March, 2025 A John Kennedy vs The State Of Tamil Nadu on 24 March, 2025 Author: Vikram Nath Bench: Vikram Nath 2025 INSC 443 REPORTABLE IN THE SUPREME COURT OF INDIA CIVIL APPELLATE JURISDICTION SLP(CIVIL) NO(S). 999-1001 OF 2025 A. JOHN KENNEDY ETC. ‚Ä¶...


In [9]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"üîß Using device: {device}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")


üîß Using device: cuda
   GPU: Tesla T4
   Memory: 15.83 GB


In [10]:
class SentenceEncoder(nn.Module):
    def __init__(self, model_name='law-ai/InLegalBERT', hidden_dim=768):
        super(SentenceEncoder, self).__init__()
        print(f"üîÑ Loading {model_name}...")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.hidden_dim = hidden_dim
        
        # Freeze encoder
        for param in self.model.parameters():
            param.requires_grad = False
        
        print(f"‚úÖ {model_name} loaded successfully (hidden_dim={hidden_dim})")
    
    def mean_pooling(self, model_output, attention_mask):
        token_embeddings = model_output[0]
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    
    def forward(self, sentences):
        encoded = self.tokenizer(sentences, padding=True, truncation=True, 
                                max_length=512, return_tensors='pt')
        encoded = {k: v.to(device) for k, v in encoded.items()}
        
        with torch.no_grad():
            model_output = self.model(**encoded)
        
        embeddings = self.mean_pooling(model_output, encoded['attention_mask'])
        return embeddings

print("‚úÖ InLegalBERT Sentence Encoder defined")


‚úÖ InLegalBERT Sentence Encoder defined


In [11]:
class MultiAspectPolicyNetwork(nn.Module):
    def __init__(self, input_dim=768, hidden_dim=256, num_aspects=5, dropout=0.5):
        super(MultiAspectPolicyNetwork, self).__init__()
        
        self.num_aspects = num_aspects
        self.aspects = ['facts', 'analysis', 'argument', 'judgement', 'statute']
        self.hidden_dim = hidden_dim
        
        # Shared LSTM encoder
        self.shared_lstm = nn.LSTM(input_dim, hidden_dim, num_layers=2, 
                                   bidirectional=True, batch_first=True, dropout=dropout)
        
        # Position and aspect embeddings
        self.position_embedding = nn.Embedding(2000, 64)  # Increased for long documents
        self.aspect_embedding = nn.Embedding(num_aspects, hidden_dim * 2)
        
        # Attention mechanism
        self.attention = nn.MultiheadAttention(
            embed_dim=hidden_dim * 2,
            num_heads=4, 
            dropout=dropout,
            batch_first=True
        )
        
        # Aspect-specific heads
        self.aspect_heads = nn.ModuleDict()
        for aspect in self.aspects:
            self.aspect_heads[aspect] = nn.Sequential(
                nn.Linear(hidden_dim * 2 + 64 + hidden_dim * 2, 512),
                nn.LayerNorm(512),
                nn.ReLU(),
                nn.Dropout(dropout),
                nn.Linear(512, 256),
                nn.LayerNorm(256),
                nn.ReLU(),
                nn.Dropout(dropout),
                nn.Linear(256, 1)
            )
    
    def forward(self, sentence_embeddings, positions, aspect_idx):
        lstm_out, _ = self.shared_lstm(sentence_embeddings)
        
        aspect_emb_query = self.aspect_embedding(torch.tensor([aspect_idx], device=device))
        aspect_emb_query = aspect_emb_query.unsqueeze(1).expand(-1, lstm_out.size(1), -1)
        
        attended_out, _ = self.attention(aspect_emb_query, lstm_out, lstm_out)
        
        combined_lstm = lstm_out + attended_out
        pos_emb = self.position_embedding(positions)
        aspect_emb_concat = self.aspect_embedding(torch.tensor([aspect_idx], device=device))
        aspect_emb_concat = aspect_emb_concat.unsqueeze(1).expand(-1, sentence_embeddings.size(1), -1)
        
        combined = torch.cat([combined_lstm, pos_emb, aspect_emb_concat], dim=-1)
        
        aspect_name = self.aspects[aspect_idx]
        logits = self.aspect_heads[aspect_name](combined).squeeze(-1)
        
        return logits

print("‚úÖ Policy Network defined (supports long documents)")


‚úÖ Policy Network defined (supports long documents)


In [12]:
class ImprovedReferenceFreeReward:
    """Enhanced reward function with improved weights"""
    
    def __init__(self, encoder):
        self.encoder = encoder
        
    def compute_coherence(self, summary_sentences):
        if len(summary_sentences) < 2:
            return 1.0
        
        embeddings = self.encoder(summary_sentences)
        coherence_scores = []
        for i in range(len(embeddings) - 1):
            sim = torch.nn.functional.cosine_similarity(
                embeddings[i].unsqueeze(0), 
                embeddings[i+1].unsqueeze(0)
            )
            coherence_scores.append(sim.item())
        
        return np.mean(coherence_scores) if coherence_scores else 0.0
    
    def compute_coverage(self, summary_embeddings, document_embeddings):
        coverage_scores = []
        for doc_emb in document_embeddings:
            sims = torch.nn.functional.cosine_similarity(
                doc_emb.unsqueeze(0).expand(len(summary_embeddings), -1),
                summary_embeddings
            )
            coverage_scores.append(sims.max().item())
        
        return np.mean(coverage_scores)
    
    def compute_redundancy(self, summary_embeddings):
        if len(summary_embeddings) < 2:
            return 0.0
        
        redundancy_scores = []
        for i in range(len(summary_embeddings)):
            for j in range(i+1, len(summary_embeddings)):
                sim = torch.nn.functional.cosine_similarity(
                    summary_embeddings[i].unsqueeze(0),
                    summary_embeddings[j].unsqueeze(0)
                )
                redundancy_scores.append(sim.item())
        
        return np.mean(redundancy_scores) if redundancy_scores else 0.0
    
    def compute_diversity(self, summary_embeddings):
        if len(summary_embeddings) < 2:
            return 0.0
        
        distances = []
        for i in range(len(summary_embeddings)):
            for j in range(i+1, len(summary_embeddings)):
                dist = 1.0 - torch.nn.functional.cosine_similarity(
                    summary_embeddings[i].unsqueeze(0),
                    summary_embeddings[j].unsqueeze(0)
                ).item()
                distances.append(dist)
        
        return np.mean(distances) if distances else 0.0
    
    def compute_informativeness(self, summary_embeddings, document_embeddings):
        informativeness_scores = []
        for summ_emb in summary_embeddings:
            sims = torch.nn.functional.cosine_similarity(
                summ_emb.unsqueeze(0).expand(len(document_embeddings), -1),
                document_embeddings
            )
            informativeness_scores.append(sims.mean().item())
        
        return np.mean(informativeness_scores)
    
    def compute_position_bias(self, selected_indices, total_sentences):
        if len(selected_indices) == 0:
            return 0.0
        
        positions = [idx / max(total_sentences - 1, 1) for idx in selected_indices]
        
        has_beginning = any(p < 0.33 for p in positions)
        has_middle = any(0.33 <= p < 0.67 for p in positions)
        has_end = any(p >= 0.67 for p in positions)
        
        return (has_beginning + has_middle + has_end) / 3.0
    
    def compute_sentence_ordering_penalty(self, selected_indices):
        if len(selected_indices) < 2:
            return 1.0
        
        ordered = all(selected_indices[i] < selected_indices[i+1] 
                     for i in range(len(selected_indices)-1))
        
        if ordered:
            return 1.0
        else:
            inversions = 0
            for i in range(len(selected_indices)):
                for j in range(i+1, len(selected_indices)):
                    if selected_indices[i] > selected_indices[j]:
                        inversions += 1
            
            max_inversions = len(selected_indices) * (len(selected_indices) - 1) / 2
            return 1.0 - (inversions / max(max_inversions, 1))
    
    def compute_reward(self, selected_sentences, selected_indices, 
                      all_sentences, target_ratio, aspect):
        """IMPROVED reward with better weights"""
        if not selected_sentences or not all_sentences:
            return 0.0
        
        # Encode sentences
        summary_embeddings = self.encoder(selected_sentences)
        document_embeddings = self.encoder(all_sentences)
        
        # Component rewards
        coherence = self.compute_coherence(selected_sentences)
        coverage = self.compute_coverage(summary_embeddings, document_embeddings)
        redundancy = self.compute_redundancy(summary_embeddings)
        diversity = self.compute_diversity(summary_embeddings)
        informativeness = self.compute_informativeness(summary_embeddings, document_embeddings)
        position_bias = self.compute_position_bias(selected_indices, len(all_sentences))
        ordering_score = self.compute_sentence_ordering_penalty(selected_indices)
        
        # Length control
        gen_words = sum(len(s.split()) for s in selected_sentences)
        source_words = sum(len(s.split()) for s in all_sentences)
        compression_ratio = gen_words / max(source_words, 1)
        
        length_diff = abs(compression_ratio - target_ratio)
        length_reward = np.exp(-10 * length_diff)
        
        # IMPROVED WEIGHTS
        final_reward = (
            0.30 * coherence +           # Increased
            0.25 * coverage +            
            0.20 * (1 - redundancy) +    # Increased penalty
            0.15 * diversity +           # Increased
            0.05 * informativeness +     
            0.03 * position_bias +       
            0.02 * ordering_score        # New
        )
        
        final_reward = final_reward * (0.8 + 0.2 * length_reward)
        
        return final_reward

print("‚úÖ Improved Reference-Free Reward Function defined")


‚úÖ Improved Reference-Free Reward Function defined


In [13]:
class UnsupervisedRLAgent:
    """
    RL Agent with memory-efficient processing
    """
    def __init__(self, encoder, policy, learning_rate=1e-4, gamma=0.99, weight_decay=1e-5):
        self.encoder = encoder.to(device)
        self.policy = policy.to(device)
        
        self.optimizer = optim.Adam(self.policy.parameters(), 
                                    lr=learning_rate, 
                                    weight_decay=weight_decay)
        
        self.gamma = gamma
        self.aspects = ['facts', 'analysis', 'argument', 'judgement', 'statute']
        
        self.aspect_summary_ratios = {
            'facts': 0.12,
            'analysis': 0.12,
            'argument': 0.08,
            'judgement': 0.06,
            'statute': 0.08
        }
        
        self.min_summary_sentences = 3
        self.max_document_sentences = 500  # ADD THIS - Limit for GPU memory
        
        self.reward_function = ImprovedReferenceFreeReward(encoder)
        self.reward_baselines = {aspect: 0.0 for aspect in self.aspects}
        self.baseline_momentum = 0.9
        self.best_val_reward = 0.0
        self.patience = 15
        self.patience_counter = 0
    
    def preprocess_document(self, judgment_text):
        """Process sentences with memory limit"""
        sentences = sent_tokenize(judgment_text)
        sentences = [s.strip() for s in sentences if len(s.strip().split()) > 5]
        
        # LIMIT SENTENCES FOR GPU MEMORY
        if len(sentences) > self.max_document_sentences:
            print(f"   ‚ö†Ô∏è Document too long ({len(sentences)} sents), truncating to {self.max_document_sentences}", end='\r')
            sentences = sentences[:self.max_document_sentences]
        
        return sentences
    
    def encode_sentences(self, sentences):
        """Encode ALL sentences in batches"""
        if len(sentences) == 0:
            return torch.zeros(1, self.encoder.hidden_dim).to(device)
        
        batch_size = 16
        embeddings = []
        for i in range(0, len(sentences), batch_size):
            batch = sentences[i:i+batch_size]
            emb = self.encoder(batch)
            embeddings.append(emb)
        
        return torch.cat(embeddings, dim=0)
    
    def select_sentences(self, logits, num_sentences, aspect, temperature=1.0):
        logits = logits / temperature
        probs = torch.sigmoid(logits)
        
        dist = torch.distributions.Bernoulli(probs)
        actions = dist.sample()
        log_probs = dist.log_prob(actions)
        
        num_selected = actions.sum().item()
        
        aspect_ratio = self.aspect_summary_ratios[aspect]
        max_sentences = max(self.min_summary_sentences, int(num_sentences * aspect_ratio))
        
        if num_selected < self.min_summary_sentences:
            top_k_indices = torch.topk(logits, k=self.min_summary_sentences).indices
            actions = torch.zeros_like(actions)
            actions[top_k_indices] = 1
        elif num_selected > max_sentences:
            selected_indices = (actions == 1).nonzero(as_tuple=True)[0]
            selected_logits = logits[selected_indices]
            top_k = torch.topk(selected_logits, k=max_sentences).indices
            keep_indices = selected_indices[top_k]
            actions = torch.zeros_like(actions)
            actions[keep_indices] = 1
        
        return actions, log_probs
    
    def train_step(self, judgment, temperature=1.0):
        """Train with memory management"""
        self.policy.train()
        
        sentences = self.preprocess_document(judgment)
        if len(sentences) < 3:
            return 0.0, {aspect: 0.0 for aspect in self.aspects}
        
        print(f"   Processing {len(sentences)} sentences...", end='\r')
        
        # Clear cache before processing
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        sentence_embeddings = self.encode_sentences(sentences)
        sentence_embeddings = sentence_embeddings.unsqueeze(0)
        
        positions = torch.arange(min(len(sentences), 1999), device=device).unsqueeze(0)
        if len(sentences) < positions.size(1):
            positions = positions[:, :len(sentences)]
        
        total_loss = 0.0
        aspect_rewards = {}
        
        for aspect_idx, aspect in enumerate(self.aspects):
            logits = self.policy(sentence_embeddings, positions, aspect_idx).squeeze(0)
            actions, log_probs = self.select_sentences(logits, len(sentences), aspect, temperature)
            
            selected_indices = (actions == 1).nonzero(as_tuple=True)[0].cpu().numpy()
            selected_sentences = [sentences[i] for i in selected_indices]
            
            reward = self.reward_function.compute_reward(
                selected_sentences, 
                selected_indices,
                sentences,
                self.aspect_summary_ratios[aspect],
                aspect
            )
            aspect_rewards[aspect] = reward
            
            self.reward_baselines[aspect] = (
                self.baseline_momentum * self.reward_baselines[aspect] + 
                (1 - self.baseline_momentum) * reward
            )
            
            advantage = reward - self.reward_baselines[aspect]
            loss = -(log_probs * actions * advantage).sum()
            
            probs = torch.sigmoid(logits)
            entropy = -(probs * torch.log(probs + 1e-10) + 
                       (1 - probs) * torch.log(1 - probs + 1e-10)).mean()
            loss = loss - 0.15 * entropy
            
            total_loss += loss
        
        total_loss = total_loss / len(self.aspects)
        self.optimizer.zero_grad()
        total_loss.backward()
        torch.nn.utils.clip_grad_norm_(self.policy.parameters(), max_norm=1.0)
        self.optimizer.step()
        
        # Clear cache after update
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        return total_loss.item(), aspect_rewards
    
    def generate_summaries(self, judgment, temperature=0.3):
        """Generate summaries from FULL document"""
        self.policy.eval()
        
        with torch.no_grad():
            sentences = self.preprocess_document(judgment)
            if len(sentences) < 3:
                return {aspect: ' '.join(sentences) for aspect in self.aspects}
            
            # NO LIMIT - Process ALL sentences
            sentence_embeddings = self.encode_sentences(sentences)
            sentence_embeddings = sentence_embeddings.unsqueeze(0)
            
            positions = torch.arange(min(len(sentences), 1999), device=device).unsqueeze(0)
            if len(sentences) < positions.size(1):
                positions = positions[:, :len(sentences)]
            
            summaries = {}
            
            for aspect_idx, aspect in enumerate(self.aspects):
                logits = self.policy(sentence_embeddings, positions, aspect_idx).squeeze(0)
                
                aspect_ratio = self.aspect_summary_ratios[aspect]
                num_select = max(self.min_summary_sentences, 
                               int(len(sentences) * aspect_ratio))
                
                top_k_indices = torch.topk(logits, k=num_select).indices
                top_k_indices = sorted(top_k_indices.cpu().numpy())
                
                summary = ' '.join([sentences[i] for i in top_k_indices])
                summaries[aspect] = summary
            
            return summaries
    
    def evaluate(self, val_dataset):
        self.policy.eval()
        
        aspect_total_rewards = {aspect: 0.0 for aspect in self.aspects}
        
        for item in val_dataset:
            judgment = item['judgment']
            sentences = self.preprocess_document(judgment)
            generated_summaries = self.generate_summaries(judgment)
            
            for aspect_idx, aspect in enumerate(self.aspects):
                selected_sentences = sent_tokenize(generated_summaries[aspect])
                selected_indices = list(range(len(selected_sentences)))
                
                reward = self.reward_function.compute_reward(
                    selected_sentences,
                    selected_indices,
                    sentences,
                    self.aspect_summary_ratios[aspect],
                    aspect
                )
                aspect_total_rewards[aspect] += reward
        
        aspect_avg_rewards = {
            aspect: aspect_total_rewards[aspect] / len(val_dataset)
            for aspect in self.aspects
        }
        
        overall_avg = np.mean(list(aspect_avg_rewards.values()))
        
        return overall_avg, aspect_avg_rewards
    
    def check_early_stopping(self, val_reward):
        if val_reward > self.best_val_reward:
            self.best_val_reward = val_reward
            self.patience_counter = 0
            return False
        else:
            self.patience_counter += 1
            return self.patience_counter >= self.patience

print("‚úÖ Unsupervised RL Agent defined (NO SENTENCE LIMITS)")


‚úÖ Unsupervised RL Agent defined (NO SENTENCE LIMITS)


In [14]:
def train_unsupervised_rl(agent, train_dataset, val_dataset, num_epochs=50, 
                         save_path='final_inlegalbert_model.pt'):
    """Training with warmup - processes ALL documents fully"""
    training_history = {
        'train_loss': [],
        'train_rewards': {aspect: [] for aspect in agent.aspects},
        'val_rewards': {aspect: [] for aspect in agent.aspects},
        'overall_val_reward': [],
        'learning_rate': []
    }
    
    warmup_epochs = 3
    base_lr = agent.optimizer.param_groups[0]['lr']
    
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        agent.optimizer, mode='max', factor=0.5, patience=5, verbose=True
    )
    
    for epoch in range(num_epochs):
        # Warmup
        if epoch < warmup_epochs:
            warmup_lr = base_lr * (epoch + 1) / warmup_epochs
            for param_group in agent.optimizer.param_groups:
                param_group['lr'] = warmup_lr
            print(f"   üî• Warmup: LR = {warmup_lr:.6f}")
        
        temperature = max(0.3, 1.0 - epoch / num_epochs)
        
        epoch_losses = []
        epoch_aspect_rewards = {aspect: [] for aspect in agent.aspects}
        
        pbar = tqdm(train_dataset, desc=f'Epoch {epoch+1}/{num_epochs}')
        for item in pbar:
            judgment = item['judgment']
            
            loss, aspect_rewards = agent.train_step(judgment, temperature)
            
            epoch_losses.append(loss)
            for aspect in agent.aspects:
                if aspect_rewards[aspect] > 0:
                    epoch_aspect_rewards[aspect].append(aspect_rewards[aspect])
            
            avg_reward = np.mean([r for r in aspect_rewards.values() if r > 0]) if any(aspect_rewards.values()) else 0
            pbar.set_postfix({
                'loss': f'{loss:.4f}', 
                'reward': f'{avg_reward:.4f}',
                'temp': f'{temperature:.2f}'
            })
        
        avg_train_loss = np.mean(epoch_losses) if epoch_losses else 0
        
        for aspect in agent.aspects:
            if epoch_aspect_rewards[aspect]:
                training_history['train_rewards'][aspect].append(np.mean(epoch_aspect_rewards[aspect]))
            else:
                training_history['train_rewards'][aspect].append(0.0)
        
        # Validation
        overall_val_reward, aspect_val_rewards = agent.evaluate(val_dataset)
        
        if epoch >= warmup_epochs:
            scheduler.step(overall_val_reward)
        
        training_history['train_loss'].append(avg_train_loss)
        training_history['overall_val_reward'].append(overall_val_reward)
        training_history['learning_rate'].append(agent.optimizer.param_groups[0]['lr'])
        for aspect in agent.aspects:
            training_history['val_rewards'][aspect].append(aspect_val_rewards[aspect])
        
        print(f'\n{"="*70}')
        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'{"="*70}')
        print(f'  Train Loss: {avg_train_loss:.4f}')
        print(f'  Overall Val Reward: {overall_val_reward:.4f}')
        print(f'  Learning Rate: {agent.optimizer.param_groups[0]["lr"]:.6f}')
        print(f'  Temperature: {temperature:.4f}')
        print(f'\n  Aspect-wise Val Rewards:')
        for aspect in agent.aspects:
            print(f'    {aspect:12s}: {aspect_val_rewards[aspect]:.4f}')
        
        # Save best
        if overall_val_reward > agent.best_val_reward:
            agent.best_val_reward = overall_val_reward
            agent.patience_counter = 0
            torch.save({
                'epoch': epoch,
                'policy_state_dict': agent.policy.state_dict(),
                'optimizer_state_dict': agent.optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'val_reward': overall_val_reward,
                'aspect_val_rewards': aspect_val_rewards,
                'training_history': training_history,
            }, save_path)
            print(f'\n  ‚úÖ Saved best model (Val Reward: {overall_val_reward:.4f})')
        
        # Early stopping
        if agent.check_early_stopping(overall_val_reward):
            print(f'\n‚ö†Ô∏è Early stopping at epoch {epoch+1}')
            print(f'  Best val reward: {agent.best_val_reward:.4f}')
            break
        
        print(f'{"="*70}\n')
    
    return training_history

print("‚úÖ Training function defined with warmup")


‚úÖ Training function defined with warmup


In [None]:
print("\n" + "="*70)
print("üöÄ INITIALIZING INLEGALBERT MODEL")
print("="*70)
print(f"   Dataset: {len(dataset)} Supreme Court judgments")
print(f"   Training: {len(train_dataset)} documents")
print(f"   Validation: {len(val_dataset)} documents")
print(f"   Test: {len(test_dataset)} documents")
print(f"   üî• NO LIMITS - Processing ALL sentences in ALL documents")
print("="*70 + "\n")

encoder = SentenceEncoder(model_name='law-ai/InLegalBERT', hidden_dim=768)

policy = MultiAspectPolicyNetwork(
    input_dim=768,
    hidden_dim=256, 
    num_aspects=5, 
    dropout=0.5
)

agent = UnsupervisedRLAgent(
    encoder=encoder,
    policy=policy,
    learning_rate=1e-4,
    weight_decay=1e-5
)

print(f"\nüìä Model Statistics:")
print(f"   Encoder: InLegalBERT (768-dim, frozen)")
print(f"   Total params: {sum(p.numel() for p in policy.parameters()):,}")
print(f"   Trainable: {sum(p.numel() for p in policy.parameters() if p.requires_grad):,}")

print("\n" + "="*70)
print("üéØ STARTING TRAINING")
print("="*70)
print("‚ö° Improvements:")
print("   ‚Ä¢ InLegalBERT (Indian legal domain)")
print("   ‚Ä¢ Enhanced rewards (coherence‚Üë30%, redundancy‚Üì20%, diversity‚Üë15%)")
print("   ‚Ä¢ Sentence ordering penalty")
print("   ‚Ä¢ Patience: 15 epochs")
print("   ‚Ä¢ Learning rate warmup: 3 epochs")
print("   ‚Ä¢ NO SENTENCE LIMITS - Full document processing")
print("="*70 + "\n")

history = train_unsupervised_rl(
    agent=agent,
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    num_epochs=10,
    save_path='final_inlegalbert_model.pt'
)

print("\n" + "="*70)
print("‚úÖ TRAINING COMPLETE!")
print("="*70)



üöÄ INITIALIZING INLEGALBERT MODEL
   Dataset: 400 Supreme Court judgments
   Training: 280 documents
   Validation: 60 documents
   Test: 60 documents
   üî• NO LIMITS - Processing ALL sentences in ALL documents

üîÑ Loading law-ai/InLegalBERT...


2025-11-14 17:06:23.552541: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763139983.570983     142 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763139983.576018     142 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

‚úÖ law-ai/InLegalBERT loaded successfully (hidden_dim=768)


model.safetensors:   0%|          | 0.00/534M [00:00<?, ?B/s]


üìä Model Statistics:
   Encoder: InLegalBERT (768-dim, frozen)
   Total params: 8,312,837
   Trainable: 8,312,837

üéØ STARTING TRAINING
‚ö° Improvements:
   ‚Ä¢ InLegalBERT (Indian legal domain)
   ‚Ä¢ Enhanced rewards (coherence‚Üë30%, redundancy‚Üì20%, diversity‚Üë15%)
   ‚Ä¢ Sentence ordering penalty
   ‚Ä¢ Patience: 15 epochs
   ‚Ä¢ Learning rate warmup: 3 epochs
   ‚Ä¢ NO SENTENCE LIMITS - Full document processing

   üî• Warmup: LR = 0.000033



Epoch 1/10:   0%|          | 0/280 [00:00<?, ?it/s][A

   Processing 333 sentences...


Epoch 1/10:   0%|          | 0/280 [00:22<?, ?it/s, loss=7.1265, reward=0.6152, temp=1.00][A
Epoch 1/10:   0%|          | 1/280 [00:22<1:46:15, 22.85s/it, loss=7.1265, reward=0.6152, temp=1.00][A

   Processing 353 sentences...


Epoch 1/10:   0%|          | 1/280 [00:42<1:46:15, 22.85s/it, loss=5.9047, reward=0.6103, temp=1.00][A
Epoch 1/10:   1%|          | 2/280 [00:42<1:36:51, 20.90s/it, loss=5.9047, reward=0.6103, temp=1.00][A

   Processing 57 sentences...


Epoch 1/10:   1%|          | 2/280 [00:48<1:36:51, 20.90s/it, loss=0.5330, reward=0.5958, temp=1.00][A
Epoch 1/10:   1%|          | 3/280 [00:48<1:04:41, 14.01s/it, loss=0.5330, reward=0.5958, temp=1.00][A

   Processing 96 sentences...


Epoch 1/10:   1%|          | 3/280 [01:00<1:04:41, 14.01s/it, loss=0.8126, reward=0.5647, temp=1.00][A
Epoch 1/10:   1%|‚ñè         | 4/280 [01:00<1:01:29, 13.37s/it, loss=0.8126, reward=0.5647, temp=1.00][A

   Processing 71 sentences...


Epoch 1/10:   1%|‚ñè         | 4/280 [01:03<1:01:29, 13.37s/it, loss=0.5338, reward=0.5901, temp=1.00][A
Epoch 1/10:   2%|‚ñè         | 5/280 [01:03<43:32,  9.50s/it, loss=0.5338, reward=0.5901, temp=1.00]  [A

   Processing 62 sentences...


Epoch 1/10:   2%|‚ñè         | 5/280 [01:05<43:32,  9.50s/it, loss=0.2612, reward=0.5880, temp=1.00][A
Epoch 1/10:   2%|‚ñè         | 6/280 [01:05<32:20,  7.08s/it, loss=0.2612, reward=0.5880, temp=1.00][A

   Processing 350 sentences...


Epoch 1/10:   2%|‚ñè         | 6/280 [01:34<32:20,  7.08s/it, loss=1.9944, reward=0.6217, temp=1.00][A
Epoch 1/10:   2%|‚ñé         | 7/280 [01:34<1:04:58, 14.28s/it, loss=1.9944, reward=0.6217, temp=1.00][A

   Processing 263 sentences...


Epoch 1/10:   2%|‚ñé         | 7/280 [01:50<1:04:58, 14.28s/it, loss=1.1591, reward=0.6124, temp=1.00][A
Epoch 1/10:   3%|‚ñé         | 8/280 [01:50<1:07:28, 14.88s/it, loss=1.1591, reward=0.6124, temp=1.00][A

   Processing 176 sentences...


Epoch 1/10:   3%|‚ñé         | 8/280 [02:00<1:07:28, 14.88s/it, loss=0.5387, reward=0.6089, temp=1.00][A
Epoch 1/10:   3%|‚ñé         | 9/280 [02:00<1:00:11, 13.33s/it, loss=0.5387, reward=0.6089, temp=1.00][A

   Processing 318 sentences...


Epoch 1/10:   3%|‚ñé         | 9/280 [03:03<1:00:11, 13.33s/it, loss=0.8934, reward=0.6140, temp=1.00][A
Epoch 1/10:   4%|‚ñé         | 10/280 [03:03<2:07:58, 28.44s/it, loss=0.8934, reward=0.6140, temp=1.00][A

   Processing 338 sentences...


Epoch 1/10:   4%|‚ñé         | 10/280 [03:27<2:07:58, 28.44s/it, loss=0.6716, reward=0.6152, temp=1.00][A
Epoch 1/10:   4%|‚ñç         | 11/280 [03:27<2:01:34, 27.12s/it, loss=0.6716, reward=0.6152, temp=1.00][A

   Processing 40 sentences...


Epoch 1/10:   4%|‚ñç         | 11/280 [03:29<2:01:34, 27.12s/it, loss=-0.0192, reward=0.5772, temp=1.00][A
Epoch 1/10:   4%|‚ñç         | 12/280 [03:29<1:27:33, 19.60s/it, loss=-0.0192, reward=0.5772, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:   4%|‚ñç         | 12/280 [04:23<1:27:33, 19.60s/it, loss=0.7269, reward=0.6157, temp=1.00] [A
Epoch 1/10:   5%|‚ñç         | 13/280 [04:23<2:13:21, 29.97s/it, loss=0.7269, reward=0.6157, temp=1.00][A

   Processing 423 sentences...


Epoch 1/10:   5%|‚ñç         | 13/280 [05:22<2:13:21, 29.97s/it, loss=0.4256, reward=0.6100, temp=1.00][A
Epoch 1/10:   5%|‚ñå         | 14/280 [05:22<2:52:08, 38.83s/it, loss=0.4256, reward=0.6100, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:   5%|‚ñå         | 14/280 [05:41<2:52:08, 38.83s/it, loss=0.5551, reward=0.6391, temp=1.00][A
Epoch 1/10:   5%|‚ñå         | 15/280 [05:41<2:24:13, 32.65s/it, loss=0.5551, reward=0.6391, temp=1.00][A

   Processing 82 sentences...


Epoch 1/10:   5%|‚ñå         | 15/280 [05:44<2:24:13, 32.65s/it, loss=-0.0013, reward=0.6084, temp=1.00][A
Epoch 1/10:   6%|‚ñå         | 16/280 [05:44<1:44:22, 23.72s/it, loss=-0.0013, reward=0.6084, temp=1.00][A

   Processing 137 sentences...


Epoch 1/10:   6%|‚ñå         | 16/280 [05:51<1:44:22, 23.72s/it, loss=0.0290, reward=0.6097, temp=1.00] [A
Epoch 1/10:   6%|‚ñå         | 17/280 [05:51<1:22:21, 18.79s/it, loss=0.0290, reward=0.6097, temp=1.00][A

   Processing 30 sentences...


Epoch 1/10:   6%|‚ñå         | 17/280 [05:53<1:22:21, 18.79s/it, loss=-0.0452, reward=0.5709, temp=1.00][A
Epoch 1/10:   6%|‚ñã         | 18/280 [05:53<1:00:48, 13.93s/it, loss=-0.0452, reward=0.5709, temp=1.00][A

   Processing 82 sentences...


Epoch 1/10:   6%|‚ñã         | 18/280 [06:02<1:00:48, 13.93s/it, loss=-0.0305, reward=0.5865, temp=1.00][A
Epoch 1/10:   7%|‚ñã         | 19/280 [06:02<52:57, 12.18s/it, loss=-0.0305, reward=0.5865, temp=1.00]  [A

   Processing 61 sentences...


Epoch 1/10:   7%|‚ñã         | 19/280 [06:05<52:57, 12.18s/it, loss=-0.0334, reward=0.5924, temp=1.00][A
Epoch 1/10:   7%|‚ñã         | 20/280 [06:05<40:47,  9.41s/it, loss=-0.0334, reward=0.5924, temp=1.00][A

   Processing 50 sentences...


Epoch 1/10:   7%|‚ñã         | 20/280 [06:07<40:47,  9.41s/it, loss=-0.0419, reward=0.5762, temp=1.00][A
Epoch 1/10:   8%|‚ñä         | 21/280 [06:07<32:11,  7.46s/it, loss=-0.0419, reward=0.5762, temp=1.00][A

   Processing 115 sentences...


Epoch 1/10:   8%|‚ñä         | 21/280 [06:16<32:11,  7.46s/it, loss=-0.0168, reward=0.6087, temp=1.00][A
Epoch 1/10:   8%|‚ñä         | 22/280 [06:16<33:41,  7.84s/it, loss=-0.0168, reward=0.6087, temp=1.00][A

   Processing 80 sentences...


Epoch 1/10:   8%|‚ñä         | 22/280 [06:19<33:41,  7.84s/it, loss=-0.0400, reward=0.5822, temp=1.00][A
Epoch 1/10:   8%|‚ñä         | 23/280 [06:19<27:34,  6.44s/it, loss=-0.0400, reward=0.5822, temp=1.00][A

   Processing 434 sentences...


Epoch 1/10:   8%|‚ñä         | 23/280 [07:46<27:34,  6.44s/it, loss=0.0661, reward=0.6134, temp=1.00] [A
Epoch 1/10:   9%|‚ñä         | 24/280 [07:46<2:10:31, 30.59s/it, loss=0.0661, reward=0.6134, temp=1.00][A

   Processing 254 sentences...


Epoch 1/10:   9%|‚ñä         | 24/280 [08:20<2:10:31, 30.59s/it, loss=0.0011, reward=0.6091, temp=1.00][A
Epoch 1/10:   9%|‚ñâ         | 25/280 [08:20<2:13:46, 31.48s/it, loss=0.0011, reward=0.6091, temp=1.00][A

   Processing 329 sentences...


Epoch 1/10:   9%|‚ñâ         | 25/280 [08:39<2:13:46, 31.48s/it, loss=0.0081, reward=0.6062, temp=1.00][A
Epoch 1/10:   9%|‚ñâ         | 26/280 [08:39<1:57:52, 27.84s/it, loss=0.0081, reward=0.6062, temp=1.00][A

   Processing 87 sentences...


Epoch 1/10:   9%|‚ñâ         | 26/280 [08:44<1:57:52, 27.84s/it, loss=-0.0387, reward=0.5917, temp=1.00][A
Epoch 1/10:  10%|‚ñâ         | 27/280 [08:44<1:28:07, 20.90s/it, loss=-0.0387, reward=0.5917, temp=1.00][A

   Processing 39 sentences...


Epoch 1/10:  10%|‚ñâ         | 27/280 [08:46<1:28:07, 20.90s/it, loss=-0.0458, reward=0.5705, temp=1.00][A
Epoch 1/10:  10%|‚ñà         | 28/280 [08:46<1:03:30, 15.12s/it, loss=-0.0458, reward=0.5705, temp=1.00][A

   Processing 83 sentences...


Epoch 1/10:  10%|‚ñà         | 28/280 [08:49<1:03:30, 15.12s/it, loss=-0.0395, reward=0.5900, temp=1.00][A
Epoch 1/10:  10%|‚ñà         | 29/280 [08:49<48:28, 11.59s/it, loss=-0.0395, reward=0.5900, temp=1.00]  [A

   Processing 177 sentences...


Epoch 1/10:  10%|‚ñà         | 29/280 [09:01<48:28, 11.59s/it, loss=-0.0236, reward=0.6100, temp=1.00][A
Epoch 1/10:  11%|‚ñà         | 30/280 [09:01<48:43, 11.69s/it, loss=-0.0236, reward=0.6100, temp=1.00][A

   Processing 82 sentences...


Epoch 1/10:  11%|‚ñà         | 30/280 [09:09<48:43, 11.69s/it, loss=-0.0397, reward=0.5950, temp=1.00][A
Epoch 1/10:  11%|‚ñà         | 31/280 [09:09<43:39, 10.52s/it, loss=-0.0397, reward=0.5950, temp=1.00][A

   Processing 136 sentences...


Epoch 1/10:  11%|‚ñà         | 31/280 [09:14<43:39, 10.52s/it, loss=-0.0358, reward=0.6003, temp=1.00][A
Epoch 1/10:  11%|‚ñà‚ñè        | 32/280 [09:14<36:34,  8.85s/it, loss=-0.0358, reward=0.6003, temp=1.00][A

   Processing 166 sentences...


Epoch 1/10:  11%|‚ñà‚ñè        | 32/280 [09:24<36:34,  8.85s/it, loss=-0.0338, reward=0.6012, temp=1.00][A
Epoch 1/10:  12%|‚ñà‚ñè        | 33/280 [09:24<37:49,  9.19s/it, loss=-0.0338, reward=0.6012, temp=1.00][A

   Processing 91 sentences...


Epoch 1/10:  12%|‚ñà‚ñè        | 33/280 [09:29<37:49,  9.19s/it, loss=-0.0434, reward=0.5846, temp=1.00][A
Epoch 1/10:  12%|‚ñà‚ñè        | 34/280 [09:29<32:53,  8.02s/it, loss=-0.0434, reward=0.5846, temp=1.00][A

   Processing 87 sentences...


Epoch 1/10:  12%|‚ñà‚ñè        | 34/280 [09:34<32:53,  8.02s/it, loss=-0.0404, reward=0.6011, temp=1.00][A
Epoch 1/10:  12%|‚ñà‚ñé        | 35/280 [09:34<28:56,  7.09s/it, loss=-0.0404, reward=0.6011, temp=1.00][A

   Processing 203 sentences...


Epoch 1/10:  12%|‚ñà‚ñé        | 35/280 [09:48<28:56,  7.09s/it, loss=-0.0314, reward=0.6072, temp=1.00][A
Epoch 1/10:  13%|‚ñà‚ñé        | 36/280 [09:48<37:22,  9.19s/it, loss=-0.0314, reward=0.6072, temp=1.00][A

   Processing 140 sentences...


Epoch 1/10:  13%|‚ñà‚ñé        | 36/280 [09:58<37:22,  9.19s/it, loss=-0.0397, reward=0.6038, temp=1.00][A
Epoch 1/10:  13%|‚ñà‚ñé        | 37/280 [09:58<38:30,  9.51s/it, loss=-0.0397, reward=0.6038, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  13%|‚ñà‚ñé        | 37/280 [10:50<38:30,  9.51s/it, loss=-0.0132, reward=0.6102, temp=1.00][A
Epoch 1/10:  14%|‚ñà‚ñé        | 38/280 [10:51<1:30:16, 22.38s/it, loss=-0.0132, reward=0.6102, temp=1.00][A

   Processing 97 sentences...


Epoch 1/10:  14%|‚ñà‚ñé        | 38/280 [10:57<1:30:16, 22.38s/it, loss=-0.0443, reward=0.5922, temp=1.00][A
Epoch 1/10:  14%|‚ñà‚ñç        | 39/280 [10:57<1:10:13, 17.48s/it, loss=-0.0443, reward=0.5922, temp=1.00][A

   Processing 182 sentences...


Epoch 1/10:  14%|‚ñà‚ñç        | 39/280 [11:09<1:10:13, 17.48s/it, loss=-0.0386, reward=0.6058, temp=1.00][A
Epoch 1/10:  14%|‚ñà‚ñç        | 40/280 [11:09<1:04:02, 16.01s/it, loss=-0.0386, reward=0.6058, temp=1.00][A

   Processing 87 sentences...


Epoch 1/10:  14%|‚ñà‚ñç        | 40/280 [11:13<1:04:02, 16.01s/it, loss=-0.0460, reward=0.5999, temp=1.00][A
Epoch 1/10:  15%|‚ñà‚ñç        | 41/280 [11:13<49:45, 12.49s/it, loss=-0.0460, reward=0.5999, temp=1.00]  [A

   Processing 235 sentences...


Epoch 1/10:  15%|‚ñà‚ñç        | 41/280 [11:48<49:45, 12.49s/it, loss=-0.0407, reward=0.6034, temp=1.00][A
Epoch 1/10:  15%|‚ñà‚ñå        | 42/280 [11:48<1:15:22, 19.00s/it, loss=-0.0407, reward=0.6034, temp=1.00][A

   Processing 50 sentences...


Epoch 1/10:  15%|‚ñà‚ñå        | 42/280 [11:50<1:15:22, 19.00s/it, loss=-0.0536, reward=0.5721, temp=1.00][A
Epoch 1/10:  15%|‚ñà‚ñå        | 43/280 [11:50<55:19, 14.01s/it, loss=-0.0536, reward=0.5721, temp=1.00]  [A

   Processing 70 sentences...


Epoch 1/10:  15%|‚ñà‚ñå        | 43/280 [11:54<55:19, 14.01s/it, loss=-0.0509, reward=0.5866, temp=1.00][A
Epoch 1/10:  16%|‚ñà‚ñå        | 44/280 [11:54<43:43, 11.11s/it, loss=-0.0509, reward=0.5866, temp=1.00][A

   Processing 65 sentences...


Epoch 1/10:  16%|‚ñà‚ñå        | 44/280 [12:00<43:43, 11.11s/it, loss=-0.0552, reward=0.5776, temp=1.00][A
Epoch 1/10:  16%|‚ñà‚ñå        | 45/280 [12:00<37:01,  9.45s/it, loss=-0.0552, reward=0.5776, temp=1.00][A

   Processing 482 sentences...


Epoch 1/10:  16%|‚ñà‚ñå        | 45/280 [12:51<37:01,  9.45s/it, loss=-0.0137, reward=0.6140, temp=1.00][A
Epoch 1/10:  16%|‚ñà‚ñã        | 46/280 [12:51<1:25:41, 21.97s/it, loss=-0.0137, reward=0.6140, temp=1.00][A

   Processing 164 sentences...


Epoch 1/10:  16%|‚ñà‚ñã        | 46/280 [13:07<1:25:41, 21.97s/it, loss=-0.0454, reward=0.6056, temp=1.00][A
Epoch 1/10:  17%|‚ñà‚ñã        | 47/280 [13:07<1:18:19, 20.17s/it, loss=-0.0454, reward=0.6056, temp=1.00][A

   Processing 102 sentences...


Epoch 1/10:  17%|‚ñà‚ñã        | 47/280 [13:13<1:18:19, 20.17s/it, loss=-0.0458, reward=0.6119, temp=1.00][A
Epoch 1/10:  17%|‚ñà‚ñã        | 48/280 [13:13<1:02:04, 16.05s/it, loss=-0.0458, reward=0.6119, temp=1.00][A

   Processing 48 sentences...


Epoch 1/10:  17%|‚ñà‚ñã        | 48/280 [13:15<1:02:04, 16.05s/it, loss=-0.0562, reward=0.5821, temp=1.00][A
Epoch 1/10:  18%|‚ñà‚ñä        | 49/280 [13:15<45:16, 11.76s/it, loss=-0.0562, reward=0.5821, temp=1.00]  [A

   Processing 74 sentences...


Epoch 1/10:  18%|‚ñà‚ñä        | 49/280 [13:21<45:16, 11.76s/it, loss=-0.0567, reward=0.6008, temp=1.00][A
Epoch 1/10:  18%|‚ñà‚ñä        | 50/280 [13:21<37:49,  9.87s/it, loss=-0.0567, reward=0.6008, temp=1.00][A

   Processing 67 sentences...


Epoch 1/10:  18%|‚ñà‚ñä        | 50/280 [13:26<37:49,  9.87s/it, loss=-0.0594, reward=0.5906, temp=1.00][A
Epoch 1/10:  18%|‚ñà‚ñä        | 51/280 [13:26<32:56,  8.63s/it, loss=-0.0594, reward=0.5906, temp=1.00][A

   Processing 31 sentences...


Epoch 1/10:  18%|‚ñà‚ñä        | 51/280 [13:28<32:56,  8.63s/it, loss=-0.0622, reward=0.5802, temp=1.00][A
Epoch 1/10:  19%|‚ñà‚ñä        | 52/280 [13:28<24:47,  6.52s/it, loss=-0.0622, reward=0.5802, temp=1.00][A

   Processing 53 sentences...


Epoch 1/10:  19%|‚ñà‚ñä        | 52/280 [13:32<24:47,  6.52s/it, loss=-0.0668, reward=0.5703, temp=1.00][A
Epoch 1/10:  19%|‚ñà‚ñâ        | 53/280 [13:32<21:32,  5.69s/it, loss=-0.0668, reward=0.5703, temp=1.00][A

   Processing 123 sentences...


Epoch 1/10:  19%|‚ñà‚ñâ        | 53/280 [13:39<21:32,  5.69s/it, loss=-0.0638, reward=0.5894, temp=1.00][A
Epoch 1/10:  19%|‚ñà‚ñâ        | 54/280 [13:39<23:33,  6.26s/it, loss=-0.0638, reward=0.5894, temp=1.00][A

   Processing 169 sentences...


Epoch 1/10:  19%|‚ñà‚ñâ        | 54/280 [13:49<23:33,  6.26s/it, loss=-0.0452, reward=0.6095, temp=1.00][A
Epoch 1/10:  20%|‚ñà‚ñâ        | 55/280 [13:49<27:05,  7.22s/it, loss=-0.0452, reward=0.6095, temp=1.00][A

   Processing 33 sentences...


Epoch 1/10:  20%|‚ñà‚ñâ        | 55/280 [13:52<27:05,  7.22s/it, loss=-0.0677, reward=0.5762, temp=1.00][A
Epoch 1/10:  20%|‚ñà‚ñà        | 56/280 [13:52<22:02,  5.90s/it, loss=-0.0677, reward=0.5762, temp=1.00][A

   Processing 190 sentences...


Epoch 1/10:  20%|‚ñà‚ñà        | 56/280 [14:03<22:02,  5.90s/it, loss=-0.0424, reward=0.6104, temp=1.00][A
Epoch 1/10:  20%|‚ñà‚ñà        | 57/280 [14:03<27:44,  7.46s/it, loss=-0.0424, reward=0.6104, temp=1.00][A

   Processing 223 sentences...


Epoch 1/10:  20%|‚ñà‚ñà        | 57/280 [14:21<27:44,  7.46s/it, loss=-0.0526, reward=0.6030, temp=1.00][A
Epoch 1/10:  21%|‚ñà‚ñà        | 58/280 [14:21<39:03, 10.56s/it, loss=-0.0526, reward=0.6030, temp=1.00][A

   Processing 57 sentences...


Epoch 1/10:  21%|‚ñà‚ñà        | 58/280 [14:23<39:03, 10.56s/it, loss=-0.0756, reward=0.5817, temp=1.00][A
Epoch 1/10:  21%|‚ñà‚ñà        | 59/280 [14:23<30:16,  8.22s/it, loss=-0.0756, reward=0.5817, temp=1.00][A

   Processing 362 sentences...


Epoch 1/10:  21%|‚ñà‚ñà        | 59/280 [14:48<30:16,  8.22s/it, loss=0.0512, reward=0.6397, temp=1.00] [A
Epoch 1/10:  21%|‚ñà‚ñà‚ñè       | 60/280 [14:48<48:24, 13.20s/it, loss=0.0512, reward=0.6397, temp=1.00][A

   Processing 192 sentences...


Epoch 1/10:  21%|‚ñà‚ñà‚ñè       | 60/280 [15:06<48:24, 13.20s/it, loss=-0.0475, reward=0.6069, temp=1.00][A
Epoch 1/10:  22%|‚ñà‚ñà‚ñè       | 61/280 [15:06<53:34, 14.68s/it, loss=-0.0475, reward=0.6069, temp=1.00][A

   Processing 108 sentences...


Epoch 1/10:  22%|‚ñà‚ñà‚ñè       | 61/280 [15:10<53:34, 14.68s/it, loss=-0.0724, reward=0.5985, temp=1.00][A
Epoch 1/10:  22%|‚ñà‚ñà‚ñè       | 62/280 [15:10<40:54, 11.26s/it, loss=-0.0724, reward=0.5985, temp=1.00][A

   Processing 380 sentences...


Epoch 1/10:  22%|‚ñà‚ñà‚ñè       | 62/280 [15:49<40:54, 11.26s/it, loss=-0.0308, reward=0.6133, temp=1.00][A
Epoch 1/10:  22%|‚ñà‚ñà‚ñé       | 63/280 [15:49<1:11:17, 19.71s/it, loss=-0.0308, reward=0.6133, temp=1.00][A

   Processing 285 sentences...


Epoch 1/10:  22%|‚ñà‚ñà‚ñé       | 63/280 [16:20<1:11:17, 19.71s/it, loss=-0.0560, reward=0.6064, temp=1.00][A
Epoch 1/10:  23%|‚ñà‚ñà‚ñé       | 64/280 [16:20<1:23:05, 23.08s/it, loss=-0.0560, reward=0.6064, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  23%|‚ñà‚ñà‚ñé       | 64/280 [17:20<1:23:05, 23.08s/it, loss=-0.0056, reward=0.6194, temp=1.00][A
Epoch 1/10:  23%|‚ñà‚ñà‚ñé       | 65/280 [17:20<2:03:00, 34.33s/it, loss=-0.0056, reward=0.6194, temp=1.00][A

   Processing 144 sentences...


Epoch 1/10:  23%|‚ñà‚ñà‚ñé       | 65/280 [17:30<2:03:00, 34.33s/it, loss=-0.0779, reward=0.5929, temp=1.00][A
Epoch 1/10:  24%|‚ñà‚ñà‚ñé       | 66/280 [17:30<1:35:44, 26.85s/it, loss=-0.0779, reward=0.5929, temp=1.00][A

   Processing 202 sentences...


Epoch 1/10:  24%|‚ñà‚ñà‚ñé       | 66/280 [17:47<1:35:44, 26.85s/it, loss=-0.0567, reward=0.6127, temp=1.00][A
Epoch 1/10:  24%|‚ñà‚ñà‚ñç       | 67/280 [17:47<1:25:04, 23.96s/it, loss=-0.0567, reward=0.6127, temp=1.00][A

   Processing 271 sentences...


Epoch 1/10:  24%|‚ñà‚ñà‚ñç       | 67/280 [18:15<1:25:04, 23.96s/it, loss=-0.0700, reward=0.6042, temp=1.00][A
Epoch 1/10:  24%|‚ñà‚ñà‚ñç       | 68/280 [18:15<1:29:10, 25.24s/it, loss=-0.0700, reward=0.6042, temp=1.00][A

   Processing 51 sentences...


Epoch 1/10:  24%|‚ñà‚ñà‚ñç       | 68/280 [18:17<1:29:10, 25.24s/it, loss=-0.0810, reward=0.5871, temp=1.00][A
Epoch 1/10:  25%|‚ñà‚ñà‚ñç       | 69/280 [18:17<1:04:22, 18.30s/it, loss=-0.0810, reward=0.5871, temp=1.00][A

   Processing 376 sentences...


Epoch 1/10:  25%|‚ñà‚ñà‚ñç       | 69/280 [18:48<1:04:22, 18.30s/it, loss=-0.0313, reward=0.6164, temp=1.00][A
Epoch 1/10:  25%|‚ñà‚ñà‚ñå       | 70/280 [18:48<1:16:27, 21.85s/it, loss=-0.0313, reward=0.6164, temp=1.00][A

   Processing 61 sentences...


Epoch 1/10:  25%|‚ñà‚ñà‚ñå       | 70/280 [18:51<1:16:27, 21.85s/it, loss=-0.0767, reward=0.5966, temp=1.00][A
Epoch 1/10:  25%|‚ñà‚ñà‚ñå       | 71/280 [18:51<57:08, 16.40s/it, loss=-0.0767, reward=0.5966, temp=1.00]  [A

   Processing 63 sentences...


Epoch 1/10:  25%|‚ñà‚ñà‚ñå       | 71/280 [18:55<57:08, 16.40s/it, loss=-0.0843, reward=0.5834, temp=1.00][A
Epoch 1/10:  26%|‚ñà‚ñà‚ñå       | 72/280 [18:55<43:16, 12.48s/it, loss=-0.0843, reward=0.5834, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  26%|‚ñà‚ñà‚ñå       | 72/280 [20:05<43:16, 12.48s/it, loss=-0.0625, reward=0.6039, temp=1.00][A
Epoch 1/10:  26%|‚ñà‚ñà‚ñå       | 73/280 [20:05<1:42:50, 29.81s/it, loss=-0.0625, reward=0.6039, temp=1.00][A

   Processing 254 sentences...


Epoch 1/10:  26%|‚ñà‚ñà‚ñå       | 73/280 [20:31<1:42:50, 29.81s/it, loss=-0.0743, reward=0.6000, temp=1.00][A
Epoch 1/10:  26%|‚ñà‚ñà‚ñã       | 74/280 [20:31<1:38:53, 28.80s/it, loss=-0.0743, reward=0.6000, temp=1.00][A

   Processing 118 sentences...


Epoch 1/10:  26%|‚ñà‚ñà‚ñã       | 74/280 [20:53<1:38:53, 28.80s/it, loss=-0.0653, reward=0.6104, temp=1.00][A
Epoch 1/10:  27%|‚ñà‚ñà‚ñã       | 75/280 [20:53<1:31:23, 26.75s/it, loss=-0.0653, reward=0.6104, temp=1.00][A

   Processing 439 sentences...


Epoch 1/10:  27%|‚ñà‚ñà‚ñã       | 75/280 [21:29<1:31:23, 26.75s/it, loss=-0.0425, reward=0.6083, temp=1.00][A
Epoch 1/10:  27%|‚ñà‚ñà‚ñã       | 76/280 [21:29<1:39:43, 29.33s/it, loss=-0.0425, reward=0.6083, temp=1.00][A

   Processing 51 sentences...


Epoch 1/10:  27%|‚ñà‚ñà‚ñã       | 76/280 [21:31<1:39:43, 29.33s/it, loss=-0.0780, reward=0.5977, temp=1.00][A
Epoch 1/10:  28%|‚ñà‚ñà‚ñä       | 77/280 [21:31<1:11:37, 21.17s/it, loss=-0.0780, reward=0.5977, temp=1.00][A

   Processing 143 sentences...


Epoch 1/10:  28%|‚ñà‚ñà‚ñä       | 77/280 [21:41<1:11:37, 21.17s/it, loss=-0.0781, reward=0.6041, temp=1.00][A
Epoch 1/10:  28%|‚ñà‚ñà‚ñä       | 78/280 [21:41<1:00:32, 17.98s/it, loss=-0.0781, reward=0.6041, temp=1.00][A

   Processing 300 sentences...


Epoch 1/10:  28%|‚ñà‚ñà‚ñä       | 78/280 [22:02<1:00:32, 17.98s/it, loss=-0.0328, reward=0.6186, temp=1.00][A
Epoch 1/10:  28%|‚ñà‚ñà‚ñä       | 79/280 [22:02<1:03:17, 18.89s/it, loss=-0.0328, reward=0.6186, temp=1.00][A

   Processing 137 sentences...


Epoch 1/10:  28%|‚ñà‚ñà‚ñä       | 79/280 [22:11<1:03:17, 18.89s/it, loss=-0.0832, reward=0.5995, temp=1.00][A
Epoch 1/10:  29%|‚ñà‚ñà‚ñä       | 80/280 [22:11<52:58, 15.89s/it, loss=-0.0832, reward=0.5995, temp=1.00]  [A

   Processing 120 sentences...


Epoch 1/10:  29%|‚ñà‚ñà‚ñä       | 80/280 [22:18<52:58, 15.89s/it, loss=-0.0869, reward=0.6000, temp=1.00][A
Epoch 1/10:  29%|‚ñà‚ñà‚ñâ       | 81/280 [22:18<43:35, 13.14s/it, loss=-0.0869, reward=0.6000, temp=1.00][A

   Processing 170 sentences...


Epoch 1/10:  29%|‚ñà‚ñà‚ñâ       | 81/280 [22:31<43:35, 13.14s/it, loss=-0.0907, reward=0.5974, temp=1.00][A
Epoch 1/10:  29%|‚ñà‚ñà‚ñâ       | 82/280 [22:31<43:10, 13.08s/it, loss=-0.0907, reward=0.5974, temp=1.00][A

   Processing 107 sentences...


Epoch 1/10:  29%|‚ñà‚ñà‚ñâ       | 82/280 [22:37<43:10, 13.08s/it, loss=-0.0930, reward=0.5914, temp=1.00][A
Epoch 1/10:  30%|‚ñà‚ñà‚ñâ       | 83/280 [22:37<36:35, 11.15s/it, loss=-0.0930, reward=0.5914, temp=1.00][A

   Processing 48 sentences...


Epoch 1/10:  30%|‚ñà‚ñà‚ñâ       | 83/280 [22:41<36:35, 11.15s/it, loss=-0.1001, reward=0.5676, temp=1.00][A
Epoch 1/10:  30%|‚ñà‚ñà‚ñà       | 84/280 [22:41<29:22,  8.99s/it, loss=-0.1001, reward=0.5676, temp=1.00][A

   Processing 163 sentences...


Epoch 1/10:  30%|‚ñà‚ñà‚ñà       | 84/280 [22:50<29:22,  8.99s/it, loss=-0.0577, reward=0.6110, temp=1.00][A
Epoch 1/10:  30%|‚ñà‚ñà‚ñà       | 85/280 [22:50<28:44,  8.84s/it, loss=-0.0577, reward=0.6110, temp=1.00][A

   Processing 108 sentences...


Epoch 1/10:  30%|‚ñà‚ñà‚ñà       | 85/280 [22:59<28:44,  8.84s/it, loss=-0.0855, reward=0.5988, temp=1.00][A
Epoch 1/10:  31%|‚ñà‚ñà‚ñà       | 86/280 [22:59<29:10,  9.03s/it, loss=-0.0855, reward=0.5988, temp=1.00][A

   Processing 86 sentences...


Epoch 1/10:  31%|‚ñà‚ñà‚ñà       | 86/280 [23:04<29:10,  9.03s/it, loss=-0.0841, reward=0.6019, temp=1.00][A
Epoch 1/10:  31%|‚ñà‚ñà‚ñà       | 87/280 [23:04<24:39,  7.66s/it, loss=-0.0841, reward=0.6019, temp=1.00][A

   Processing 74 sentences...


Epoch 1/10:  31%|‚ñà‚ñà‚ñà       | 87/280 [23:17<24:39,  7.66s/it, loss=-0.0944, reward=0.5878, temp=1.00][A
Epoch 1/10:  31%|‚ñà‚ñà‚ñà‚ñè      | 88/280 [23:17<29:18,  9.16s/it, loss=-0.0944, reward=0.5878, temp=1.00][A

   Processing 81 sentences...


Epoch 1/10:  31%|‚ñà‚ñà‚ñà‚ñè      | 88/280 [23:26<29:18,  9.16s/it, loss=-0.0918, reward=0.5950, temp=1.00][A
Epoch 1/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 89/280 [23:26<29:40,  9.32s/it, loss=-0.0918, reward=0.5950, temp=1.00][A

   Processing 203 sentences...


Epoch 1/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 89/280 [23:38<29:40,  9.32s/it, loss=-0.0581, reward=0.6099, temp=1.00][A
Epoch 1/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 90/280 [23:38<31:41, 10.01s/it, loss=-0.0581, reward=0.6099, temp=1.00][A

   Processing 54 sentences...


Epoch 1/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 90/280 [23:41<31:41, 10.01s/it, loss=-0.1073, reward=0.5731, temp=1.00][A
Epoch 1/10:  32%|‚ñà‚ñà‚ñà‚ñé      | 91/280 [23:41<24:34,  7.80s/it, loss=-0.1073, reward=0.5731, temp=1.00][A

   Processing 143 sentences...


Epoch 1/10:  32%|‚ñà‚ñà‚ñà‚ñé      | 91/280 [23:50<24:34,  7.80s/it, loss=-0.0887, reward=0.5994, temp=1.00][A
Epoch 1/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 92/280 [23:50<26:09,  8.35s/it, loss=-0.0887, reward=0.5994, temp=1.00][A

   Processing 36 sentences...


Epoch 1/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 92/280 [23:53<26:09,  8.35s/it, loss=-0.1020, reward=0.5807, temp=1.00][A
Epoch 1/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 93/280 [23:53<20:26,  6.56s/it, loss=-0.1020, reward=0.5807, temp=1.00][A

   Processing 106 sentences...


Epoch 1/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 93/280 [23:58<20:26,  6.56s/it, loss=-0.0867, reward=0.5991, temp=1.00][A
Epoch 1/10:  34%|‚ñà‚ñà‚ñà‚ñé      | 94/280 [23:58<18:52,  6.09s/it, loss=-0.0867, reward=0.5991, temp=1.00][A

   Processing 133 sentences...


Epoch 1/10:  34%|‚ñà‚ñà‚ñà‚ñé      | 94/280 [24:24<18:52,  6.09s/it, loss=-0.1334, reward=0.5817, temp=1.00][A
Epoch 1/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 95/280 [24:24<37:37, 12.20s/it, loss=-0.1334, reward=0.5817, temp=1.00][A

   Processing 206 sentences...


Epoch 1/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 95/280 [24:35<37:37, 12.20s/it, loss=-0.0586, reward=0.6004, temp=1.00][A
Epoch 1/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 96/280 [24:35<35:53, 11.70s/it, loss=-0.0586, reward=0.6004, temp=1.00][A

   Processing 162 sentences...


Epoch 1/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 96/280 [24:56<35:53, 11.70s/it, loss=-0.1369, reward=0.5936, temp=1.00][A
Epoch 1/10:  35%|‚ñà‚ñà‚ñà‚ñç      | 97/280 [24:56<44:50, 14.70s/it, loss=-0.1369, reward=0.5936, temp=1.00][A

   Processing 42 sentences...


Epoch 1/10:  35%|‚ñà‚ñà‚ñà‚ñç      | 97/280 [24:58<44:50, 14.70s/it, loss=-0.0917, reward=0.5898, temp=1.00][A
Epoch 1/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 98/280 [24:58<32:48, 10.82s/it, loss=-0.0917, reward=0.5898, temp=1.00][A

   Processing 89 sentences...


Epoch 1/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 98/280 [25:03<32:48, 10.82s/it, loss=-0.0866, reward=0.5942, temp=1.00][A
Epoch 1/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 99/280 [25:03<27:00,  8.95s/it, loss=-0.0866, reward=0.5942, temp=1.00][A

   Processing 32 sentences...


Epoch 1/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 99/280 [25:06<27:00,  8.95s/it, loss=-0.1215, reward=0.5647, temp=1.00][A
Epoch 1/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 100/280 [25:06<22:14,  7.41s/it, loss=-0.1215, reward=0.5647, temp=1.00][A

   Processing 408 sentences...


Epoch 1/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 100/280 [26:28<22:14,  7.41s/it, loss=0.0489, reward=0.6095, temp=1.00] [A
Epoch 1/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 101/280 [26:28<1:28:06, 29.53s/it, loss=0.0489, reward=0.6095, temp=1.00][A

   Processing 35 sentences...


Epoch 1/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 101/280 [26:30<1:28:06, 29.53s/it, loss=-0.1005, reward=0.5856, temp=1.00][A
Epoch 1/10:  36%|‚ñà‚ñà‚ñà‚ñã      | 102/280 [26:30<1:03:26, 21.38s/it, loss=-0.1005, reward=0.5856, temp=1.00][A

   Processing 137 sentences...


Epoch 1/10:  36%|‚ñà‚ñà‚ñà‚ñã      | 102/280 [26:37<1:03:26, 21.38s/it, loss=-0.0658, reward=0.6028, temp=1.00][A
Epoch 1/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 103/280 [26:37<50:44, 17.20s/it, loss=-0.0658, reward=0.6028, temp=1.00]  [A

   Processing 105 sentences...


Epoch 1/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 103/280 [26:46<50:44, 17.20s/it, loss=-0.0770, reward=0.5991, temp=1.00][A
Epoch 1/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 104/280 [26:46<42:37, 14.53s/it, loss=-0.0770, reward=0.5991, temp=1.00][A

   Processing 124 sentences...


Epoch 1/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 104/280 [26:57<42:37, 14.53s/it, loss=-0.1008, reward=0.5922, temp=1.00][A
Epoch 1/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 105/280 [26:57<39:42, 13.61s/it, loss=-0.1008, reward=0.5922, temp=1.00][A

   Processing 220 sentences...


Epoch 1/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 105/280 [27:11<39:42, 13.61s/it, loss=-0.0164, reward=0.6077, temp=1.00][A
Epoch 1/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 106/280 [27:11<39:53, 13.76s/it, loss=-0.0164, reward=0.6077, temp=1.00][A

   Processing 98 sentences...


Epoch 1/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 106/280 [27:16<39:53, 13.76s/it, loss=-0.0836, reward=0.6036, temp=1.00][A
Epoch 1/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 107/280 [27:16<31:54, 11.07s/it, loss=-0.0836, reward=0.6036, temp=1.00][A

   Processing 133 sentences...


Epoch 1/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 107/280 [27:23<31:54, 11.07s/it, loss=-0.0891, reward=0.5985, temp=1.00][A
Epoch 1/10:  39%|‚ñà‚ñà‚ñà‚ñä      | 108/280 [27:23<28:01,  9.78s/it, loss=-0.0891, reward=0.5985, temp=1.00][A

   Processing 315 sentences...


Epoch 1/10:  39%|‚ñà‚ñà‚ñà‚ñä      | 108/280 [27:52<28:01,  9.78s/it, loss=0.0155, reward=0.6026, temp=1.00] [A
Epoch 1/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 109/280 [27:52<44:25, 15.59s/it, loss=0.0155, reward=0.6026, temp=1.00][A

   Processing 81 sentences...


Epoch 1/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 109/280 [27:56<44:25, 15.59s/it, loss=-0.0994, reward=0.5875, temp=1.00][A
Epoch 1/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 110/280 [27:56<34:03, 12.02s/it, loss=-0.0994, reward=0.5875, temp=1.00][A

   Processing 286 sentences...


Epoch 1/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 110/280 [28:12<34:03, 12.02s/it, loss=-0.0671, reward=0.6001, temp=1.00][A
Epoch 1/10:  40%|‚ñà‚ñà‚ñà‚ñâ      | 111/280 [28:12<37:42, 13.39s/it, loss=-0.0671, reward=0.6001, temp=1.00][A

   Processing 122 sentences...


Epoch 1/10:  40%|‚ñà‚ñà‚ñà‚ñâ      | 111/280 [28:19<37:42, 13.39s/it, loss=-0.0835, reward=0.5943, temp=1.00][A
Epoch 1/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 112/280 [28:19<32:00, 11.43s/it, loss=-0.0835, reward=0.5943, temp=1.00][A

   Processing 170 sentences...


Epoch 1/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 112/280 [28:37<32:00, 11.43s/it, loss=-0.0449, reward=0.6039, temp=1.00][A
Epoch 1/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 113/280 [28:37<37:19, 13.41s/it, loss=-0.0449, reward=0.6039, temp=1.00][A

   Processing 376 sentences...


Epoch 1/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 113/280 [29:21<37:19, 13.41s/it, loss=0.0225, reward=0.6117, temp=1.00] [A
Epoch 1/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 114/280 [29:21<1:02:49, 22.71s/it, loss=0.0225, reward=0.6117, temp=1.00][A

   Processing 334 sentences...


Epoch 1/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 114/280 [30:12<1:02:49, 22.71s/it, loss=-0.0198, reward=0.6116, temp=1.00][A
Epoch 1/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 115/280 [30:12<1:25:19, 31.03s/it, loss=-0.0198, reward=0.6116, temp=1.00][A

   Processing 102 sentences...


Epoch 1/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 115/280 [30:23<1:25:19, 31.03s/it, loss=-0.1176, reward=0.5904, temp=1.00][A
Epoch 1/10:  41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 116/280 [30:23<1:08:03, 24.90s/it, loss=-0.1176, reward=0.5904, temp=1.00][A

   Processing 268 sentences...


Epoch 1/10:  41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 116/280 [30:37<1:08:03, 24.90s/it, loss=-0.0490, reward=0.6113, temp=1.00][A
Epoch 1/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 117/280 [30:37<59:02, 21.73s/it, loss=-0.0490, reward=0.6113, temp=1.00]  [A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 117/280 [32:16<59:02, 21.73s/it, loss=0.0340, reward=0.6147, temp=1.00] [A
Epoch 1/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 118/280 [32:16<2:01:17, 44.92s/it, loss=0.0340, reward=0.6147, temp=1.00][A

   Processing 253 sentences...


Epoch 1/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 118/280 [32:33<2:01:17, 44.92s/it, loss=-0.0617, reward=0.6099, temp=1.00][A
Epoch 1/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 119/280 [32:33<1:38:09, 36.58s/it, loss=-0.0617, reward=0.6099, temp=1.00][A

   Processing 132 sentences...


Epoch 1/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 119/280 [32:42<1:38:09, 36.58s/it, loss=-0.1051, reward=0.5971, temp=1.00][A
Epoch 1/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 120/280 [32:42<1:15:45, 28.41s/it, loss=-0.1051, reward=0.5971, temp=1.00][A

   Processing 286 sentences...


Epoch 1/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 120/280 [33:05<1:15:45, 28.41s/it, loss=-0.0374, reward=0.6139, temp=1.00][A
Epoch 1/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 121/280 [33:05<1:10:30, 26.61s/it, loss=-0.0374, reward=0.6139, temp=1.00][A

   Processing 128 sentences...


Epoch 1/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 121/280 [33:12<1:10:30, 26.61s/it, loss=-0.1000, reward=0.5951, temp=1.00][A
Epoch 1/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 122/280 [33:12<54:54, 20.85s/it, loss=-0.1000, reward=0.5951, temp=1.00]  [A

   Processing 37 sentences...


Epoch 1/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 122/280 [33:15<54:54, 20.85s/it, loss=-0.1039, reward=0.5795, temp=1.00][A
Epoch 1/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 123/280 [33:15<40:04, 15.31s/it, loss=-0.1039, reward=0.5795, temp=1.00][A

   Processing 205 sentences...


Epoch 1/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 123/280 [33:33<40:04, 15.31s/it, loss=-0.0894, reward=0.5989, temp=1.00][A
Epoch 1/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 124/280 [33:33<42:13, 16.24s/it, loss=-0.0894, reward=0.5989, temp=1.00][A

   Processing 87 sentences...


Epoch 1/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 124/280 [33:40<42:13, 16.24s/it, loss=-0.0866, reward=0.6010, temp=1.00][A
Epoch 1/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 125/280 [33:40<34:43, 13.44s/it, loss=-0.0866, reward=0.6010, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 125/280 [34:50<34:43, 13.44s/it, loss=0.0257, reward=0.6176, temp=1.00] [A
Epoch 1/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 126/280 [34:50<1:17:55, 30.36s/it, loss=0.0257, reward=0.6176, temp=1.00][A

   Processing 144 sentences...


Epoch 1/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 126/280 [35:02<1:17:55, 30.36s/it, loss=-0.0809, reward=0.6048, temp=1.00][A
Epoch 1/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 127/280 [35:02<1:03:28, 24.89s/it, loss=-0.0809, reward=0.6048, temp=1.00][A

   Processing 80 sentences...


Epoch 1/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 127/280 [35:10<1:03:28, 24.89s/it, loss=-0.0937, reward=0.5922, temp=1.00][A
Epoch 1/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 128/280 [35:10<49:58, 19.73s/it, loss=-0.0937, reward=0.5922, temp=1.00]  [A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 128/280 [36:36<49:58, 19.73s/it, loss=-0.0198, reward=0.6132, temp=1.00][A
Epoch 1/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 129/280 [36:36<1:40:21, 39.88s/it, loss=-0.0198, reward=0.6132, temp=1.00][A

   Processing 124 sentences...


Epoch 1/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 129/280 [36:46<1:40:21, 39.88s/it, loss=-0.0826, reward=0.6045, temp=1.00][A
Epoch 1/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 130/280 [36:46<1:16:36, 30.65s/it, loss=-0.0826, reward=0.6045, temp=1.00][A

   Processing 112 sentences...


Epoch 1/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 130/280 [36:56<1:16:36, 30.65s/it, loss=-0.0922, reward=0.5943, temp=1.00][A
Epoch 1/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 131/280 [36:56<1:00:51, 24.51s/it, loss=-0.0922, reward=0.5943, temp=1.00][A

   Processing 75 sentences...


Epoch 1/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 131/280 [37:00<1:00:51, 24.51s/it, loss=-0.0810, reward=0.6084, temp=1.00][A
Epoch 1/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 132/280 [37:00<45:31, 18.46s/it, loss=-0.0810, reward=0.6084, temp=1.00]  [A

   Processing 165 sentences...


Epoch 1/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 132/280 [37:12<45:31, 18.46s/it, loss=-0.0739, reward=0.6095, temp=1.00][A
Epoch 1/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 133/280 [37:12<40:30, 16.53s/it, loss=-0.0739, reward=0.6095, temp=1.00][A

   Processing 146 sentences...


Epoch 1/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 133/280 [37:30<40:30, 16.53s/it, loss=-0.0940, reward=0.6005, temp=1.00][A
Epoch 1/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 134/280 [37:30<41:30, 17.06s/it, loss=-0.0940, reward=0.6005, temp=1.00][A

   Processing 100 sentences...


Epoch 1/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 134/280 [37:35<41:30, 17.06s/it, loss=-0.0944, reward=0.5984, temp=1.00][A
Epoch 1/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 135/280 [37:35<32:32, 13.47s/it, loss=-0.0944, reward=0.5984, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 135/280 [39:15<32:32, 13.47s/it, loss=0.0069, reward=0.6174, temp=1.00] [A
Epoch 1/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 136/280 [39:15<1:34:31, 39.38s/it, loss=0.0069, reward=0.6174, temp=1.00][A

   Processing 209 sentences...


Epoch 1/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 136/280 [39:30<1:34:31, 39.38s/it, loss=-0.0799, reward=0.6086, temp=1.00][A
Epoch 1/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 137/280 [39:30<1:16:30, 32.10s/it, loss=-0.0799, reward=0.6086, temp=1.00][A

   Processing 90 sentences...


Epoch 1/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 137/280 [39:37<1:16:30, 32.10s/it, loss=-0.0975, reward=0.5927, temp=1.00][A
Epoch 1/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 138/280 [39:37<57:30, 24.30s/it, loss=-0.0975, reward=0.5927, temp=1.00]  [A

   Processing 72 sentences...


Epoch 1/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 138/280 [39:41<57:30, 24.30s/it, loss=-0.0920, reward=0.5933, temp=1.00][A
Epoch 1/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 139/280 [39:41<43:17, 18.42s/it, loss=-0.0920, reward=0.5933, temp=1.00][A

   Processing 38 sentences...


Epoch 1/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 139/280 [39:43<43:17, 18.42s/it, loss=-0.0968, reward=0.5802, temp=1.00][A
Epoch 1/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 140/280 [39:43<31:28, 13.49s/it, loss=-0.0968, reward=0.5802, temp=1.00][A

   Processing 131 sentences...


Epoch 1/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 140/280 [39:48<31:28, 13.49s/it, loss=-0.0788, reward=0.6035, temp=1.00][A
Epoch 1/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 141/280 [39:48<25:31, 11.01s/it, loss=-0.0788, reward=0.6035, temp=1.00][A

   Processing 78 sentences...


Epoch 1/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 141/280 [39:53<25:31, 11.01s/it, loss=-0.0932, reward=0.5932, temp=1.00][A
Epoch 1/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 142/280 [39:53<20:54,  9.09s/it, loss=-0.0932, reward=0.5932, temp=1.00][A

   Processing 119 sentences...


Epoch 1/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 142/280 [40:01<20:54,  9.09s/it, loss=-0.0845, reward=0.6037, temp=1.00][A
Epoch 1/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 143/280 [40:01<19:47,  8.67s/it, loss=-0.0845, reward=0.6037, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 143/280 [41:44<19:47,  8.67s/it, loss=0.0163, reward=0.6144, temp=1.00] [A
Epoch 1/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 144/280 [41:44<1:24:06, 37.10s/it, loss=0.0163, reward=0.6144, temp=1.00][A

   Processing 133 sentences...


Epoch 1/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 144/280 [41:54<1:24:06, 37.10s/it, loss=-0.0864, reward=0.6030, temp=1.00][A
Epoch 1/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 145/280 [41:54<1:04:50, 28.82s/it, loss=-0.0864, reward=0.6030, temp=1.00][A

   Processing 83 sentences...


Epoch 1/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 145/280 [42:00<1:04:50, 28.82s/it, loss=-0.1076, reward=0.5814, temp=1.00][A
Epoch 1/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 146/280 [42:00<49:17, 22.07s/it, loss=-0.1076, reward=0.5814, temp=1.00]  [A

   Processing 56 sentences...


Epoch 1/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 146/280 [42:03<49:17, 22.07s/it, loss=-0.1050, reward=0.5831, temp=1.00][A
Epoch 1/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 147/280 [42:03<36:16, 16.37s/it, loss=-0.1050, reward=0.5831, temp=1.00][A

   Processing 133 sentences...


Epoch 1/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 147/280 [42:10<36:16, 16.37s/it, loss=-0.0823, reward=0.6035, temp=1.00][A
Epoch 1/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 148/280 [42:10<29:40, 13.49s/it, loss=-0.0823, reward=0.6035, temp=1.00][A

   Processing 101 sentences...


Epoch 1/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 148/280 [42:16<29:40, 13.49s/it, loss=-0.0963, reward=0.5936, temp=1.00][A
Epoch 1/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 149/280 [42:16<24:31, 11.24s/it, loss=-0.0963, reward=0.5936, temp=1.00][A

   Processing 151 sentences...


Epoch 1/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 149/280 [42:27<24:31, 11.24s/it, loss=-0.0725, reward=0.6071, temp=1.00][A
Epoch 1/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 150/280 [42:27<24:17, 11.21s/it, loss=-0.0725, reward=0.6071, temp=1.00][A

   Processing 248 sentences...


Epoch 1/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 150/280 [42:41<24:17, 11.21s/it, loss=-0.0823, reward=0.6015, temp=1.00][A
Epoch 1/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 151/280 [42:41<26:12, 12.19s/it, loss=-0.0823, reward=0.6015, temp=1.00][A

   Processing 500 sentences...sents), truncating to 500


Epoch 1/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 151/280 [43:41<26:12, 12.19s/it, loss=0.0281, reward=0.6157, temp=1.00] [A
Epoch 1/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 152/280 [43:41<56:04, 26.29s/it, loss=0.0281, reward=0.6157, temp=1.00][A

   Processing 145 sentences...


Epoch 1/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 152/280 [43:47<56:04, 26.29s/it, loss=-0.0743, reward=0.6098, temp=1.00][A
Epoch 1/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 153/280 [43:47<43:04, 20.35s/it, loss=-0.0743, reward=0.6098, temp=1.00][A

   Processing 202 sentences...


Epoch 1/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 153/280 [44:04<43:04, 20.35s/it, loss=-0.0731, reward=0.6073, temp=1.00][A
Epoch 1/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 154/280 [44:04<40:29, 19.28s/it, loss=-0.0731, reward=0.6073, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 154/280 [44:36<40:29, 19.28s/it, loss=0.0111, reward=0.6157, temp=1.00] [A
Epoch 1/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 155/280 [44:36<48:02, 23.06s/it, loss=0.0111, reward=0.6157, temp=1.00][A

   Processing 154 sentences...


Epoch 1/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 155/280 [44:53<48:02, 23.06s/it, loss=-0.0884, reward=0.6057, temp=1.00][A
Epoch 1/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 156/280 [44:53<44:00, 21.29s/it, loss=-0.0884, reward=0.6057, temp=1.00][A

   Processing 345 sentences...


Epoch 1/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 156/280 [45:41<44:00, 21.29s/it, loss=-0.0320, reward=0.6164, temp=1.00][A
Epoch 1/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 157/280 [45:41<1:00:16, 29.40s/it, loss=-0.0320, reward=0.6164, temp=1.00][A

   Processing 99 sentences...


Epoch 1/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 157/280 [45:46<1:00:16, 29.40s/it, loss=-0.1099, reward=0.5904, temp=1.00][A
Epoch 1/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 158/280 [45:46<44:41, 21.98s/it, loss=-0.1099, reward=0.5904, temp=1.00]  [A

   Processing 500 sentences...sents), truncating to 500


Epoch 1/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 158/280 [46:35<44:41, 21.98s/it, loss=0.0235, reward=0.6215, temp=1.00] [A
Epoch 1/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 159/280 [46:35<1:00:44, 30.12s/it, loss=0.0235, reward=0.6215, temp=1.00][A

   Processing 118 sentences...


Epoch 1/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 159/280 [46:45<1:00:44, 30.12s/it, loss=-0.1052, reward=0.5961, temp=1.00][A
Epoch 1/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 160/280 [46:45<47:53, 23.95s/it, loss=-0.1052, reward=0.5961, temp=1.00]  [A

   Processing 30 sentences...


Epoch 1/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 160/280 [46:46<47:53, 23.95s/it, loss=-0.1112, reward=0.5623, temp=1.00][A
Epoch 1/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 161/280 [46:46<34:13, 17.25s/it, loss=-0.1112, reward=0.5623, temp=1.00][A

   Processing 76 sentences...


Epoch 1/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 161/280 [46:50<34:13, 17.25s/it, loss=-0.0890, reward=0.6014, temp=1.00][A
Epoch 1/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 162/280 [46:50<25:59, 13.21s/it, loss=-0.0890, reward=0.6014, temp=1.00][A

   Processing 95 sentences...


Epoch 1/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 162/280 [46:57<25:59, 13.21s/it, loss=-0.0960, reward=0.5915, temp=1.00][A
Epoch 1/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 163/280 [46:57<22:10, 11.37s/it, loss=-0.0960, reward=0.5915, temp=1.00][A

   Processing 78 sentences...


Epoch 1/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 163/280 [47:02<22:10, 11.37s/it, loss=-0.0930, reward=0.5969, temp=1.00][A
Epoch 1/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 164/280 [47:02<18:03,  9.34s/it, loss=-0.0930, reward=0.5969, temp=1.00][A

   Processing 153 sentences...


Epoch 1/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 164/280 [47:10<18:03,  9.34s/it, loss=-0.0725, reward=0.6075, temp=1.00][A
Epoch 1/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 165/280 [47:10<17:09,  8.95s/it, loss=-0.0725, reward=0.6075, temp=1.00][A

   Processing 229 sentences...


Epoch 1/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 165/280 [47:24<17:09,  8.95s/it, loss=-0.0651, reward=0.6084, temp=1.00][A
Epoch 1/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 166/280 [47:24<20:09, 10.61s/it, loss=-0.0651, reward=0.6084, temp=1.00][A

   Processing 108 sentences...


Epoch 1/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 166/280 [47:29<20:09, 10.61s/it, loss=-0.1006, reward=0.5928, temp=1.00][A
Epoch 1/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 167/280 [47:29<16:46,  8.91s/it, loss=-0.1006, reward=0.5928, temp=1.00][A

   Processing 154 sentences...


Epoch 1/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 167/280 [47:48<16:46,  8.91s/it, loss=-0.0916, reward=0.5996, temp=1.00][A
Epoch 1/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 168/280 [47:48<22:26, 12.02s/it, loss=-0.0916, reward=0.5996, temp=1.00][A

   Processing 44 sentences...


Epoch 1/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 168/280 [47:56<22:26, 12.02s/it, loss=-0.1006, reward=0.5863, temp=1.00][A
Epoch 1/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 169/280 [47:56<19:44, 10.67s/it, loss=-0.1006, reward=0.5863, temp=1.00][A

   Processing 229 sentences...


Epoch 1/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 169/280 [48:34<19:44, 10.67s/it, loss=-0.0899, reward=0.6026, temp=1.00][A
Epoch 1/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 170/280 [48:34<34:28, 18.80s/it, loss=-0.0899, reward=0.6026, temp=1.00][A

   Processing 221 sentences...


Epoch 1/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 170/280 [48:54<34:28, 18.80s/it, loss=-0.0443, reward=0.6146, temp=1.00][A
Epoch 1/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 171/280 [48:54<35:13, 19.39s/it, loss=-0.0443, reward=0.6146, temp=1.00][A

   Processing 148 sentences...


Epoch 1/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 171/280 [49:02<35:13, 19.39s/it, loss=-0.0729, reward=0.6116, temp=1.00][A
Epoch 1/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 172/280 [49:02<28:19, 15.73s/it, loss=-0.0729, reward=0.6116, temp=1.00][A

   Processing 144 sentences...


Epoch 1/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 172/280 [49:08<28:19, 15.73s/it, loss=-0.0976, reward=0.5992, temp=1.00][A
Epoch 1/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 173/280 [49:08<22:59, 12.89s/it, loss=-0.0976, reward=0.5992, temp=1.00][A

   Processing 77 sentences...


Epoch 1/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 173/280 [49:12<22:59, 12.89s/it, loss=-0.1003, reward=0.5898, temp=1.00][A
Epoch 1/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 174/280 [49:12<17:53, 10.13s/it, loss=-0.1003, reward=0.5898, temp=1.00][A

   Processing 90 sentences...


Epoch 1/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 174/280 [49:19<17:53, 10.13s/it, loss=-0.1004, reward=0.5962, temp=1.00][A
Epoch 1/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 175/280 [49:19<16:13,  9.27s/it, loss=-0.1004, reward=0.5962, temp=1.00][A

   Processing 309 sentences...


Epoch 1/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 175/280 [49:32<16:13,  9.27s/it, loss=-0.0453, reward=0.6096, temp=1.00][A
Epoch 1/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 176/280 [49:32<18:03, 10.42s/it, loss=-0.0453, reward=0.6096, temp=1.00][A

   Processing 297 sentences...


Epoch 1/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 176/280 [49:52<18:03, 10.42s/it, loss=-0.0245, reward=0.6161, temp=1.00][A
Epoch 1/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 177/280 [49:52<22:54, 13.35s/it, loss=-0.0245, reward=0.6161, temp=1.00][A

   Processing 286 sentences...


Epoch 1/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 177/280 [50:18<22:54, 13.35s/it, loss=-0.0260, reward=0.6156, temp=1.00][A
Epoch 1/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 178/280 [50:18<29:10, 17.17s/it, loss=-0.0260, reward=0.6156, temp=1.00][A

   Processing 66 sentences...


Epoch 1/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 178/280 [50:21<29:10, 17.17s/it, loss=-0.1020, reward=0.5890, temp=1.00][A
Epoch 1/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 179/280 [50:21<21:41, 12.89s/it, loss=-0.1020, reward=0.5890, temp=1.00][A

   Processing 220 sentences...


Epoch 1/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 179/280 [50:37<21:41, 12.89s/it, loss=-0.0683, reward=0.6102, temp=1.00][A
Epoch 1/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 180/280 [50:37<22:59, 13.80s/it, loss=-0.0683, reward=0.6102, temp=1.00][A

   Processing 209 sentences...


Epoch 1/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 180/280 [50:55<22:59, 13.80s/it, loss=-0.0997, reward=0.6028, temp=1.00][A
Epoch 1/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 181/280 [50:55<24:42, 14.97s/it, loss=-0.0997, reward=0.6028, temp=1.00][A

   Processing 70 sentences...


Epoch 1/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 181/280 [50:59<24:42, 14.97s/it, loss=-0.1146, reward=0.5852, temp=1.00][A
Epoch 1/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 182/280 [50:59<19:22, 11.86s/it, loss=-0.1146, reward=0.5852, temp=1.00][A

   Processing 214 sentences...


Epoch 1/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 182/280 [51:10<19:22, 11.86s/it, loss=0.0082, reward=0.6301, temp=1.00] [A
Epoch 1/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 183/280 [51:10<18:31, 11.46s/it, loss=0.0082, reward=0.6301, temp=1.00][A

   Processing 213 sentences...


Epoch 1/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 183/280 [51:21<18:31, 11.46s/it, loss=-0.0936, reward=0.6042, temp=1.00][A
Epoch 1/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 184/280 [51:21<18:23, 11.49s/it, loss=-0.0936, reward=0.6042, temp=1.00][A

   Processing 29 sentences...


Epoch 1/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 184/280 [51:24<18:23, 11.49s/it, loss=-0.1132, reward=0.5713, temp=1.00][A
Epoch 1/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 185/280 [51:24<13:43,  8.66s/it, loss=-0.1132, reward=0.5713, temp=1.00][A

   Processing 91 sentences...


Epoch 1/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 185/280 [51:28<13:43,  8.66s/it, loss=-0.0956, reward=0.5969, temp=1.00][A
Epoch 1/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 186/280 [51:28<11:37,  7.42s/it, loss=-0.0956, reward=0.5969, temp=1.00][A

   Processing 291 sentences...


Epoch 1/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 186/280 [51:48<11:37,  7.42s/it, loss=-0.0555, reward=0.6085, temp=1.00][A
Epoch 1/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 187/280 [51:48<17:28, 11.27s/it, loss=-0.0555, reward=0.6085, temp=1.00][A

   Processing 80 sentences...


Epoch 1/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 187/280 [51:58<17:28, 11.27s/it, loss=-0.0818, reward=0.6089, temp=1.00][A
Epoch 1/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 188/280 [51:58<16:19, 10.65s/it, loss=-0.0818, reward=0.6089, temp=1.00][A

   Processing 43 sentences...


Epoch 1/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 188/280 [52:00<16:19, 10.65s/it, loss=-0.0930, reward=0.6020, temp=1.00][A
Epoch 1/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 189/280 [52:00<12:34,  8.30s/it, loss=-0.0930, reward=0.6020, temp=1.00][A

   Processing 45 sentences...


Epoch 1/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 189/280 [52:03<12:34,  8.30s/it, loss=-0.1226, reward=0.5569, temp=1.00][A
Epoch 1/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 190/280 [52:03<09:50,  6.56s/it, loss=-0.1226, reward=0.5569, temp=1.00][A

   Processing 89 sentences...


Epoch 1/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 190/280 [52:07<09:50,  6.56s/it, loss=-0.0881, reward=0.6013, temp=1.00][A
Epoch 1/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 191/280 [52:07<08:32,  5.76s/it, loss=-0.0881, reward=0.6013, temp=1.00][A

   Processing 194 sentences...


Epoch 1/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 191/280 [52:16<08:32,  5.76s/it, loss=-0.0559, reward=0.6111, temp=1.00][A
Epoch 1/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 192/280 [52:16<10:12,  6.96s/it, loss=-0.0559, reward=0.6111, temp=1.00][A

   Processing 137 sentences...


Epoch 1/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 192/280 [52:27<10:12,  6.96s/it, loss=-0.1241, reward=0.5845, temp=1.00][A
Epoch 1/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 193/280 [52:27<11:30,  7.93s/it, loss=-0.1241, reward=0.5845, temp=1.00][A

   Processing 73 sentences...


Epoch 1/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 193/280 [52:30<11:30,  7.93s/it, loss=-0.1019, reward=0.5927, temp=1.00][A
Epoch 1/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 194/280 [52:30<09:23,  6.56s/it, loss=-0.1019, reward=0.5927, temp=1.00][A

   Processing 158 sentences...


Epoch 1/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 194/280 [52:44<09:23,  6.56s/it, loss=-0.0851, reward=0.6028, temp=1.00][A
Epoch 1/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 195/280 [52:44<12:13,  8.63s/it, loss=-0.0851, reward=0.6028, temp=1.00][A

   Processing 56 sentences...


Epoch 1/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 195/280 [52:46<12:13,  8.63s/it, loss=-0.1257, reward=0.5651, temp=1.00][A
Epoch 1/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 196/280 [52:46<09:36,  6.86s/it, loss=-0.1257, reward=0.5651, temp=1.00][A

   Processing 180 sentences...


Epoch 1/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 196/280 [52:57<09:36,  6.86s/it, loss=-0.0694, reward=0.6034, temp=1.00][A
Epoch 1/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 197/280 [52:57<11:10,  8.08s/it, loss=-0.0694, reward=0.6034, temp=1.00][A

   Processing 338 sentences...


Epoch 1/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 197/280 [53:17<11:10,  8.08s/it, loss=-0.0350, reward=0.6058, temp=1.00][A
Epoch 1/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 198/280 [53:17<15:52, 11.61s/it, loss=-0.0350, reward=0.6058, temp=1.00][A

   Processing 261 sentences...


Epoch 1/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 198/280 [53:43<15:52, 11.61s/it, loss=-0.0211, reward=0.6120, temp=1.00][A
Epoch 1/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 199/280 [53:43<21:41, 16.07s/it, loss=-0.0211, reward=0.6120, temp=1.00][A

   Processing 156 sentences...


Epoch 1/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 199/280 [53:50<21:41, 16.07s/it, loss=-0.0899, reward=0.5994, temp=1.00][A
Epoch 1/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 200/280 [53:50<17:25, 13.06s/it, loss=-0.0899, reward=0.5994, temp=1.00][A

   Processing 43 sentences...


Epoch 1/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 200/280 [53:52<17:25, 13.06s/it, loss=-0.1255, reward=0.5659, temp=1.00][A
Epoch 1/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 201/280 [53:52<13:07,  9.97s/it, loss=-0.1255, reward=0.5659, temp=1.00][A

   Processing 144 sentences...


Epoch 1/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 201/280 [54:05<13:07,  9.97s/it, loss=-0.0995, reward=0.5970, temp=1.00][A
Epoch 1/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 202/280 [54:05<14:11, 10.91s/it, loss=-0.0995, reward=0.5970, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 202/280 [54:58<14:11, 10.91s/it, loss=0.1245, reward=0.6178, temp=1.00] [A
Epoch 1/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 203/280 [54:58<30:05, 23.45s/it, loss=0.1245, reward=0.6178, temp=1.00][A

   Processing 49 sentences...


Epoch 1/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 203/280 [55:01<30:05, 23.45s/it, loss=-0.1102, reward=0.5768, temp=1.00][A
Epoch 1/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 204/280 [55:01<21:56, 17.33s/it, loss=-0.1102, reward=0.5768, temp=1.00][A

   Processing 66 sentences...


Epoch 1/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 204/280 [55:07<21:56, 17.33s/it, loss=-0.0954, reward=0.5914, temp=1.00][A
Epoch 1/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 205/280 [55:07<17:32, 14.03s/it, loss=-0.0954, reward=0.5914, temp=1.00][A

   Processing 103 sentences...


Epoch 1/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 205/280 [55:14<17:32, 14.03s/it, loss=-0.0692, reward=0.6104, temp=1.00][A
Epoch 1/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 206/280 [55:14<14:26, 11.71s/it, loss=-0.0692, reward=0.6104, temp=1.00][A

   Processing 350 sentences...


Epoch 1/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 206/280 [55:41<14:26, 11.71s/it, loss=-0.0041, reward=0.6134, temp=1.00][A
Epoch 1/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 207/280 [55:41<20:00, 16.44s/it, loss=-0.0041, reward=0.6134, temp=1.00][A

   Processing 111 sentences...


Epoch 1/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 207/280 [55:50<20:00, 16.44s/it, loss=-0.0849, reward=0.6061, temp=1.00][A
Epoch 1/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 208/280 [55:50<16:59, 14.16s/it, loss=-0.0849, reward=0.6061, temp=1.00][A

   Processing 51 sentences...


Epoch 1/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 208/280 [55:53<16:59, 14.16s/it, loss=-0.0924, reward=0.6029, temp=1.00][A
Epoch 1/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 209/280 [55:53<12:38, 10.68s/it, loss=-0.0924, reward=0.6029, temp=1.00][A

   Processing 146 sentences...


Epoch 1/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 209/280 [56:04<12:38, 10.68s/it, loss=-0.0798, reward=0.6076, temp=1.00][A
Epoch 1/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 210/280 [56:04<12:51, 11.02s/it, loss=-0.0798, reward=0.6076, temp=1.00][A

   Processing 161 sentences...


Epoch 1/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 210/280 [56:15<12:51, 11.02s/it, loss=-0.1013, reward=0.5999, temp=1.00][A
Epoch 1/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 211/280 [56:15<12:23, 10.78s/it, loss=-0.1013, reward=0.5999, temp=1.00][A

   Processing 243 sentences...


Epoch 1/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 211/280 [56:30<12:23, 10.78s/it, loss=-0.0433, reward=0.6140, temp=1.00][A
Epoch 1/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 212/280 [56:30<13:46, 12.15s/it, loss=-0.0433, reward=0.6140, temp=1.00][A

   Processing 297 sentences...


Epoch 1/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 212/280 [56:57<13:46, 12.15s/it, loss=-0.0623, reward=0.6072, temp=1.00][A
Epoch 1/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 213/280 [56:57<18:27, 16.52s/it, loss=-0.0623, reward=0.6072, temp=1.00][A

   Processing 47 sentences...


Epoch 1/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 213/280 [56:59<18:27, 16.52s/it, loss=-0.1203, reward=0.5666, temp=1.00][A
Epoch 1/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 214/280 [56:59<13:36, 12.38s/it, loss=-0.1203, reward=0.5666, temp=1.00][A

   Processing 415 sentences...


Epoch 1/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 214/280 [58:07<13:36, 12.38s/it, loss=-0.0279, reward=0.6096, temp=1.00][A
Epoch 1/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 215/280 [58:07<31:21, 28.94s/it, loss=-0.0279, reward=0.6096, temp=1.00][A

   Processing 138 sentences...


Epoch 1/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 215/280 [58:19<31:21, 28.94s/it, loss=-0.0749, reward=0.6046, temp=1.00][A
Epoch 1/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 216/280 [58:19<25:35, 23.99s/it, loss=-0.0749, reward=0.6046, temp=1.00][A

   Processing 46 sentences...


Epoch 1/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 216/280 [58:22<25:35, 23.99s/it, loss=-0.1026, reward=0.5841, temp=1.00][A
Epoch 1/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 217/280 [58:22<18:20, 17.46s/it, loss=-0.1026, reward=0.5841, temp=1.00][A

   Processing 73 sentences...


Epoch 1/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 217/280 [58:26<18:20, 17.46s/it, loss=-0.0814, reward=0.6086, temp=1.00][A
Epoch 1/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 218/280 [58:26<13:52, 13.43s/it, loss=-0.0814, reward=0.6086, temp=1.00][A

   Processing 136 sentences...


Epoch 1/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 218/280 [58:39<13:52, 13.43s/it, loss=-0.0875, reward=0.5980, temp=1.00][A
Epoch 1/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 219/280 [58:39<13:36, 13.39s/it, loss=-0.0875, reward=0.5980, temp=1.00][A

   Processing 93 sentences...


Epoch 1/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 219/280 [58:44<13:36, 13.39s/it, loss=-0.0917, reward=0.6086, temp=1.00][A
Epoch 1/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 220/280 [58:44<10:58, 10.97s/it, loss=-0.0917, reward=0.6086, temp=1.00][A

   Processing 267 sentences...


Epoch 1/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 220/280 [59:01<10:58, 10.97s/it, loss=-0.0860, reward=0.6035, temp=1.00][A
Epoch 1/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 221/280 [59:01<12:32, 12.75s/it, loss=-0.0860, reward=0.6035, temp=1.00][A

   Processing 421 sentences...


Epoch 1/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 221/280 [1:00:08<12:32, 12.75s/it, loss=-0.0053, reward=0.6145, temp=1.00][A
Epoch 1/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 222/280 [1:00:08<27:52, 28.84s/it, loss=-0.0053, reward=0.6145, temp=1.00][A

   Processing 78 sentences...


Epoch 1/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 222/280 [1:00:11<27:52, 28.84s/it, loss=-0.0975, reward=0.5983, temp=1.00][A
Epoch 1/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 223/280 [1:00:11<20:11, 21.26s/it, loss=-0.0975, reward=0.5983, temp=1.00][A

   Processing 275 sentences...


Epoch 1/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 223/280 [1:00:32<20:11, 21.26s/it, loss=-0.0497, reward=0.6083, temp=1.00][A
Epoch 1/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 224/280 [1:00:32<19:45, 21.17s/it, loss=-0.0497, reward=0.6083, temp=1.00][A

   Processing 56 sentences...


Epoch 1/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 224/280 [1:00:36<19:45, 21.17s/it, loss=-0.1220, reward=0.5709, temp=1.00][A
Epoch 1/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 225/280 [1:00:36<14:31, 15.85s/it, loss=-0.1220, reward=0.5709, temp=1.00][A

   Processing 292 sentences...


Epoch 1/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 225/280 [1:00:50<14:31, 15.85s/it, loss=-0.0573, reward=0.6070, temp=1.00][A
Epoch 1/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 226/280 [1:00:50<13:54, 15.45s/it, loss=-0.0573, reward=0.6070, temp=1.00][A

   Processing 393 sentences...


Epoch 1/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 226/280 [1:01:32<13:54, 15.45s/it, loss=-0.0256, reward=0.6127, temp=1.00][A
Epoch 1/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 227/280 [1:01:32<20:34, 23.29s/it, loss=-0.0256, reward=0.6127, temp=1.00][A

   Processing 412 sentences...


Epoch 1/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 227/280 [1:02:15<20:34, 23.29s/it, loss=-0.0669, reward=0.6080, temp=1.00][A
Epoch 1/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 228/280 [1:02:15<25:30, 29.43s/it, loss=-0.0669, reward=0.6080, temp=1.00][A

   Processing 53 sentences...


Epoch 1/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 228/280 [1:02:18<25:30, 29.43s/it, loss=-0.1097, reward=0.5787, temp=1.00][A
Epoch 1/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 229/280 [1:02:18<18:06, 21.31s/it, loss=-0.1097, reward=0.5787, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 229/280 [1:03:03<18:06, 21.31s/it, loss=0.0289, reward=0.6151, temp=1.00] [A
Epoch 1/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 230/280 [1:03:03<23:45, 28.51s/it, loss=0.0289, reward=0.6151, temp=1.00][A

   Processing 449 sentences...


Epoch 1/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 230/280 [1:03:36<23:45, 28.51s/it, loss=-0.0345, reward=0.6114, temp=1.00][A
Epoch 1/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 231/280 [1:03:36<24:14, 29.69s/it, loss=-0.0345, reward=0.6114, temp=1.00][A

   Processing 81 sentences...


Epoch 1/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 231/280 [1:03:40<24:14, 29.69s/it, loss=-0.1063, reward=0.5894, temp=1.00][A
Epoch 1/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 232/280 [1:03:40<17:44, 22.17s/it, loss=-0.1063, reward=0.5894, temp=1.00][A

   Processing 368 sentences...


Epoch 1/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 232/280 [1:04:03<17:44, 22.17s/it, loss=-0.0343, reward=0.6107, temp=1.00][A
Epoch 1/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 233/280 [1:04:03<17:32, 22.40s/it, loss=-0.0343, reward=0.6107, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 233/280 [1:05:07<17:32, 22.40s/it, loss=-0.0525, reward=0.6072, temp=1.00][A
Epoch 1/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 234/280 [1:05:07<26:41, 34.82s/it, loss=-0.0525, reward=0.6072, temp=1.00][A

   Processing 45 sentences...


Epoch 1/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 234/280 [1:05:09<26:41, 34.82s/it, loss=-0.1063, reward=0.5657, temp=1.00][A
Epoch 1/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 235/280 [1:05:09<18:50, 25.13s/it, loss=-0.1063, reward=0.5657, temp=1.00][A

   Processing 45 sentences...


Epoch 1/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 235/280 [1:05:12<18:50, 25.13s/it, loss=-0.0977, reward=0.5790, temp=1.00][A
Epoch 1/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 236/280 [1:05:12<13:28, 18.37s/it, loss=-0.0977, reward=0.5790, temp=1.00][A

   Processing 96 sentences...


Epoch 1/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 236/280 [1:05:21<13:28, 18.37s/it, loss=-0.0888, reward=0.5973, temp=1.00][A
Epoch 1/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 237/280 [1:05:21<11:02, 15.42s/it, loss=-0.0888, reward=0.5973, temp=1.00][A

   Processing 184 sentences...


Epoch 1/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 237/280 [1:05:33<11:02, 15.42s/it, loss=-0.0509, reward=0.6152, temp=1.00][A
Epoch 1/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 238/280 [1:05:33<10:10, 14.53s/it, loss=-0.0509, reward=0.6152, temp=1.00][A

   Processing 63 sentences...


Epoch 1/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 238/280 [1:05:38<10:10, 14.53s/it, loss=-0.0920, reward=0.5939, temp=1.00][A
Epoch 1/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 239/280 [1:05:38<08:01, 11.73s/it, loss=-0.0920, reward=0.5939, temp=1.00][A

   Processing 123 sentences...


Epoch 1/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 239/280 [1:05:45<08:01, 11.73s/it, loss=-0.0840, reward=0.6024, temp=1.00][A
Epoch 1/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 240/280 [1:05:45<06:45, 10.13s/it, loss=-0.0840, reward=0.6024, temp=1.00][A

   Processing 72 sentences...


Epoch 1/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 240/280 [1:05:50<06:45, 10.13s/it, loss=-0.1046, reward=0.5816, temp=1.00][A
Epoch 1/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 241/280 [1:05:50<05:40,  8.73s/it, loss=-0.1046, reward=0.5816, temp=1.00][A

   Processing 268 sentences...


Epoch 1/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 241/280 [1:06:37<05:40,  8.73s/it, loss=-0.0478, reward=0.6082, temp=1.00][A
Epoch 1/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 242/280 [1:06:37<12:49, 20.25s/it, loss=-0.0478, reward=0.6082, temp=1.00][A

   Processing 129 sentences...


Epoch 1/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 242/280 [1:06:45<12:49, 20.25s/it, loss=-0.0796, reward=0.6031, temp=1.00][A
Epoch 1/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 243/280 [1:06:45<10:08, 16.44s/it, loss=-0.0796, reward=0.6031, temp=1.00][A

   Processing 57 sentences...


Epoch 1/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 243/280 [1:06:48<10:08, 16.44s/it, loss=-0.0971, reward=0.5866, temp=1.00][A
Epoch 1/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 244/280 [1:06:48<07:26, 12.40s/it, loss=-0.0971, reward=0.5866, temp=1.00][A

   Processing 267 sentences...


Epoch 1/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 244/280 [1:07:06<07:26, 12.40s/it, loss=-0.0573, reward=0.6078, temp=1.00][A
Epoch 1/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 245/280 [1:07:06<08:16, 14.19s/it, loss=-0.0573, reward=0.6078, temp=1.00][A

   Processing 84 sentences...


Epoch 1/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 245/280 [1:07:10<08:16, 14.19s/it, loss=-0.0953, reward=0.5963, temp=1.00][A
Epoch 1/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 246/280 [1:07:10<06:12, 10.97s/it, loss=-0.0953, reward=0.5963, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 246/280 [1:07:58<06:12, 10.97s/it, loss=0.0390, reward=0.6163, temp=1.00] [A
Epoch 1/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 247/280 [1:07:58<12:15, 22.30s/it, loss=0.0390, reward=0.6163, temp=1.00][A

   Processing 97 sentences...


Epoch 1/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 247/280 [1:08:11<12:15, 22.30s/it, loss=-0.0838, reward=0.6075, temp=1.00][A
Epoch 1/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 248/280 [1:08:11<10:25, 19.53s/it, loss=-0.0838, reward=0.6075, temp=1.00][A

   Processing 232 sentences...


Epoch 1/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 248/280 [1:08:42<10:25, 19.53s/it, loss=-0.0557, reward=0.6118, temp=1.00][A
Epoch 1/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 249/280 [1:08:42<11:45, 22.75s/it, loss=-0.0557, reward=0.6118, temp=1.00][A

   Processing 111 sentences...


Epoch 1/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 249/280 [1:08:48<11:45, 22.75s/it, loss=-0.0712, reward=0.6142, temp=1.00][A
Epoch 1/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 250/280 [1:08:48<08:58, 17.94s/it, loss=-0.0712, reward=0.6142, temp=1.00][A

   Processing 24 sentences...


Epoch 1/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 250/280 [1:08:50<08:58, 17.94s/it, loss=-0.1252, reward=0.5328, temp=1.00][A
Epoch 1/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 251/280 [1:08:50<06:16, 12.99s/it, loss=-0.1252, reward=0.5328, temp=1.00][A

   Processing 112 sentences...


Epoch 1/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 251/280 [1:08:56<06:16, 12.99s/it, loss=-0.0767, reward=0.6059, temp=1.00][A
Epoch 1/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 252/280 [1:08:56<05:06, 10.93s/it, loss=-0.0767, reward=0.6059, temp=1.00][A

   Processing 43 sentences...


Epoch 1/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 252/280 [1:08:59<05:06, 10.93s/it, loss=-0.0981, reward=0.5842, temp=1.00][A
Epoch 1/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 253/280 [1:08:59<03:48,  8.46s/it, loss=-0.0981, reward=0.5842, temp=1.00][A

   Processing 183 sentences...


Epoch 1/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 253/280 [1:09:09<03:48,  8.46s/it, loss=-0.0708, reward=0.6048, temp=1.00][A
Epoch 1/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 254/280 [1:09:09<03:58,  9.17s/it, loss=-0.0708, reward=0.6048, temp=1.00][A

   Processing 117 sentences...


Epoch 1/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 254/280 [1:09:14<03:58,  9.17s/it, loss=-0.0892, reward=0.5979, temp=1.00][A
Epoch 1/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 255/280 [1:09:14<03:15,  7.81s/it, loss=-0.0892, reward=0.5979, temp=1.00][A

   Processing 106 sentences...


Epoch 1/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 255/280 [1:09:21<03:15,  7.81s/it, loss=-0.0932, reward=0.5967, temp=1.00][A
Epoch 1/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 256/280 [1:09:21<02:58,  7.42s/it, loss=-0.0932, reward=0.5967, temp=1.00][A

   Processing 67 sentences...


Epoch 1/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 256/280 [1:09:23<02:58,  7.42s/it, loss=-0.0948, reward=0.5914, temp=1.00][A
Epoch 1/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 257/280 [1:09:23<02:17,  5.96s/it, loss=-0.0948, reward=0.5914, temp=1.00][A

   Processing 112 sentences...


Epoch 1/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 257/280 [1:09:32<02:17,  5.96s/it, loss=-0.0984, reward=0.5923, temp=1.00][A
Epoch 1/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 258/280 [1:09:32<02:28,  6.76s/it, loss=-0.0984, reward=0.5923, temp=1.00][A

   Processing 79 sentences...


Epoch 1/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 258/280 [1:09:36<02:28,  6.76s/it, loss=-0.0961, reward=0.5904, temp=1.00][A
Epoch 1/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 259/280 [1:09:36<02:08,  6.10s/it, loss=-0.0961, reward=0.5904, temp=1.00][A

   Processing 408 sentences...


Epoch 1/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 259/280 [1:09:57<02:08,  6.10s/it, loss=0.0372, reward=0.6189, temp=1.00] [A
Epoch 1/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 260/280 [1:09:57<03:31, 10.56s/it, loss=0.0372, reward=0.6189, temp=1.00][A

   Processing 103 sentences...


Epoch 1/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 260/280 [1:10:02<03:31, 10.56s/it, loss=-0.0839, reward=0.6040, temp=1.00][A
Epoch 1/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 261/280 [1:10:02<02:46,  8.76s/it, loss=-0.0839, reward=0.6040, temp=1.00][A

   Processing 103 sentences...


Epoch 1/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 261/280 [1:10:09<02:46,  8.76s/it, loss=-0.0850, reward=0.6037, temp=1.00][A
Epoch 1/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 262/280 [1:10:09<02:26,  8.15s/it, loss=-0.0850, reward=0.6037, temp=1.00][A

   Processing 118 sentences...


Epoch 1/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 262/280 [1:10:15<02:26,  8.15s/it, loss=-0.1095, reward=0.5890, temp=1.00][A
Epoch 1/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 263/280 [1:10:15<02:07,  7.50s/it, loss=-0.1095, reward=0.5890, temp=1.00][A

   Processing 249 sentences...


Epoch 1/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 263/280 [1:10:34<02:07,  7.50s/it, loss=-0.0632, reward=0.6073, temp=1.00][A
Epoch 1/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 264/280 [1:10:34<02:58, 11.15s/it, loss=-0.0632, reward=0.6073, temp=1.00][A

   Processing 145 sentences...


Epoch 1/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 264/280 [1:10:40<02:58, 11.15s/it, loss=-0.0888, reward=0.5998, temp=1.00][A
Epoch 1/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 265/280 [1:10:40<02:22,  9.50s/it, loss=-0.0888, reward=0.5998, temp=1.00][A

   Processing 294 sentences...


Epoch 1/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 265/280 [1:11:06<02:22,  9.50s/it, loss=-0.0371, reward=0.6105, temp=1.00][A
Epoch 1/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 266/280 [1:11:06<03:22, 14.48s/it, loss=-0.0371, reward=0.6105, temp=1.00][A

   Processing 147 sentences...


Epoch 1/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 266/280 [1:11:14<03:22, 14.48s/it, loss=-0.1079, reward=0.5925, temp=1.00][A
Epoch 1/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 267/280 [1:11:14<02:43, 12.58s/it, loss=-0.1079, reward=0.5925, temp=1.00][A

   Processing 304 sentences...


Epoch 1/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 267/280 [1:11:48<02:43, 12.58s/it, loss=-0.0556, reward=0.6076, temp=1.00][A
Epoch 1/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 268/280 [1:11:48<03:46, 18.90s/it, loss=-0.0556, reward=0.6076, temp=1.00][A

   Processing 128 sentences...


Epoch 1/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 268/280 [1:12:01<03:46, 18.90s/it, loss=-0.0811, reward=0.6072, temp=1.00][A
Epoch 1/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 269/280 [1:12:01<03:10, 17.28s/it, loss=-0.0811, reward=0.6072, temp=1.00][A

   Processing 247 sentences...


Epoch 1/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 269/280 [1:12:18<03:10, 17.28s/it, loss=-0.0719, reward=0.6073, temp=1.00][A
Epoch 1/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 270/280 [1:12:18<02:50, 17.09s/it, loss=-0.0719, reward=0.6073, temp=1.00][A

   Processing 165 sentences...


Epoch 1/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 270/280 [1:12:35<02:50, 17.09s/it, loss=-0.0884, reward=0.6037, temp=1.00][A
Epoch 1/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 271/280 [1:12:35<02:34, 17.12s/it, loss=-0.0884, reward=0.6037, temp=1.00][A

   Processing 38 sentences...


Epoch 1/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 271/280 [1:12:38<02:34, 17.12s/it, loss=-0.1114, reward=0.5725, temp=1.00][A
Epoch 1/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 272/280 [1:12:38<01:42, 12.81s/it, loss=-0.1114, reward=0.5725, temp=1.00][A

   Processing 63 sentences...


Epoch 1/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 272/280 [1:12:41<01:42, 12.81s/it, loss=-0.1028, reward=0.5876, temp=1.00][A
Epoch 1/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 273/280 [1:12:41<01:09,  9.94s/it, loss=-0.1028, reward=0.5876, temp=1.00][A

   Processing 215 sentences...


Epoch 1/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 273/280 [1:13:06<01:09,  9.94s/it, loss=-0.0691, reward=0.6038, temp=1.00][A
Epoch 1/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 274/280 [1:13:06<01:26, 14.44s/it, loss=-0.0691, reward=0.6038, temp=1.00][A

   Processing 39 sentences...


Epoch 1/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 274/280 [1:13:08<01:26, 14.44s/it, loss=-0.0992, reward=0.5887, temp=1.00][A
Epoch 1/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 275/280 [1:13:08<00:53, 10.72s/it, loss=-0.0992, reward=0.5887, temp=1.00][A

   Processing 105 sentences...


Epoch 1/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 275/280 [1:13:13<00:53, 10.72s/it, loss=-0.1086, reward=0.5892, temp=1.00][A
Epoch 1/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 276/280 [1:13:13<00:35,  8.93s/it, loss=-0.1086, reward=0.5892, temp=1.00][A

   Processing 138 sentences...


Epoch 1/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 276/280 [1:13:24<00:35,  8.93s/it, loss=-0.0891, reward=0.5985, temp=1.00][A
Epoch 1/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 277/280 [1:13:24<00:28,  9.54s/it, loss=-0.0891, reward=0.5985, temp=1.00][A

   Processing 161 sentences...


Epoch 1/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 277/280 [1:13:34<00:28,  9.54s/it, loss=-0.0807, reward=0.6031, temp=1.00][A
Epoch 1/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 278/280 [1:13:34<00:19,  9.67s/it, loss=-0.0807, reward=0.6031, temp=1.00][A

   Processing 35 sentences...


Epoch 1/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 278/280 [1:13:36<00:19,  9.67s/it, loss=-0.1130, reward=0.5698, temp=1.00][A
Epoch 1/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 279/280 [1:13:36<00:07,  7.34s/it, loss=-0.1130, reward=0.5698, temp=1.00][A

   Processing 500 sentences...ents), truncating to 500


Epoch 1/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 279/280 [1:14:07<00:07,  7.34s/it, loss=0.1074, reward=0.6174, temp=1.00] [A
Epoch 1/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 280/280 [1:14:07<00:00, 15.89s/it, loss=0.1074, reward=0.6174, temp=1.00][A


   ‚ö†Ô∏è Document too long (1424 sents), truncating to 500
Epoch 1/10:
  Train Loss: 0.0167
  Overall Val Reward: 0.5804
  Learning Rate: 0.000033
  Temperature: 1.0000

  Aspect-wise Val Rewards:
    facts       : 0.5847
    analysis    : 0.5794
    argument    : 0.5803
    judgement   : 0.5794
    statute     : 0.5784

  ‚úÖ Saved best model (Val Reward: 0.5804)

   üî• Warmup: LR = 0.000067


Epoch 2/10:   0%|          | 0/280 [00:00<?, ?it/s]

   Processing 333 sentences...

Epoch 2/10:   0%|          | 1/280 [00:28<2:11:51, 28.36s/it, loss=0.0098, reward=0.6161, temp=0.90]

   Processing 353 sentences...

Epoch 2/10:   1%|          | 2/280 [00:53<2:01:22, 26.20s/it, loss=-0.0377, reward=0.6099, temp=0.90]

   Processing 57 sentences...

Epoch 2/10:   1%|          | 3/280 [01:00<1:21:07, 17.57s/it, loss=-0.1202, reward=0.5658, temp=0.90]

   Processing 96 sentences...

Epoch 2/10:   1%|‚ñè         | 4/280 [01:14<1:15:04, 16.32s/it, loss=-0.0924, reward=0.6000, temp=0.90]

   Processing 71 sentences...

Epoch 2/10:   2%|‚ñè         | 5/280 [01:17<52:45, 11.51s/it, loss=-0.0904, reward=0.5991, temp=0.90]  

   Processing 62 sentences...

Epoch 2/10:   2%|‚ñè         | 6/280 [01:20<38:57,  8.53s/it, loss=-0.0940, reward=0.5911, temp=0.90]

   Processing 350 sentences...

Epoch 2/10:   2%|‚ñé         | 7/280 [01:52<1:13:23, 16.13s/it, loss=-0.0183, reward=0.6172, temp=0.90]

   Processing 263 sentences...

Epoch 2/10:   3%|‚ñé         | 8/280 [02:08<1:12:35, 16.01s/it, loss=-0.0483, reward=0.6125, temp=0.90]

   Processing 176 sentences...

Epoch 2/10:   3%|‚ñé         | 9/280 [02:17<1:03:21, 14.03s/it, loss=-0.0767, reward=0.6078, temp=0.90]

   Processing 318 sentences...

Epoch 2/10:   4%|‚ñé         | 10/280 [03:21<2:12:55, 29.54s/it, loss=-0.0807, reward=0.6031, temp=0.90]

   Processing 338 sentences...

Epoch 2/10:   4%|‚ñç         | 11/280 [03:46<2:05:24, 27.97s/it, loss=-0.0388, reward=0.6156, temp=0.90]

   Processing 40 sentences...

Epoch 2/10:   4%|‚ñç         | 12/280 [03:48<1:30:06, 20.17s/it, loss=-0.0978, reward=0.5733, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:   5%|‚ñç         | 13/280 [04:42<2:14:28, 30.22s/it, loss=-0.0373, reward=0.6110, temp=0.90]

   Processing 423 sentences...

Epoch 2/10:   5%|‚ñå         | 14/280 [05:40<2:51:56, 38.79s/it, loss=-0.0324, reward=0.6167, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:   5%|‚ñå         | 15/280 [05:59<2:24:27, 32.71s/it, loss=0.0681, reward=0.6408, temp=0.90] 

   Processing 82 sentences...

Epoch 2/10:   6%|‚ñå         | 16/280 [06:02<1:44:29, 23.75s/it, loss=-0.0881, reward=0.5999, temp=0.90]

   Processing 137 sentences...

Epoch 2/10:   6%|‚ñå         | 17/280 [06:09<1:22:28, 18.81s/it, loss=-0.0718, reward=0.6137, temp=0.90]

   Processing 30 sentences...

Epoch 2/10:   6%|‚ñã         | 18/280 [06:12<1:00:52, 13.94s/it, loss=-0.0960, reward=0.5507, temp=0.90]

   Processing 82 sentences...

Epoch 2/10:   7%|‚ñã         | 19/280 [06:20<52:48, 12.14s/it, loss=-0.0861, reward=0.5881, temp=0.90]  

   Processing 61 sentences...

Epoch 2/10:   7%|‚ñã         | 20/280 [06:23<40:40,  9.39s/it, loss=-0.0825, reward=0.5866, temp=0.90]

   Processing 50 sentences...

Epoch 2/10:   8%|‚ñä         | 21/280 [06:26<32:21,  7.49s/it, loss=-0.0927, reward=0.5723, temp=0.90]

   Processing 115 sentences...

Epoch 2/10:   8%|‚ñä         | 22/280 [06:34<33:46,  7.85s/it, loss=-0.0759, reward=0.6039, temp=0.90]

   Processing 80 sentences...

Epoch 2/10:   8%|‚ñä         | 23/280 [06:38<27:41,  6.47s/it, loss=-0.0769, reward=0.6017, temp=0.90]

   Processing 434 sentences...

Epoch 2/10:   9%|‚ñä         | 24/280 [08:00<2:04:33, 29.19s/it, loss=-0.0212, reward=0.6168, temp=0.90]

   Processing 254 sentences...

Epoch 2/10:   9%|‚ñâ         | 25/280 [08:32<2:07:24, 29.98s/it, loss=-0.0713, reward=0.6065, temp=0.90]

   Processing 329 sentences...

Epoch 2/10:   9%|‚ñâ         | 26/280 [08:51<1:53:27, 26.80s/it, loss=-0.0693, reward=0.6053, temp=0.90]

   Processing 87 sentences...

Epoch 2/10:  10%|‚ñâ         | 27/280 [08:56<1:25:02, 20.17s/it, loss=-0.0818, reward=0.6015, temp=0.90]

   Processing 39 sentences...

Epoch 2/10:  10%|‚ñà         | 28/280 [08:57<1:01:20, 14.61s/it, loss=-0.0916, reward=0.5824, temp=0.90]

   Processing 83 sentences...

Epoch 2/10:  10%|‚ñà         | 29/280 [09:01<46:59, 11.23s/it, loss=-0.0839, reward=0.6024, temp=0.90]  

   Processing 177 sentences...

Epoch 2/10:  11%|‚ñà         | 30/280 [09:12<47:23, 11.37s/it, loss=-0.0781, reward=0.6046, temp=0.90]

   Processing 82 sentences...

Epoch 2/10:  11%|‚ñà         | 31/280 [09:20<42:41, 10.29s/it, loss=-0.0929, reward=0.5898, temp=0.90]

   Processing 136 sentences...

Epoch 2/10:  11%|‚ñà‚ñè        | 32/280 [09:25<35:46,  8.66s/it, loss=-0.0945, reward=0.5933, temp=0.90]

   Processing 166 sentences...

Epoch 2/10:  12%|‚ñà‚ñè        | 33/280 [09:35<37:09,  9.03s/it, loss=-0.0829, reward=0.6021, temp=0.90]

   Processing 91 sentences...

Epoch 2/10:  12%|‚ñà‚ñè        | 34/280 [09:40<32:37,  7.96s/it, loss=-0.0918, reward=0.5964, temp=0.90]

   Processing 87 sentences...

Epoch 2/10:  12%|‚ñà‚ñé        | 35/280 [09:45<28:47,  7.05s/it, loss=-0.1006, reward=0.5892, temp=0.90]

   Processing 203 sentences...

Epoch 2/10:  13%|‚ñà‚ñé        | 36/280 [09:59<37:17,  9.17s/it, loss=-0.0881, reward=0.5977, temp=0.90]

   Processing 140 sentences...

Epoch 2/10:  13%|‚ñà‚ñé        | 37/280 [10:10<38:27,  9.50s/it, loss=-0.0752, reward=0.6063, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  14%|‚ñà‚ñé        | 38/280 [11:01<1:29:13, 22.12s/it, loss=-0.0248, reward=0.6061, temp=0.90]

   Processing 97 sentences...

Epoch 2/10:  14%|‚ñà‚ñç        | 39/280 [11:07<1:09:33, 17.32s/it, loss=-0.1011, reward=0.5949, temp=0.90]

   Processing 182 sentences...

Epoch 2/10:  14%|‚ñà‚ñç        | 40/280 [11:20<1:03:39, 15.91s/it, loss=-0.0985, reward=0.5974, temp=0.90]

   Processing 87 sentences...

Epoch 2/10:  15%|‚ñà‚ñç        | 41/280 [11:24<49:27, 12.42s/it, loss=-0.0927, reward=0.6003, temp=0.90]  

   Processing 235 sentences...

Epoch 2/10:  15%|‚ñà‚ñå        | 42/280 [11:57<1:13:51, 18.62s/it, loss=-0.0614, reward=0.6068, temp=0.90]

   Processing 50 sentences...

Epoch 2/10:  15%|‚ñà‚ñå        | 43/280 [12:00<54:17, 13.74s/it, loss=-0.1125, reward=0.5807, temp=0.90]  

   Processing 70 sentences...

Epoch 2/10:  16%|‚ñà‚ñå        | 44/280 [12:04<43:05, 10.96s/it, loss=-0.1142, reward=0.5839, temp=0.90]

   Processing 65 sentences...

Epoch 2/10:  16%|‚ñà‚ñå        | 45/280 [12:10<36:29,  9.32s/it, loss=-0.1150, reward=0.5802, temp=0.90]

   Processing 482 sentences...

Epoch 2/10:  16%|‚ñà‚ñã        | 46/280 [13:01<1:25:56, 22.03s/it, loss=0.0324, reward=0.6100, temp=0.90]

   Processing 164 sentences...

Epoch 2/10:  17%|‚ñà‚ñã        | 47/280 [13:17<1:18:01, 20.09s/it, loss=-0.0812, reward=0.6010, temp=0.90]

   Processing 102 sentences...

Epoch 2/10:  17%|‚ñà‚ñã        | 48/280 [13:23<1:01:39, 15.95s/it, loss=-0.0864, reward=0.6023, temp=0.90]

   Processing 48 sentences...

Epoch 2/10:  18%|‚ñà‚ñä        | 49/280 [13:25<44:59, 11.69s/it, loss=-0.1011, reward=0.5889, temp=0.90]  

   Processing 74 sentences...

Epoch 2/10:  18%|‚ñà‚ñä        | 50/280 [13:30<37:37,  9.81s/it, loss=-0.0948, reward=0.5974, temp=0.90]

   Processing 67 sentences...

Epoch 2/10:  18%|‚ñà‚ñä        | 51/280 [13:36<32:45,  8.58s/it, loss=-0.1058, reward=0.5884, temp=0.90]

   Processing 31 sentences...

Epoch 2/10:  19%|‚ñà‚ñä        | 52/280 [13:38<24:36,  6.48s/it, loss=-0.0967, reward=0.5927, temp=0.90]

   Processing 53 sentences...

Epoch 2/10:  19%|‚ñà‚ñâ        | 53/280 [13:41<21:17,  5.63s/it, loss=-0.1267, reward=0.5603, temp=0.90]

   Processing 123 sentences...

Epoch 2/10:  19%|‚ñà‚ñâ        | 54/280 [13:49<23:28,  6.23s/it, loss=-0.0910, reward=0.5928, temp=0.90]

   Processing 169 sentences...

Epoch 2/10:  20%|‚ñà‚ñâ        | 55/280 [13:59<27:27,  7.32s/it, loss=-0.0360, reward=0.6100, temp=0.90]

   Processing 33 sentences...

Epoch 2/10:  20%|‚ñà‚ñà        | 56/280 [14:02<22:22,  5.99s/it, loss=-0.1217, reward=0.5544, temp=0.90]

   Processing 190 sentences...

Epoch 2/10:  20%|‚ñà‚ñà        | 57/280 [14:13<27:59,  7.53s/it, loss=-0.0252, reward=0.6106, temp=0.90]

   Processing 223 sentences...

Epoch 2/10:  21%|‚ñà‚ñà        | 58/280 [14:30<38:36, 10.44s/it, loss=-0.0512, reward=0.6044, temp=0.90]

   Processing 57 sentences...

Epoch 2/10:  21%|‚ñà‚ñà        | 59/280 [14:33<29:59,  8.14s/it, loss=-0.0911, reward=0.5998, temp=0.90]

   Processing 362 sentences...

Epoch 2/10:  21%|‚ñà‚ñà‚ñè       | 60/280 [14:58<48:29, 13.22s/it, loss=0.1339, reward=0.6377, temp=0.90] 

   Processing 192 sentences...

Epoch 2/10:  22%|‚ñà‚ñà‚ñè       | 61/280 [15:16<54:10, 14.84s/it, loss=-0.0779, reward=0.6035, temp=0.90]

   Processing 108 sentences...

Epoch 2/10:  22%|‚ñà‚ñà‚ñè       | 62/280 [15:20<41:19, 11.37s/it, loss=-0.0909, reward=0.6012, temp=0.90]

   Processing 380 sentences...

Epoch 2/10:  22%|‚ñà‚ñà‚ñé       | 63/280 [16:00<1:12:14, 19.97s/it, loss=-0.0479, reward=0.6085, temp=0.90]

   Processing 285 sentences...

Epoch 2/10:  23%|‚ñà‚ñà‚ñé       | 64/280 [16:31<1:24:14, 23.40s/it, loss=-0.0511, reward=0.6094, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  23%|‚ñà‚ñà‚ñé       | 65/280 [17:33<2:04:39, 34.79s/it, loss=-0.0255, reward=0.6127, temp=0.90]

   Processing 144 sentences...

Epoch 2/10:  24%|‚ñà‚ñà‚ñé       | 66/280 [17:42<1:37:04, 27.22s/it, loss=-0.0977, reward=0.5940, temp=0.90]

   Processing 202 sentences...

Epoch 2/10:  24%|‚ñà‚ñà‚ñç       | 67/280 [17:59<1:26:03, 24.24s/it, loss=-0.0614, reward=0.6147, temp=0.90]

   Processing 271 sentences...

Epoch 2/10:  24%|‚ñà‚ñà‚ñç       | 68/280 [18:27<1:29:20, 25.28s/it, loss=-0.0916, reward=0.5991, temp=0.90]

   Processing 51 sentences...

Epoch 2/10:  25%|‚ñà‚ñà‚ñç       | 69/280 [18:29<1:04:31, 18.35s/it, loss=-0.0896, reward=0.5922, temp=0.90]

   Processing 376 sentences...

Epoch 2/10:  25%|‚ñà‚ñà‚ñå       | 70/280 [19:00<1:17:07, 22.04s/it, loss=-0.0508, reward=0.6138, temp=0.90]

   Processing 61 sentences...

Epoch 2/10:  25%|‚ñà‚ñà‚ñå       | 71/280 [19:04<57:38, 16.55s/it, loss=-0.0867, reward=0.5953, temp=0.90]  

   Processing 63 sentences...

Epoch 2/10:  26%|‚ñà‚ñà‚ñå       | 72/280 [19:07<43:46, 12.63s/it, loss=-0.0876, reward=0.5894, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  26%|‚ñà‚ñà‚ñå       | 73/280 [20:15<1:40:32, 29.14s/it, loss=-0.0275, reward=0.6149, temp=0.90]

   Processing 254 sentences...

Epoch 2/10:  26%|‚ñà‚ñà‚ñã       | 74/280 [20:41<1:36:54, 28.23s/it, loss=-0.0665, reward=0.6107, temp=0.90]

   Processing 118 sentences...

Epoch 2/10:  27%|‚ñà‚ñà‚ñã       | 75/280 [21:04<1:30:43, 26.55s/it, loss=-0.0954, reward=0.5894, temp=0.90]

   Processing 439 sentences...

Epoch 2/10:  27%|‚ñà‚ñà‚ñã       | 76/280 [21:38<1:38:48, 29.06s/it, loss=-0.0414, reward=0.6152, temp=0.90]

   Processing 51 sentences...

Epoch 2/10:  28%|‚ñà‚ñà‚ñä       | 77/280 [21:41<1:11:01, 20.99s/it, loss=-0.0927, reward=0.5858, temp=0.90]

   Processing 143 sentences...

Epoch 2/10:  28%|‚ñà‚ñà‚ñä       | 78/280 [21:51<1:00:04, 17.85s/it, loss=-0.0924, reward=0.5936, temp=0.90]

   Processing 300 sentences...

Epoch 2/10:  28%|‚ñà‚ñà‚ñä       | 79/280 [22:12<1:03:04, 18.83s/it, loss=-0.0723, reward=0.6064, temp=0.90]

   Processing 137 sentences...

Epoch 2/10:  29%|‚ñà‚ñà‚ñä       | 80/280 [22:21<52:55, 15.88s/it, loss=-0.1110, reward=0.5747, temp=0.90]  

   Processing 120 sentences...

Epoch 2/10:  29%|‚ñà‚ñà‚ñâ       | 81/280 [22:28<43:34, 13.14s/it, loss=-0.0813, reward=0.6038, temp=0.90]

   Processing 170 sentences...

Epoch 2/10:  29%|‚ñà‚ñà‚ñâ       | 82/280 [22:41<43:22, 13.14s/it, loss=-0.0821, reward=0.6024, temp=0.90]

   Processing 107 sentences...

Epoch 2/10:  30%|‚ñà‚ñà‚ñâ       | 83/280 [22:48<36:43, 11.19s/it, loss=-0.0895, reward=0.5986, temp=0.90]

   Processing 48 sentences...

Epoch 2/10:  30%|‚ñà‚ñà‚ñà       | 84/280 [22:52<29:27,  9.02s/it, loss=-0.1020, reward=0.5732, temp=0.90]

   Processing 163 sentences...

Epoch 2/10:  30%|‚ñà‚ñà‚ñà       | 85/280 [23:00<28:50,  8.87s/it, loss=-0.0751, reward=0.6024, temp=0.90]

   Processing 108 sentences...

Epoch 2/10:  31%|‚ñà‚ñà‚ñà       | 86/280 [23:10<29:13,  9.04s/it, loss=-0.0936, reward=0.5937, temp=0.90]

   Processing 86 sentences...

Epoch 2/10:  31%|‚ñà‚ñà‚ñà       | 87/280 [23:14<24:37,  7.66s/it, loss=-0.0781, reward=0.6121, temp=0.90]

   Processing 74 sentences...

Epoch 2/10:  31%|‚ñà‚ñà‚ñà‚ñè      | 88/280 [23:27<29:13,  9.13s/it, loss=-0.0894, reward=0.6030, temp=0.90]

   Processing 81 sentences...

Epoch 2/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 89/280 [23:37<30:21,  9.53s/it, loss=-0.1039, reward=0.5873, temp=0.90]

   Processing 203 sentences...

Epoch 2/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 90/280 [23:49<32:12, 10.17s/it, loss=-0.0595, reward=0.6084, temp=0.90]

   Processing 54 sentences...

Epoch 2/10:  32%|‚ñà‚ñà‚ñà‚ñé      | 91/280 [23:51<24:54,  7.91s/it, loss=-0.1105, reward=0.5797, temp=0.90]

   Processing 143 sentences...

Epoch 2/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 92/280 [24:01<26:39,  8.51s/it, loss=-0.0804, reward=0.6053, temp=0.90]

   Processing 36 sentences...

Epoch 2/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 93/280 [24:04<20:47,  6.67s/it, loss=-0.1095, reward=0.5812, temp=0.90]

   Processing 106 sentences...

Epoch 2/10:  34%|‚ñà‚ñà‚ñà‚ñé      | 94/280 [24:09<19:01,  6.14s/it, loss=-0.0959, reward=0.5962, temp=0.90]

   Processing 133 sentences...

Epoch 2/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 95/280 [24:35<37:13, 12.07s/it, loss=-0.1028, reward=0.5952, temp=0.90]

   Processing 206 sentences...

Epoch 2/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 96/280 [24:45<35:16, 11.50s/it, loss=-0.0380, reward=0.6118, temp=0.90]

   Processing 162 sentences...

Epoch 2/10:  35%|‚ñà‚ñà‚ñà‚ñç      | 97/280 [25:07<44:30, 14.59s/it, loss=-0.0859, reward=0.6017, temp=0.90]

   Processing 42 sentences...

Epoch 2/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 98/280 [25:08<32:35, 10.74s/it, loss=-0.1004, reward=0.5952, temp=0.90]

   Processing 89 sentences...

Epoch 2/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 99/280 [25:13<26:54,  8.92s/it, loss=-0.0886, reward=0.6030, temp=0.90]

   Processing 32 sentences...

Epoch 2/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 100/280 [25:17<22:10,  7.39s/it, loss=-0.1169, reward=0.5706, temp=0.90]

   Processing 408 sentences...

Epoch 2/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 101/280 [26:41<1:30:36, 30.37s/it, loss=-0.1041, reward=0.5951, temp=0.90]

   Processing 35 sentences...

Epoch 2/10:  36%|‚ñà‚ñà‚ñà‚ñã      | 102/280 [26:43<1:05:11, 21.97s/it, loss=-0.1194, reward=0.5704, temp=0.90]

   Processing 137 sentences...

Epoch 2/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 103/280 [26:51<52:11, 17.69s/it, loss=-0.0898, reward=0.5961, temp=0.90]  

   Processing 105 sentences...

Epoch 2/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 104/280 [26:59<43:35, 14.86s/it, loss=-0.0847, reward=0.5974, temp=0.90]

   Processing 124 sentences...

Epoch 2/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 105/280 [27:11<40:25, 13.86s/it, loss=-0.1039, reward=0.5892, temp=0.90]

   Processing 220 sentences...

Epoch 2/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 106/280 [27:25<40:26, 13.94s/it, loss=-0.0246, reward=0.6070, temp=0.90]

   Processing 98 sentences...

Epoch 2/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 107/280 [27:29<32:11, 11.16s/it, loss=-0.0992, reward=0.5943, temp=0.90]

   Processing 133 sentences...

Epoch 2/10:  39%|‚ñà‚ñà‚ñà‚ñä      | 108/280 [27:36<28:11,  9.83s/it, loss=-0.0458, reward=0.6115, temp=0.90]

   Processing 315 sentences...

Epoch 2/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 109/280 [28:05<44:31, 15.62s/it, loss=-0.0113, reward=0.6095, temp=0.90]

   Processing 81 sentences...

Epoch 2/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 110/280 [28:09<34:08, 12.05s/it, loss=-0.0895, reward=0.5993, temp=0.90]

   Processing 286 sentences...

Epoch 2/10:  40%|‚ñà‚ñà‚ñà‚ñâ      | 111/280 [28:26<37:42, 13.39s/it, loss=-0.0415, reward=0.6100, temp=0.90]

   Processing 122 sentences...

Epoch 2/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 112/280 [28:32<32:03, 11.45s/it, loss=-0.0845, reward=0.6054, temp=0.90]

   Processing 170 sentences...

Epoch 2/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 113/280 [28:50<37:01, 13.30s/it, loss=-0.0868, reward=0.6029, temp=0.90]

   Processing 376 sentences...

Epoch 2/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 114/280 [29:34<1:02:08, 22.46s/it, loss=-0.0172, reward=0.6139, temp=0.90]

   Processing 334 sentences...

Epoch 2/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 115/280 [30:25<1:25:35, 31.12s/it, loss=-0.0449, reward=0.6114, temp=0.90]

   Processing 102 sentences...

Epoch 2/10:  41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 116/280 [30:36<1:08:15, 24.97s/it, loss=-0.0921, reward=0.6006, temp=0.90]

   Processing 268 sentences...

Epoch 2/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 117/280 [30:50<58:59, 21.72s/it, loss=-0.0824, reward=0.6046, temp=0.90]  

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 118/280 [32:26<1:59:05, 44.11s/it, loss=-0.0073, reward=0.6182, temp=0.90]

   Processing 253 sentences...

Epoch 2/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 119/280 [32:43<1:36:14, 35.87s/it, loss=-0.0818, reward=0.6063, temp=0.90]

   Processing 132 sentences...

Epoch 2/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 120/280 [32:52<1:14:26, 27.92s/it, loss=-0.0871, reward=0.6051, temp=0.90]

   Processing 286 sentences...

Epoch 2/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 121/280 [33:15<1:09:58, 26.41s/it, loss=-0.0689, reward=0.6131, temp=0.90]

   Processing 128 sentences...

Epoch 2/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 122/280 [33:23<54:32, 20.72s/it, loss=-0.0869, reward=0.6022, temp=0.90]  

   Processing 37 sentences...

Epoch 2/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 123/280 [33:25<39:46, 15.20s/it, loss=-0.0905, reward=0.5938, temp=0.90]

   Processing 205 sentences...

Epoch 2/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 124/280 [33:43<41:48, 16.08s/it, loss=-0.0800, reward=0.6056, temp=0.90]

   Processing 87 sentences...

Epoch 2/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 125/280 [33:50<34:15, 13.26s/it, loss=-0.0881, reward=0.6004, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 126/280 [35:02<1:19:03, 30.80s/it, loss=-0.0584, reward=0.6109, temp=0.90]

   Processing 144 sentences...

Epoch 2/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 127/280 [35:14<1:04:29, 25.29s/it, loss=-0.0862, reward=0.6033, temp=0.90]

   Processing 80 sentences...

Epoch 2/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 128/280 [35:22<50:44, 20.03s/it, loss=-0.0935, reward=0.5897, temp=0.90]  

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 129/280 [36:49<1:41:19, 40.26s/it, loss=-0.0409, reward=0.6135, temp=0.90]

   Processing 124 sentences...

Epoch 2/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 130/280 [36:59<1:17:31, 31.01s/it, loss=-0.0800, reward=0.6094, temp=0.90]

   Processing 112 sentences...

Epoch 2/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 131/280 [37:09<1:01:38, 24.82s/it, loss=-0.0980, reward=0.5926, temp=0.90]

   Processing 75 sentences...

Epoch 2/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 132/280 [37:13<45:57, 18.63s/it, loss=-0.1032, reward=0.5824, temp=0.90]  

   Processing 165 sentences...

Epoch 2/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 133/280 [37:25<40:42, 16.62s/it, loss=-0.0691, reward=0.6127, temp=0.90]

   Processing 146 sentences...

Epoch 2/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 134/280 [37:43<41:22, 17.00s/it, loss=-0.0796, reward=0.6087, temp=0.90]

   Processing 100 sentences...

Epoch 2/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 135/280 [37:48<32:31, 13.46s/it, loss=-0.0872, reward=0.6016, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 136/280 [39:29<1:35:10, 39.65s/it, loss=-0.0214, reward=0.6156, temp=0.90]

   Processing 209 sentences...

Epoch 2/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 137/280 [39:44<1:16:45, 32.20s/it, loss=-0.0709, reward=0.6117, temp=0.90]

   Processing 90 sentences...

Epoch 2/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 138/280 [39:50<57:38, 24.35s/it, loss=-0.0994, reward=0.5932, temp=0.90]  

   Processing 72 sentences...

Epoch 2/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 139/280 [39:54<43:20, 18.44s/it, loss=-0.0974, reward=0.5974, temp=0.90]

   Processing 38 sentences...

Epoch 2/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 140/280 [39:56<31:28, 13.49s/it, loss=-0.1080, reward=0.5731, temp=0.90]

   Processing 131 sentences...

Epoch 2/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 141/280 [40:02<25:28, 11.00s/it, loss=-0.0864, reward=0.6024, temp=0.90]

   Processing 78 sentences...

Epoch 2/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 142/280 [40:06<20:52,  9.07s/it, loss=-0.1012, reward=0.5910, temp=0.90]

   Processing 119 sentences...

Epoch 2/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 143/280 [40:14<19:46,  8.66s/it, loss=-0.0868, reward=0.6010, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 144/280 [41:57<1:23:59, 37.05s/it, loss=0.0588, reward=0.6220, temp=0.90]

   Processing 133 sentences...

Epoch 2/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 145/280 [42:07<1:04:46, 28.79s/it, loss=-0.0858, reward=0.6064, temp=0.90]

   Processing 83 sentences...

Epoch 2/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 146/280 [42:13<49:17, 22.07s/it, loss=-0.1077, reward=0.5831, temp=0.90]  

   Processing 56 sentences...

Epoch 2/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 147/280 [42:16<36:17, 16.37s/it, loss=-0.1058, reward=0.5846, temp=0.90]

   Processing 133 sentences...

Epoch 2/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 148/280 [42:23<29:43, 13.51s/it, loss=-0.0904, reward=0.5995, temp=0.90]

   Processing 101 sentences...

Epoch 2/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 149/280 [42:29<24:30, 11.23s/it, loss=-0.1140, reward=0.5846, temp=0.90]

   Processing 151 sentences...

Epoch 2/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 150/280 [42:40<24:28, 11.30s/it, loss=-0.0854, reward=0.6017, temp=0.90]

   Processing 248 sentences...

Epoch 2/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 151/280 [42:55<26:33, 12.35s/it, loss=-0.0565, reward=0.6091, temp=0.90]

   Processing 500 sentences...sents), truncating to 500

Epoch 2/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 152/280 [43:53<55:11, 25.87s/it, loss=-0.0030, reward=0.6113, temp=0.90]

   Processing 145 sentences...

Epoch 2/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 153/280 [43:59<42:28, 20.07s/it, loss=-0.0813, reward=0.6058, temp=0.90]

   Processing 202 sentences...

Epoch 2/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 154/280 [44:16<40:07, 19.11s/it, loss=-0.0851, reward=0.6035, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 155/280 [44:48<47:49, 22.95s/it, loss=-0.0069, reward=0.6144, temp=0.90]

   Processing 154 sentences...

Epoch 2/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 156/280 [45:05<44:04, 21.33s/it, loss=-0.1029, reward=0.5974, temp=0.90]

   Processing 345 sentences...

Epoch 2/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 157/280 [45:54<1:00:41, 29.60s/it, loss=-0.0334, reward=0.6170, temp=0.90]

   Processing 99 sentences...

Epoch 2/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 158/280 [45:59<44:59, 22.12s/it, loss=-0.0960, reward=0.5983, temp=0.90]  

   Processing 500 sentences...sents), truncating to 500

Epoch 2/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 159/280 [46:47<1:00:29, 30.00s/it, loss=0.0044, reward=0.6216, temp=0.90]

   Processing 118 sentences...

Epoch 2/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 160/280 [46:57<47:47, 23.89s/it, loss=-0.1108, reward=0.5909, temp=0.90]  

   Processing 30 sentences...

Epoch 2/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 161/280 [46:59<34:08, 17.21s/it, loss=-0.1025, reward=0.5608, temp=0.90]

   Processing 76 sentences...

Epoch 2/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 162/280 [47:03<25:59, 13.21s/it, loss=-0.0938, reward=0.5968, temp=0.90]

   Processing 95 sentences...

Epoch 2/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 163/280 [47:09<22:05, 11.33s/it, loss=-0.0802, reward=0.6085, temp=0.90]

   Processing 78 sentences...

Epoch 2/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 164/280 [47:14<17:59,  9.31s/it, loss=-0.0881, reward=0.5967, temp=0.90]

   Processing 153 sentences...

Epoch 2/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 165/280 [47:22<17:06,  8.92s/it, loss=-0.0720, reward=0.6091, temp=0.90]

   Processing 229 sentences...

Epoch 2/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 166/280 [47:37<20:10, 10.61s/it, loss=-0.0587, reward=0.6166, temp=0.90]

   Processing 108 sentences...

Epoch 2/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 167/280 [47:42<16:49,  8.93s/it, loss=-0.0904, reward=0.6021, temp=0.90]

   Processing 154 sentences...

Epoch 2/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 168/280 [48:01<22:14, 11.92s/it, loss=-0.0841, reward=0.6037, temp=0.90]

   Processing 44 sentences...

Epoch 2/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 169/280 [48:08<19:39, 10.63s/it, loss=-0.1068, reward=0.5614, temp=0.90]

   Processing 229 sentences...

Epoch 2/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 170/280 [48:46<34:28, 18.81s/it, loss=-0.0928, reward=0.5976, temp=0.90]

   Processing 221 sentences...

Epoch 2/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 171/280 [49:07<35:29, 19.54s/it, loss=-0.0688, reward=0.6080, temp=0.90]

   Processing 148 sentences...

Epoch 2/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 172/280 [49:15<28:34, 15.88s/it, loss=-0.0734, reward=0.6098, temp=0.90]

   Processing 144 sentences...

Epoch 2/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 173/280 [49:21<23:10, 12.99s/it, loss=-0.0828, reward=0.6072, temp=0.90]

   Processing 77 sentences...

Epoch 2/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 174/280 [49:25<18:01, 10.21s/it, loss=-0.0940, reward=0.5973, temp=0.90]

   Processing 90 sentences...

Epoch 2/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 175/280 [49:32<16:23,  9.36s/it, loss=-0.1033, reward=0.5950, temp=0.90]

   Processing 309 sentences...

Epoch 2/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 176/280 [49:45<18:06, 10.45s/it, loss=-0.0757, reward=0.6038, temp=0.90]

   Processing 297 sentences...

Epoch 2/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 177/280 [50:05<22:56, 13.36s/it, loss=-0.0288, reward=0.6171, temp=0.90]

   Processing 286 sentences...

Epoch 2/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 178/280 [50:30<28:49, 16.96s/it, loss=-0.0775, reward=0.6082, temp=0.90]

   Processing 66 sentences...

Epoch 2/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 179/280 [50:33<21:24, 12.72s/it, loss=-0.1116, reward=0.5818, temp=0.90]

   Processing 220 sentences...

Epoch 2/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 180/280 [50:49<22:38, 13.58s/it, loss=-0.0784, reward=0.6070, temp=0.90]

   Processing 209 sentences...

Epoch 2/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 181/280 [51:07<24:45, 15.00s/it, loss=-0.1066, reward=0.5969, temp=0.90]

   Processing 70 sentences...

Epoch 2/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 182/280 [51:12<19:26, 11.90s/it, loss=-0.1040, reward=0.5888, temp=0.90]

   Processing 214 sentences...

Epoch 2/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 183/280 [51:22<18:30, 11.45s/it, loss=0.0397, reward=0.6337, temp=0.90] 

   Processing 213 sentences...

Epoch 2/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 184/280 [51:34<18:25, 11.51s/it, loss=-0.0704, reward=0.6073, temp=0.90]

   Processing 29 sentences...

Epoch 2/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 185/280 [51:36<13:42,  8.66s/it, loss=-0.1114, reward=0.5784, temp=0.90]

   Processing 91 sentences...

Epoch 2/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 186/280 [51:40<11:37,  7.42s/it, loss=-0.0941, reward=0.6000, temp=0.90]

   Processing 291 sentences...

Epoch 2/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 187/280 [52:00<17:21, 11.20s/it, loss=-0.0300, reward=0.6166, temp=0.90]

   Processing 80 sentences...

Epoch 2/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 188/280 [52:09<16:05, 10.49s/it, loss=-0.0970, reward=0.6027, temp=0.90]

   Processing 43 sentences...

Epoch 2/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 189/280 [52:12<12:25,  8.19s/it, loss=-0.1068, reward=0.5836, temp=0.90]

   Processing 45 sentences...

Epoch 2/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 190/280 [52:15<09:43,  6.49s/it, loss=-0.1073, reward=0.5793, temp=0.90]

   Processing 89 sentences...

Epoch 2/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 191/280 [52:19<08:27,  5.70s/it, loss=-0.0902, reward=0.6025, temp=0.90]

   Processing 194 sentences...

Epoch 2/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 192/280 [52:28<10:09,  6.92s/it, loss=-0.0686, reward=0.6090, temp=0.90]

   Processing 137 sentences...

Epoch 2/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 193/280 [52:39<11:29,  7.92s/it, loss=-0.1079, reward=0.5917, temp=0.90]

   Processing 73 sentences...

Epoch 2/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 194/280 [52:42<09:26,  6.59s/it, loss=-0.1042, reward=0.5873, temp=0.90]

   Processing 158 sentences...

Epoch 2/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 195/280 [52:55<12:10,  8.59s/it, loss=-0.0765, reward=0.6052, temp=0.90]

   Processing 56 sentences...

Epoch 2/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 196/280 [52:58<09:32,  6.81s/it, loss=-0.1093, reward=0.5738, temp=0.90]

   Processing 180 sentences...

Epoch 2/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 197/280 [53:09<11:05,  8.02s/it, loss=-0.0584, reward=0.6096, temp=0.90]

   Processing 338 sentences...

Epoch 2/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 198/280 [53:29<15:58, 11.69s/it, loss=-0.0310, reward=0.6099, temp=0.90]

   Processing 261 sentences...

Epoch 2/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 199/280 [53:57<22:16, 16.50s/it, loss=-0.0601, reward=0.6085, temp=0.90]

   Processing 156 sentences...

Epoch 2/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 200/280 [54:03<17:51, 13.39s/it, loss=-0.0893, reward=0.6017, temp=0.90]

   Processing 43 sentences...

Epoch 2/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 201/280 [54:06<13:27, 10.22s/it, loss=-0.1204, reward=0.5643, temp=0.90]

   Processing 144 sentences...

Epoch 2/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 202/280 [54:19<14:26, 11.11s/it, loss=-0.0783, reward=0.6054, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 203/280 [55:10<29:42, 23.14s/it, loss=0.0386, reward=0.6165, temp=0.90] 

   Processing 49 sentences...

Epoch 2/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 204/280 [55:13<21:39, 17.10s/it, loss=-0.1141, reward=0.5642, temp=0.90]

   Processing 66 sentences...

Epoch 2/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 205/280 [55:20<17:23, 13.91s/it, loss=-0.0977, reward=0.5875, temp=0.90]

   Processing 103 sentences...

Epoch 2/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 206/280 [55:26<14:22, 11.65s/it, loss=-0.0871, reward=0.6009, temp=0.90]

   Processing 350 sentences...

Epoch 2/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 207/280 [55:54<20:02, 16.47s/it, loss=-0.0255, reward=0.6126, temp=0.90]

   Processing 111 sentences...

Epoch 2/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 208/280 [56:02<16:54, 14.09s/it, loss=-0.1023, reward=0.5896, temp=0.90]

   Processing 51 sentences...

Epoch 2/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 209/280 [56:05<12:35, 10.64s/it, loss=-0.0939, reward=0.5978, temp=0.90]

   Processing 146 sentences...

Epoch 2/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 210/280 [56:17<12:56, 11.10s/it, loss=-0.0699, reward=0.6076, temp=0.90]

   Processing 161 sentences...

Epoch 2/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 211/280 [56:28<12:32, 10.91s/it, loss=-0.0637, reward=0.6120, temp=0.90]

   Processing 243 sentences...

Epoch 2/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 212/280 [56:43<13:56, 12.31s/it, loss=-0.0332, reward=0.6221, temp=0.90]

   Processing 297 sentences...

Epoch 2/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 213/280 [57:10<18:45, 16.79s/it, loss=-0.0701, reward=0.6083, temp=0.90]

   Processing 47 sentences...

Epoch 2/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 214/280 [57:13<13:48, 12.56s/it, loss=-0.1094, reward=0.5580, temp=0.90]

   Processing 415 sentences...

Epoch 2/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 215/280 [58:22<31:50, 29.40s/it, loss=-0.0116, reward=0.6177, temp=0.90]

   Processing 138 sentences...

Epoch 2/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 216/280 [58:34<25:55, 24.31s/it, loss=-0.0901, reward=0.6025, temp=0.90]

   Processing 46 sentences...

Epoch 2/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 217/280 [58:36<18:33, 17.67s/it, loss=-0.1011, reward=0.5695, temp=0.90]

   Processing 73 sentences...

Epoch 2/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 218/280 [58:40<13:59, 13.54s/it, loss=-0.0918, reward=0.5952, temp=0.90]

   Processing 136 sentences...

Epoch 2/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 219/280 [58:53<13:34, 13.35s/it, loss=-0.0917, reward=0.5947, temp=0.90]

   Processing 93 sentences...

Epoch 2/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 220/280 [58:59<10:57, 10.96s/it, loss=-0.0733, reward=0.6119, temp=0.90]

   Processing 267 sentences...

Epoch 2/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 221/280 [59:15<12:27, 12.67s/it, loss=-0.0532, reward=0.6106, temp=0.90]

   Processing 421 sentences...

Epoch 2/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 222/280 [1:00:23<28:18, 29.29s/it, loss=-0.0415, reward=0.6139, temp=0.90]

   Processing 78 sentences...

Epoch 2/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 223/280 [1:00:27<20:30, 21.58s/it, loss=-0.0869, reward=0.6007, temp=0.90]

   Processing 275 sentences...

Epoch 2/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 224/280 [1:00:47<19:47, 21.21s/it, loss=-0.0770, reward=0.6073, temp=0.90]

   Processing 56 sentences...

Epoch 2/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 225/280 [1:00:51<14:33, 15.87s/it, loss=-0.0990, reward=0.5783, temp=0.90]

   Processing 292 sentences...

Epoch 2/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 226/280 [1:01:05<13:58, 15.54s/it, loss=-0.0500, reward=0.6140, temp=0.90]

   Processing 393 sentences...

Epoch 2/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 227/280 [1:01:46<20:18, 23.00s/it, loss=-0.0441, reward=0.6127, temp=0.90]

   Processing 412 sentences...

Epoch 2/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 228/280 [1:02:31<25:37, 29.56s/it, loss=-0.0373, reward=0.6159, temp=0.90]

   Processing 53 sentences...

Epoch 2/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 229/280 [1:02:33<18:11, 21.40s/it, loss=-0.1027, reward=0.5805, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 230/280 [1:03:18<23:43, 28.46s/it, loss=-0.0432, reward=0.6098, temp=0.90]

   Processing 449 sentences...

Epoch 2/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 231/280 [1:03:50<24:14, 29.69s/it, loss=-0.0233, reward=0.6177, temp=0.90]

   Processing 81 sentences...

Epoch 2/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 232/280 [1:03:55<17:45, 22.21s/it, loss=-0.0931, reward=0.5964, temp=0.90]

   Processing 368 sentences...

Epoch 2/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 233/280 [1:04:19<17:38, 22.53s/it, loss=-0.0765, reward=0.6083, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 234/280 [1:05:22<26:47, 34.94s/it, loss=-0.0371, reward=0.6148, temp=0.90]

   Processing 45 sentences...

Epoch 2/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 235/280 [1:05:25<18:54, 25.22s/it, loss=-0.0962, reward=0.5768, temp=0.90]

   Processing 45 sentences...

Epoch 2/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 236/280 [1:05:27<13:30, 18.41s/it, loss=-0.1025, reward=0.5610, temp=0.90]

   Processing 96 sentences...

Epoch 2/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 237/280 [1:05:36<11:07, 15.52s/it, loss=-0.0944, reward=0.5893, temp=0.90]

   Processing 184 sentences...

Epoch 2/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 238/280 [1:05:49<10:16, 14.68s/it, loss=-0.0738, reward=0.6048, temp=0.90]

   Processing 63 sentences...

Epoch 2/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 239/280 [1:05:54<08:06, 11.86s/it, loss=-0.0892, reward=0.5962, temp=0.90]

   Processing 123 sentences...

Epoch 2/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 240/280 [1:06:01<06:51, 10.28s/it, loss=-0.0859, reward=0.6009, temp=0.90]

   Processing 72 sentences...

Epoch 2/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 241/280 [1:06:06<05:42,  8.77s/it, loss=-0.0919, reward=0.5894, temp=0.90]

   Processing 268 sentences...

Epoch 2/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 242/280 [1:06:55<13:07, 20.72s/it, loss=-0.0361, reward=0.6180, temp=0.90]

   Processing 129 sentences...

Epoch 2/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 243/280 [1:07:02<10:17, 16.70s/it, loss=-0.0924, reward=0.5964, temp=0.90]

   Processing 57 sentences...

Epoch 2/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 244/280 [1:07:05<07:32, 12.58s/it, loss=-0.0941, reward=0.5940, temp=0.90]

   Processing 267 sentences...

Epoch 2/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 245/280 [1:07:23<08:15, 14.17s/it, loss=-0.0789, reward=0.6026, temp=0.90]

   Processing 84 sentences...

Epoch 2/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 246/280 [1:07:26<06:12, 10.96s/it, loss=-0.1034, reward=0.5868, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 247/280 [1:08:15<12:10, 22.13s/it, loss=0.0123, reward=0.6169, temp=0.90] 

   Processing 97 sentences...

Epoch 2/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 248/280 [1:08:28<10:21, 19.41s/it, loss=-0.0735, reward=0.6149, temp=0.90]

   Processing 232 sentences...

Epoch 2/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 249/280 [1:08:57<11:32, 22.33s/it, loss=-0.0810, reward=0.6060, temp=0.90]

   Processing 111 sentences...

Epoch 2/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 250/280 [1:09:03<08:48, 17.62s/it, loss=-0.0888, reward=0.6031, temp=0.90]

   Processing 24 sentences...

Epoch 2/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 251/280 [1:09:05<06:10, 12.76s/it, loss=-0.1208, reward=0.5522, temp=0.90]

   Processing 112 sentences...

Epoch 2/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 252/280 [1:09:11<05:01, 10.77s/it, loss=-0.0827, reward=0.6032, temp=0.90]

   Processing 43 sentences...

Epoch 2/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 253/280 [1:09:14<03:45,  8.34s/it, loss=-0.0952, reward=0.5895, temp=0.90]

   Processing 183 sentences...

Epoch 2/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 254/280 [1:09:24<03:56,  9.10s/it, loss=-0.0606, reward=0.6105, temp=0.90]

   Processing 117 sentences...

Epoch 2/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 255/280 [1:09:29<03:14,  7.80s/it, loss=-0.0959, reward=0.5943, temp=0.90]

   Processing 106 sentences...

Epoch 2/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 256/280 [1:09:36<02:56,  7.36s/it, loss=-0.0842, reward=0.6025, temp=0.90]

   Processing 67 sentences...

Epoch 2/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 257/280 [1:09:38<02:15,  5.89s/it, loss=-0.0934, reward=0.5982, temp=0.90]

   Processing 112 sentences...

Epoch 2/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 258/280 [1:09:47<02:27,  6.71s/it, loss=-0.1114, reward=0.5825, temp=0.90]

   Processing 79 sentences...

Epoch 2/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 259/280 [1:09:51<02:08,  6.12s/it, loss=-0.0980, reward=0.5896, temp=0.90]

   Processing 408 sentences...

Epoch 2/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 260/280 [1:10:12<03:28, 10.45s/it, loss=0.0379, reward=0.6195, temp=0.90] 

   Processing 103 sentences...

Epoch 2/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 261/280 [1:10:17<02:45,  8.69s/it, loss=-0.0938, reward=0.5979, temp=0.90]

   Processing 103 sentences...

Epoch 2/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 262/280 [1:10:23<02:26,  8.12s/it, loss=-0.0852, reward=0.6027, temp=0.90]

   Processing 118 sentences...

Epoch 2/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 263/280 [1:10:29<02:06,  7.46s/it, loss=-0.0979, reward=0.5970, temp=0.90]

   Processing 249 sentences...

Epoch 2/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 264/280 [1:10:49<03:00, 11.26s/it, loss=-0.0797, reward=0.6037, temp=0.90]

   Processing 145 sentences...

Epoch 2/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 265/280 [1:10:55<02:23,  9.57s/it, loss=-0.0756, reward=0.6079, temp=0.90]

   Processing 294 sentences...

Epoch 2/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 266/280 [1:11:20<03:19, 14.22s/it, loss=-0.0203, reward=0.6208, temp=0.90]

   Processing 147 sentences...

Epoch 2/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 267/280 [1:11:28<02:41, 12.46s/it, loss=-0.1017, reward=0.5994, temp=0.90]

   Processing 304 sentences...

Epoch 2/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 268/280 [1:12:02<03:46, 18.92s/it, loss=-0.1049, reward=0.5974, temp=0.90]

   Processing 128 sentences...

Epoch 2/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 269/280 [1:12:15<03:08, 17.16s/it, loss=-0.0851, reward=0.6070, temp=0.90]

   Processing 247 sentences...

Epoch 2/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 270/280 [1:12:32<02:50, 17.08s/it, loss=-0.0585, reward=0.6099, temp=0.90]

   Processing 165 sentences...

Epoch 2/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 271/280 [1:12:49<02:33, 17.11s/it, loss=-0.0779, reward=0.6093, temp=0.90]

   Processing 38 sentences...

Epoch 2/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 272/280 [1:12:52<01:42, 12.83s/it, loss=-0.1189, reward=0.5559, temp=0.90]

   Processing 63 sentences...

Epoch 2/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 273/280 [1:12:55<01:09,  9.92s/it, loss=-0.0982, reward=0.5903, temp=0.90]

   Processing 215 sentences...

Epoch 2/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 274/280 [1:13:21<01:27, 14.59s/it, loss=-0.0929, reward=0.5988, temp=0.90]

   Processing 39 sentences...

Epoch 2/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 275/280 [1:13:23<00:54, 10.80s/it, loss=-0.1027, reward=0.5736, temp=0.90]

   Processing 105 sentences...

Epoch 2/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 276/280 [1:13:28<00:35,  9.00s/it, loss=-0.0793, reward=0.6053, temp=0.90]

   Processing 138 sentences...

Epoch 2/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 277/280 [1:13:39<00:28,  9.57s/it, loss=-0.0697, reward=0.6103, temp=0.90]

   Processing 161 sentences...

Epoch 2/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 278/280 [1:13:48<00:19,  9.66s/it, loss=-0.0809, reward=0.6052, temp=0.90]

   Processing 35 sentences...

Epoch 2/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 279/280 [1:13:50<00:07,  7.35s/it, loss=-0.1141, reward=0.5738, temp=0.90]

   Processing 500 sentences...ents), truncating to 500

Epoch 2/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 280/280 [1:14:23<00:00, 15.94s/it, loss=0.0605, reward=0.6182, temp=0.90] 


   ‚ö†Ô∏è Document too long (1424 sents), truncating to 500
Epoch 2/10:
  Train Loss: -0.0754
  Overall Val Reward: 0.5808
  Learning Rate: 0.000067
  Temperature: 0.9000

  Aspect-wise Val Rewards:
    facts       : 0.5837
    analysis    : 0.5829
    argument    : 0.5795
    judgement   : 0.5787
    statute     : 0.5790

  ‚úÖ Saved best model (Val Reward: 0.5808)

   üî• Warmup: LR = 0.000100


Epoch 3/10:   0%|          | 0/280 [00:00<?, ?it/s]

   Processing 333 sentences...

Epoch 3/10:   0%|          | 1/280 [00:28<2:11:21, 28.25s/it, loss=-0.0314, reward=0.6144, temp=0.80]

   Processing 353 sentences...

Epoch 3/10:   1%|          | 2/280 [00:52<1:59:09, 25.72s/it, loss=-0.0690, reward=0.6082, temp=0.80]

   Processing 57 sentences...

Epoch 3/10:   1%|          | 3/280 [00:59<1:19:50, 17.30s/it, loss=-0.1050, reward=0.5726, temp=0.80]

   Processing 96 sentences...

Epoch 3/10:   1%|‚ñè         | 4/280 [01:13<1:13:28, 15.97s/it, loss=-0.0944, reward=0.5968, temp=0.80]

   Processing 71 sentences...

Epoch 3/10:   2%|‚ñè         | 5/280 [01:16<51:50, 11.31s/it, loss=-0.0965, reward=0.5897, temp=0.80]  

   Processing 62 sentences...

Epoch 3/10:   2%|‚ñè         | 6/280 [01:19<38:21,  8.40s/it, loss=-0.0889, reward=0.5949, temp=0.80]

   Processing 350 sentences...

Epoch 3/10:   2%|‚ñé         | 7/280 [01:51<1:13:35, 16.17s/it, loss=-0.0431, reward=0.6128, temp=0.80]

   Processing 263 sentences...

Epoch 3/10:   3%|‚ñé         | 8/280 [02:07<1:12:48, 16.06s/it, loss=-0.0640, reward=0.6104, temp=0.80]

   Processing 176 sentences...

Epoch 3/10:   3%|‚ñé         | 9/280 [02:16<1:03:33, 14.07s/it, loss=-0.0773, reward=0.6068, temp=0.80]

   Processing 318 sentences...

Epoch 3/10:   4%|‚ñé         | 10/280 [03:18<2:10:01, 28.90s/it, loss=-0.0960, reward=0.5955, temp=0.80]

   Processing 338 sentences...

Epoch 3/10:   4%|‚ñç         | 11/280 [03:43<2:02:55, 27.42s/it, loss=-0.0467, reward=0.6155, temp=0.80]

   Processing 40 sentences...

Epoch 3/10:   4%|‚ñç         | 12/280 [03:45<1:28:27, 19.80s/it, loss=-0.0992, reward=0.5694, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:   5%|‚ñç         | 13/280 [04:39<2:13:39, 30.04s/it, loss=-0.0465, reward=0.6109, temp=0.80]

   Processing 423 sentences...

Epoch 3/10:   5%|‚ñå         | 14/280 [05:38<2:52:47, 38.98s/it, loss=-0.0401, reward=0.6184, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:   5%|‚ñå         | 15/280 [05:57<2:24:58, 32.83s/it, loss=0.0398, reward=0.6426, temp=0.80] 

   Processing 82 sentences...

Epoch 3/10:   6%|‚ñå         | 16/280 [06:00<1:44:48, 23.82s/it, loss=-0.0923, reward=0.5909, temp=0.80]

   Processing 137 sentences...

Epoch 3/10:   6%|‚ñå         | 17/280 [06:07<1:22:45, 18.88s/it, loss=-0.0875, reward=0.6029, temp=0.80]

   Processing 30 sentences...

Epoch 3/10:   6%|‚ñã         | 18/280 [06:10<1:00:58, 13.96s/it, loss=-0.0942, reward=0.5709, temp=0.80]

   Processing 82 sentences...

Epoch 3/10:   7%|‚ñã         | 19/280 [06:18<53:03, 12.20s/it, loss=-0.0864, reward=0.5866, temp=0.80]  

   Processing 61 sentences...

Epoch 3/10:   7%|‚ñã         | 20/280 [06:21<40:53,  9.44s/it, loss=-0.0863, reward=0.5864, temp=0.80]

   Processing 50 sentences...

Epoch 3/10:   8%|‚ñä         | 21/280 [06:24<32:24,  7.51s/it, loss=-0.0974, reward=0.5593, temp=0.80]

   Processing 115 sentences...

Epoch 3/10:   8%|‚ñä         | 22/280 [06:32<33:43,  7.84s/it, loss=-0.0779, reward=0.6012, temp=0.80]

   Processing 80 sentences...

Epoch 3/10:   8%|‚ñä         | 23/280 [06:35<27:30,  6.42s/it, loss=-0.0883, reward=0.5847, temp=0.80]

   Processing 434 sentences...

Epoch 3/10:   9%|‚ñä         | 24/280 [08:02<2:09:53, 30.44s/it, loss=-0.0366, reward=0.6164, temp=0.80]

   Processing 254 sentences...

Epoch 3/10:   9%|‚ñâ         | 25/280 [08:36<2:13:56, 31.52s/it, loss=-0.0804, reward=0.6003, temp=0.80]

   Processing 329 sentences...

Epoch 3/10:   9%|‚ñâ         | 26/280 [08:55<1:58:09, 27.91s/it, loss=-0.0600, reward=0.6097, temp=0.80]

   Processing 87 sentences...

Epoch 3/10:  10%|‚ñâ         | 27/280 [09:00<1:28:23, 20.96s/it, loss=-0.0872, reward=0.5960, temp=0.80]

   Processing 39 sentences...

Epoch 3/10:  10%|‚ñà         | 28/280 [09:02<1:03:47, 15.19s/it, loss=-0.0959, reward=0.5745, temp=0.80]

   Processing 83 sentences...

Epoch 3/10:  10%|‚ñà         | 29/280 [09:05<48:47, 11.66s/it, loss=-0.0881, reward=0.5970, temp=0.80]  

   Processing 177 sentences...

Epoch 3/10:  11%|‚ñà         | 30/280 [09:17<49:05, 11.78s/it, loss=-0.0890, reward=0.5964, temp=0.80]

   Processing 82 sentences...

Epoch 3/10:  11%|‚ñà         | 31/280 [09:25<43:53, 10.57s/it, loss=-0.0950, reward=0.5863, temp=0.80]

   Processing 136 sentences...

Epoch 3/10:  11%|‚ñà‚ñè        | 32/280 [09:30<36:54,  8.93s/it, loss=-0.0839, reward=0.5996, temp=0.80]

   Processing 166 sentences...

Epoch 3/10:  12%|‚ñà‚ñè        | 33/280 [09:40<38:09,  9.27s/it, loss=-0.0693, reward=0.6073, temp=0.80]

   Processing 91 sentences...

Epoch 3/10:  12%|‚ñà‚ñè        | 34/280 [09:46<33:28,  8.16s/it, loss=-0.0905, reward=0.5951, temp=0.80]

   Processing 87 sentences...

Epoch 3/10:  12%|‚ñà‚ñé        | 35/280 [09:51<29:22,  7.19s/it, loss=-0.0880, reward=0.6018, temp=0.80]

   Processing 203 sentences...

Epoch 3/10:  13%|‚ñà‚ñé        | 36/280 [10:05<37:19,  9.18s/it, loss=-0.0656, reward=0.6108, temp=0.80]

   Processing 140 sentences...

Epoch 3/10:  13%|‚ñà‚ñé        | 37/280 [10:15<38:48,  9.58s/it, loss=-0.0920, reward=0.6002, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  14%|‚ñà‚ñé        | 38/280 [11:06<1:28:49, 22.02s/it, loss=-0.0073, reward=0.6147, temp=0.80]

   Processing 97 sentences...

Epoch 3/10:  14%|‚ñà‚ñç        | 39/280 [11:12<1:09:13, 17.23s/it, loss=-0.1106, reward=0.5857, temp=0.80]

   Processing 182 sentences...

Epoch 3/10:  14%|‚ñà‚ñç        | 40/280 [11:25<1:03:07, 15.78s/it, loss=-0.0652, reward=0.6140, temp=0.80]

   Processing 87 sentences...

Epoch 3/10:  15%|‚ñà‚ñç        | 41/280 [11:29<48:57, 12.29s/it, loss=-0.0920, reward=0.6009, temp=0.80]  

   Processing 235 sentences...

Epoch 3/10:  15%|‚ñà‚ñå        | 42/280 [12:03<1:15:11, 18.96s/it, loss=-0.0822, reward=0.6058, temp=0.80]

   Processing 50 sentences...

Epoch 3/10:  15%|‚ñà‚ñå        | 43/280 [12:06<55:14, 13.98s/it, loss=-0.1029, reward=0.5790, temp=0.80]  

   Processing 70 sentences...

Epoch 3/10:  16%|‚ñà‚ñå        | 44/280 [12:10<43:37, 11.09s/it, loss=-0.0977, reward=0.5894, temp=0.80]

   Processing 65 sentences...

Epoch 3/10:  16%|‚ñà‚ñå        | 45/280 [12:16<37:09,  9.49s/it, loss=-0.0936, reward=0.5936, temp=0.80]

   Processing 482 sentences...

Epoch 3/10:  16%|‚ñà‚ñã        | 46/280 [13:08<1:27:25, 22.42s/it, loss=-0.0112, reward=0.6163, temp=0.80]

   Processing 164 sentences...

Epoch 3/10:  17%|‚ñà‚ñã        | 47/280 [13:24<1:19:32, 20.48s/it, loss=-0.0788, reward=0.6080, temp=0.80]

   Processing 102 sentences...

Epoch 3/10:  17%|‚ñà‚ñã        | 48/280 [13:31<1:02:58, 16.29s/it, loss=-0.0856, reward=0.6066, temp=0.80]

   Processing 48 sentences...

Epoch 3/10:  18%|‚ñà‚ñä        | 49/280 [13:33<45:53, 11.92s/it, loss=-0.1096, reward=0.5603, temp=0.80]  

   Processing 74 sentences...

Epoch 3/10:  18%|‚ñà‚ñä        | 50/280 [13:38<38:19, 10.00s/it, loss=-0.0920, reward=0.5950, temp=0.80]

   Processing 67 sentences...

Epoch 3/10:  18%|‚ñà‚ñä        | 51/280 [13:44<33:37,  8.81s/it, loss=-0.0962, reward=0.5915, temp=0.80]

   Processing 31 sentences...

Epoch 3/10:  19%|‚ñà‚ñä        | 52/280 [13:46<25:13,  6.64s/it, loss=-0.0938, reward=0.5891, temp=0.80]

   Processing 53 sentences...

Epoch 3/10:  19%|‚ñà‚ñâ        | 53/280 [13:49<21:39,  5.72s/it, loss=-0.1001, reward=0.5804, temp=0.80]

   Processing 123 sentences...

Epoch 3/10:  19%|‚ñà‚ñâ        | 54/280 [13:57<23:40,  6.28s/it, loss=-0.0848, reward=0.6014, temp=0.80]

   Processing 169 sentences...

Epoch 3/10:  20%|‚ñà‚ñâ        | 55/280 [14:07<27:34,  7.36s/it, loss=-0.0822, reward=0.6014, temp=0.80]

   Processing 33 sentences...

Epoch 3/10:  20%|‚ñà‚ñà        | 56/280 [14:10<22:28,  6.02s/it, loss=-0.1085, reward=0.5562, temp=0.80]

   Processing 190 sentences...

Epoch 3/10:  20%|‚ñà‚ñà        | 57/280 [14:21<27:54,  7.51s/it, loss=-0.0636, reward=0.6080, temp=0.80]

   Processing 223 sentences...

Epoch 3/10:  21%|‚ñà‚ñà        | 58/280 [14:38<39:01, 10.55s/it, loss=-0.0754, reward=0.6022, temp=0.80]

   Processing 57 sentences...

Epoch 3/10:  21%|‚ñà‚ñà        | 59/280 [14:41<30:15,  8.22s/it, loss=-0.0899, reward=0.5998, temp=0.80]

   Processing 362 sentences...

Epoch 3/10:  21%|‚ñà‚ñà‚ñè       | 60/280 [15:07<49:11, 13.42s/it, loss=0.1288, reward=0.6522, temp=0.80] 

   Processing 192 sentences...

Epoch 3/10:  22%|‚ñà‚ñà‚ñè       | 61/280 [15:24<53:57, 14.78s/it, loss=-0.0629, reward=0.6130, temp=0.80]

   Processing 108 sentences...

Epoch 3/10:  22%|‚ñà‚ñà‚ñè       | 62/280 [15:28<41:10, 11.33s/it, loss=-0.0882, reward=0.6056, temp=0.80]

   Processing 380 sentences...

Epoch 3/10:  22%|‚ñà‚ñà‚ñé       | 63/280 [16:07<1:11:09, 19.67s/it, loss=-0.1053, reward=0.5989, temp=0.80]

   Processing 285 sentences...

Epoch 3/10:  23%|‚ñà‚ñà‚ñé       | 64/280 [16:38<1:22:39, 22.96s/it, loss=-0.0798, reward=0.6077, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  23%|‚ñà‚ñà‚ñé       | 65/280 [17:37<2:01:24, 33.88s/it, loss=-0.0063, reward=0.6223, temp=0.80]

   Processing 144 sentences...

Epoch 3/10:  24%|‚ñà‚ñà‚ñé       | 66/280 [17:46<1:34:34, 26.51s/it, loss=-0.0973, reward=0.5961, temp=0.80]

   Processing 202 sentences...

Epoch 3/10:  24%|‚ñà‚ñà‚ñç       | 67/280 [18:03<1:23:52, 23.63s/it, loss=-0.0719, reward=0.6162, temp=0.80]

   Processing 271 sentences...

Epoch 3/10:  24%|‚ñà‚ñà‚ñç       | 68/280 [18:31<1:28:26, 25.03s/it, loss=-0.0885, reward=0.6033, temp=0.80]

   Processing 51 sentences...

Epoch 3/10:  25%|‚ñà‚ñà‚ñç       | 69/280 [18:34<1:03:52, 18.17s/it, loss=-0.0937, reward=0.5936, temp=0.80]

   Processing 376 sentences...

Epoch 3/10:  25%|‚ñà‚ñà‚ñå       | 70/280 [19:04<1:16:39, 21.90s/it, loss=-0.0582, reward=0.6168, temp=0.80]

   Processing 61 sentences...

Epoch 3/10:  25%|‚ñà‚ñà‚ñå       | 71/280 [19:08<57:15, 16.44s/it, loss=-0.0910, reward=0.5937, temp=0.80]  

   Processing 63 sentences...

Epoch 3/10:  26%|‚ñà‚ñà‚ñå       | 72/280 [19:11<43:20, 12.50s/it, loss=-0.0955, reward=0.5803, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  26%|‚ñà‚ñà‚ñå       | 73/280 [20:20<1:41:31, 29.43s/it, loss=-0.0457, reward=0.6150, temp=0.80]

   Processing 254 sentences...

Epoch 3/10:  26%|‚ñà‚ñà‚ñã       | 74/280 [20:47<1:38:19, 28.64s/it, loss=-0.0871, reward=0.6021, temp=0.80]

   Processing 118 sentences...

Epoch 3/10:  27%|‚ñà‚ñà‚ñã       | 75/280 [21:09<1:31:15, 26.71s/it, loss=-0.0776, reward=0.6159, temp=0.80]

   Processing 439 sentences...

Epoch 3/10:  27%|‚ñà‚ñà‚ñã       | 76/280 [21:44<1:39:06, 29.15s/it, loss=-0.0792, reward=0.6074, temp=0.80]

   Processing 51 sentences...

Epoch 3/10:  28%|‚ñà‚ñà‚ñä       | 77/280 [21:46<1:11:14, 21.06s/it, loss=-0.0904, reward=0.5971, temp=0.80]

   Processing 143 sentences...

Epoch 3/10:  28%|‚ñà‚ñà‚ñä       | 78/280 [21:57<1:00:10, 17.87s/it, loss=-0.0906, reward=0.5980, temp=0.80]

   Processing 300 sentences...

Epoch 3/10:  28%|‚ñà‚ñà‚ñä       | 79/280 [22:18<1:03:15, 18.88s/it, loss=-0.0829, reward=0.6072, temp=0.80]

   Processing 137 sentences...

Epoch 3/10:  29%|‚ñà‚ñà‚ñä       | 80/280 [22:27<52:56, 15.88s/it, loss=-0.0991, reward=0.5889, temp=0.80]  

   Processing 120 sentences...

Epoch 3/10:  29%|‚ñà‚ñà‚ñâ       | 81/280 [22:33<43:35, 13.14s/it, loss=-0.0887, reward=0.6026, temp=0.80]

   Processing 170 sentences...

Epoch 3/10:  29%|‚ñà‚ñà‚ñâ       | 82/280 [22:47<43:23, 13.15s/it, loss=-0.0999, reward=0.5981, temp=0.80]

   Processing 107 sentences...

Epoch 3/10:  30%|‚ñà‚ñà‚ñâ       | 83/280 [22:53<36:51, 11.23s/it, loss=-0.0823, reward=0.6118, temp=0.80]

   Processing 48 sentences...

Epoch 3/10:  30%|‚ñà‚ñà‚ñà       | 84/280 [22:57<29:38,  9.07s/it, loss=-0.1091, reward=0.5757, temp=0.80]

   Processing 163 sentences...

Epoch 3/10:  30%|‚ñà‚ñà‚ñà       | 85/280 [23:06<29:02,  8.94s/it, loss=-0.0687, reward=0.6129, temp=0.80]

   Processing 108 sentences...

Epoch 3/10:  31%|‚ñà‚ñà‚ñà       | 86/280 [23:15<29:20,  9.07s/it, loss=-0.1015, reward=0.5922, temp=0.80]

   Processing 86 sentences...

Epoch 3/10:  31%|‚ñà‚ñà‚ñà       | 87/280 [23:20<24:38,  7.66s/it, loss=-0.0926, reward=0.5994, temp=0.80]

   Processing 74 sentences...

Epoch 3/10:  31%|‚ñà‚ñà‚ñà‚ñè      | 88/280 [23:32<29:10,  9.12s/it, loss=-0.1197, reward=0.5783, temp=0.80]

   Processing 81 sentences...

Epoch 3/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 89/280 [23:42<29:40,  9.32s/it, loss=-0.1049, reward=0.5865, temp=0.80]

   Processing 203 sentences...

Epoch 3/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 90/280 [23:54<31:35,  9.98s/it, loss=-0.0579, reward=0.6074, temp=0.80]

   Processing 54 sentences...

Epoch 3/10:  32%|‚ñà‚ñà‚ñà‚ñé      | 91/280 [23:56<24:26,  7.76s/it, loss=-0.1232, reward=0.5684, temp=0.80]

   Processing 143 sentences...

Epoch 3/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 92/280 [24:06<25:57,  8.29s/it, loss=-0.0695, reward=0.6055, temp=0.80]

   Processing 36 sentences...

Epoch 3/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 93/280 [24:08<20:16,  6.50s/it, loss=-0.1017, reward=0.5852, temp=0.80]

   Processing 106 sentences...

Epoch 3/10:  34%|‚ñà‚ñà‚ñà‚ñé      | 94/280 [24:13<18:40,  6.02s/it, loss=-0.0845, reward=0.6017, temp=0.80]

   Processing 133 sentences...

Epoch 3/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 95/280 [24:39<36:49, 11.94s/it, loss=-0.1142, reward=0.5885, temp=0.80]

   Processing 206 sentences...

Epoch 3/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 96/280 [24:49<34:54, 11.38s/it, loss=-0.0517, reward=0.6069, temp=0.80]

   Processing 162 sentences...

Epoch 3/10:  35%|‚ñà‚ñà‚ñà‚ñç      | 97/280 [25:11<44:18, 14.53s/it, loss=-0.0704, reward=0.6074, temp=0.80]

   Processing 42 sentences...

Epoch 3/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 98/280 [25:12<32:27, 10.70s/it, loss=-0.0948, reward=0.5943, temp=0.80]

   Processing 89 sentences...

Epoch 3/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 99/280 [25:17<26:36,  8.82s/it, loss=-0.1017, reward=0.5959, temp=0.80]

   Processing 32 sentences...

Epoch 3/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 100/280 [25:21<22:08,  7.38s/it, loss=-0.1250, reward=0.5535, temp=0.80]

   Processing 408 sentences...

Epoch 3/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 101/280 [26:40<1:26:25, 28.97s/it, loss=-0.0375, reward=0.6041, temp=0.80]

   Processing 35 sentences...

Epoch 3/10:  36%|‚ñà‚ñà‚ñà‚ñã      | 102/280 [26:43<1:02:14, 20.98s/it, loss=-0.1109, reward=0.5718, temp=0.80]

   Processing 137 sentences...

Epoch 3/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 103/280 [26:50<50:01, 16.96s/it, loss=-0.0711, reward=0.6033, temp=0.80]  

   Processing 105 sentences...

Epoch 3/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 104/280 [26:59<42:15, 14.41s/it, loss=-0.0973, reward=0.5869, temp=0.80]

   Processing 124 sentences...

Epoch 3/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 105/280 [27:11<39:59, 13.71s/it, loss=-0.0892, reward=0.5915, temp=0.80]

   Processing 220 sentences...

Epoch 3/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 106/280 [27:25<40:13, 13.87s/it, loss=-0.0334, reward=0.6138, temp=0.80]

   Processing 98 sentences...

Epoch 3/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 107/280 [27:30<32:03, 11.12s/it, loss=-0.1030, reward=0.5884, temp=0.80]

   Processing 133 sentences...

Epoch 3/10:  39%|‚ñà‚ñà‚ñà‚ñä      | 108/280 [27:37<28:15,  9.86s/it, loss=-0.0887, reward=0.5964, temp=0.80]

   Processing 315 sentences...

Epoch 3/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 109/280 [28:05<44:06, 15.48s/it, loss=-0.0749, reward=0.6008, temp=0.80]

   Processing 81 sentences...

Epoch 3/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 110/280 [28:09<33:49, 11.94s/it, loss=-0.1038, reward=0.5830, temp=0.80]

   Processing 286 sentences...

Epoch 3/10:  40%|‚ñà‚ñà‚ñà‚ñâ      | 111/280 [28:25<37:17, 13.24s/it, loss=-0.0546, reward=0.6060, temp=0.80]

   Processing 122 sentences...

Epoch 3/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 112/280 [28:32<31:42, 11.33s/it, loss=-0.0808, reward=0.5991, temp=0.80]

   Processing 170 sentences...

Epoch 3/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 113/280 [28:50<37:06, 13.33s/it, loss=-0.0693, reward=0.6113, temp=0.80]

   Processing 376 sentences...

Epoch 3/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 114/280 [29:33<1:01:55, 22.39s/it, loss=-0.0273, reward=0.6152, temp=0.80]

   Processing 334 sentences...

Epoch 3/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 115/280 [30:24<1:24:35, 30.76s/it, loss=-0.0763, reward=0.6101, temp=0.80]

   Processing 102 sentences...

Epoch 3/10:  41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 116/280 [30:35<1:07:40, 24.76s/it, loss=-0.0927, reward=0.5984, temp=0.80]

   Processing 268 sentences...

Epoch 3/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 117/280 [30:49<58:48, 21.65s/it, loss=-0.0597, reward=0.6153, temp=0.80]  

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 118/280 [32:27<2:00:21, 44.58s/it, loss=-0.0417, reward=0.6111, temp=0.80]

   Processing 253 sentences...

Epoch 3/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 119/280 [32:44<1:37:27, 36.32s/it, loss=-0.0821, reward=0.6040, temp=0.80]

   Processing 132 sentences...

Epoch 3/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 120/280 [32:53<1:15:12, 28.20s/it, loss=-0.0880, reward=0.6036, temp=0.80]

   Processing 286 sentences...

Epoch 3/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 121/280 [33:16<1:10:14, 26.50s/it, loss=-0.0748, reward=0.6101, temp=0.80]

   Processing 128 sentences...

Epoch 3/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 122/280 [33:23<54:32, 20.71s/it, loss=-0.0818, reward=0.6060, temp=0.80]  

   Processing 37 sentences...

Epoch 3/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 123/280 [33:25<39:46, 15.20s/it, loss=-0.0902, reward=0.5926, temp=0.80]

   Processing 205 sentences...

Epoch 3/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 124/280 [33:44<41:49, 16.08s/it, loss=-0.0825, reward=0.6059, temp=0.80]

   Processing 87 sentences...

Epoch 3/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 125/280 [33:50<34:10, 13.23s/it, loss=-0.0931, reward=0.5923, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 126/280 [35:01<1:18:01, 30.40s/it, loss=-0.0571, reward=0.6120, temp=0.80]

   Processing 144 sentences...

Epoch 3/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 127/280 [35:13<1:03:38, 24.95s/it, loss=-0.0923, reward=0.5990, temp=0.80]

   Processing 80 sentences...

Epoch 3/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 128/280 [35:20<50:03, 19.76s/it, loss=-0.0859, reward=0.6076, temp=0.80]  

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 129/280 [36:46<1:39:45, 39.64s/it, loss=-0.0543, reward=0.6135, temp=0.80]

   Processing 124 sentences...

Epoch 3/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 130/280 [36:56<1:16:20, 30.53s/it, loss=-0.0862, reward=0.6077, temp=0.80]

   Processing 112 sentences...

Epoch 3/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 131/280 [37:06<1:00:38, 24.42s/it, loss=-0.0978, reward=0.5929, temp=0.80]

   Processing 75 sentences...

Epoch 3/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 132/280 [37:10<45:22, 18.39s/it, loss=-0.0963, reward=0.5926, temp=0.80]  

   Processing 165 sentences...

Epoch 3/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 133/280 [37:23<40:37, 16.58s/it, loss=-0.0778, reward=0.6136, temp=0.80]

   Processing 146 sentences...

Epoch 3/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 134/280 [37:40<41:15, 16.96s/it, loss=-0.0822, reward=0.6110, temp=0.80]

   Processing 100 sentences...

Epoch 3/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 135/280 [37:46<32:26, 13.43s/it, loss=-0.0875, reward=0.6091, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 136/280 [39:23<1:32:58, 38.74s/it, loss=-0.0400, reward=0.6161, temp=0.80]

   Processing 209 sentences...

Epoch 3/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 137/280 [39:38<1:15:23, 31.63s/it, loss=-0.1005, reward=0.6037, temp=0.80]

   Processing 90 sentences...

Epoch 3/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 138/280 [39:45<56:45, 23.99s/it, loss=-0.0921, reward=0.6070, temp=0.80]  

   Processing 72 sentences...

Epoch 3/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 139/280 [39:49<42:42, 18.18s/it, loss=-0.1043, reward=0.5888, temp=0.80]

   Processing 38 sentences...

Epoch 3/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 140/280 [39:51<31:04, 13.32s/it, loss=-0.1053, reward=0.5865, temp=0.80]

   Processing 131 sentences...

Epoch 3/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 141/280 [39:56<25:14, 10.90s/it, loss=-0.0836, reward=0.6094, temp=0.80]

   Processing 78 sentences...

Epoch 3/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 142/280 [40:01<20:44,  9.02s/it, loss=-0.1060, reward=0.5905, temp=0.80]

   Processing 119 sentences...

Epoch 3/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 143/280 [40:09<19:44,  8.65s/it, loss=-0.1043, reward=0.5929, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 144/280 [41:52<1:23:59, 37.06s/it, loss=0.0466, reward=0.6218, temp=0.80]

   Processing 133 sentences...

Epoch 3/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 145/280 [42:02<1:04:43, 28.76s/it, loss=-0.0991, reward=0.6035, temp=0.80]

   Processing 83 sentences...

Epoch 3/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 146/280 [42:08<49:17, 22.07s/it, loss=-0.1101, reward=0.5919, temp=0.80]  

   Processing 56 sentences...

Epoch 3/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 147/280 [42:11<36:15, 16.36s/it, loss=-0.1069, reward=0.5883, temp=0.80]

   Processing 133 sentences...

Epoch 3/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 148/280 [42:18<29:47, 13.54s/it, loss=-0.1031, reward=0.5962, temp=0.80]

   Processing 101 sentences...

Epoch 3/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 149/280 [42:24<24:38, 11.29s/it, loss=-0.1050, reward=0.5891, temp=0.80]

   Processing 151 sentences...

Epoch 3/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 150/280 [42:35<24:30, 11.31s/it, loss=-0.1004, reward=0.5947, temp=0.80]

   Processing 248 sentences...

Epoch 3/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 151/280 [42:50<26:21, 12.26s/it, loss=-0.0757, reward=0.6044, temp=0.80]

   Processing 500 sentences...sents), truncating to 500

Epoch 3/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 152/280 [43:48<55:11, 25.87s/it, loss=0.0116, reward=0.6132, temp=0.80] 

   Processing 145 sentences...

Epoch 3/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 153/280 [43:54<42:29, 20.07s/it, loss=-0.0804, reward=0.6073, temp=0.80]

   Processing 202 sentences...

Epoch 3/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 154/280 [44:11<40:05, 19.09s/it, loss=-0.0697, reward=0.6062, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 155/280 [44:43<47:36, 22.85s/it, loss=-0.0178, reward=0.6156, temp=0.80]

   Processing 154 sentences...

Epoch 3/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 156/280 [45:00<43:53, 21.24s/it, loss=-0.0891, reward=0.6057, temp=0.80]

   Processing 345 sentences...

Epoch 3/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 157/280 [45:49<1:00:52, 29.69s/it, loss=-0.0534, reward=0.6166, temp=0.80]

   Processing 99 sentences...

Epoch 3/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 158/280 [45:54<45:02, 22.15s/it, loss=-0.0979, reward=0.5933, temp=0.80]  

   Processing 500 sentences...sents), truncating to 500

Epoch 3/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 159/280 [46:44<1:01:17, 30.39s/it, loss=-0.0286, reward=0.6204, temp=0.80]

   Processing 118 sentences...

Epoch 3/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 160/280 [46:53<48:09, 24.08s/it, loss=-0.1011, reward=0.5950, temp=0.80]  

   Processing 30 sentences...

Epoch 3/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 161/280 [46:55<34:25, 17.35s/it, loss=-0.1037, reward=0.5594, temp=0.80]

   Processing 76 sentences...

Epoch 3/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 162/280 [46:58<26:09, 13.30s/it, loss=-0.0928, reward=0.5911, temp=0.80]

   Processing 95 sentences...

Epoch 3/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 163/280 [47:06<22:17, 11.43s/it, loss=-0.0887, reward=0.5961, temp=0.80]

   Processing 78 sentences...

Epoch 3/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 164/280 [47:10<18:05,  9.36s/it, loss=-0.0829, reward=0.6020, temp=0.80]

   Processing 153 sentences...

Epoch 3/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 165/280 [47:18<17:15,  9.01s/it, loss=-0.0803, reward=0.6041, temp=0.80]

   Processing 229 sentences...

Epoch 3/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 166/280 [47:33<20:13, 10.65s/it, loss=-0.0630, reward=0.6169, temp=0.80]

   Processing 108 sentences...

Epoch 3/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 167/280 [47:38<16:51,  8.95s/it, loss=-0.0884, reward=0.5969, temp=0.80]

   Processing 154 sentences...

Epoch 3/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 168/280 [47:57<22:22, 11.99s/it, loss=-0.0789, reward=0.6068, temp=0.80]

   Processing 44 sentences...

Epoch 3/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 169/280 [48:04<19:39, 10.63s/it, loss=-0.0963, reward=0.5753, temp=0.80]

   Processing 229 sentences...

Epoch 3/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 170/280 [48:41<33:55, 18.50s/it, loss=-0.0835, reward=0.6031, temp=0.80]

   Processing 221 sentences...

Epoch 3/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 171/280 [49:02<34:54, 19.21s/it, loss=-0.0799, reward=0.6033, temp=0.80]

   Processing 148 sentences...

Epoch 3/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 172/280 [49:09<28:05, 15.61s/it, loss=-0.0810, reward=0.6066, temp=0.80]

   Processing 144 sentences...

Epoch 3/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 173/280 [49:15<22:50, 12.81s/it, loss=-0.0876, reward=0.6020, temp=0.80]

   Processing 77 sentences...

Epoch 3/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 174/280 [49:19<17:46, 10.06s/it, loss=-0.0988, reward=0.5920, temp=0.80]

   Processing 90 sentences...

Epoch 3/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 175/280 [49:26<16:10,  9.25s/it, loss=-0.0969, reward=0.5898, temp=0.80]

   Processing 309 sentences...

Epoch 3/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 176/280 [49:39<17:56, 10.35s/it, loss=-0.0569, reward=0.6116, temp=0.80]

   Processing 297 sentences...

Epoch 3/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 177/280 [50:00<23:03, 13.43s/it, loss=-0.0550, reward=0.6149, temp=0.80]

   Processing 286 sentences...

Epoch 3/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 178/280 [50:26<29:19, 17.25s/it, loss=-0.0638, reward=0.6130, temp=0.80]

   Processing 66 sentences...

Epoch 3/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 179/280 [50:29<21:47, 12.94s/it, loss=-0.1081, reward=0.5773, temp=0.80]

   Processing 220 sentences...

Epoch 3/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 180/280 [50:44<22:46, 13.67s/it, loss=-0.0877, reward=0.6034, temp=0.80]

   Processing 209 sentences...

Epoch 3/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 181/280 [51:02<24:27, 14.82s/it, loss=-0.0767, reward=0.6095, temp=0.80]

   Processing 70 sentences...

Epoch 3/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 182/280 [51:06<19:06, 11.70s/it, loss=-0.0989, reward=0.5915, temp=0.80]

   Processing 214 sentences...

Epoch 3/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 183/280 [51:17<18:17, 11.31s/it, loss=-0.0120, reward=0.6402, temp=0.80]

   Processing 213 sentences...

Epoch 3/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 184/280 [51:29<18:21, 11.47s/it, loss=-0.0807, reward=0.6102, temp=0.80]

   Processing 29 sentences...

Epoch 3/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 185/280 [51:31<13:39,  8.62s/it, loss=-0.1098, reward=0.5587, temp=0.80]

   Processing 91 sentences...

Epoch 3/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 186/280 [51:35<11:33,  7.38s/it, loss=-0.0935, reward=0.5987, temp=0.80]

   Processing 291 sentences...

Epoch 3/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 187/280 [51:55<17:29, 11.29s/it, loss=-0.0799, reward=0.6059, temp=0.80]

   Processing 80 sentences...

Epoch 3/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 188/280 [52:04<16:14, 10.59s/it, loss=-0.0893, reward=0.6034, temp=0.80]

   Processing 43 sentences...

Epoch 3/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 189/280 [52:07<12:31,  8.26s/it, loss=-0.0950, reward=0.5946, temp=0.80]

   Processing 45 sentences...

Epoch 3/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 190/280 [52:10<09:47,  6.53s/it, loss=-0.1039, reward=0.5840, temp=0.80]

   Processing 89 sentences...

Epoch 3/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 191/280 [52:14<08:29,  5.72s/it, loss=-0.0901, reward=0.6036, temp=0.80]

   Processing 194 sentences...

Epoch 3/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 192/280 [52:24<10:18,  7.03s/it, loss=-0.0918, reward=0.6039, temp=0.80]

   Processing 137 sentences...

Epoch 3/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 193/280 [52:34<11:45,  8.11s/it, loss=-0.1114, reward=0.5888, temp=0.80]

   Processing 73 sentences...

Epoch 3/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 194/280 [52:38<09:34,  6.68s/it, loss=-0.0986, reward=0.5952, temp=0.80]

   Processing 158 sentences...

Epoch 3/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 195/280 [52:51<12:23,  8.74s/it, loss=-0.0873, reward=0.5995, temp=0.80]

   Processing 56 sentences...

Epoch 3/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 196/280 [52:54<09:40,  6.91s/it, loss=-0.1259, reward=0.5590, temp=0.80]

   Processing 180 sentences...

Epoch 3/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 197/280 [53:05<11:12,  8.10s/it, loss=-0.0676, reward=0.6044, temp=0.80]

   Processing 338 sentences...

Epoch 3/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 198/280 [53:24<15:52, 11.61s/it, loss=-0.0371, reward=0.6079, temp=0.80]

   Processing 261 sentences...

Epoch 3/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 199/280 [53:51<21:46, 16.13s/it, loss=-0.0770, reward=0.6010, temp=0.80]

   Processing 156 sentences...

Epoch 3/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 200/280 [53:57<17:30, 13.13s/it, loss=-0.0855, reward=0.6032, temp=0.80]

   Processing 43 sentences...

Epoch 3/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 201/280 [54:00<13:10, 10.00s/it, loss=-0.1133, reward=0.5725, temp=0.80]

   Processing 144 sentences...

Epoch 3/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 202/280 [54:13<14:15, 10.97s/it, loss=-0.0643, reward=0.6126, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 203/280 [55:06<30:14, 23.57s/it, loss=0.0379, reward=0.6158, temp=0.80] 

   Processing 49 sentences...

Epoch 3/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 204/280 [55:09<22:03, 17.42s/it, loss=-0.1128, reward=0.5705, temp=0.80]

   Processing 66 sentences...

Epoch 3/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 205/280 [55:16<17:39, 14.13s/it, loss=-0.0995, reward=0.5883, temp=0.80]

   Processing 103 sentences...

Epoch 3/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 206/280 [55:22<14:29, 11.74s/it, loss=-0.0836, reward=0.6012, temp=0.80]

   Processing 350 sentences...

Epoch 3/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 207/280 [55:51<20:31, 16.87s/it, loss=-0.0395, reward=0.6093, temp=0.80]

   Processing 111 sentences...

Epoch 3/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 208/280 [55:59<17:17, 14.40s/it, loss=-0.0962, reward=0.5933, temp=0.80]

   Processing 51 sentences...

Epoch 3/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 209/280 [56:02<12:52, 10.88s/it, loss=-0.0936, reward=0.5889, temp=0.80]

   Processing 146 sentences...

Epoch 3/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 210/280 [56:14<13:02, 11.18s/it, loss=-0.0781, reward=0.6093, temp=0.80]

   Processing 161 sentences...

Epoch 3/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 211/280 [56:24<12:28, 10.85s/it, loss=-0.0772, reward=0.6101, temp=0.80]

   Processing 243 sentences...

Epoch 3/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 212/280 [56:39<13:47, 12.16s/it, loss=-0.0442, reward=0.6201, temp=0.80]

   Processing 297 sentences...

Epoch 3/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 213/280 [57:06<18:36, 16.66s/it, loss=-0.0675, reward=0.6095, temp=0.80]

   Processing 47 sentences...

Epoch 3/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 214/280 [57:09<13:42, 12.46s/it, loss=-0.1049, reward=0.5740, temp=0.80]

   Processing 415 sentences...

Epoch 3/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 215/280 [58:19<32:18, 29.82s/it, loss=-0.0591, reward=0.6075, temp=0.80]

   Processing 138 sentences...

Epoch 3/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 216/280 [58:31<26:05, 24.47s/it, loss=-0.0825, reward=0.6054, temp=0.80]

   Processing 46 sentences...

Epoch 3/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 217/280 [58:34<18:41, 17.80s/it, loss=-0.0926, reward=0.5889, temp=0.80]

   Processing 73 sentences...

Epoch 3/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 218/280 [58:38<14:05, 13.64s/it, loss=-0.0906, reward=0.6016, temp=0.80]

   Processing 136 sentences...

Epoch 3/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 219/280 [58:51<13:49, 13.59s/it, loss=-0.0915, reward=0.5979, temp=0.80]

   Processing 93 sentences...

Epoch 3/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 220/280 [58:56<11:08, 11.14s/it, loss=-0.0839, reward=0.6077, temp=0.80]

   Processing 267 sentences...

Epoch 3/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 221/280 [59:13<12:31, 12.74s/it, loss=-0.0709, reward=0.6090, temp=0.80]

   Processing 421 sentences...

Epoch 3/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 222/280 [1:00:20<28:11, 29.16s/it, loss=-0.0655, reward=0.6076, temp=0.80]

   Processing 78 sentences...

Epoch 3/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 223/280 [1:00:24<20:25, 21.50s/it, loss=-0.0951, reward=0.5980, temp=0.80]

   Processing 275 sentences...

Epoch 3/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 224/280 [1:00:44<19:43, 21.14s/it, loss=-0.0799, reward=0.6063, temp=0.80]

   Processing 56 sentences...

Epoch 3/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 225/280 [1:00:48<14:29, 15.82s/it, loss=-0.1113, reward=0.5592, temp=0.80]

   Processing 292 sentences...

Epoch 3/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 226/280 [1:01:02<13:53, 15.43s/it, loss=-0.0721, reward=0.6060, temp=0.80]

   Processing 393 sentences...

Epoch 3/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 227/280 [1:01:44<20:42, 23.44s/it, loss=-0.0603, reward=0.6088, temp=0.80]

   Processing 412 sentences...

Epoch 3/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 228/280 [1:02:29<25:51, 29.84s/it, loss=-0.0420, reward=0.6144, temp=0.80]

   Processing 53 sentences...

Epoch 3/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 229/280 [1:02:31<18:21, 21.60s/it, loss=-0.0916, reward=0.5988, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 230/280 [1:03:17<23:55, 28.71s/it, loss=-0.0329, reward=0.6163, temp=0.80]

   Processing 449 sentences...

Epoch 3/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 231/280 [1:03:49<24:11, 29.62s/it, loss=-0.0619, reward=0.6101, temp=0.80]

   Processing 81 sentences...

Epoch 3/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 232/280 [1:03:53<17:43, 22.16s/it, loss=-0.0927, reward=0.5975, temp=0.80]

   Processing 368 sentences...

Epoch 3/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 233/280 [1:04:16<17:29, 22.32s/it, loss=-0.0588, reward=0.6147, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 234/280 [1:05:22<27:07, 35.38s/it, loss=-0.0556, reward=0.6143, temp=0.80]

   Processing 45 sentences...

Epoch 3/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 235/280 [1:05:24<19:08, 25.52s/it, loss=-0.0980, reward=0.5653, temp=0.80]

   Processing 45 sentences...

Epoch 3/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 236/280 [1:05:27<13:40, 18.64s/it, loss=-0.0926, reward=0.5856, temp=0.80]

   Processing 96 sentences...

Epoch 3/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 237/280 [1:05:35<11:10, 15.60s/it, loss=-0.1049, reward=0.5774, temp=0.80]

   Processing 184 sentences...

Epoch 3/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 238/280 [1:05:48<10:16, 14.69s/it, loss=-0.0818, reward=0.6046, temp=0.80]

   Processing 63 sentences...

Epoch 3/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 239/280 [1:05:53<08:05, 11.85s/it, loss=-0.0952, reward=0.5752, temp=0.80]

   Processing 123 sentences...

Epoch 3/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 240/280 [1:06:00<06:50, 10.26s/it, loss=-0.0828, reward=0.6051, temp=0.80]

   Processing 72 sentences...

Epoch 3/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 241/280 [1:06:05<05:43,  8.82s/it, loss=-0.0862, reward=0.6029, temp=0.80]

   Processing 268 sentences...

Epoch 3/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 242/280 [1:06:56<13:32, 21.38s/it, loss=-0.0879, reward=0.6014, temp=0.80]

   Processing 129 sentences...

Epoch 3/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 243/280 [1:07:03<10:36, 17.20s/it, loss=-0.0940, reward=0.5931, temp=0.80]

   Processing 57 sentences...

Epoch 3/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 244/280 [1:07:06<07:46, 12.95s/it, loss=-0.0919, reward=0.5955, temp=0.80]

   Processing 267 sentences...

Epoch 3/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 245/280 [1:07:25<08:28, 14.52s/it, loss=-0.0500, reward=0.6173, temp=0.80]

   Processing 84 sentences...

Epoch 3/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 246/280 [1:07:28<06:21, 11.21s/it, loss=-0.0925, reward=0.5956, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 247/280 [1:08:17<12:23, 22.53s/it, loss=-0.0158, reward=0.6146, temp=0.80]

   Processing 97 sentences...

Epoch 3/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 248/280 [1:08:31<10:34, 19.83s/it, loss=-0.0790, reward=0.6100, temp=0.80]

   Processing 232 sentences...

Epoch 3/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 249/280 [1:09:00<11:42, 22.65s/it, loss=-0.0799, reward=0.6093, temp=0.80]

   Processing 111 sentences...

Epoch 3/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 250/280 [1:09:06<08:55, 17.85s/it, loss=-0.0688, reward=0.6226, temp=0.80]

   Processing 24 sentences...

Epoch 3/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 251/280 [1:09:08<06:14, 12.93s/it, loss=-0.1166, reward=0.5493, temp=0.80]

   Processing 112 sentences...

Epoch 3/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 252/280 [1:09:14<05:04, 10.88s/it, loss=-0.0905, reward=0.6002, temp=0.80]

   Processing 43 sentences...

Epoch 3/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 253/280 [1:09:17<03:47,  8.43s/it, loss=-0.1081, reward=0.5673, temp=0.80]

   Processing 183 sentences...

Epoch 3/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 254/280 [1:09:27<03:56,  9.10s/it, loss=-0.0905, reward=0.5975, temp=0.80]

   Processing 117 sentences...

Epoch 3/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 255/280 [1:09:32<03:14,  7.78s/it, loss=-0.0842, reward=0.6036, temp=0.80]

   Processing 106 sentences...

Epoch 3/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 256/280 [1:09:38<02:56,  7.35s/it, loss=-0.0899, reward=0.6007, temp=0.80]

   Processing 67 sentences...

Epoch 3/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 257/280 [1:09:41<02:15,  5.89s/it, loss=-0.1023, reward=0.5854, temp=0.80]

   Processing 112 sentences...

Epoch 3/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 258/280 [1:09:50<02:28,  6.76s/it, loss=-0.0920, reward=0.5983, temp=0.80]

   Processing 79 sentences...

Epoch 3/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 259/280 [1:09:54<02:08,  6.11s/it, loss=-0.1003, reward=0.5886, temp=0.80]

   Processing 408 sentences...

Epoch 3/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 260/280 [1:10:15<03:30, 10.54s/it, loss=-0.0312, reward=0.6109, temp=0.80]

   Processing 103 sentences...

Epoch 3/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 261/280 [1:10:20<02:46,  8.77s/it, loss=-0.0801, reward=0.6069, temp=0.80]

   Processing 103 sentences...

Epoch 3/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 262/280 [1:10:27<02:27,  8.21s/it, loss=-0.0901, reward=0.6012, temp=0.80]

   Processing 118 sentences...

Epoch 3/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 263/280 [1:10:33<02:08,  7.57s/it, loss=-0.0984, reward=0.5937, temp=0.80]

   Processing 249 sentences...

Epoch 3/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 264/280 [1:10:53<03:00, 11.26s/it, loss=-0.0906, reward=0.5983, temp=0.80]

   Processing 145 sentences...

Epoch 3/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 265/280 [1:10:58<02:23,  9.58s/it, loss=-0.0981, reward=0.5981, temp=0.80]

   Processing 294 sentences...

Epoch 3/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 266/280 [1:11:24<03:22, 14.44s/it, loss=-0.0371, reward=0.6186, temp=0.80]

   Processing 147 sentences...

Epoch 3/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 267/280 [1:11:32<02:43, 12.60s/it, loss=-0.0861, reward=0.6042, temp=0.80]

   Processing 304 sentences...

Epoch 3/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 268/280 [1:12:05<03:44, 18.69s/it, loss=-0.0442, reward=0.6162, temp=0.80]

   Processing 128 sentences...

Epoch 3/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 269/280 [1:12:18<03:07, 17.05s/it, loss=-0.1075, reward=0.5964, temp=0.80]

   Processing 247 sentences...

Epoch 3/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 270/280 [1:12:35<02:49, 16.93s/it, loss=-0.0721, reward=0.6118, temp=0.80]

   Processing 165 sentences...

Epoch 3/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 271/280 [1:12:52<02:32, 16.93s/it, loss=-0.0889, reward=0.6063, temp=0.80]

   Processing 38 sentences...

Epoch 3/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 272/280 [1:12:55<01:41, 12.68s/it, loss=-0.1040, reward=0.5709, temp=0.80]

   Processing 63 sentences...

Epoch 3/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 273/280 [1:12:58<01:08,  9.82s/it, loss=-0.0922, reward=0.5923, temp=0.80]

   Processing 215 sentences...

Epoch 3/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 274/280 [1:13:23<01:25, 14.33s/it, loss=-0.0800, reward=0.6065, temp=0.80]

   Processing 39 sentences...

Epoch 3/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 275/280 [1:13:25<00:53, 10.62s/it, loss=-0.0982, reward=0.5783, temp=0.80]

   Processing 105 sentences...

Epoch 3/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 276/280 [1:13:30<00:35,  8.91s/it, loss=-0.0793, reward=0.6079, temp=0.80]

   Processing 138 sentences...

Epoch 3/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 277/280 [1:13:41<00:28,  9.52s/it, loss=-0.0886, reward=0.6039, temp=0.80]

   Processing 161 sentences...

Epoch 3/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 278/280 [1:13:51<00:19,  9.62s/it, loss=-0.1018, reward=0.5917, temp=0.80]

   Processing 35 sentences...

Epoch 3/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 279/280 [1:13:52<00:07,  7.33s/it, loss=-0.1018, reward=0.5846, temp=0.80]

   Processing 500 sentences...ents), truncating to 500

Epoch 3/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 280/280 [1:14:24<00:00, 15.95s/it, loss=0.0136, reward=0.6164, temp=0.80] 


   ‚ö†Ô∏è Document too long (1424 sents), truncating to 500
Epoch 3/10:
  Train Loss: -0.0797
  Overall Val Reward: 0.5802
  Learning Rate: 0.000100
  Temperature: 0.8000

  Aspect-wise Val Rewards:
    facts       : 0.5806
    analysis    : 0.5813
    argument    : 0.5810
    judgement   : 0.5789
    statute     : 0.5791



Epoch 4/10:   0%|          | 0/280 [00:00<?, ?it/s]

   Processing 333 sentences...

Epoch 4/10:   0%|          | 1/280 [00:28<2:14:32, 28.93s/it, loss=-0.0579, reward=0.6144, temp=0.70]

   Processing 353 sentences...

Epoch 4/10:   1%|          | 2/280 [00:53<2:01:13, 26.16s/it, loss=-0.0622, reward=0.6116, temp=0.70]

   Processing 57 sentences...

Epoch 4/10:   1%|          | 3/280 [01:00<1:20:20, 17.40s/it, loss=-0.1036, reward=0.5805, temp=0.70]

   Processing 96 sentences...

Epoch 4/10:   1%|‚ñè         | 4/280 [01:14<1:13:50, 16.05s/it, loss=-0.1017, reward=0.5802, temp=0.70]

   Processing 71 sentences...

Epoch 4/10:   2%|‚ñè         | 5/280 [01:17<52:06, 11.37s/it, loss=-0.0923, reward=0.5976, temp=0.70]  

   Processing 62 sentences...

Epoch 4/10:   2%|‚ñè         | 6/280 [01:19<38:30,  8.43s/it, loss=-0.0933, reward=0.5902, temp=0.70]

   Processing 350 sentences...

Epoch 4/10:   2%|‚ñé         | 7/280 [01:52<1:14:28, 16.37s/it, loss=-0.0385, reward=0.6213, temp=0.70]

   Processing 263 sentences...

Epoch 4/10:   3%|‚ñé         | 8/280 [02:08<1:14:04, 16.34s/it, loss=-0.0689, reward=0.6149, temp=0.70]

   Processing 176 sentences...

Epoch 4/10:   3%|‚ñé         | 9/280 [02:18<1:04:07, 14.20s/it, loss=-0.0858, reward=0.6066, temp=0.70]

   Processing 318 sentences...

Epoch 4/10:   4%|‚ñé         | 10/280 [03:22<2:13:38, 29.70s/it, loss=-0.0693, reward=0.6153, temp=0.70]

   Processing 338 sentences...

Epoch 4/10:   4%|‚ñç         | 11/280 [03:46<2:05:17, 27.95s/it, loss=-0.0730, reward=0.6130, temp=0.70]

   Processing 40 sentences...

Epoch 4/10:   4%|‚ñç         | 12/280 [03:49<1:30:12, 20.20s/it, loss=-0.1014, reward=0.5679, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:   5%|‚ñç         | 13/280 [04:40<2:12:16, 29.72s/it, loss=-0.0467, reward=0.6187, temp=0.70]

   Processing 423 sentences...

Epoch 4/10:   5%|‚ñå         | 14/280 [05:39<2:50:41, 38.50s/it, loss=-0.0739, reward=0.6119, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:   5%|‚ñå         | 15/280 [05:58<2:23:25, 32.47s/it, loss=-0.0125, reward=0.6436, temp=0.70]

   Processing 82 sentences...

Epoch 4/10:   6%|‚ñå         | 16/280 [06:01<1:43:43, 23.57s/it, loss=-0.0913, reward=0.5941, temp=0.70]

   Processing 137 sentences...

Epoch 4/10:   6%|‚ñå         | 17/280 [06:08<1:21:54, 18.69s/it, loss=-0.0833, reward=0.6063, temp=0.70]

   Processing 30 sentences...

Epoch 4/10:   6%|‚ñã         | 18/280 [06:10<1:00:27, 13.85s/it, loss=-0.0969, reward=0.5587, temp=0.70]

   Processing 82 sentences...

Epoch 4/10:   7%|‚ñã         | 19/280 [06:18<52:22, 12.04s/it, loss=-0.0892, reward=0.5860, temp=0.70]  

   Processing 61 sentences...

Epoch 4/10:   7%|‚ñã         | 20/280 [06:21<40:19,  9.31s/it, loss=-0.0854, reward=0.5947, temp=0.70]

   Processing 50 sentences...

Epoch 4/10:   8%|‚ñä         | 21/280 [06:24<31:58,  7.41s/it, loss=-0.0946, reward=0.5602, temp=0.70]

   Processing 115 sentences...

Epoch 4/10:   8%|‚ñä         | 22/280 [06:33<33:15,  7.74s/it, loss=-0.0844, reward=0.5964, temp=0.70]

   Processing 80 sentences...

Epoch 4/10:   8%|‚ñä         | 23/280 [06:36<27:12,  6.35s/it, loss=-0.0856, reward=0.5947, temp=0.70]

   Processing 434 sentences...

Epoch 4/10:   9%|‚ñä         | 24/280 [08:04<2:11:11, 30.75s/it, loss=-0.0530, reward=0.6188, temp=0.70]

   Processing 254 sentences...

Epoch 4/10:   9%|‚ñâ         | 25/280 [08:36<2:12:50, 31.26s/it, loss=-0.0794, reward=0.6061, temp=0.70]

   Processing 329 sentences...

Epoch 4/10:   9%|‚ñâ         | 26/280 [08:56<1:57:31, 27.76s/it, loss=-0.0722, reward=0.6111, temp=0.70]

   Processing 87 sentences...

Epoch 4/10:  10%|‚ñâ         | 27/280 [09:00<1:27:56, 20.86s/it, loss=-0.0900, reward=0.5982, temp=0.70]

   Processing 39 sentences...

Epoch 4/10:  10%|‚ñà         | 28/280 [09:02<1:03:25, 15.10s/it, loss=-0.0906, reward=0.5755, temp=0.70]

   Processing 83 sentences...

Epoch 4/10:  10%|‚ñà         | 29/280 [09:05<48:27, 11.58s/it, loss=-0.0916, reward=0.5945, temp=0.70]  

   Processing 177 sentences...

Epoch 4/10:  11%|‚ñà         | 30/280 [09:18<49:00, 11.76s/it, loss=-0.0801, reward=0.6081, temp=0.70]

   Processing 82 sentences...

Epoch 4/10:  11%|‚ñà         | 31/280 [09:25<43:47, 10.55s/it, loss=-0.0947, reward=0.5895, temp=0.70]

   Processing 136 sentences...

Epoch 4/10:  11%|‚ñà‚ñè        | 32/280 [09:30<36:40,  8.87s/it, loss=-0.0912, reward=0.5964, temp=0.70]

   Processing 166 sentences...

Epoch 4/10:  12%|‚ñà‚ñè        | 33/280 [09:40<37:59,  9.23s/it, loss=-0.0807, reward=0.6084, temp=0.70]

   Processing 91 sentences...

Epoch 4/10:  12%|‚ñà‚ñè        | 34/280 [09:46<33:14,  8.11s/it, loss=-0.0924, reward=0.5988, temp=0.70]

   Processing 87 sentences...

Epoch 4/10:  12%|‚ñà‚ñé        | 35/280 [09:51<29:12,  7.15s/it, loss=-0.0952, reward=0.5977, temp=0.70]

   Processing 203 sentences...

Epoch 4/10:  13%|‚ñà‚ñé        | 36/280 [10:05<37:47,  9.29s/it, loss=-0.0752, reward=0.6111, temp=0.70]

   Processing 140 sentences...

Epoch 4/10:  13%|‚ñà‚ñé        | 37/280 [10:15<38:49,  9.59s/it, loss=-0.0851, reward=0.6081, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  14%|‚ñà‚ñé        | 38/280 [11:06<1:28:19, 21.90s/it, loss=-0.0431, reward=0.6142, temp=0.70]

   Processing 97 sentences...

Epoch 4/10:  14%|‚ñà‚ñç        | 39/280 [11:12<1:09:08, 17.21s/it, loss=-0.1005, reward=0.5943, temp=0.70]

   Processing 182 sentences...

Epoch 4/10:  14%|‚ñà‚ñç        | 40/280 [11:25<1:03:39, 15.91s/it, loss=-0.1032, reward=0.5939, temp=0.70]

   Processing 87 sentences...

Epoch 4/10:  15%|‚ñà‚ñç        | 41/280 [11:29<49:29, 12.42s/it, loss=-0.0948, reward=0.5965, temp=0.70]  

   Processing 235 sentences...

Epoch 4/10:  15%|‚ñà‚ñå        | 42/280 [12:03<1:14:29, 18.78s/it, loss=-0.0759, reward=0.6086, temp=0.70]

   Processing 50 sentences...

Epoch 4/10:  15%|‚ñà‚ñå        | 43/280 [12:05<54:43, 13.85s/it, loss=-0.1014, reward=0.5870, temp=0.70]  

   Processing 70 sentences...

Epoch 4/10:  16%|‚ñà‚ñå        | 44/280 [12:10<43:18, 11.01s/it, loss=-0.0927, reward=0.6002, temp=0.70]

   Processing 65 sentences...

Epoch 4/10:  16%|‚ñà‚ñå        | 45/280 [12:15<36:40,  9.36s/it, loss=-0.1084, reward=0.5794, temp=0.70]

   Processing 482 sentences...

Epoch 4/10:  16%|‚ñà‚ñã        | 46/280 [13:06<1:24:52, 21.76s/it, loss=-0.0218, reward=0.6184, temp=0.70]

   Processing 164 sentences...

Epoch 4/10:  17%|‚ñà‚ñã        | 47/280 [13:22<1:17:47, 20.03s/it, loss=-0.0742, reward=0.6131, temp=0.70]

   Processing 102 sentences...

Epoch 4/10:  17%|‚ñà‚ñã        | 48/280 [13:28<1:01:47, 15.98s/it, loss=-0.0891, reward=0.6064, temp=0.70]

   Processing 48 sentences...

Epoch 4/10:  18%|‚ñà‚ñä        | 49/280 [13:30<45:03, 11.70s/it, loss=-0.1018, reward=0.5799, temp=0.70]  

   Processing 74 sentences...

Epoch 4/10:  18%|‚ñà‚ñä        | 50/280 [13:36<37:42,  9.84s/it, loss=-0.0922, reward=0.5982, temp=0.70]

   Processing 67 sentences...

Epoch 4/10:  18%|‚ñà‚ñä        | 51/280 [13:41<32:51,  8.61s/it, loss=-0.0944, reward=0.5985, temp=0.70]

   Processing 31 sentences...

Epoch 4/10:  19%|‚ñà‚ñä        | 52/280 [13:43<24:41,  6.50s/it, loss=-0.0939, reward=0.5904, temp=0.70]

   Processing 53 sentences...

Epoch 4/10:  19%|‚ñà‚ñâ        | 53/280 [13:47<21:20,  5.64s/it, loss=-0.0946, reward=0.5965, temp=0.70]

   Processing 123 sentences...

Epoch 4/10:  19%|‚ñà‚ñâ        | 54/280 [13:54<23:19,  6.19s/it, loss=-0.0920, reward=0.5981, temp=0.70]

   Processing 169 sentences...

Epoch 4/10:  20%|‚ñà‚ñâ        | 55/280 [14:04<27:06,  7.23s/it, loss=-0.0880, reward=0.5987, temp=0.70]

   Processing 33 sentences...

Epoch 4/10:  20%|‚ñà‚ñà        | 56/280 [14:07<22:12,  5.95s/it, loss=-0.1112, reward=0.5638, temp=0.70]

   Processing 190 sentences...

Epoch 4/10:  20%|‚ñà‚ñà        | 57/280 [14:18<27:37,  7.43s/it, loss=-0.0724, reward=0.6094, temp=0.70]

   Processing 223 sentences...

Epoch 4/10:  21%|‚ñà‚ñà        | 58/280 [14:35<38:27, 10.39s/it, loss=-0.0712, reward=0.6086, temp=0.70]

   Processing 57 sentences...

Epoch 4/10:  21%|‚ñà‚ñà        | 59/280 [14:38<29:51,  8.10s/it, loss=-0.0941, reward=0.5929, temp=0.70]

   Processing 362 sentences...

Epoch 4/10:  21%|‚ñà‚ñà‚ñè       | 60/280 [15:03<48:18, 13.18s/it, loss=-0.0132, reward=0.6378, temp=0.70]

   Processing 192 sentences...

Epoch 4/10:  22%|‚ñà‚ñà‚ñè       | 61/280 [15:21<53:31, 14.66s/it, loss=-0.0871, reward=0.6044, temp=0.70]

   Processing 108 sentences...

Epoch 4/10:  22%|‚ñà‚ñà‚ñè       | 62/280 [15:24<40:54, 11.26s/it, loss=-0.0871, reward=0.6106, temp=0.70]

   Processing 380 sentences...

Epoch 4/10:  22%|‚ñà‚ñà‚ñé       | 63/280 [16:04<1:11:36, 19.80s/it, loss=-0.0934, reward=0.6041, temp=0.70]

   Processing 285 sentences...

Epoch 4/10:  23%|‚ñà‚ñà‚ñé       | 64/280 [16:36<1:24:32, 23.48s/it, loss=-0.0749, reward=0.6120, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  23%|‚ñà‚ñà‚ñé       | 65/280 [17:37<2:04:25, 34.72s/it, loss=-0.0578, reward=0.6157, temp=0.70]

   Processing 144 sentences...

Epoch 4/10:  24%|‚ñà‚ñà‚ñé       | 66/280 [17:46<1:36:48, 27.14s/it, loss=-0.0977, reward=0.5943, temp=0.70]

   Processing 202 sentences...

Epoch 4/10:  24%|‚ñà‚ñà‚ñç       | 67/280 [18:03<1:25:16, 24.02s/it, loss=-0.0815, reward=0.6106, temp=0.70]

   Processing 271 sentences...

Epoch 4/10:  24%|‚ñà‚ñà‚ñç       | 68/280 [18:31<1:28:41, 25.10s/it, loss=-0.0893, reward=0.6054, temp=0.70]

   Processing 51 sentences...

Epoch 4/10:  25%|‚ñà‚ñà‚ñç       | 69/280 [18:33<1:04:07, 18.23s/it, loss=-0.0970, reward=0.5853, temp=0.70]

   Processing 376 sentences...

Epoch 4/10:  25%|‚ñà‚ñà‚ñå       | 70/280 [19:04<1:17:01, 22.01s/it, loss=-0.0796, reward=0.6104, temp=0.70]

   Processing 61 sentences...

Epoch 4/10:  25%|‚ñà‚ñà‚ñå       | 71/280 [19:07<57:35, 16.53s/it, loss=-0.0944, reward=0.6000, temp=0.70]  

   Processing 63 sentences...

Epoch 4/10:  26%|‚ñà‚ñà‚ñå       | 72/280 [19:11<43:34, 12.57s/it, loss=-0.1056, reward=0.5741, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  26%|‚ñà‚ñà‚ñå       | 73/280 [20:18<1:40:02, 29.00s/it, loss=-0.0585, reward=0.6148, temp=0.70]

   Processing 254 sentences...

Epoch 4/10:  26%|‚ñà‚ñà‚ñã       | 74/280 [20:45<1:37:40, 28.45s/it, loss=-0.0858, reward=0.6069, temp=0.70]

   Processing 118 sentences...

Epoch 4/10:  27%|‚ñà‚ñà‚ñã       | 75/280 [21:09<1:31:55, 26.91s/it, loss=-0.1022, reward=0.5912, temp=0.70]

   Processing 439 sentences...

Epoch 4/10:  27%|‚ñà‚ñà‚ñã       | 76/280 [21:43<1:39:00, 29.12s/it, loss=-0.0646, reward=0.6098, temp=0.70]

   Processing 51 sentences...

Epoch 4/10:  28%|‚ñà‚ñà‚ñä       | 77/280 [21:45<1:11:08, 21.03s/it, loss=-0.0968, reward=0.5946, temp=0.70]

   Processing 143 sentences...

Epoch 4/10:  28%|‚ñà‚ñà‚ñä       | 78/280 [21:56<1:00:21, 17.93s/it, loss=-0.1015, reward=0.5937, temp=0.70]

   Processing 300 sentences...

Epoch 4/10:  28%|‚ñà‚ñà‚ñä       | 79/280 [22:17<1:03:11, 18.86s/it, loss=-0.0728, reward=0.6105, temp=0.70]

   Processing 137 sentences...

Epoch 4/10:  29%|‚ñà‚ñà‚ñä       | 80/280 [22:26<52:55, 15.88s/it, loss=-0.1060, reward=0.5866, temp=0.70]  

   Processing 120 sentences...

Epoch 4/10:  29%|‚ñà‚ñà‚ñâ       | 81/280 [22:32<43:28, 13.11s/it, loss=-0.0937, reward=0.6005, temp=0.70]

   Processing 170 sentences...

Epoch 4/10:  29%|‚ñà‚ñà‚ñâ       | 82/280 [22:45<43:00, 13.04s/it, loss=-0.1094, reward=0.5933, temp=0.70]

   Processing 107 sentences...

Epoch 4/10:  30%|‚ñà‚ñà‚ñâ       | 83/280 [22:52<36:33, 11.14s/it, loss=-0.0810, reward=0.6085, temp=0.70]

   Processing 48 sentences...

Epoch 4/10:  30%|‚ñà‚ñà‚ñà       | 84/280 [22:56<29:20,  8.98s/it, loss=-0.1119, reward=0.5653, temp=0.70]

   Processing 163 sentences...

Epoch 4/10:  30%|‚ñà‚ñà‚ñà       | 85/280 [23:04<28:48,  8.86s/it, loss=-0.0528, reward=0.6154, temp=0.70]

   Processing 108 sentences...

Epoch 4/10:  31%|‚ñà‚ñà‚ñà       | 86/280 [23:14<29:13,  9.04s/it, loss=-0.0876, reward=0.6047, temp=0.70]

   Processing 86 sentences...

Epoch 4/10:  31%|‚ñà‚ñà‚ñà       | 87/280 [23:18<24:34,  7.64s/it, loss=-0.0827, reward=0.6104, temp=0.70]

   Processing 74 sentences...

Epoch 4/10:  31%|‚ñà‚ñà‚ñà‚ñè      | 88/280 [23:31<29:05,  9.09s/it, loss=-0.1073, reward=0.5835, temp=0.70]

   Processing 81 sentences...

Epoch 4/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 89/280 [23:40<29:28,  9.26s/it, loss=-0.1123, reward=0.5771, temp=0.70]

   Processing 203 sentences...

Epoch 4/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 90/280 [23:52<31:38,  9.99s/it, loss=-0.0676, reward=0.6080, temp=0.70]

   Processing 54 sentences...

Epoch 4/10:  32%|‚ñà‚ñà‚ñà‚ñé      | 91/280 [23:55<24:29,  7.77s/it, loss=-0.1126, reward=0.5713, temp=0.70]

   Processing 143 sentences...

Epoch 4/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 92/280 [24:04<26:14,  8.37s/it, loss=-0.0764, reward=0.6069, temp=0.70]

   Processing 36 sentences...

Epoch 4/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 93/280 [24:07<20:29,  6.57s/it, loss=-0.1052, reward=0.5783, temp=0.70]

   Processing 106 sentences...

Epoch 4/10:  34%|‚ñà‚ñà‚ñà‚ñé      | 94/280 [24:12<18:47,  6.06s/it, loss=-0.0866, reward=0.6036, temp=0.70]

   Processing 133 sentences...

Epoch 4/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 95/280 [24:38<37:37, 12.20s/it, loss=-0.1086, reward=0.5886, temp=0.70]

   Processing 206 sentences...

Epoch 4/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 96/280 [24:49<35:44, 11.66s/it, loss=-0.0645, reward=0.6079, temp=0.70]

   Processing 162 sentences...

Epoch 4/10:  35%|‚ñà‚ñà‚ñà‚ñç      | 97/280 [25:10<44:47, 14.68s/it, loss=-0.1030, reward=0.5924, temp=0.70]

   Processing 42 sentences...

Epoch 4/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 98/280 [25:12<32:46, 10.80s/it, loss=-0.0911, reward=0.5980, temp=0.70]

   Processing 89 sentences...

Epoch 4/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 99/280 [25:17<26:54,  8.92s/it, loss=-0.0922, reward=0.6003, temp=0.70]

   Processing 32 sentences...

Epoch 4/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 100/280 [25:20<22:09,  7.38s/it, loss=-0.1190, reward=0.5553, temp=0.70]

   Processing 408 sentences...

Epoch 4/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 101/280 [26:44<1:29:58, 30.16s/it, loss=-0.0347, reward=0.6051, temp=0.70]

   Processing 35 sentences...

Epoch 4/10:  36%|‚ñà‚ñà‚ñà‚ñã      | 102/280 [26:46<1:04:49, 21.85s/it, loss=-0.1132, reward=0.5596, temp=0.70]

   Processing 137 sentences...

Epoch 4/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 103/280 [26:54<51:40, 17.52s/it, loss=-0.1040, reward=0.5872, temp=0.70]  

   Processing 105 sentences...

Epoch 4/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 104/280 [27:02<43:22, 14.78s/it, loss=-0.1060, reward=0.5863, temp=0.70]

   Processing 124 sentences...

Epoch 4/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 105/280 [27:14<40:17, 13.82s/it, loss=-0.0748, reward=0.6035, temp=0.70]

   Processing 220 sentences...

Epoch 4/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 106/280 [27:28<40:25, 13.94s/it, loss=-0.0455, reward=0.6147, temp=0.70]

   Processing 98 sentences...

Epoch 4/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 107/280 [27:32<32:12, 11.17s/it, loss=-0.0891, reward=0.6000, temp=0.70]

   Processing 133 sentences...

Epoch 4/10:  39%|‚ñà‚ñà‚ñà‚ñä      | 108/280 [27:39<28:16,  9.86s/it, loss=-0.0894, reward=0.5986, temp=0.70]

   Processing 315 sentences...

Epoch 4/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 109/280 [28:09<44:52, 15.74s/it, loss=-0.0620, reward=0.6064, temp=0.70]

   Processing 81 sentences...

Epoch 4/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 110/280 [28:12<34:20, 12.12s/it, loss=-0.0910, reward=0.5979, temp=0.70]

   Processing 286 sentences...

Epoch 4/10:  40%|‚ñà‚ñà‚ñà‚ñâ      | 111/280 [28:29<37:34, 13.34s/it, loss=-0.0652, reward=0.6116, temp=0.70]

   Processing 122 sentences...

Epoch 4/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 112/280 [28:35<31:47, 11.35s/it, loss=-0.0920, reward=0.5906, temp=0.70]

   Processing 170 sentences...

Epoch 4/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 113/280 [28:53<36:50, 13.24s/it, loss=-0.0847, reward=0.6045, temp=0.70]

   Processing 376 sentences...

Epoch 4/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 114/280 [29:37<1:02:15, 22.50s/it, loss=-0.0675, reward=0.6080, temp=0.70]

   Processing 334 sentences...

Epoch 4/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 115/280 [30:28<1:25:34, 31.12s/it, loss=-0.0654, reward=0.6158, temp=0.70]

   Processing 102 sentences...

Epoch 4/10:  41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 116/280 [30:39<1:08:23, 25.02s/it, loss=-0.0997, reward=0.5846, temp=0.70]

   Processing 268 sentences...

Epoch 4/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 117/280 [30:53<59:08, 21.77s/it, loss=-0.0719, reward=0.6151, temp=0.70]  

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 118/280 [32:33<2:01:48, 45.11s/it, loss=-0.0507, reward=0.6196, temp=0.70]

   Processing 253 sentences...

Epoch 4/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 119/280 [32:49<1:38:03, 36.54s/it, loss=-0.0830, reward=0.6060, temp=0.70]

   Processing 132 sentences...

Epoch 4/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 120/280 [32:59<1:15:33, 28.33s/it, loss=-0.0896, reward=0.5945, temp=0.70]

   Processing 286 sentences...

Epoch 4/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 121/280 [33:21<1:10:23, 26.56s/it, loss=-0.0770, reward=0.6103, temp=0.70]

   Processing 128 sentences...

Epoch 4/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 122/280 [33:28<54:42, 20.78s/it, loss=-0.0848, reward=0.6072, temp=0.70]  

   Processing 37 sentences...

Epoch 4/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 123/280 [33:31<39:55, 15.26s/it, loss=-0.0935, reward=0.5763, temp=0.70]

   Processing 205 sentences...

Epoch 4/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 124/280 [33:49<41:49, 16.08s/it, loss=-0.0830, reward=0.6036, temp=0.70]

   Processing 87 sentences...

Epoch 4/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 125/280 [33:55<34:13, 13.25s/it, loss=-0.0869, reward=0.6069, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 126/280 [35:07<1:19:19, 30.90s/it, loss=-0.0574, reward=0.6156, temp=0.70]

   Processing 144 sentences...

Epoch 4/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 127/280 [35:20<1:04:39, 25.36s/it, loss=-0.0886, reward=0.6046, temp=0.70]

   Processing 80 sentences...

Epoch 4/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 128/280 [35:27<50:42, 20.02s/it, loss=-0.0957, reward=0.5967, temp=0.70]  

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 129/280 [36:56<1:41:56, 40.51s/it, loss=-0.0587, reward=0.6139, temp=0.70]

   Processing 124 sentences...

Epoch 4/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 130/280 [37:05<1:17:44, 31.10s/it, loss=-0.0943, reward=0.5976, temp=0.70]

   Processing 112 sentences...

Epoch 4/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 131/280 [37:15<1:01:34, 24.80s/it, loss=-0.0952, reward=0.5966, temp=0.70]

   Processing 75 sentences...

Epoch 4/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 132/280 [37:19<46:01, 18.66s/it, loss=-0.0974, reward=0.5941, temp=0.70]  

   Processing 165 sentences...

Epoch 4/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 133/280 [37:31<40:49, 16.66s/it, loss=-0.0786, reward=0.6157, temp=0.70]

   Processing 146 sentences...

Epoch 4/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 134/280 [37:49<41:29, 17.05s/it, loss=-0.0844, reward=0.6116, temp=0.70]

   Processing 100 sentences...

Epoch 4/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 135/280 [37:54<32:35, 13.48s/it, loss=-0.0869, reward=0.6112, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 136/280 [39:33<1:33:21, 38.90s/it, loss=-0.0701, reward=0.6119, temp=0.70]

   Processing 209 sentences...

Epoch 4/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 137/280 [39:47<1:15:33, 31.70s/it, loss=-0.0917, reward=0.6050, temp=0.70]

   Processing 90 sentences...

Epoch 4/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 138/280 [39:54<56:47, 24.00s/it, loss=-0.1027, reward=0.5945, temp=0.70]  

   Processing 72 sentences...

Epoch 4/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 139/280 [39:58<42:48, 18.22s/it, loss=-0.1027, reward=0.5893, temp=0.70]

   Processing 38 sentences...

Epoch 4/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 140/280 [40:00<31:05, 13.32s/it, loss=-0.0986, reward=0.5907, temp=0.70]

   Processing 131 sentences...

Epoch 4/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 141/280 [40:05<25:13, 10.89s/it, loss=-0.0956, reward=0.6021, temp=0.70]

   Processing 78 sentences...

Epoch 4/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 142/280 [40:10<20:44,  9.02s/it, loss=-0.1073, reward=0.5872, temp=0.70]

   Processing 119 sentences...

Epoch 4/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 143/280 [40:18<19:49,  8.68s/it, loss=-0.0959, reward=0.6044, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 144/280 [42:01<1:23:54, 37.01s/it, loss=0.0025, reward=0.6181, temp=0.70]

   Processing 133 sentences...

Epoch 4/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 145/280 [42:11<1:04:44, 28.77s/it, loss=-0.0963, reward=0.5950, temp=0.70]

   Processing 83 sentences...

Epoch 4/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 146/280 [42:17<49:19, 22.09s/it, loss=-0.0969, reward=0.5957, temp=0.70]  

   Processing 56 sentences...

Epoch 4/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 147/280 [42:20<36:17, 16.37s/it, loss=-0.1029, reward=0.5884, temp=0.70]

   Processing 133 sentences...

Epoch 4/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 148/280 [42:27<29:35, 13.45s/it, loss=-0.1165, reward=0.5841, temp=0.70]

   Processing 101 sentences...

Epoch 4/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 149/280 [42:33<24:32, 11.24s/it, loss=-0.1283, reward=0.5785, temp=0.70]

   Processing 151 sentences...

Epoch 4/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 150/280 [42:44<24:18, 11.22s/it, loss=-0.0829, reward=0.6022, temp=0.70]

   Processing 248 sentences...

Epoch 4/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 151/280 [42:59<26:19, 12.24s/it, loss=-0.0395, reward=0.6143, temp=0.70]

   Processing 500 sentences...sents), truncating to 500

Epoch 4/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 152/280 [43:55<54:40, 25.63s/it, loss=-0.0187, reward=0.6116, temp=0.70]

   Processing 145 sentences...

Epoch 4/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 153/280 [44:02<42:05, 19.88s/it, loss=-0.0888, reward=0.5997, temp=0.70]

   Processing 202 sentences...

Epoch 4/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 154/280 [44:19<40:05, 19.09s/it, loss=-0.1067, reward=0.5968, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 155/280 [44:51<47:33, 22.83s/it, loss=-0.0282, reward=0.6119, temp=0.70]

   Processing 154 sentences...

Epoch 4/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 156/280 [45:09<44:07, 21.35s/it, loss=-0.0969, reward=0.6018, temp=0.70]

   Processing 345 sentences...

Epoch 4/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 157/280 [45:58<1:00:48, 29.66s/it, loss=-0.0869, reward=0.6061, temp=0.70]

   Processing 99 sentences...

Epoch 4/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 158/280 [46:02<45:03, 22.16s/it, loss=-0.1105, reward=0.5846, temp=0.70]  

   Processing 500 sentences...sents), truncating to 500

Epoch 4/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 159/280 [46:53<1:01:57, 30.72s/it, loss=-0.0431, reward=0.6164, temp=0.70]

   Processing 118 sentences...

Epoch 4/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 160/280 [47:02<48:36, 24.30s/it, loss=-0.0958, reward=0.5941, temp=0.70]  

   Processing 30 sentences...

Epoch 4/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 161/280 [47:04<34:41, 17.49s/it, loss=-0.0992, reward=0.5765, temp=0.70]

   Processing 76 sentences...

Epoch 4/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 162/280 [47:08<26:21, 13.40s/it, loss=-0.0948, reward=0.5894, temp=0.70]

   Processing 95 sentences...

Epoch 4/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 163/280 [47:15<22:21, 11.46s/it, loss=-0.0956, reward=0.5843, temp=0.70]

   Processing 78 sentences...

Epoch 4/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 164/280 [47:19<18:11,  9.41s/it, loss=-0.0880, reward=0.5992, temp=0.70]

   Processing 153 sentences...

Epoch 4/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 165/280 [47:28<17:20,  9.05s/it, loss=-0.0841, reward=0.6016, temp=0.70]

   Processing 229 sentences...

Epoch 4/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 166/280 [47:42<20:17, 10.68s/it, loss=-0.0836, reward=0.6027, temp=0.70]

   Processing 108 sentences...

Epoch 4/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 167/280 [47:47<16:55,  8.99s/it, loss=-0.0911, reward=0.5983, temp=0.70]

   Processing 154 sentences...

Epoch 4/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 168/280 [48:06<22:19, 11.96s/it, loss=-0.0888, reward=0.5985, temp=0.70]

   Processing 44 sentences...

Epoch 4/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 169/280 [48:14<19:48, 10.71s/it, loss=-0.1000, reward=0.5715, temp=0.70]

   Processing 229 sentences...

Epoch 4/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 170/280 [48:51<34:16, 18.69s/it, loss=-0.0802, reward=0.6020, temp=0.70]

   Processing 221 sentences...

Epoch 4/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 171/280 [49:12<35:10, 19.36s/it, loss=-0.0676, reward=0.6117, temp=0.70]

   Processing 148 sentences...

Epoch 4/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 172/280 [49:19<28:20, 15.75s/it, loss=-0.0807, reward=0.6080, temp=0.70]

   Processing 144 sentences...

Epoch 4/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 173/280 [49:26<22:59, 12.89s/it, loss=-0.0846, reward=0.6041, temp=0.70]

   Processing 77 sentences...

Epoch 4/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 174/280 [49:29<17:52, 10.12s/it, loss=-0.1017, reward=0.5887, temp=0.70]

   Processing 90 sentences...

Epoch 4/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 175/280 [49:37<16:15,  9.29s/it, loss=-0.0924, reward=0.5987, temp=0.70]

   Processing 309 sentences...

Epoch 4/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 176/280 [49:50<18:00, 10.39s/it, loss=-0.0694, reward=0.6097, temp=0.70]

   Processing 297 sentences...

Epoch 4/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 177/280 [50:09<22:39, 13.20s/it, loss=-0.0676, reward=0.6143, temp=0.70]

   Processing 286 sentences...

Epoch 4/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 178/280 [50:35<29:02, 17.08s/it, loss=-0.0672, reward=0.6160, temp=0.70]

   Processing 66 sentences...

Epoch 4/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 179/280 [50:38<21:36, 12.83s/it, loss=-0.1013, reward=0.5816, temp=0.70]

   Processing 220 sentences...

Epoch 4/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 180/280 [50:54<22:44, 13.65s/it, loss=-0.0846, reward=0.6056, temp=0.70]

   Processing 209 sentences...

Epoch 4/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 181/280 [51:12<24:32, 14.88s/it, loss=-0.0864, reward=0.6042, temp=0.70]

   Processing 70 sentences...

Epoch 4/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 182/280 [51:16<19:14, 11.78s/it, loss=-0.0995, reward=0.5778, temp=0.70]

   Processing 214 sentences...

Epoch 4/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 183/280 [51:27<18:23, 11.37s/it, loss=-0.0623, reward=0.6305, temp=0.70]

   Processing 213 sentences...

Epoch 4/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 184/280 [51:38<18:16, 11.42s/it, loss=-0.0812, reward=0.6110, temp=0.70]

   Processing 29 sentences...

Epoch 4/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 185/280 [51:40<13:36,  8.60s/it, loss=-0.1094, reward=0.5418, temp=0.70]

   Processing 91 sentences...

Epoch 4/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 186/280 [51:45<11:32,  7.37s/it, loss=-0.0895, reward=0.6041, temp=0.70]

   Processing 291 sentences...

Epoch 4/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 187/280 [52:05<17:32, 11.32s/it, loss=-0.0686, reward=0.6125, temp=0.70]

   Processing 80 sentences...

Epoch 4/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 188/280 [52:14<16:11, 10.56s/it, loss=-0.0940, reward=0.5965, temp=0.70]

   Processing 43 sentences...

Epoch 4/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 189/280 [52:17<12:28,  8.22s/it, loss=-0.0931, reward=0.5944, temp=0.70]

   Processing 45 sentences...

Epoch 4/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 190/280 [52:19<09:45,  6.51s/it, loss=-0.1000, reward=0.5815, temp=0.70]

   Processing 89 sentences...

Epoch 4/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 191/280 [52:23<08:26,  5.69s/it, loss=-0.0891, reward=0.6031, temp=0.70]

   Processing 194 sentences...

Epoch 4/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 192/280 [52:33<10:04,  6.87s/it, loss=-0.0847, reward=0.6035, temp=0.70]

   Processing 137 sentences...

Epoch 4/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 193/280 [52:43<11:23,  7.86s/it, loss=-0.0934, reward=0.5994, temp=0.70]

   Processing 73 sentences...

Epoch 4/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 194/280 [52:46<09:17,  6.49s/it, loss=-0.0942, reward=0.5936, temp=0.70]

   Processing 158 sentences...

Epoch 4/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 195/280 [52:59<12:01,  8.49s/it, loss=-0.0836, reward=0.6080, temp=0.70]

   Processing 56 sentences...

Epoch 4/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 196/280 [53:02<09:25,  6.73s/it, loss=-0.1127, reward=0.5682, temp=0.70]

   Processing 180 sentences...

Epoch 4/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 197/280 [53:12<10:51,  7.85s/it, loss=-0.0900, reward=0.5958, temp=0.70]

   Processing 338 sentences...

Epoch 4/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 198/280 [53:33<15:49, 11.58s/it, loss=-0.0408, reward=0.6107, temp=0.70]

   Processing 261 sentences...

Epoch 4/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 199/280 [53:59<21:37, 16.02s/it, loss=-0.0448, reward=0.6169, temp=0.70]

   Processing 156 sentences...

Epoch 4/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 200/280 [54:05<17:26, 13.08s/it, loss=-0.1063, reward=0.5924, temp=0.70]

   Processing 43 sentences...

Epoch 4/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 201/280 [54:08<13:09,  9.99s/it, loss=-0.1149, reward=0.5666, temp=0.70]

   Processing 144 sentences...

Epoch 4/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 202/280 [54:21<14:18, 11.01s/it, loss=-0.0856, reward=0.6046, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 203/280 [55:14<29:59, 23.36s/it, loss=-0.0145, reward=0.6172, temp=0.70]

   Processing 49 sentences...

Epoch 4/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 204/280 [55:17<21:50, 17.25s/it, loss=-0.1062, reward=0.5673, temp=0.70]

   Processing 66 sentences...

Epoch 4/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 205/280 [55:23<17:29, 14.00s/it, loss=-0.0964, reward=0.5849, temp=0.70]

   Processing 103 sentences...

Epoch 4/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 206/280 [55:29<14:25, 11.69s/it, loss=-0.0812, reward=0.6064, temp=0.70]

   Processing 350 sentences...

Epoch 4/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 207/280 [55:58<20:18, 16.70s/it, loss=-0.0530, reward=0.6125, temp=0.70]

   Processing 111 sentences...

Epoch 4/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 208/280 [56:06<17:09, 14.30s/it, loss=-0.0942, reward=0.5929, temp=0.70]

   Processing 51 sentences...

Epoch 4/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 209/280 [56:09<12:45, 10.78s/it, loss=-0.0981, reward=0.5777, temp=0.70]

   Processing 146 sentences...

Epoch 4/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 210/280 [56:21<12:55, 11.08s/it, loss=-0.0795, reward=0.6093, temp=0.70]

   Processing 161 sentences...

Epoch 4/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 211/280 [56:31<12:28, 10.85s/it, loss=-0.0826, reward=0.6033, temp=0.70]

   Processing 243 sentences...

Epoch 4/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 212/280 [56:47<13:53, 12.26s/it, loss=-0.0683, reward=0.6190, temp=0.70]

   Processing 297 sentences...

Epoch 4/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 213/280 [57:14<18:36, 16.67s/it, loss=-0.0770, reward=0.6067, temp=0.70]

   Processing 47 sentences...

Epoch 4/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 214/280 [57:16<13:43, 12.48s/it, loss=-0.0961, reward=0.5831, temp=0.70]

   Processing 415 sentences...

Epoch 4/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 215/280 [58:24<31:29, 29.07s/it, loss=-0.0637, reward=0.6132, temp=0.70]

   Processing 138 sentences...

Epoch 4/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 216/280 [58:37<25:40, 24.08s/it, loss=-0.0869, reward=0.6053, temp=0.70]

   Processing 46 sentences...

Epoch 4/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 217/280 [58:39<18:23, 17.51s/it, loss=-0.1015, reward=0.5681, temp=0.70]

   Processing 73 sentences...

Epoch 4/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 218/280 [58:43<13:53, 13.45s/it, loss=-0.0939, reward=0.5915, temp=0.70]

   Processing 136 sentences...

Epoch 4/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 219/280 [58:56<13:30, 13.29s/it, loss=-0.0883, reward=0.6003, temp=0.70]

   Processing 93 sentences...

Epoch 4/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 220/280 [59:01<10:55, 10.92s/it, loss=-0.0836, reward=0.6143, temp=0.70]

   Processing 267 sentences...

Epoch 4/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 221/280 [59:18<12:24, 12.62s/it, loss=-0.0768, reward=0.6100, temp=0.70]

   Processing 421 sentences...

Epoch 4/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 222/280 [1:00:26<28:14, 29.22s/it, loss=-0.0791, reward=0.6067, temp=0.70]

   Processing 78 sentences...

Epoch 4/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 223/280 [1:00:29<20:27, 21.53s/it, loss=-0.0962, reward=0.5971, temp=0.70]

   Processing 275 sentences...

Epoch 4/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 224/280 [1:00:50<19:48, 21.22s/it, loss=-0.0904, reward=0.5996, temp=0.70]

   Processing 56 sentences...

Epoch 4/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 225/280 [1:00:53<14:33, 15.87s/it, loss=-0.1041, reward=0.5754, temp=0.70]

   Processing 292 sentences...

Epoch 4/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 226/280 [1:01:07<13:54, 15.46s/it, loss=-0.0753, reward=0.6106, temp=0.70]

   Processing 393 sentences...

Epoch 4/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 227/280 [1:01:48<20:13, 22.91s/it, loss=-0.0483, reward=0.6141, temp=0.70]

   Processing 412 sentences...

Epoch 4/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 228/280 [1:02:33<25:34, 29.51s/it, loss=-0.0558, reward=0.6135, temp=0.70]

   Processing 53 sentences...

Epoch 4/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 229/280 [1:02:35<18:10, 21.37s/it, loss=-0.1020, reward=0.5827, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 230/280 [1:03:21<23:54, 28.69s/it, loss=-0.0780, reward=0.6037, temp=0.70]

   Processing 449 sentences...

Epoch 4/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 231/280 [1:03:53<24:17, 29.75s/it, loss=-0.0662, reward=0.6110, temp=0.70]

   Processing 81 sentences...

Epoch 4/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 232/280 [1:03:58<17:47, 22.25s/it, loss=-0.0990, reward=0.5872, temp=0.70]

   Processing 368 sentences...

Epoch 4/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 233/280 [1:04:21<17:34, 22.43s/it, loss=-0.0733, reward=0.6098, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 234/280 [1:05:26<27:08, 35.40s/it, loss=-0.0560, reward=0.6152, temp=0.70]

   Processing 45 sentences...

Epoch 4/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 235/280 [1:05:29<19:09, 25.53s/it, loss=-0.0982, reward=0.5669, temp=0.70]

   Processing 45 sentences...

Epoch 4/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 236/280 [1:05:31<13:40, 18.65s/it, loss=-0.1010, reward=0.5695, temp=0.70]

   Processing 96 sentences...

Epoch 4/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 237/280 [1:05:40<11:06, 15.50s/it, loss=-0.0913, reward=0.5915, temp=0.70]

   Processing 184 sentences...

Epoch 4/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 238/280 [1:05:52<10:14, 14.62s/it, loss=-0.0828, reward=0.6051, temp=0.70]

   Processing 63 sentences...

Epoch 4/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 239/280 [1:05:57<08:04, 11.80s/it, loss=-0.0972, reward=0.5827, temp=0.70]

   Processing 123 sentences...

Epoch 4/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 240/280 [1:06:04<06:47, 10.19s/it, loss=-0.0869, reward=0.6002, temp=0.70]

   Processing 72 sentences...

Epoch 4/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 241/280 [1:06:09<05:40,  8.73s/it, loss=-0.0933, reward=0.5872, temp=0.70]

   Processing 268 sentences...

Epoch 4/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 242/280 [1:06:57<12:57, 20.46s/it, loss=-0.0731, reward=0.6067, temp=0.70]

   Processing 129 sentences...

Epoch 4/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 243/280 [1:07:04<10:12, 16.57s/it, loss=-0.0924, reward=0.5968, temp=0.70]

   Processing 57 sentences...

Epoch 4/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 244/280 [1:07:08<07:30, 12.52s/it, loss=-0.0924, reward=0.5987, temp=0.70]

   Processing 267 sentences...

Epoch 4/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 245/280 [1:07:26<08:17, 14.21s/it, loss=-0.0759, reward=0.6051, temp=0.70]

   Processing 84 sentences...

Epoch 4/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 246/280 [1:07:29<06:13, 10.98s/it, loss=-0.0989, reward=0.5853, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 247/280 [1:08:18<12:20, 22.44s/it, loss=-0.0094, reward=0.6216, temp=0.70]

   Processing 97 sentences...

Epoch 4/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 248/280 [1:08:31<10:26, 19.58s/it, loss=-0.0898, reward=0.6009, temp=0.70]

   Processing 232 sentences...

Epoch 4/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 249/280 [1:09:00<11:33, 22.37s/it, loss=-0.0787, reward=0.6098, temp=0.70]

   Processing 111 sentences...

Epoch 4/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 250/280 [1:09:07<08:50, 17.69s/it, loss=-0.0859, reward=0.6084, temp=0.70]

   Processing 24 sentences...

Epoch 4/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 251/280 [1:09:08<06:11, 12.81s/it, loss=-0.1110, reward=0.5566, temp=0.70]

   Processing 112 sentences...

Epoch 4/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 252/280 [1:09:14<05:02, 10.79s/it, loss=-0.0880, reward=0.6041, temp=0.70]

   Processing 43 sentences...

Epoch 4/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 253/280 [1:09:17<03:46,  8.37s/it, loss=-0.1040, reward=0.5728, temp=0.70]

   Processing 183 sentences...

Epoch 4/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 254/280 [1:09:28<03:55,  9.07s/it, loss=-0.0859, reward=0.6013, temp=0.70]

   Processing 117 sentences...

Epoch 4/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 255/280 [1:09:33<03:14,  7.77s/it, loss=-0.0887, reward=0.6005, temp=0.70]

   Processing 106 sentences...

Epoch 4/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 256/280 [1:09:39<02:58,  7.43s/it, loss=-0.0875, reward=0.6038, temp=0.70]

   Processing 67 sentences...

Epoch 4/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 257/280 [1:09:42<02:16,  5.94s/it, loss=-0.0980, reward=0.5824, temp=0.70]

   Processing 112 sentences...

Epoch 4/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 258/280 [1:09:50<02:30,  6.83s/it, loss=-0.0991, reward=0.5907, temp=0.70]

   Processing 79 sentences...

Epoch 4/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 259/280 [1:09:55<02:09,  6.16s/it, loss=-0.0873, reward=0.6062, temp=0.70]

   Processing 408 sentences...

Epoch 4/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 260/280 [1:10:16<03:32, 10.62s/it, loss=-0.0241, reward=0.6192, temp=0.70]

   Processing 103 sentences...

Epoch 4/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 261/280 [1:10:21<02:47,  8.79s/it, loss=-0.0856, reward=0.6098, temp=0.70]

   Processing 103 sentences...

Epoch 4/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 262/280 [1:10:28<02:27,  8.21s/it, loss=-0.0915, reward=0.6042, temp=0.70]

   Processing 118 sentences...

Epoch 4/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 263/280 [1:10:34<02:08,  7.55s/it, loss=-0.0949, reward=0.5950, temp=0.70]

   Processing 249 sentences...

Epoch 4/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 264/280 [1:10:53<02:59, 11.23s/it, loss=-0.0761, reward=0.6107, temp=0.70]

   Processing 145 sentences...

Epoch 4/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 265/280 [1:10:59<02:23,  9.58s/it, loss=-0.0867, reward=0.6078, temp=0.70]

   Processing 294 sentences...

Epoch 4/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 266/280 [1:11:24<03:20, 14.34s/it, loss=-0.0565, reward=0.6256, temp=0.70]

   Processing 147 sentences...

Epoch 4/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 267/280 [1:11:33<02:42, 12.48s/it, loss=-0.0919, reward=0.5930, temp=0.70]

   Processing 304 sentences...

Epoch 4/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 268/280 [1:12:05<03:42, 18.56s/it, loss=-0.0776, reward=0.6103, temp=0.70]

   Processing 128 sentences...

Epoch 4/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 269/280 [1:12:19<03:06, 16.93s/it, loss=-0.0923, reward=0.6021, temp=0.70]

   Processing 247 sentences...

Epoch 4/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 270/280 [1:12:36<02:49, 16.97s/it, loss=-0.0870, reward=0.6071, temp=0.70]

   Processing 165 sentences...

Epoch 4/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 271/280 [1:12:52<02:32, 16.95s/it, loss=-0.0909, reward=0.6048, temp=0.70]

   Processing 38 sentences...

Epoch 4/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 272/280 [1:12:55<01:41, 12.69s/it, loss=-0.1014, reward=0.5779, temp=0.70]

   Processing 63 sentences...

Epoch 4/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 273/280 [1:12:58<01:08,  9.84s/it, loss=-0.1045, reward=0.5847, temp=0.70]

   Processing 215 sentences...

Epoch 4/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 274/280 [1:13:23<01:26, 14.35s/it, loss=-0.0803, reward=0.6095, temp=0.70]

   Processing 39 sentences...

Epoch 4/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 275/280 [1:13:25<00:53, 10.66s/it, loss=-0.1045, reward=0.5804, temp=0.70]

   Processing 105 sentences...

Epoch 4/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 276/280 [1:13:30<00:35,  8.92s/it, loss=-0.0917, reward=0.5982, temp=0.70]

   Processing 138 sentences...

Epoch 4/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 277/280 [1:13:41<00:28,  9.47s/it, loss=-0.0819, reward=0.6116, temp=0.70]

   Processing 161 sentences...

Epoch 4/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 278/280 [1:13:51<00:19,  9.64s/it, loss=-0.1003, reward=0.5967, temp=0.70]

   Processing 35 sentences...

Epoch 4/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 279/280 [1:13:53<00:07,  7.33s/it, loss=-0.1029, reward=0.5877, temp=0.70]

   Processing 500 sentences...ents), truncating to 500

Epoch 4/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 280/280 [1:14:25<00:00, 15.95s/it, loss=-0.0069, reward=0.6197, temp=0.70]


   ‚ö†Ô∏è Document too long (1424 sents), truncating to 500
Epoch 4/10:
  Train Loss: -0.0842
  Overall Val Reward: 0.5798
  Learning Rate: 0.000100
  Temperature: 0.7000

  Aspect-wise Val Rewards:
    facts       : 0.5789
    analysis    : 0.5799
    argument    : 0.5798
    judgement   : 0.5795
    statute     : 0.5807



Epoch 5/10:   0%|          | 0/280 [00:00<?, ?it/s]

   Processing 333 sentences...

Epoch 5/10:   0%|          | 1/280 [00:28<2:12:05, 28.41s/it, loss=-0.0736, reward=0.6124, temp=0.60]

   Processing 353 sentences...

Epoch 5/10:   1%|          | 2/280 [00:52<2:00:20, 25.97s/it, loss=-0.0833, reward=0.6096, temp=0.60]

   Processing 57 sentences...

Epoch 5/10:   1%|          | 3/280 [00:59<1:19:55, 17.31s/it, loss=-0.0978, reward=0.5881, temp=0.60]

   Processing 96 sentences...

Epoch 5/10:   1%|‚ñè         | 4/280 [01:13<1:13:50, 16.05s/it, loss=-0.0959, reward=0.5844, temp=0.60]

   Processing 71 sentences...

Epoch 5/10:   2%|‚ñè         | 5/280 [01:16<52:08, 11.38s/it, loss=-0.0909, reward=0.6029, temp=0.60]  

   Processing 62 sentences...

Epoch 5/10:   2%|‚ñè         | 6/280 [01:19<38:35,  8.45s/it, loss=-0.0944, reward=0.5951, temp=0.60]

   Processing 350 sentences...

Epoch 5/10:   2%|‚ñé         | 7/280 [01:52<1:14:05, 16.28s/it, loss=-0.0765, reward=0.6124, temp=0.60]

   Processing 263 sentences...

Epoch 5/10:   3%|‚ñé         | 8/280 [02:08<1:13:19, 16.17s/it, loss=-0.0818, reward=0.6104, temp=0.60]

   Processing 176 sentences...

Epoch 5/10:   3%|‚ñé         | 9/280 [02:17<1:03:58, 14.17s/it, loss=-0.0848, reward=0.6080, temp=0.60]

   Processing 318 sentences...

Epoch 5/10:   4%|‚ñé         | 10/280 [03:20<2:11:48, 29.29s/it, loss=-0.0816, reward=0.6099, temp=0.60]

   Processing 338 sentences...

Epoch 5/10:   4%|‚ñç         | 11/280 [03:44<2:04:04, 27.67s/it, loss=-0.0757, reward=0.6194, temp=0.60]

   Processing 40 sentences...

Epoch 5/10:   4%|‚ñç         | 12/280 [03:47<1:29:17, 19.99s/it, loss=-0.0948, reward=0.5871, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:   5%|‚ñç         | 13/280 [04:39<2:12:48, 29.85s/it, loss=-0.0769, reward=0.6155, temp=0.60]

   Processing 423 sentences...

Epoch 5/10:   5%|‚ñå         | 14/280 [05:38<2:51:07, 38.60s/it, loss=-0.0759, reward=0.6152, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:   5%|‚ñå         | 15/280 [05:57<2:23:50, 32.57s/it, loss=-0.0615, reward=0.6408, temp=0.60]

   Processing 82 sentences...

Epoch 5/10:   6%|‚ñå         | 16/280 [06:00<1:44:00, 23.64s/it, loss=-0.0950, reward=0.5988, temp=0.60]

   Processing 137 sentences...

Epoch 5/10:   6%|‚ñå         | 17/280 [06:07<1:22:03, 18.72s/it, loss=-0.0895, reward=0.6036, temp=0.60]

   Processing 30 sentences...

Epoch 5/10:   6%|‚ñã         | 18/280 [06:10<1:00:33, 13.87s/it, loss=-0.1180, reward=0.5688, temp=0.60]

   Processing 82 sentences...

Epoch 5/10:   7%|‚ñã         | 19/280 [06:18<52:37, 12.10s/it, loss=-0.0990, reward=0.5934, temp=0.60]  

   Processing 61 sentences...

Epoch 5/10:   7%|‚ñã         | 20/280 [06:21<40:34,  9.36s/it, loss=-0.0955, reward=0.5973, temp=0.60]

   Processing 50 sentences...

Epoch 5/10:   8%|‚ñä         | 21/280 [06:23<32:09,  7.45s/it, loss=-0.0982, reward=0.5766, temp=0.60]

   Processing 115 sentences...

Epoch 5/10:   8%|‚ñä         | 22/280 [06:32<33:26,  7.78s/it, loss=-0.0981, reward=0.5921, temp=0.60]

   Processing 80 sentences...

Epoch 5/10:   8%|‚ñä         | 23/280 [06:35<27:26,  6.41s/it, loss=-0.0905, reward=0.6070, temp=0.60]

   Processing 434 sentences...

Epoch 5/10:   9%|‚ñä         | 24/280 [08:02<2:10:06, 30.50s/it, loss=-0.0736, reward=0.6169, temp=0.60]

   Processing 254 sentences...

Epoch 5/10:   9%|‚ñâ         | 25/280 [08:34<2:11:54, 31.04s/it, loss=-0.0866, reward=0.6089, temp=0.60]

   Processing 329 sentences...

Epoch 5/10:   9%|‚ñâ         | 26/280 [08:53<1:56:25, 27.50s/it, loss=-0.0805, reward=0.6113, temp=0.60]

   Processing 87 sentences...

Epoch 5/10:  10%|‚ñâ         | 27/280 [08:58<1:27:06, 20.66s/it, loss=-0.1020, reward=0.5879, temp=0.60]

   Processing 39 sentences...

Epoch 5/10:  10%|‚ñà         | 28/280 [09:00<1:02:49, 14.96s/it, loss=-0.1121, reward=0.5714, temp=0.60]

   Processing 83 sentences...

Epoch 5/10:  10%|‚ñà         | 29/280 [09:03<47:58, 11.47s/it, loss=-0.0972, reward=0.5952, temp=0.60]  

   Processing 177 sentences...

Epoch 5/10:  11%|‚ñà         | 30/280 [09:15<48:24, 11.62s/it, loss=-0.0933, reward=0.5981, temp=0.60]

   Processing 82 sentences...

Epoch 5/10:  11%|‚ñà         | 31/280 [09:23<43:42, 10.53s/it, loss=-0.1160, reward=0.5799, temp=0.60]

   Processing 136 sentences...

Epoch 5/10:  11%|‚ñà‚ñè        | 32/280 [09:28<36:38,  8.87s/it, loss=-0.0938, reward=0.6000, temp=0.60]

   Processing 166 sentences...

Epoch 5/10:  12%|‚ñà‚ñè        | 33/280 [09:38<37:37,  9.14s/it, loss=-0.0843, reward=0.5998, temp=0.60]

   Processing 91 sentences...

Epoch 5/10:  12%|‚ñà‚ñè        | 34/280 [09:43<33:01,  8.06s/it, loss=-0.1057, reward=0.5874, temp=0.60]

   Processing 87 sentences...

Epoch 5/10:  12%|‚ñà‚ñé        | 35/280 [09:48<29:12,  7.15s/it, loss=-0.0734, reward=0.6058, temp=0.60]

   Processing 203 sentences...

Epoch 5/10:  13%|‚ñà‚ñé        | 36/280 [10:03<37:42,  9.27s/it, loss=-0.0600, reward=0.6094, temp=0.60]

   Processing 140 sentences...

Epoch 5/10:  13%|‚ñà‚ñé        | 37/280 [10:13<38:56,  9.61s/it, loss=-0.0767, reward=0.6103, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  14%|‚ñà‚ñé        | 38/280 [11:06<1:30:52, 22.53s/it, loss=-0.0109, reward=0.6136, temp=0.60]

   Processing 97 sentences...

Epoch 5/10:  14%|‚ñà‚ñç        | 39/280 [11:12<1:10:42, 17.60s/it, loss=-0.1089, reward=0.5909, temp=0.60]

   Processing 182 sentences...

Epoch 5/10:  14%|‚ñà‚ñç        | 40/280 [11:24<1:04:24, 16.10s/it, loss=-0.1116, reward=0.5974, temp=0.60]

   Processing 87 sentences...

Epoch 5/10:  15%|‚ñà‚ñç        | 41/280 [11:29<49:58, 12.55s/it, loss=-0.0830, reward=0.6052, temp=0.60]  

   Processing 235 sentences...

Epoch 5/10:  15%|‚ñà‚ñå        | 42/280 [12:02<1:14:17, 18.73s/it, loss=-0.0792, reward=0.6006, temp=0.60]

   Processing 50 sentences...

Epoch 5/10:  15%|‚ñà‚ñå        | 43/280 [12:04<54:36, 13.83s/it, loss=-0.0990, reward=0.5735, temp=0.60]  

   Processing 70 sentences...

Epoch 5/10:  16%|‚ñà‚ñå        | 44/280 [12:09<43:12, 10.99s/it, loss=-0.0953, reward=0.5918, temp=0.60]

   Processing 65 sentences...

Epoch 5/10:  16%|‚ñà‚ñå        | 45/280 [12:14<36:48,  9.40s/it, loss=-0.1100, reward=0.5839, temp=0.60]

   Processing 482 sentences...

Epoch 5/10:  16%|‚ñà‚ñã        | 46/280 [13:04<1:24:11, 21.59s/it, loss=-0.0717, reward=0.6170, temp=0.60]

   Processing 164 sentences...

Epoch 5/10:  17%|‚ñà‚ñã        | 47/280 [13:20<1:16:36, 19.73s/it, loss=-0.0918, reward=0.5990, temp=0.60]

   Processing 102 sentences...

Epoch 5/10:  17%|‚ñà‚ñã        | 48/280 [13:26<1:00:56, 15.76s/it, loss=-0.0877, reward=0.6045, temp=0.60]

   Processing 48 sentences...

Epoch 5/10:  18%|‚ñà‚ñä        | 49/280 [13:28<44:28, 11.55s/it, loss=-0.0975, reward=0.5772, temp=0.60]  

   Processing 74 sentences...

Epoch 5/10:  18%|‚ñà‚ñä        | 50/280 [13:33<37:19,  9.73s/it, loss=-0.0865, reward=0.6002, temp=0.60]

   Processing 67 sentences...

Epoch 5/10:  18%|‚ñà‚ñä        | 51/280 [13:39<32:44,  8.58s/it, loss=-0.0967, reward=0.5912, temp=0.60]

   Processing 31 sentences...

Epoch 5/10:  19%|‚ñà‚ñä        | 52/280 [13:41<24:37,  6.48s/it, loss=-0.1053, reward=0.5670, temp=0.60]

   Processing 53 sentences...

Epoch 5/10:  19%|‚ñà‚ñâ        | 53/280 [13:45<21:17,  5.63s/it, loss=-0.1133, reward=0.5766, temp=0.60]

   Processing 123 sentences...

Epoch 5/10:  19%|‚ñà‚ñâ        | 54/280 [13:52<23:18,  6.19s/it, loss=-0.0882, reward=0.6034, temp=0.60]

   Processing 169 sentences...

Epoch 5/10:  20%|‚ñà‚ñâ        | 55/280 [14:02<27:04,  7.22s/it, loss=-0.0616, reward=0.6088, temp=0.60]

   Processing 33 sentences...

Epoch 5/10:  20%|‚ñà‚ñà        | 56/280 [14:05<22:04,  5.91s/it, loss=-0.1234, reward=0.5551, temp=0.60]

   Processing 190 sentences...

Epoch 5/10:  20%|‚ñà‚ñà        | 57/280 [14:16<27:45,  7.47s/it, loss=-0.0345, reward=0.6157, temp=0.60]

   Processing 223 sentences...

Epoch 5/10:  21%|‚ñà‚ñà        | 58/280 [14:33<38:41, 10.46s/it, loss=-0.0777, reward=0.6031, temp=0.60]

   Processing 57 sentences...

Epoch 5/10:  21%|‚ñà‚ñà        | 59/280 [14:36<30:00,  8.15s/it, loss=-0.0988, reward=0.5927, temp=0.60]

   Processing 362 sentences...

Epoch 5/10:  21%|‚ñà‚ñà‚ñè       | 60/280 [15:01<48:11, 13.14s/it, loss=-0.0432, reward=0.6347, temp=0.60]

   Processing 192 sentences...

Epoch 5/10:  22%|‚ñà‚ñà‚ñè       | 61/280 [15:19<53:55, 14.77s/it, loss=-0.0885, reward=0.6020, temp=0.60]

   Processing 108 sentences...

Epoch 5/10:  22%|‚ñà‚ñà‚ñè       | 62/280 [15:22<41:08, 11.32s/it, loss=-0.0896, reward=0.6060, temp=0.60]

   Processing 380 sentences...

Epoch 5/10:  22%|‚ñà‚ñà‚ñé       | 63/280 [16:03<1:12:14, 19.98s/it, loss=-0.0960, reward=0.5962, temp=0.60]

   Processing 285 sentences...

Epoch 5/10:  23%|‚ñà‚ñà‚ñé       | 64/280 [16:33<1:23:04, 23.08s/it, loss=-0.0839, reward=0.6074, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  23%|‚ñà‚ñà‚ñé       | 65/280 [17:32<2:01:31, 33.91s/it, loss=-0.0767, reward=0.6125, temp=0.60]

   Processing 144 sentences...

Epoch 5/10:  24%|‚ñà‚ñà‚ñé       | 66/280 [17:42<1:34:48, 26.58s/it, loss=-0.0934, reward=0.6018, temp=0.60]

   Processing 202 sentences...

Epoch 5/10:  24%|‚ñà‚ñà‚ñç       | 67/280 [17:59<1:24:17, 23.74s/it, loss=-0.0840, reward=0.6099, temp=0.60]

   Processing 271 sentences...

Epoch 5/10:  24%|‚ñà‚ñà‚ñç       | 68/280 [18:28<1:29:34, 25.35s/it, loss=-0.0827, reward=0.6074, temp=0.60]

   Processing 51 sentences...

Epoch 5/10:  25%|‚ñà‚ñà‚ñç       | 69/280 [18:30<1:04:41, 18.39s/it, loss=-0.1006, reward=0.5863, temp=0.60]

   Processing 376 sentences...

Epoch 5/10:  25%|‚ñà‚ñà‚ñå       | 70/280 [19:00<1:16:31, 21.86s/it, loss=-0.0812, reward=0.6103, temp=0.60]

   Processing 61 sentences...

Epoch 5/10:  25%|‚ñà‚ñà‚ñå       | 71/280 [19:04<57:14, 16.44s/it, loss=-0.0951, reward=0.5915, temp=0.60]  

   Processing 63 sentences...

Epoch 5/10:  26%|‚ñà‚ñà‚ñå       | 72/280 [19:07<43:20, 12.50s/it, loss=-0.0990, reward=0.5899, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  26%|‚ñà‚ñà‚ñå       | 73/280 [20:15<1:40:00, 28.99s/it, loss=-0.0773, reward=0.6119, temp=0.60]

   Processing 254 sentences...

Epoch 5/10:  26%|‚ñà‚ñà‚ñã       | 74/280 [20:41<1:36:37, 28.14s/it, loss=-0.0814, reward=0.6121, temp=0.60]

   Processing 118 sentences...

Epoch 5/10:  27%|‚ñà‚ñà‚ñã       | 75/280 [21:03<1:30:29, 26.48s/it, loss=-0.1002, reward=0.5868, temp=0.60]

   Processing 439 sentences...

Epoch 5/10:  27%|‚ñà‚ñà‚ñã       | 76/280 [21:38<1:38:24, 28.94s/it, loss=-0.0709, reward=0.6107, temp=0.60]

   Processing 51 sentences...

Epoch 5/10:  28%|‚ñà‚ñà‚ñä       | 77/280 [21:40<1:10:41, 20.90s/it, loss=-0.1043, reward=0.5887, temp=0.60]

   Processing 143 sentences...

Epoch 5/10:  28%|‚ñà‚ñà‚ñä       | 78/280 [21:51<1:00:08, 17.86s/it, loss=-0.0908, reward=0.5992, temp=0.60]

   Processing 300 sentences...

Epoch 5/10:  28%|‚ñà‚ñà‚ñä       | 79/280 [22:12<1:03:08, 18.85s/it, loss=-0.0800, reward=0.6131, temp=0.60]

   Processing 137 sentences...

Epoch 5/10:  29%|‚ñà‚ñà‚ñä       | 80/280 [22:21<52:46, 15.83s/it, loss=-0.1006, reward=0.5936, temp=0.60]  

   Processing 120 sentences...

Epoch 5/10:  29%|‚ñà‚ñà‚ñâ       | 81/280 [22:28<43:34, 13.14s/it, loss=-0.0889, reward=0.6068, temp=0.60]

   Processing 170 sentences...

Epoch 5/10:  29%|‚ñà‚ñà‚ñâ       | 82/280 [22:41<43:36, 13.22s/it, loss=-0.0878, reward=0.6099, temp=0.60]

   Processing 107 sentences...

Epoch 5/10:  30%|‚ñà‚ñà‚ñâ       | 83/280 [22:48<36:51, 11.23s/it, loss=-0.0973, reward=0.6004, temp=0.60]

   Processing 48 sentences...

Epoch 5/10:  30%|‚ñà‚ñà‚ñà       | 84/280 [22:52<29:38,  9.07s/it, loss=-0.1158, reward=0.5621, temp=0.60]

   Processing 163 sentences...

Epoch 5/10:  30%|‚ñà‚ñà‚ñà       | 85/280 [23:00<28:57,  8.91s/it, loss=-0.0868, reward=0.6042, temp=0.60]

   Processing 108 sentences...

Epoch 5/10:  31%|‚ñà‚ñà‚ñà       | 86/280 [23:10<29:15,  9.05s/it, loss=-0.0875, reward=0.6113, temp=0.60]

   Processing 86 sentences...

Epoch 5/10:  31%|‚ñà‚ñà‚ñà       | 87/280 [23:14<24:36,  7.65s/it, loss=-0.0964, reward=0.5971, temp=0.60]

   Processing 74 sentences...

Epoch 5/10:  31%|‚ñà‚ñà‚ñà‚ñè      | 88/280 [23:27<29:10,  9.12s/it, loss=-0.1131, reward=0.5904, temp=0.60]

   Processing 81 sentences...

Epoch 5/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 89/280 [23:36<29:32,  9.28s/it, loss=-0.1154, reward=0.5885, temp=0.60]

   Processing 203 sentences...

Epoch 5/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 90/280 [23:48<31:40, 10.00s/it, loss=-0.0509, reward=0.6204, temp=0.60]

   Processing 54 sentences...

Epoch 5/10:  32%|‚ñà‚ñà‚ñà‚ñé      | 91/280 [23:51<24:34,  7.80s/it, loss=-0.1092, reward=0.5864, temp=0.60]

   Processing 143 sentences...

Epoch 5/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 92/280 [24:00<26:09,  8.35s/it, loss=-0.0841, reward=0.6129, temp=0.60]

   Processing 36 sentences...

Epoch 5/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 93/280 [24:03<20:25,  6.55s/it, loss=-0.1194, reward=0.5765, temp=0.60]

   Processing 106 sentences...

Epoch 5/10:  34%|‚ñà‚ñà‚ñà‚ñé      | 94/280 [24:07<18:43,  6.04s/it, loss=-0.0968, reward=0.5994, temp=0.60]

   Processing 133 sentences...

Epoch 5/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 95/280 [24:33<36:47, 11.93s/it, loss=-0.0982, reward=0.5937, temp=0.60]

   Processing 206 sentences...

Epoch 5/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 96/280 [24:43<34:54, 11.39s/it, loss=-0.0812, reward=0.6094, temp=0.60]

   Processing 162 sentences...

Epoch 5/10:  35%|‚ñà‚ñà‚ñà‚ñç      | 97/280 [25:04<43:48, 14.36s/it, loss=-0.0898, reward=0.6040, temp=0.60]

   Processing 42 sentences...

Epoch 5/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 98/280 [25:06<32:07, 10.59s/it, loss=-0.0973, reward=0.5991, temp=0.60]

   Processing 89 sentences...

Epoch 5/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 99/280 [25:11<26:24,  8.75s/it, loss=-0.0951, reward=0.5989, temp=0.60]

   Processing 32 sentences...

Epoch 5/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 100/280 [25:15<21:48,  7.27s/it, loss=-0.1208, reward=0.5801, temp=0.60]

   Processing 408 sentences...

Epoch 5/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 101/280 [26:34<1:26:27, 28.98s/it, loss=-0.0774, reward=0.6100, temp=0.60]

   Processing 35 sentences...

Epoch 5/10:  36%|‚ñà‚ñà‚ñà‚ñã      | 102/280 [26:37<1:02:19, 21.01s/it, loss=-0.1479, reward=0.5555, temp=0.60]

   Processing 137 sentences...

Epoch 5/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 103/280 [26:44<50:09, 17.00s/it, loss=-0.0926, reward=0.5957, temp=0.60]  

   Processing 105 sentences...

Epoch 5/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 104/280 [26:53<42:14, 14.40s/it, loss=-0.0990, reward=0.5940, temp=0.60]

   Processing 124 sentences...

Epoch 5/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 105/280 [27:04<39:26, 13.52s/it, loss=-0.0926, reward=0.5972, temp=0.60]

   Processing 220 sentences...

Epoch 5/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 106/280 [27:18<39:48, 13.73s/it, loss=-0.0717, reward=0.6120, temp=0.60]

   Processing 98 sentences...

Epoch 5/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 107/280 [27:23<31:50, 11.04s/it, loss=-0.0904, reward=0.6001, temp=0.60]

   Processing 133 sentences...

Epoch 5/10:  39%|‚ñà‚ñà‚ñà‚ñä      | 108/280 [27:30<28:07,  9.81s/it, loss=-0.0589, reward=0.6113, temp=0.60]

   Processing 315 sentences...

Epoch 5/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 109/280 [28:00<45:04, 15.82s/it, loss=-0.0811, reward=0.6102, temp=0.60]

   Processing 81 sentences...

Epoch 5/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 110/280 [28:03<34:27, 12.16s/it, loss=-0.0949, reward=0.5969, temp=0.60]

   Processing 286 sentences...

Epoch 5/10:  40%|‚ñà‚ñà‚ñà‚ñâ      | 111/280 [28:20<37:43, 13.39s/it, loss=-0.0797, reward=0.6098, temp=0.60]

   Processing 122 sentences...

Epoch 5/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 112/280 [28:27<32:01, 11.44s/it, loss=-0.0820, reward=0.6054, temp=0.60]

   Processing 170 sentences...

Epoch 5/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 113/280 [28:45<37:36, 13.51s/it, loss=-0.0878, reward=0.5991, temp=0.60]

   Processing 376 sentences...

Epoch 5/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 114/280 [29:30<1:03:51, 23.08s/it, loss=-0.0814, reward=0.6087, temp=0.60]

   Processing 334 sentences...

Epoch 5/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 115/280 [30:21<1:26:26, 31.43s/it, loss=-0.0807, reward=0.6133, temp=0.60]

   Processing 102 sentences...

Epoch 5/10:  41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 116/280 [30:32<1:09:14, 25.33s/it, loss=-0.0928, reward=0.5993, temp=0.60]

   Processing 268 sentences...

Epoch 5/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 117/280 [30:47<59:50, 22.03s/it, loss=-0.0853, reward=0.6105, temp=0.60]  

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 118/280 [32:26<2:02:20, 45.31s/it, loss=-0.0686, reward=0.6170, temp=0.60]

   Processing 253 sentences...

Epoch 5/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 119/280 [32:43<1:38:33, 36.73s/it, loss=-0.0920, reward=0.6066, temp=0.60]

   Processing 132 sentences...

Epoch 5/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 120/280 [32:52<1:16:06, 28.54s/it, loss=-0.0980, reward=0.5990, temp=0.60]

   Processing 286 sentences...

Epoch 5/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 121/280 [33:15<1:11:06, 26.84s/it, loss=-0.0881, reward=0.6086, temp=0.60]

   Processing 128 sentences...

Epoch 5/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 122/280 [33:23<55:12, 20.97s/it, loss=-0.1004, reward=0.5940, temp=0.60]  

   Processing 37 sentences...

Epoch 5/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 123/280 [33:25<40:16, 15.39s/it, loss=-0.0984, reward=0.5913, temp=0.60]

   Processing 205 sentences...

Epoch 5/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 124/280 [33:43<42:08, 16.21s/it, loss=-0.0809, reward=0.6052, temp=0.60]

   Processing 87 sentences...

Epoch 5/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 125/280 [33:50<34:29, 13.35s/it, loss=-0.1042, reward=0.5969, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 126/280 [34:59<1:17:36, 30.24s/it, loss=-0.0598, reward=0.6128, temp=0.60]

   Processing 144 sentences...

Epoch 5/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 127/280 [35:12<1:03:21, 24.85s/it, loss=-0.0884, reward=0.6109, temp=0.60]

   Processing 80 sentences...

Epoch 5/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 128/280 [35:19<49:44, 19.63s/it, loss=-0.1031, reward=0.5964, temp=0.60]  

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 129/280 [36:46<1:40:13, 39.83s/it, loss=-0.0729, reward=0.6061, temp=0.60]

   Processing 124 sentences...

Epoch 5/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 130/280 [36:55<1:16:31, 30.61s/it, loss=-0.0950, reward=0.6039, temp=0.60]

   Processing 112 sentences...

Epoch 5/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 131/280 [37:05<1:00:38, 24.42s/it, loss=-0.1018, reward=0.5937, temp=0.60]

   Processing 75 sentences...

Epoch 5/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 132/280 [37:09<45:21, 18.39s/it, loss=-0.1054, reward=0.5883, temp=0.60]  

   Processing 165 sentences...

Epoch 5/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 133/280 [37:21<40:13, 16.42s/it, loss=-0.0870, reward=0.6109, temp=0.60]

   Processing 146 sentences...

Epoch 5/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 134/280 [37:40<41:23, 17.01s/it, loss=-0.0900, reward=0.6076, temp=0.60]

   Processing 100 sentences...

Epoch 5/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 135/280 [37:45<32:34, 13.48s/it, loss=-0.0918, reward=0.6113, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 136/280 [39:23<1:33:05, 38.79s/it, loss=-0.0635, reward=0.6118, temp=0.60]

   Processing 209 sentences...

Epoch 5/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 137/280 [39:38<1:15:19, 31.60s/it, loss=-0.0961, reward=0.6019, temp=0.60]

   Processing 90 sentences...

Epoch 5/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 138/280 [39:44<56:43, 23.97s/it, loss=-0.0958, reward=0.6034, temp=0.60]  

   Processing 72 sentences...

Epoch 5/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 139/280 [39:48<42:40, 18.16s/it, loss=-0.1016, reward=0.5953, temp=0.60]

   Processing 38 sentences...

Epoch 5/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 140/280 [39:50<31:01, 13.30s/it, loss=-0.1035, reward=0.5813, temp=0.60]

   Processing 131 sentences...

Epoch 5/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 141/280 [39:56<25:10, 10.87s/it, loss=-0.0954, reward=0.6020, temp=0.60]

   Processing 78 sentences...

Epoch 5/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 142/280 [40:00<20:40,  8.99s/it, loss=-0.1003, reward=0.5950, temp=0.60]

   Processing 119 sentences...

Epoch 5/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 143/280 [40:08<19:38,  8.60s/it, loss=-0.0966, reward=0.6016, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 144/280 [41:49<1:22:13, 36.28s/it, loss=-0.0566, reward=0.6174, temp=0.60]

   Processing 133 sentences...

Epoch 5/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 145/280 [41:58<1:03:34, 28.26s/it, loss=-0.0912, reward=0.6063, temp=0.60]

   Processing 83 sentences...

Epoch 5/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 146/280 [42:05<48:31, 21.73s/it, loss=-0.1049, reward=0.5819, temp=0.60]  

   Processing 56 sentences...

Epoch 5/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 147/280 [42:08<35:47, 16.15s/it, loss=-0.0977, reward=0.5935, temp=0.60]

   Processing 133 sentences...

Epoch 5/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 148/280 [42:15<29:20, 13.34s/it, loss=-0.0976, reward=0.5969, temp=0.60]

   Processing 101 sentences...

Epoch 5/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 149/280 [42:21<24:14, 11.11s/it, loss=-0.1013, reward=0.5922, temp=0.60]

   Processing 151 sentences...

Epoch 5/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 150/280 [42:32<24:08, 11.14s/it, loss=-0.0820, reward=0.6136, temp=0.60]

   Processing 248 sentences...

Epoch 5/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 151/280 [42:47<26:19, 12.24s/it, loss=-0.0759, reward=0.6098, temp=0.60]

   Processing 500 sentences...sents), truncating to 500

Epoch 5/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 152/280 [43:44<54:46, 25.68s/it, loss=-0.0723, reward=0.6108, temp=0.60]

   Processing 145 sentences...

Epoch 5/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 153/280 [43:50<42:07, 19.90s/it, loss=-0.0956, reward=0.6048, temp=0.60]

   Processing 202 sentences...

Epoch 5/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 154/280 [44:07<40:08, 19.12s/it, loss=-0.0968, reward=0.6009, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 155/280 [44:39<47:34, 22.84s/it, loss=-0.0570, reward=0.6154, temp=0.60]

   Processing 154 sentences...

Epoch 5/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 156/280 [44:56<43:45, 21.17s/it, loss=-0.0942, reward=0.6058, temp=0.60]

   Processing 345 sentences...

Epoch 5/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 157/280 [45:44<1:00:06, 29.32s/it, loss=-0.0784, reward=0.6128, temp=0.60]

   Processing 99 sentences...

Epoch 5/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 158/280 [45:49<44:33, 21.92s/it, loss=-0.1058, reward=0.5895, temp=0.60]  

   Processing 500 sentences...sents), truncating to 500

Epoch 5/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 159/280 [46:40<1:01:37, 30.56s/it, loss=-0.0721, reward=0.6166, temp=0.60]

   Processing 118 sentences...

Epoch 5/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 160/280 [46:49<48:20, 24.17s/it, loss=-0.0944, reward=0.5993, temp=0.60]  

   Processing 30 sentences...

Epoch 5/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 161/280 [46:51<34:31, 17.41s/it, loss=-0.1459, reward=0.5535, temp=0.60]

   Processing 76 sentences...

Epoch 5/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 162/280 [46:54<26:11, 13.32s/it, loss=-0.1015, reward=0.5946, temp=0.60]

   Processing 95 sentences...

Epoch 5/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 163/280 [47:02<22:26, 11.51s/it, loss=-0.1067, reward=0.5761, temp=0.60]

   Processing 78 sentences...

Epoch 5/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 164/280 [47:06<18:16,  9.45s/it, loss=-0.0916, reward=0.6030, temp=0.60]

   Processing 153 sentences...

Epoch 5/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 165/280 [47:14<17:16,  9.01s/it, loss=-0.0773, reward=0.6088, temp=0.60]

   Processing 229 sentences...

Epoch 5/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 166/280 [47:29<20:14, 10.66s/it, loss=-0.0786, reward=0.6060, temp=0.60]

   Processing 108 sentences...

Epoch 5/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 167/280 [47:34<16:51,  8.95s/it, loss=-0.0925, reward=0.6002, temp=0.60]

   Processing 154 sentences...

Epoch 5/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 168/280 [47:52<22:06, 11.85s/it, loss=-0.0865, reward=0.6078, temp=0.60]

   Processing 44 sentences...

Epoch 5/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 169/280 [48:00<19:30, 10.55s/it, loss=-0.1125, reward=0.5800, temp=0.60]

   Processing 229 sentences...

Epoch 5/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 170/280 [48:37<33:51, 18.47s/it, loss=-0.0846, reward=0.6067, temp=0.60]

   Processing 221 sentences...

Epoch 5/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 171/280 [48:58<35:01, 19.28s/it, loss=-0.0842, reward=0.6076, temp=0.60]

   Processing 148 sentences...

Epoch 5/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 172/280 [49:06<28:18, 15.72s/it, loss=-0.1057, reward=0.5978, temp=0.60]

   Processing 144 sentences...

Epoch 5/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 173/280 [49:12<22:59, 12.90s/it, loss=-0.0897, reward=0.6044, temp=0.60]

   Processing 77 sentences...

Epoch 5/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 174/280 [49:16<17:55, 10.15s/it, loss=-0.0980, reward=0.6002, temp=0.60]

   Processing 90 sentences...

Epoch 5/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 175/280 [49:23<16:11,  9.25s/it, loss=-0.1159, reward=0.5867, temp=0.60]

   Processing 309 sentences...

Epoch 5/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 176/280 [49:36<18:05, 10.44s/it, loss=-0.0741, reward=0.6074, temp=0.60]

   Processing 297 sentences...

Epoch 5/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 177/280 [49:56<23:06, 13.46s/it, loss=-0.0706, reward=0.6204, temp=0.60]

   Processing 286 sentences...

Epoch 5/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 178/280 [50:23<29:26, 17.31s/it, loss=-0.0788, reward=0.6142, temp=0.60]

   Processing 66 sentences...

Epoch 5/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 179/280 [50:26<21:50, 12.98s/it, loss=-0.1266, reward=0.5763, temp=0.60]

   Processing 220 sentences...

Epoch 5/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 180/280 [50:41<22:45, 13.66s/it, loss=-0.0877, reward=0.6074, temp=0.60]

   Processing 209 sentences...

Epoch 5/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 181/280 [50:59<24:39, 14.95s/it, loss=-0.0863, reward=0.6017, temp=0.60]

   Processing 70 sentences...

Epoch 5/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 182/280 [51:03<19:15, 11.79s/it, loss=-0.1148, reward=0.5946, temp=0.60]

   Processing 214 sentences...

Epoch 5/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 183/280 [51:14<18:29, 11.44s/it, loss=-0.0738, reward=0.6338, temp=0.60]

   Processing 213 sentences...

Epoch 5/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 184/280 [51:26<18:26, 11.53s/it, loss=-0.0871, reward=0.6082, temp=0.60]

   Processing 29 sentences...

Epoch 5/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 185/280 [51:28<13:42,  8.66s/it, loss=-0.1393, reward=0.5524, temp=0.60]

   Processing 91 sentences...

Epoch 5/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 186/280 [51:32<11:36,  7.41s/it, loss=-0.0992, reward=0.5982, temp=0.60]

   Processing 291 sentences...

Epoch 5/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 187/280 [51:53<17:35, 11.35s/it, loss=-0.0817, reward=0.6099, temp=0.60]

   Processing 80 sentences...

Epoch 5/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 188/280 [52:01<16:14, 10.59s/it, loss=-0.1036, reward=0.5937, temp=0.60]

   Processing 43 sentences...

Epoch 5/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 189/280 [52:04<12:29,  8.24s/it, loss=-0.1359, reward=0.5860, temp=0.60]

   Processing 45 sentences...

Epoch 5/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 190/280 [52:07<09:46,  6.52s/it, loss=-0.1500, reward=0.5745, temp=0.60]

   Processing 89 sentences...

Epoch 5/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 191/280 [52:10<08:27,  5.71s/it, loss=-0.0844, reward=0.5976, temp=0.60]

   Processing 194 sentences...

Epoch 5/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 192/280 [52:20<10:07,  6.91s/it, loss=0.1530, reward=0.6120, temp=0.60] 

   Processing 137 sentences...

Epoch 5/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 193/280 [52:30<11:28,  7.92s/it, loss=-0.2104, reward=0.5835, temp=0.60]

   Processing 73 sentences...

Epoch 5/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 194/280 [52:34<09:23,  6.55s/it, loss=-0.1023, reward=0.5890, temp=0.60]

   Processing 158 sentences...

Epoch 5/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 195/280 [52:47<12:05,  8.54s/it, loss=-0.0213, reward=0.6033, temp=0.60]

   Processing 56 sentences...

Epoch 5/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 196/280 [52:50<09:28,  6.77s/it, loss=-0.1844, reward=0.5657, temp=0.60]

   Processing 180 sentences...

Epoch 5/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 197/280 [53:00<10:56,  7.91s/it, loss=0.0862, reward=0.6074, temp=0.60] 

   Processing 338 sentences...

Epoch 5/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 198/280 [53:20<15:39, 11.46s/it, loss=0.2008, reward=0.6091, temp=0.60]

   Processing 261 sentences...

Epoch 5/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 199/280 [53:47<21:40, 16.05s/it, loss=0.0270, reward=0.6069, temp=0.60]

   Processing 156 sentences...

Epoch 5/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 200/280 [53:53<17:27, 13.10s/it, loss=-0.0300, reward=0.6070, temp=0.60]

   Processing 43 sentences...

Epoch 5/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 201/280 [53:56<13:10, 10.01s/it, loss=-0.1315, reward=0.5752, temp=0.60]

   Processing 144 sentences...

Epoch 5/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 202/280 [54:09<14:20, 11.03s/it, loss=-0.0783, reward=0.6025, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 203/280 [55:02<30:16, 23.60s/it, loss=0.1095, reward=0.6195, temp=0.60] 

   Processing 49 sentences...

Epoch 5/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 204/280 [55:05<22:04, 17.43s/it, loss=-0.1102, reward=0.5766, temp=0.60]

   Processing 66 sentences...

Epoch 5/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 205/280 [55:12<17:41, 14.15s/it, loss=-0.1018, reward=0.5890, temp=0.60]

   Processing 103 sentences...

Epoch 5/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 206/280 [55:18<14:32, 11.79s/it, loss=-0.0899, reward=0.5999, temp=0.60]

   Processing 350 sentences...

Epoch 5/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 207/280 [55:45<19:57, 16.41s/it, loss=-0.0584, reward=0.6101, temp=0.60]

   Processing 111 sentences...

Epoch 5/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 208/280 [55:54<16:50, 14.04s/it, loss=-0.0974, reward=0.5893, temp=0.60]

   Processing 51 sentences...

Epoch 5/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 209/280 [55:56<12:32, 10.60s/it, loss=-0.0898, reward=0.5917, temp=0.60]

   Processing 146 sentences...

Epoch 5/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 210/280 [56:08<12:46, 10.96s/it, loss=-0.0801, reward=0.6082, temp=0.60]

   Processing 161 sentences...

Epoch 5/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 211/280 [56:18<12:18, 10.70s/it, loss=-0.0768, reward=0.6116, temp=0.60]

   Processing 243 sentences...

Epoch 5/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 212/280 [56:33<13:41, 12.09s/it, loss=-0.0656, reward=0.6211, temp=0.60]

   Processing 297 sentences...

Epoch 5/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 213/280 [57:00<18:29, 16.56s/it, loss=-0.0785, reward=0.6081, temp=0.60]

   Processing 47 sentences...

Epoch 5/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 214/280 [57:03<13:37, 12.39s/it, loss=-0.0925, reward=0.5478, temp=0.60]

   Processing 415 sentences...

Epoch 5/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 215/280 [58:11<31:35, 29.17s/it, loss=-0.0664, reward=0.6116, temp=0.60]

   Processing 138 sentences...

Epoch 5/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 216/280 [58:23<25:34, 23.97s/it, loss=-0.0827, reward=0.6020, temp=0.60]

   Processing 46 sentences...

Epoch 5/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 217/280 [58:25<18:18, 17.44s/it, loss=-0.0907, reward=0.5801, temp=0.60]

   Processing 73 sentences...

Epoch 5/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 218/280 [58:29<13:51, 13.41s/it, loss=-0.0881, reward=0.5907, temp=0.60]

   Processing 136 sentences...

Epoch 5/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 219/280 [58:42<13:30, 13.28s/it, loss=-0.0886, reward=0.5907, temp=0.60]

   Processing 93 sentences...

Epoch 5/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 220/280 [58:48<10:55, 10.93s/it, loss=-0.0800, reward=0.6114, temp=0.60]

   Processing 267 sentences...

Epoch 5/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 221/280 [59:04<12:23, 12.60s/it, loss=-0.0753, reward=0.6087, temp=0.60]

   Processing 421 sentences...

Epoch 5/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 222/280 [1:00:12<28:03, 29.02s/it, loss=-0.0630, reward=0.6093, temp=0.60]

   Processing 78 sentences...

Epoch 5/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 223/280 [1:00:15<20:19, 21.40s/it, loss=-0.0930, reward=0.5901, temp=0.60]

   Processing 275 sentences...

Epoch 5/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 224/280 [1:00:35<19:37, 21.03s/it, loss=-0.0711, reward=0.6123, temp=0.60]

   Processing 56 sentences...

Epoch 5/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 225/280 [1:00:39<14:26, 15.76s/it, loss=-0.0961, reward=0.5777, temp=0.60]

   Processing 292 sentences...

Epoch 5/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 226/280 [1:00:53<13:50, 15.38s/it, loss=-0.0866, reward=0.6028, temp=0.60]

   Processing 393 sentences...

Epoch 5/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 227/280 [1:01:34<20:18, 22.99s/it, loss=-0.0682, reward=0.6115, temp=0.60]

   Processing 412 sentences...

Epoch 5/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 228/280 [1:02:18<25:21, 29.25s/it, loss=-0.0594, reward=0.6141, temp=0.60]

   Processing 53 sentences...

Epoch 5/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 229/280 [1:02:20<18:00, 21.18s/it, loss=-0.0983, reward=0.5783, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 230/280 [1:03:06<23:47, 28.55s/it, loss=-0.0462, reward=0.6138, temp=0.60]

   Processing 449 sentences...

Epoch 5/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 231/280 [1:03:38<24:11, 29.61s/it, loss=-0.0613, reward=0.6121, temp=0.60]

   Processing 81 sentences...

Epoch 5/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 232/280 [1:03:43<17:44, 22.18s/it, loss=-0.0949, reward=0.5957, temp=0.60]

   Processing 368 sentences...

Epoch 5/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 233/280 [1:04:06<17:37, 22.50s/it, loss=-0.0852, reward=0.6055, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 234/280 [1:05:10<26:45, 34.91s/it, loss=-0.0783, reward=0.6083, temp=0.60]

   Processing 45 sentences...

Epoch 5/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 235/280 [1:05:13<18:54, 25.20s/it, loss=-0.0987, reward=0.5803, temp=0.60]

   Processing 45 sentences...

Epoch 5/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 236/280 [1:05:15<13:30, 18.43s/it, loss=-0.0984, reward=0.5817, temp=0.60]

   Processing 96 sentences...

Epoch 5/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 237/280 [1:05:24<11:02, 15.41s/it, loss=-0.0971, reward=0.5933, temp=0.60]

   Processing 184 sentences...

Epoch 5/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 238/280 [1:05:36<10:08, 14.50s/it, loss=-0.0843, reward=0.6029, temp=0.60]

   Processing 63 sentences...

Epoch 5/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 239/280 [1:05:41<08:01, 11.73s/it, loss=-0.0989, reward=0.5817, temp=0.60]

   Processing 123 sentences...

Epoch 5/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 240/280 [1:05:48<06:48, 10.20s/it, loss=-0.0854, reward=0.6102, temp=0.60]

   Processing 72 sentences...

Epoch 5/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 241/280 [1:05:53<05:41,  8.76s/it, loss=-0.0948, reward=0.5884, temp=0.60]

   Processing 268 sentences...

Epoch 5/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 242/280 [1:06:43<13:13, 20.89s/it, loss=-0.0722, reward=0.6096, temp=0.60]

   Processing 129 sentences...

Epoch 5/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 243/280 [1:06:50<10:25, 16.92s/it, loss=-0.0902, reward=0.6040, temp=0.60]

   Processing 57 sentences...

Epoch 5/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 244/280 [1:06:53<07:37, 12.72s/it, loss=-0.1015, reward=0.5861, temp=0.60]

   Processing 267 sentences...

Epoch 5/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 245/280 [1:07:11<08:23, 14.38s/it, loss=-0.0883, reward=0.6024, temp=0.60]

   Processing 84 sentences...

Epoch 5/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 246/280 [1:07:15<06:17, 11.11s/it, loss=-0.0969, reward=0.5934, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 247/280 [1:08:04<12:23, 22.54s/it, loss=-0.0298, reward=0.6169, temp=0.60]

   Processing 97 sentences...

Epoch 5/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 248/280 [1:08:17<10:31, 19.74s/it, loss=-0.0973, reward=0.5962, temp=0.60]

   Processing 232 sentences...

Epoch 5/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 249/280 [1:08:46<11:38, 22.53s/it, loss=-0.0854, reward=0.6072, temp=0.60]

   Processing 111 sentences...

Epoch 5/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 250/280 [1:08:53<08:54, 17.82s/it, loss=-0.0862, reward=0.6155, temp=0.60]

   Processing 24 sentences...

Epoch 5/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 251/280 [1:08:55<06:14, 12.92s/it, loss=-0.1197, reward=0.5350, temp=0.60]

   Processing 112 sentences...

Epoch 5/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 252/280 [1:09:01<05:05, 10.89s/it, loss=-0.0923, reward=0.6003, temp=0.60]

   Processing 43 sentences...

Epoch 5/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 253/280 [1:09:04<03:48,  8.45s/it, loss=-0.1021, reward=0.5766, temp=0.60]

   Processing 183 sentences...

Epoch 5/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 254/280 [1:09:14<03:58,  9.16s/it, loss=-0.0842, reward=0.6054, temp=0.60]

   Processing 117 sentences...

Epoch 5/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 255/280 [1:09:19<03:15,  7.82s/it, loss=-0.0851, reward=0.6031, temp=0.60]

   Processing 106 sentences...

Epoch 5/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 256/280 [1:09:26<02:58,  7.43s/it, loss=-0.0939, reward=0.5983, temp=0.60]

   Processing 67 sentences...

Epoch 5/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 257/280 [1:09:28<02:16,  5.94s/it, loss=-0.1048, reward=0.5836, temp=0.60]

   Processing 112 sentences...

Epoch 5/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 258/280 [1:09:37<02:30,  6.83s/it, loss=-0.0958, reward=0.5927, temp=0.60]

   Processing 79 sentences...

Epoch 5/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 259/280 [1:09:42<02:09,  6.17s/it, loss=-0.0894, reward=0.6081, temp=0.60]

   Processing 408 sentences...

Epoch 5/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 260/280 [1:10:02<03:30, 10.52s/it, loss=-0.0413, reward=0.6157, temp=0.60]

   Processing 103 sentences...

Epoch 5/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 261/280 [1:10:07<02:46,  8.76s/it, loss=-0.0955, reward=0.6013, temp=0.60]

   Processing 103 sentences...

Epoch 5/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 262/280 [1:10:14<02:26,  8.16s/it, loss=-0.0926, reward=0.6057, temp=0.60]

   Processing 118 sentences...

Epoch 5/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 263/280 [1:10:20<02:08,  7.55s/it, loss=-0.1033, reward=0.5952, temp=0.60]

   Processing 249 sentences...

Epoch 5/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 264/280 [1:10:40<02:59, 11.22s/it, loss=-0.0817, reward=0.6074, temp=0.60]

   Processing 145 sentences...

Epoch 5/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 265/280 [1:10:45<02:22,  9.53s/it, loss=-0.0967, reward=0.5983, temp=0.60]

   Processing 294 sentences...

Epoch 5/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 266/280 [1:11:11<03:22, 14.46s/it, loss=-0.0717, reward=0.6156, temp=0.60]

   Processing 147 sentences...

Epoch 5/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 267/280 [1:11:19<02:44, 12.63s/it, loss=-0.0891, reward=0.6056, temp=0.60]

   Processing 304 sentences...

Epoch 5/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 268/280 [1:11:52<03:44, 18.74s/it, loss=-0.0792, reward=0.6107, temp=0.60]

   Processing 128 sentences...

Epoch 5/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 269/280 [1:12:05<03:06, 16.98s/it, loss=-0.0877, reward=0.6062, temp=0.60]

   Processing 247 sentences...

Epoch 5/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 270/280 [1:12:22<02:50, 17.02s/it, loss=-0.0905, reward=0.6076, temp=0.60]

   Processing 165 sentences...

Epoch 5/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 271/280 [1:12:39<02:32, 16.94s/it, loss=-0.0903, reward=0.6073, temp=0.60]

   Processing 38 sentences...

Epoch 5/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 272/280 [1:12:42<01:41, 12.70s/it, loss=-0.1128, reward=0.5676, temp=0.60]

   Processing 63 sentences...

Epoch 5/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 273/280 [1:12:45<01:08,  9.82s/it, loss=-0.0968, reward=0.5922, temp=0.60]

   Processing 215 sentences...

Epoch 5/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 274/280 [1:13:10<01:25, 14.32s/it, loss=-0.0882, reward=0.6079, temp=0.60]

   Processing 39 sentences...

Epoch 5/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 275/280 [1:13:12<00:53, 10.62s/it, loss=-0.1061, reward=0.5745, temp=0.60]

   Processing 105 sentences...

Epoch 5/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 276/280 [1:13:17<00:35,  8.88s/it, loss=-0.0911, reward=0.6052, temp=0.60]

   Processing 138 sentences...

Epoch 5/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 277/280 [1:13:28<00:28,  9.52s/it, loss=-0.0867, reward=0.6104, temp=0.60]

   Processing 161 sentences...

Epoch 5/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 278/280 [1:13:38<00:19,  9.59s/it, loss=-0.1012, reward=0.5957, temp=0.60]

   Processing 35 sentences...

Epoch 5/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 279/280 [1:13:39<00:07,  7.30s/it, loss=-0.1036, reward=0.5877, temp=0.60]

   Processing 500 sentences...ents), truncating to 500

Epoch 5/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 280/280 [1:14:11<00:00, 15.90s/it, loss=-0.0478, reward=0.6191, temp=0.60]


   ‚ö†Ô∏è Document too long (1424 sents), truncating to 500
Epoch 5/10:
  Train Loss: -0.0864
  Overall Val Reward: 0.5790
  Learning Rate: 0.000100
  Temperature: 0.6000

  Aspect-wise Val Rewards:
    facts       : 0.5812
    analysis    : 0.5751
    argument    : 0.5801
    judgement   : 0.5785
    statute     : 0.5803



Epoch 6/10:   0%|          | 0/280 [00:00<?, ?it/s]

   Processing 333 sentences...

Epoch 6/10:   0%|          | 1/280 [00:28<2:11:59, 28.38s/it, loss=-0.0895, reward=0.6054, temp=0.50]

   Processing 353 sentences...

Epoch 6/10:   1%|          | 2/280 [00:52<1:59:17, 25.75s/it, loss=-0.0839, reward=0.6125, temp=0.50]

   Processing 57 sentences...

Epoch 6/10:   1%|          | 3/280 [00:59<1:19:11, 17.15s/it, loss=-0.1002, reward=0.5784, temp=0.50]

   Processing 96 sentences...

Epoch 6/10:   1%|‚ñè         | 4/280 [01:13<1:12:55, 15.85s/it, loss=-0.0911, reward=0.6035, temp=0.50]

   Processing 71 sentences...

Epoch 6/10:   2%|‚ñè         | 5/280 [01:16<51:27, 11.23s/it, loss=-0.0950, reward=0.5939, temp=0.50]  

   Processing 62 sentences...

Epoch 6/10:   2%|‚ñè         | 6/280 [01:18<38:09,  8.36s/it, loss=-0.0942, reward=0.6011, temp=0.50]

   Processing 350 sentences...

Epoch 6/10:   2%|‚ñé         | 7/280 [01:51<1:13:37, 16.18s/it, loss=-0.0841, reward=0.6179, temp=0.50]

   Processing 263 sentences...

Epoch 6/10:   3%|‚ñé         | 8/280 [02:06<1:12:40, 16.03s/it, loss=-0.0861, reward=0.6137, temp=0.50]

   Processing 176 sentences...

Epoch 6/10:   3%|‚ñé         | 9/280 [02:16<1:03:19, 14.02s/it, loss=-0.0902, reward=0.6091, temp=0.50]

   Processing 318 sentences...

Epoch 6/10:   4%|‚ñé         | 10/280 [03:21<2:13:39, 29.70s/it, loss=-0.0902, reward=0.5990, temp=0.50]

   Processing 338 sentences...

Epoch 6/10:   4%|‚ñç         | 11/280 [03:45<2:05:11, 27.92s/it, loss=-0.0831, reward=0.6172, temp=0.50]

   Processing 40 sentences...

Epoch 6/10:   4%|‚ñç         | 12/280 [03:47<1:30:01, 20.15s/it, loss=-0.1070, reward=0.5781, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:   5%|‚ñç         | 13/280 [04:40<2:14:25, 30.21s/it, loss=-0.0812, reward=0.6145, temp=0.50]

   Processing 423 sentences...

Epoch 6/10:   5%|‚ñå         | 14/280 [05:42<2:55:17, 39.54s/it, loss=-0.0843, reward=0.6140, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:   5%|‚ñå         | 15/280 [06:00<2:26:33, 33.18s/it, loss=-0.0728, reward=0.6383, temp=0.50]

   Processing 82 sentences...

Epoch 6/10:   6%|‚ñå         | 16/280 [06:03<1:45:52, 24.06s/it, loss=-0.0966, reward=0.6019, temp=0.50]

   Processing 137 sentences...

Epoch 6/10:   6%|‚ñå         | 17/280 [06:10<1:23:19, 19.01s/it, loss=-0.0924, reward=0.6081, temp=0.50]

   Processing 30 sentences...

Epoch 6/10:   6%|‚ñã         | 18/280 [06:13<1:01:24, 14.06s/it, loss=-0.1171, reward=0.5650, temp=0.50]

   Processing 82 sentences...

Epoch 6/10:   7%|‚ñã         | 19/280 [06:21<53:16, 12.25s/it, loss=-0.1009, reward=0.6043, temp=0.50]  

   Processing 61 sentences...

Epoch 6/10:   7%|‚ñã         | 20/280 [06:24<41:03,  9.47s/it, loss=-0.1144, reward=0.5824, temp=0.50]

   Processing 50 sentences...

Epoch 6/10:   8%|‚ñä         | 21/280 [06:27<32:37,  7.56s/it, loss=-0.1163, reward=0.5743, temp=0.50]

   Processing 115 sentences...

Epoch 6/10:   8%|‚ñä         | 22/280 [06:35<33:53,  7.88s/it, loss=-0.0957, reward=0.6030, temp=0.50]

   Processing 80 sentences...

Epoch 6/10:   8%|‚ñä         | 23/280 [06:39<27:42,  6.47s/it, loss=-0.0955, reward=0.5950, temp=0.50]

   Processing 434 sentences...

Epoch 6/10:   9%|‚ñä         | 24/280 [08:03<2:07:58, 30.00s/it, loss=-0.0800, reward=0.6204, temp=0.50]

   Processing 254 sentences...

Epoch 6/10:   9%|‚ñâ         | 25/280 [08:37<2:11:40, 30.98s/it, loss=-0.0883, reward=0.6101, temp=0.50]

   Processing 329 sentences...

Epoch 6/10:   9%|‚ñâ         | 26/280 [08:56<1:55:59, 27.40s/it, loss=-0.0861, reward=0.6146, temp=0.50]

   Processing 87 sentences...

Epoch 6/10:  10%|‚ñâ         | 27/280 [09:01<1:26:59, 20.63s/it, loss=-0.1014, reward=0.5971, temp=0.50]

   Processing 39 sentences...

Epoch 6/10:  10%|‚ñà         | 28/280 [09:02<1:02:43, 14.93s/it, loss=-0.1247, reward=0.5694, temp=0.50]

   Processing 83 sentences...

Epoch 6/10:  10%|‚ñà         | 29/280 [09:06<47:57, 11.47s/it, loss=-0.1027, reward=0.5897, temp=0.50]  

   Processing 177 sentences...

Epoch 6/10:  11%|‚ñà         | 30/280 [09:18<48:28, 11.63s/it, loss=-0.0903, reward=0.6020, temp=0.50]

   Processing 82 sentences...

Epoch 6/10:  11%|‚ñà         | 31/280 [09:25<43:14, 10.42s/it, loss=-0.1136, reward=0.5846, temp=0.50]

   Processing 136 sentences...

Epoch 6/10:  11%|‚ñà‚ñè        | 32/280 [09:30<36:13,  8.76s/it, loss=-0.0750, reward=0.6032, temp=0.50]

   Processing 166 sentences...

Epoch 6/10:  12%|‚ñà‚ñè        | 33/280 [09:40<37:22,  9.08s/it, loss=-0.0590, reward=0.6090, temp=0.50]

   Processing 91 sentences...

Epoch 6/10:  12%|‚ñà‚ñè        | 34/280 [09:45<32:42,  7.98s/it, loss=-0.1137, reward=0.5909, temp=0.50]

   Processing 87 sentences...

Epoch 6/10:  12%|‚ñà‚ñé        | 35/280 [09:50<28:54,  7.08s/it, loss=-0.0932, reward=0.6030, temp=0.50]

   Processing 203 sentences...

Epoch 6/10:  13%|‚ñà‚ñé        | 36/280 [10:05<37:38,  9.25s/it, loss=-0.0740, reward=0.6074, temp=0.50]

   Processing 140 sentences...

Epoch 6/10:  13%|‚ñà‚ñé        | 37/280 [10:15<38:16,  9.45s/it, loss=-0.1111, reward=0.5905, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  14%|‚ñà‚ñé        | 38/280 [11:07<1:30:04, 22.33s/it, loss=-0.0665, reward=0.6086, temp=0.50]

   Processing 97 sentences...

Epoch 6/10:  14%|‚ñà‚ñç        | 39/280 [11:13<1:10:06, 17.45s/it, loss=-0.1280, reward=0.5886, temp=0.50]

   Processing 182 sentences...

Epoch 6/10:  14%|‚ñà‚ñç        | 40/280 [11:26<1:04:02, 16.01s/it, loss=-0.0686, reward=0.6049, temp=0.50]

   Processing 87 sentences...

Epoch 6/10:  15%|‚ñà‚ñç        | 41/280 [11:30<49:41, 12.47s/it, loss=-0.0948, reward=0.6019, temp=0.50]  

   Processing 235 sentences...

Epoch 6/10:  15%|‚ñà‚ñå        | 42/280 [12:04<1:15:21, 19.00s/it, loss=-0.0863, reward=0.6035, temp=0.50]

   Processing 50 sentences...

Epoch 6/10:  15%|‚ñà‚ñå        | 43/280 [12:06<55:15, 13.99s/it, loss=-0.1116, reward=0.5871, temp=0.50]  

   Processing 70 sentences...

Epoch 6/10:  16%|‚ñà‚ñå        | 44/280 [12:11<43:38, 11.10s/it, loss=-0.1142, reward=0.5828, temp=0.50]

   Processing 65 sentences...

Epoch 6/10:  16%|‚ñà‚ñå        | 45/280 [12:16<36:59,  9.45s/it, loss=-0.1053, reward=0.5923, temp=0.50]

   Processing 482 sentences...

Epoch 6/10:  16%|‚ñà‚ñã        | 46/280 [13:08<1:25:48, 22.00s/it, loss=-0.0781, reward=0.6159, temp=0.50]

   Processing 164 sentences...

Epoch 6/10:  17%|‚ñà‚ñã        | 47/280 [13:23<1:17:57, 20.07s/it, loss=-0.0964, reward=0.6040, temp=0.50]

   Processing 102 sentences...

Epoch 6/10:  17%|‚ñà‚ñã        | 48/280 [13:30<1:01:45, 15.97s/it, loss=-0.0884, reward=0.6006, temp=0.50]

   Processing 48 sentences...

Epoch 6/10:  18%|‚ñà‚ñä        | 49/280 [13:31<45:02, 11.70s/it, loss=-0.1007, reward=0.5765, temp=0.50]  

   Processing 74 sentences...

Epoch 6/10:  18%|‚ñà‚ñä        | 50/280 [13:37<37:42,  9.84s/it, loss=-0.1023, reward=0.5860, temp=0.50]

   Processing 67 sentences...

Epoch 6/10:  18%|‚ñà‚ñä        | 51/280 [13:43<32:51,  8.61s/it, loss=-0.0936, reward=0.6029, temp=0.50]

   Processing 31 sentences...

Epoch 6/10:  19%|‚ñà‚ñä        | 52/280 [13:44<24:42,  6.50s/it, loss=-0.1141, reward=0.5746, temp=0.50]

   Processing 53 sentences...

Epoch 6/10:  19%|‚ñà‚ñâ        | 53/280 [13:48<21:28,  5.67s/it, loss=-0.1087, reward=0.5843, temp=0.50]

   Processing 123 sentences...

Epoch 6/10:  19%|‚ñà‚ñâ        | 54/280 [13:56<23:31,  6.25s/it, loss=-0.0886, reward=0.6031, temp=0.50]

   Processing 169 sentences...

Epoch 6/10:  20%|‚ñà‚ñâ        | 55/280 [14:05<27:17,  7.28s/it, loss=-0.0447, reward=0.6110, temp=0.50]

   Processing 33 sentences...

Epoch 6/10:  20%|‚ñà‚ñà        | 56/280 [14:08<22:09,  5.93s/it, loss=-0.1213, reward=0.5736, temp=0.50]

   Processing 190 sentences...

Epoch 6/10:  20%|‚ñà‚ñà        | 57/280 [14:19<27:45,  7.47s/it, loss=-0.0407, reward=0.6117, temp=0.50]

   Processing 223 sentences...

Epoch 6/10:  21%|‚ñà‚ñà        | 58/280 [14:37<39:13, 10.60s/it, loss=-0.0665, reward=0.6060, temp=0.50]

   Processing 57 sentences...

Epoch 6/10:  21%|‚ñà‚ñà        | 59/280 [14:40<30:24,  8.26s/it, loss=-0.1009, reward=0.5917, temp=0.50]

   Processing 362 sentences...

Epoch 6/10:  21%|‚ñà‚ñà‚ñè       | 60/280 [15:05<48:36, 13.26s/it, loss=-0.0838, reward=0.6132, temp=0.50]

   Processing 192 sentences...

Epoch 6/10:  22%|‚ñà‚ñà‚ñè       | 61/280 [15:23<53:33, 14.67s/it, loss=-0.0855, reward=0.6033, temp=0.50]

   Processing 108 sentences...

Epoch 6/10:  22%|‚ñà‚ñà‚ñè       | 62/280 [15:26<40:54, 11.26s/it, loss=-0.0882, reward=0.6086, temp=0.50]

   Processing 380 sentences...

Epoch 6/10:  22%|‚ñà‚ñà‚ñé       | 63/280 [16:05<1:11:04, 19.65s/it, loss=-0.0811, reward=0.6087, temp=0.50]

   Processing 285 sentences...

Epoch 6/10:  23%|‚ñà‚ñà‚ñé       | 64/280 [16:36<1:22:58, 23.05s/it, loss=-0.0805, reward=0.6091, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  23%|‚ñà‚ñà‚ñé       | 65/280 [17:36<2:01:44, 33.98s/it, loss=-0.0697, reward=0.6189, temp=0.50]

   Processing 144 sentences...

Epoch 6/10:  24%|‚ñà‚ñà‚ñé       | 66/280 [17:45<1:35:06, 26.66s/it, loss=-0.0951, reward=0.6065, temp=0.50]

   Processing 202 sentences...

Epoch 6/10:  24%|‚ñà‚ñà‚ñç       | 67/280 [18:02<1:24:17, 23.75s/it, loss=-0.0834, reward=0.6182, temp=0.50]

   Processing 271 sentences...

Epoch 6/10:  24%|‚ñà‚ñà‚ñç       | 68/280 [18:31<1:29:31, 25.34s/it, loss=-0.0910, reward=0.6045, temp=0.50]

   Processing 51 sentences...

Epoch 6/10:  25%|‚ñà‚ñà‚ñç       | 69/280 [18:33<1:04:40, 18.39s/it, loss=-0.1022, reward=0.5862, temp=0.50]

   Processing 376 sentences...

Epoch 6/10:  25%|‚ñà‚ñà‚ñå       | 70/280 [19:05<1:18:12, 22.35s/it, loss=-0.0862, reward=0.5992, temp=0.50]

   Processing 61 sentences...

Epoch 6/10:  25%|‚ñà‚ñà‚ñå       | 71/280 [19:09<58:24, 16.77s/it, loss=-0.0964, reward=0.5939, temp=0.50]  

   Processing 63 sentences...

Epoch 6/10:  26%|‚ñà‚ñà‚ñå       | 72/280 [19:12<44:11, 12.75s/it, loss=-0.1077, reward=0.5687, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  26%|‚ñà‚ñà‚ñå       | 73/280 [20:20<1:41:18, 29.37s/it, loss=-0.0789, reward=0.6159, temp=0.50]

   Processing 254 sentences...

Epoch 6/10:  26%|‚ñà‚ñà‚ñã       | 74/280 [20:47<1:38:30, 28.69s/it, loss=-0.0853, reward=0.6028, temp=0.50]

   Processing 118 sentences...

Epoch 6/10:  27%|‚ñà‚ñà‚ñã       | 75/280 [21:09<1:30:55, 26.61s/it, loss=-0.0888, reward=0.6035, temp=0.50]

   Processing 439 sentences...

Epoch 6/10:  27%|‚ñà‚ñà‚ñã       | 76/280 [21:44<1:38:54, 29.09s/it, loss=-0.0840, reward=0.6107, temp=0.50]

   Processing 51 sentences...

Epoch 6/10:  28%|‚ñà‚ñà‚ñä       | 77/280 [21:46<1:11:03, 21.00s/it, loss=-0.1047, reward=0.5787, temp=0.50]

   Processing 143 sentences...

Epoch 6/10:  28%|‚ñà‚ñà‚ñä       | 78/280 [21:57<1:00:12, 17.88s/it, loss=-0.1008, reward=0.5926, temp=0.50]

   Processing 300 sentences...

Epoch 6/10:  28%|‚ñà‚ñà‚ñä       | 79/280 [22:18<1:03:03, 18.82s/it, loss=-0.0859, reward=0.6090, temp=0.50]

   Processing 137 sentences...

Epoch 6/10:  29%|‚ñà‚ñà‚ñä       | 80/280 [22:27<53:08, 15.94s/it, loss=-0.1110, reward=0.5952, temp=0.50]  

   Processing 120 sentences...

Epoch 6/10:  29%|‚ñà‚ñà‚ñâ       | 81/280 [22:34<43:47, 13.20s/it, loss=-0.0975, reward=0.5927, temp=0.50]

   Processing 170 sentences...

Epoch 6/10:  29%|‚ñà‚ñà‚ñâ       | 82/280 [22:47<43:48, 13.27s/it, loss=-0.0990, reward=0.5904, temp=0.50]

   Processing 107 sentences...

Epoch 6/10:  30%|‚ñà‚ñà‚ñâ       | 83/280 [22:54<37:00, 11.27s/it, loss=-0.0926, reward=0.6018, temp=0.50]

   Processing 48 sentences...

Epoch 6/10:  30%|‚ñà‚ñà‚ñà       | 84/280 [22:58<29:44,  9.10s/it, loss=-0.1112, reward=0.5683, temp=0.50]

   Processing 163 sentences...

Epoch 6/10:  30%|‚ñà‚ñà‚ñà       | 85/280 [23:07<29:09,  8.97s/it, loss=-0.0870, reward=0.6070, temp=0.50]

   Processing 108 sentences...

Epoch 6/10:  31%|‚ñà‚ñà‚ñà       | 86/280 [23:16<29:28,  9.12s/it, loss=-0.0890, reward=0.6055, temp=0.50]

   Processing 86 sentences...

Epoch 6/10:  31%|‚ñà‚ñà‚ñà       | 87/280 [23:21<24:53,  7.74s/it, loss=-0.0882, reward=0.6018, temp=0.50]

   Processing 74 sentences...

Epoch 6/10:  31%|‚ñà‚ñà‚ñà‚ñè      | 88/280 [23:33<29:45,  9.30s/it, loss=-0.1216, reward=0.5733, temp=0.50]

   Processing 81 sentences...

Epoch 6/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 89/280 [23:43<30:16,  9.51s/it, loss=-0.1089, reward=0.5847, temp=0.50]

   Processing 203 sentences...

Epoch 6/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 90/280 [23:55<32:14, 10.18s/it, loss=-0.0600, reward=0.6105, temp=0.50]

   Processing 54 sentences...

Epoch 6/10:  32%|‚ñà‚ñà‚ñà‚ñé      | 91/280 [23:58<24:54,  7.91s/it, loss=-0.1105, reward=0.5791, temp=0.50]

   Processing 143 sentences...

Epoch 6/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 92/280 [24:08<26:32,  8.47s/it, loss=-0.0836, reward=0.6093, temp=0.50]

   Processing 36 sentences...

Epoch 6/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 93/280 [24:10<20:41,  6.64s/it, loss=-0.1023, reward=0.5984, temp=0.50]

   Processing 106 sentences...

Epoch 6/10:  34%|‚ñà‚ñà‚ñà‚ñé      | 94/280 [24:15<19:02,  6.14s/it, loss=-0.1145, reward=0.5922, temp=0.50]

   Processing 133 sentences...

Epoch 6/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 95/280 [24:41<37:43, 12.24s/it, loss=-0.1196, reward=0.5753, temp=0.50]

   Processing 206 sentences...

Epoch 6/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 96/280 [24:51<35:28, 11.57s/it, loss=-0.0762, reward=0.6066, temp=0.50]

   Processing 162 sentences...

Epoch 6/10:  35%|‚ñà‚ñà‚ñà‚ñç      | 97/280 [25:14<45:04, 14.78s/it, loss=-0.0890, reward=0.6034, temp=0.50]

   Processing 42 sentences...

Epoch 6/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 98/280 [25:15<32:59, 10.88s/it, loss=-0.1044, reward=0.5917, temp=0.50]

   Processing 89 sentences...

Epoch 6/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 99/280 [25:20<27:06,  8.98s/it, loss=-0.0909, reward=0.5966, temp=0.50]

   Processing 32 sentences...

Epoch 6/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 100/280 [25:24<22:21,  7.46s/it, loss=-0.1396, reward=0.5542, temp=0.50]

   Processing 408 sentences...

Epoch 6/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 101/280 [26:46<1:29:07, 29.87s/it, loss=-0.0823, reward=0.6035, temp=0.50]

   Processing 35 sentences...

Epoch 6/10:  36%|‚ñà‚ñà‚ñà‚ñã      | 102/280 [26:48<1:04:08, 21.62s/it, loss=-0.1155, reward=0.5736, temp=0.50]

   Processing 137 sentences...

Epoch 6/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 103/280 [26:56<51:13, 17.36s/it, loss=-0.0904, reward=0.5969, temp=0.50]  

   Processing 105 sentences...

Epoch 6/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 104/280 [27:04<42:56, 14.64s/it, loss=-0.1269, reward=0.5802, temp=0.50]

   Processing 124 sentences...

Epoch 6/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 105/280 [27:16<40:07, 13.76s/it, loss=-0.0992, reward=0.5932, temp=0.50]

   Processing 220 sentences...

Epoch 6/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 106/280 [27:30<40:12, 13.87s/it, loss=-0.0654, reward=0.6087, temp=0.50]

   Processing 98 sentences...

Epoch 6/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 107/280 [27:35<32:06, 11.14s/it, loss=-0.1017, reward=0.5961, temp=0.50]

   Processing 133 sentences...

Epoch 6/10:  39%|‚ñà‚ñà‚ñà‚ñä      | 108/280 [27:42<28:17,  9.87s/it, loss=-0.0653, reward=0.6066, temp=0.50]

   Processing 315 sentences...

Epoch 6/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 109/280 [28:11<44:21, 15.57s/it, loss=-0.0898, reward=0.6012, temp=0.50]

   Processing 81 sentences...

Epoch 6/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 110/280 [28:14<34:00, 12.00s/it, loss=-0.1032, reward=0.5861, temp=0.50]

   Processing 286 sentences...

Epoch 6/10:  40%|‚ñà‚ñà‚ñà‚ñâ      | 111/280 [28:31<37:37, 13.36s/it, loss=-0.0859, reward=0.6098, temp=0.50]

   Processing 122 sentences...

Epoch 6/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 112/280 [28:38<32:01, 11.44s/it, loss=-0.0902, reward=0.5968, temp=0.50]

   Processing 170 sentences...

Epoch 6/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 113/280 [28:55<37:08, 13.34s/it, loss=-0.0911, reward=0.6035, temp=0.50]

   Processing 376 sentences...

Epoch 6/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 114/280 [29:39<1:02:14, 22.50s/it, loss=-0.0819, reward=0.6107, temp=0.50]

   Processing 334 sentences...

Epoch 6/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 115/280 [30:28<1:23:21, 30.31s/it, loss=-0.0846, reward=0.6116, temp=0.50]

   Processing 102 sentences...

Epoch 6/10:  41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 116/280 [30:39<1:06:42, 24.41s/it, loss=-0.0906, reward=0.5996, temp=0.50]

   Processing 268 sentences...

Epoch 6/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 117/280 [30:53<58:07, 21.39s/it, loss=-0.0853, reward=0.6092, temp=0.50]  

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 118/280 [32:33<2:01:17, 44.92s/it, loss=-0.0800, reward=0.6163, temp=0.50]

   Processing 253 sentences...

Epoch 6/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 119/280 [32:49<1:37:34, 36.36s/it, loss=-0.0880, reward=0.6095, temp=0.50]

   Processing 132 sentences...

Epoch 6/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 120/280 [32:58<1:15:03, 28.15s/it, loss=-0.0920, reward=0.5999, temp=0.50]

   Processing 286 sentences...

Epoch 6/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 121/280 [33:20<1:09:55, 26.38s/it, loss=-0.0876, reward=0.6135, temp=0.50]

   Processing 128 sentences...

Epoch 6/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 122/280 [33:28<54:18, 20.62s/it, loss=-0.0917, reward=0.6042, temp=0.50]  

   Processing 37 sentences...

Epoch 6/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 123/280 [33:30<39:37, 15.15s/it, loss=-0.1016, reward=0.5747, temp=0.50]

   Processing 205 sentences...

Epoch 6/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 124/280 [33:48<41:42, 16.04s/it, loss=-0.0905, reward=0.6042, temp=0.50]

   Processing 87 sentences...

Epoch 6/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 125/280 [33:55<34:12, 13.24s/it, loss=-0.0947, reward=0.6037, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 126/280 [35:05<1:17:35, 30.23s/it, loss=-0.0749, reward=0.6182, temp=0.50]

   Processing 144 sentences...

Epoch 6/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 127/280 [35:17<1:03:20, 24.84s/it, loss=-0.0934, reward=0.6068, temp=0.50]

   Processing 80 sentences...

Epoch 6/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 128/280 [35:25<49:58, 19.73s/it, loss=-0.1019, reward=0.6018, temp=0.50]  

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 129/280 [36:52<1:40:30, 39.94s/it, loss=-0.0820, reward=0.6114, temp=0.50]

   Processing 124 sentences...

Epoch 6/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 130/280 [37:01<1:16:44, 30.70s/it, loss=-0.0993, reward=0.5960, temp=0.50]

   Processing 112 sentences...

Epoch 6/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 131/280 [37:11<1:00:49, 24.49s/it, loss=-0.1064, reward=0.5832, temp=0.50]

   Processing 75 sentences...

Epoch 6/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 132/280 [37:15<45:29, 18.45s/it, loss=-0.1089, reward=0.5872, temp=0.50]  

   Processing 165 sentences...

Epoch 6/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 133/280 [37:27<40:18, 16.45s/it, loss=-0.0902, reward=0.6111, temp=0.50]

   Processing 146 sentences...

Epoch 6/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 134/280 [37:45<41:03, 16.88s/it, loss=-0.0964, reward=0.6045, temp=0.50]

   Processing 100 sentences...

Epoch 6/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 135/280 [37:50<32:16, 13.36s/it, loss=-0.0946, reward=0.6071, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 136/280 [39:32<1:35:29, 39.79s/it, loss=-0.0743, reward=0.6133, temp=0.50]

   Processing 209 sentences...

Epoch 6/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 137/280 [39:47<1:17:18, 32.44s/it, loss=-0.0928, reward=0.6071, temp=0.50]

   Processing 90 sentences...

Epoch 6/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 138/280 [39:53<58:07, 24.56s/it, loss=-0.0989, reward=0.6039, temp=0.50]  

   Processing 72 sentences...

Epoch 6/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 139/280 [39:58<43:41, 18.59s/it, loss=-0.1140, reward=0.5879, temp=0.50]

   Processing 38 sentences...

Epoch 6/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 140/280 [40:00<31:42, 13.59s/it, loss=-0.1027, reward=0.5928, temp=0.50]

   Processing 131 sentences...

Epoch 6/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 141/280 [40:05<25:41, 11.09s/it, loss=-0.1030, reward=0.5993, temp=0.50]

   Processing 78 sentences...

Epoch 6/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 142/280 [40:09<20:59,  9.13s/it, loss=-0.1107, reward=0.5909, temp=0.50]

   Processing 119 sentences...

Epoch 6/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 143/280 [40:17<20:03,  8.78s/it, loss=-0.1060, reward=0.5993, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 144/280 [41:58<1:22:33, 36.42s/it, loss=-0.0171, reward=0.6210, temp=0.50]

   Processing 133 sentences...

Epoch 6/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 145/280 [42:08<1:03:54, 28.41s/it, loss=-0.0903, reward=0.6067, temp=0.50]

   Processing 83 sentences...

Epoch 6/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 146/280 [42:14<48:40, 21.80s/it, loss=-0.1070, reward=0.5864, temp=0.50]  

   Processing 56 sentences...

Epoch 6/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 147/280 [42:17<35:51, 16.17s/it, loss=-0.1064, reward=0.5859, temp=0.50]

   Processing 133 sentences...

Epoch 6/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 148/280 [42:24<29:26, 13.38s/it, loss=-0.0962, reward=0.5939, temp=0.50]

   Processing 101 sentences...

Epoch 6/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 149/280 [42:30<24:28, 11.21s/it, loss=-0.1173, reward=0.5830, temp=0.50]

   Processing 151 sentences...

Epoch 6/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 150/280 [42:42<24:23, 11.26s/it, loss=-0.0903, reward=0.6071, temp=0.50]

   Processing 248 sentences...

Epoch 6/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 151/280 [42:57<26:28, 12.32s/it, loss=-0.0506, reward=0.6114, temp=0.50]

   Processing 500 sentences...sents), truncating to 500

Epoch 6/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 152/280 [43:54<54:50, 25.71s/it, loss=-0.0584, reward=0.6149, temp=0.50]

   Processing 145 sentences...

Epoch 6/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 153/280 [44:00<42:12, 19.94s/it, loss=-0.0936, reward=0.5994, temp=0.50]

   Processing 202 sentences...

Epoch 6/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 154/280 [44:17<40:03, 19.08s/it, loss=-0.0941, reward=0.6036, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 155/280 [44:49<47:36, 22.85s/it, loss=-0.0744, reward=0.6149, temp=0.50]

   Processing 154 sentences...

Epoch 6/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 156/280 [45:06<43:59, 21.29s/it, loss=-0.0948, reward=0.6066, temp=0.50]

   Processing 345 sentences...

Epoch 6/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 157/280 [45:57<1:01:40, 30.08s/it, loss=-0.0894, reward=0.6074, temp=0.50]

   Processing 99 sentences...

Epoch 6/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 158/280 [46:02<45:35, 22.43s/it, loss=-0.0994, reward=0.6021, temp=0.50]  

   Processing 500 sentences...sents), truncating to 500

Epoch 6/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 159/280 [46:50<1:01:13, 30.36s/it, loss=-0.0809, reward=0.6192, temp=0.50]

   Processing 118 sentences...

Epoch 6/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 160/280 [47:00<48:03, 24.03s/it, loss=-0.0913, reward=0.6033, temp=0.50]  

   Processing 30 sentences...

Epoch 6/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 161/280 [47:01<34:19, 17.30s/it, loss=-0.1170, reward=0.5573, temp=0.50]

   Processing 76 sentences...

Epoch 6/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 162/280 [47:05<26:04, 13.26s/it, loss=-0.1005, reward=0.6021, temp=0.50]

   Processing 95 sentences...

Epoch 6/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 163/280 [47:12<22:10, 11.37s/it, loss=-0.0942, reward=0.5907, temp=0.50]

   Processing 78 sentences...

Epoch 6/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 164/280 [47:17<18:06,  9.37s/it, loss=-0.0925, reward=0.6065, temp=0.50]

   Processing 153 sentences...

Epoch 6/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 165/280 [47:25<17:09,  8.95s/it, loss=-0.0905, reward=0.6117, temp=0.50]

   Processing 229 sentences...

Epoch 6/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 166/280 [47:39<20:12, 10.64s/it, loss=-0.0887, reward=0.6081, temp=0.50]

   Processing 108 sentences...

Epoch 6/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 167/280 [47:44<16:49,  8.93s/it, loss=-0.0996, reward=0.5959, temp=0.50]

   Processing 154 sentences...

Epoch 6/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 168/280 [48:03<22:00, 11.79s/it, loss=-0.0879, reward=0.6030, temp=0.50]

   Processing 44 sentences...

Epoch 6/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 169/280 [48:10<19:35, 10.59s/it, loss=-0.1128, reward=0.5739, temp=0.50]

   Processing 229 sentences...

Epoch 6/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 170/280 [48:47<33:45, 18.41s/it, loss=-0.0922, reward=0.5967, temp=0.50]

   Processing 221 sentences...

Epoch 6/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 171/280 [49:08<34:37, 19.06s/it, loss=-0.1015, reward=0.5981, temp=0.50]

   Processing 148 sentences...

Epoch 6/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 172/280 [49:15<27:58, 15.54s/it, loss=-0.0926, reward=0.6033, temp=0.50]

   Processing 144 sentences...

Epoch 6/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 173/280 [49:21<22:42, 12.74s/it, loss=-0.0865, reward=0.6051, temp=0.50]

   Processing 77 sentences...

Epoch 6/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 174/280 [49:25<17:41, 10.01s/it, loss=-0.1098, reward=0.5897, temp=0.50]

   Processing 90 sentences...

Epoch 6/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 175/280 [49:32<16:01,  9.16s/it, loss=-0.1209, reward=0.5809, temp=0.50]

   Processing 309 sentences...

Epoch 6/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 176/280 [49:45<17:51, 10.30s/it, loss=-0.0880, reward=0.6003, temp=0.50]

   Processing 297 sentences...

Epoch 6/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 177/280 [50:05<22:47, 13.28s/it, loss=-0.0791, reward=0.6111, temp=0.50]

   Processing 286 sentences...

Epoch 6/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 178/280 [50:31<28:52, 16.98s/it, loss=-0.0780, reward=0.6102, temp=0.50]

   Processing 66 sentences...

Epoch 6/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 179/280 [50:34<21:26, 12.74s/it, loss=-0.1273, reward=0.5796, temp=0.50]

   Processing 220 sentences...

Epoch 6/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 180/280 [50:49<22:29, 13.49s/it, loss=-0.0964, reward=0.5940, temp=0.50]

   Processing 209 sentences...

Epoch 6/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 181/280 [51:07<24:22, 14.78s/it, loss=-0.0938, reward=0.5988, temp=0.50]

   Processing 70 sentences...

Epoch 6/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 182/280 [51:11<19:09, 11.73s/it, loss=-0.1223, reward=0.5814, temp=0.50]

   Processing 214 sentences...

Epoch 6/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 183/280 [51:22<18:18, 11.32s/it, loss=-0.0729, reward=0.6389, temp=0.50]

   Processing 213 sentences...

Epoch 6/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 184/280 [51:34<18:20, 11.47s/it, loss=-0.0913, reward=0.6061, temp=0.50]

   Processing 29 sentences...

Epoch 6/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 185/280 [51:36<13:39,  8.63s/it, loss=-0.1542, reward=0.5531, temp=0.50]

   Processing 91 sentences...

Epoch 6/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 186/280 [51:40<11:35,  7.40s/it, loss=-0.0811, reward=0.6060, temp=0.50]

   Processing 291 sentences...

Epoch 6/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 187/280 [52:00<17:23, 11.22s/it, loss=-0.0899, reward=0.6032, temp=0.50]

   Processing 80 sentences...

Epoch 6/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 188/280 [52:09<16:11, 10.56s/it, loss=-0.0930, reward=0.6038, temp=0.50]

   Processing 43 sentences...

Epoch 6/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 189/280 [52:12<12:28,  8.22s/it, loss=-0.1259, reward=0.5732, temp=0.50]

   Processing 45 sentences...

Epoch 6/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 190/280 [52:14<09:44,  6.49s/it, loss=-0.1294, reward=0.5735, temp=0.50]

   Processing 89 sentences...

Epoch 6/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 191/280 [52:18<08:26,  5.70s/it, loss=-0.0999, reward=0.5804, temp=0.50]

   Processing 194 sentences...

Epoch 6/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 192/280 [52:28<10:07,  6.90s/it, loss=-0.0725, reward=0.6019, temp=0.50]

   Processing 137 sentences...

Epoch 6/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 193/280 [52:39<11:36,  8.00s/it, loss=-0.0996, reward=0.5877, temp=0.50]

   Processing 73 sentences...

Epoch 6/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 194/280 [52:42<09:27,  6.60s/it, loss=-0.0947, reward=0.6025, temp=0.50]

   Processing 158 sentences...

Epoch 6/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 195/280 [52:55<12:14,  8.64s/it, loss=-0.0911, reward=0.6090, temp=0.50]

   Processing 56 sentences...

Epoch 6/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 196/280 [52:58<09:33,  6.83s/it, loss=-0.1267, reward=0.5742, temp=0.50]

   Processing 180 sentences...

Epoch 6/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 197/280 [53:09<11:01,  7.97s/it, loss=-0.0892, reward=0.6053, temp=0.50]

   Processing 338 sentences...

Epoch 6/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 198/280 [53:28<15:47, 11.55s/it, loss=-0.0887, reward=0.6048, temp=0.50]

   Processing 261 sentences...

Epoch 6/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 199/280 [53:55<21:50, 16.18s/it, loss=-0.0874, reward=0.6128, temp=0.50]

   Processing 156 sentences...

Epoch 6/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 200/280 [54:02<17:34, 13.18s/it, loss=-0.0958, reward=0.5926, temp=0.50]

   Processing 43 sentences...

Epoch 6/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 201/280 [54:04<13:14, 10.05s/it, loss=-0.1534, reward=0.5641, temp=0.50]

   Processing 144 sentences...

Epoch 6/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 202/280 [54:17<14:13, 10.94s/it, loss=-0.0950, reward=0.5922, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 203/280 [55:09<29:44, 23.18s/it, loss=-0.0616, reward=0.6185, temp=0.50]

   Processing 49 sentences...

Epoch 6/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 204/280 [55:12<21:44, 17.16s/it, loss=-0.1328, reward=0.5740, temp=0.50]

   Processing 66 sentences...

Epoch 6/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 205/280 [55:19<17:22, 13.90s/it, loss=-0.0951, reward=0.5951, temp=0.50]

   Processing 103 sentences...

Epoch 6/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 206/280 [55:25<14:18, 11.61s/it, loss=-0.1025, reward=0.5909, temp=0.50]

   Processing 350 sentences...

Epoch 6/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 207/280 [55:53<20:11, 16.59s/it, loss=-0.0842, reward=0.6107, temp=0.50]

   Processing 111 sentences...

Epoch 6/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 208/280 [56:02<17:02, 14.21s/it, loss=-0.0940, reward=0.5985, temp=0.50]

   Processing 51 sentences...

Epoch 6/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 209/280 [56:04<12:41, 10.72s/it, loss=-0.1216, reward=0.5771, temp=0.50]

   Processing 146 sentences...

Epoch 6/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 210/280 [56:16<12:55, 11.07s/it, loss=-0.0861, reward=0.6078, temp=0.50]

   Processing 161 sentences...

Epoch 6/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 211/280 [56:26<12:25, 10.80s/it, loss=-0.0902, reward=0.6008, temp=0.50]

   Processing 243 sentences...

Epoch 6/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 212/280 [56:42<13:44, 12.13s/it, loss=-0.0845, reward=0.6155, temp=0.50]

   Processing 297 sentences...

Epoch 6/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 213/280 [57:08<18:24, 16.48s/it, loss=-0.0856, reward=0.6084, temp=0.50]

   Processing 47 sentences...

Epoch 6/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 214/280 [57:11<13:34, 12.34s/it, loss=-0.1995, reward=0.5608, temp=0.50]

   Processing 415 sentences...

Epoch 6/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 215/280 [58:19<31:33, 29.14s/it, loss=-0.0822, reward=0.6175, temp=0.50]

   Processing 138 sentences...

Epoch 6/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 216/280 [58:32<25:41, 24.09s/it, loss=-0.1039, reward=0.5953, temp=0.50]

   Processing 46 sentences...

Epoch 6/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 217/280 [58:34<18:25, 17.54s/it, loss=-0.1358, reward=0.5843, temp=0.50]

   Processing 73 sentences...

Epoch 6/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 218/280 [58:38<13:54, 13.46s/it, loss=-0.0958, reward=0.5970, temp=0.50]

   Processing 136 sentences...

Epoch 6/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 219/280 [58:51<13:40, 13.45s/it, loss=-0.1282, reward=0.5900, temp=0.50]

   Processing 93 sentences...

Epoch 6/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 220/280 [58:57<11:02, 11.04s/it, loss=0.0667, reward=0.6196, temp=0.50] 

   Processing 267 sentences...

Epoch 6/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 221/280 [59:13<12:28, 12.69s/it, loss=0.0339, reward=0.6057, temp=0.50]

   Processing 421 sentences...

Epoch 6/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 222/280 [1:00:19<27:44, 28.69s/it, loss=-0.0852, reward=0.6029, temp=0.50]

   Processing 78 sentences...

Epoch 6/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 223/280 [1:00:23<20:06, 21.17s/it, loss=-0.1339, reward=0.5937, temp=0.50]

   Processing 275 sentences...

Epoch 6/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 224/280 [1:00:43<19:34, 20.98s/it, loss=0.0166, reward=0.6060, temp=0.50] 

   Processing 56 sentences...

Epoch 6/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 225/280 [1:00:47<14:24, 15.72s/it, loss=-0.1885, reward=0.5791, temp=0.50]

   Processing 292 sentences...

Epoch 6/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 226/280 [1:01:01<13:53, 15.44s/it, loss=-0.0864, reward=0.6108, temp=0.50]

   Processing 393 sentences...

Epoch 6/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 227/280 [1:01:42<20:18, 22.99s/it, loss=0.3667, reward=0.6163, temp=0.50] 

   Processing 412 sentences...

Epoch 6/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 228/280 [1:02:27<25:32, 29.47s/it, loss=0.2220, reward=0.6123, temp=0.50]

   Processing 53 sentences...

Epoch 6/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 229/280 [1:02:29<18:08, 21.34s/it, loss=-0.1074, reward=0.5896, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 230/280 [1:03:14<23:46, 28.53s/it, loss=-0.0485, reward=0.6096, temp=0.50]

   Processing 449 sentences...

Epoch 6/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 231/280 [1:03:47<24:14, 29.69s/it, loss=-0.0879, reward=0.6068, temp=0.50]

   Processing 81 sentences...

Epoch 6/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 232/280 [1:03:52<17:45, 22.21s/it, loss=-0.0939, reward=0.5912, temp=0.50]

   Processing 368 sentences...

Epoch 6/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 233/280 [1:04:14<17:31, 22.36s/it, loss=-0.0865, reward=0.6091, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 234/280 [1:05:18<26:43, 34.85s/it, loss=-0.0837, reward=0.6163, temp=0.50]

   Processing 45 sentences...

Epoch 6/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 235/280 [1:05:21<18:52, 25.16s/it, loss=-0.1362, reward=0.5775, temp=0.50]

   Processing 45 sentences...

Epoch 6/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 236/280 [1:05:23<13:29, 18.39s/it, loss=-0.1062, reward=0.5880, temp=0.50]

   Processing 96 sentences...

Epoch 6/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 237/280 [1:05:32<11:01, 15.38s/it, loss=-0.0947, reward=0.5909, temp=0.50]

   Processing 184 sentences...

Epoch 6/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 238/280 [1:05:44<10:09, 14.50s/it, loss=-0.0902, reward=0.6060, temp=0.50]

   Processing 63 sentences...

Epoch 6/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 239/280 [1:05:49<07:59, 11.71s/it, loss=-0.1546, reward=0.5865, temp=0.50]

   Processing 123 sentences...

Epoch 6/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 240/280 [1:05:56<06:46, 10.16s/it, loss=-0.0912, reward=0.5987, temp=0.50]

   Processing 72 sentences...

Epoch 6/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 241/280 [1:06:01<05:39,  8.71s/it, loss=-0.0922, reward=0.6037, temp=0.50]

   Processing 268 sentences...

Epoch 6/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 242/280 [1:06:49<13:01, 20.56s/it, loss=-0.0851, reward=0.6121, temp=0.50]

   Processing 129 sentences...

Epoch 6/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 243/280 [1:06:57<10:15, 16.64s/it, loss=-0.0942, reward=0.5966, temp=0.50]

   Processing 57 sentences...

Epoch 6/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 244/280 [1:07:00<07:31, 12.54s/it, loss=-0.1096, reward=0.5960, temp=0.50]

   Processing 267 sentences...

Epoch 6/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 245/280 [1:07:18<08:15, 14.15s/it, loss=-0.0889, reward=0.6114, temp=0.50]

   Processing 84 sentences...

Epoch 6/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 246/280 [1:07:21<06:11, 10.93s/it, loss=-0.0968, reward=0.5882, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 247/280 [1:08:09<12:09, 22.11s/it, loss=-0.0727, reward=0.6118, temp=0.50]

   Processing 97 sentences...

Epoch 6/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 248/280 [1:08:23<10:23, 19.49s/it, loss=-0.0954, reward=0.5991, temp=0.50]

   Processing 232 sentences...

Epoch 6/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 249/280 [1:08:52<11:36, 22.48s/it, loss=-0.0985, reward=0.5986, temp=0.50]

   Processing 111 sentences...

Epoch 6/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 250/280 [1:08:59<08:52, 17.74s/it, loss=-0.0851, reward=0.6088, temp=0.50]

   Processing 24 sentences...

Epoch 6/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 251/280 [1:09:00<06:12, 12.85s/it, loss=-0.1996, reward=0.5540, temp=0.50]

   Processing 112 sentences...

Epoch 6/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 252/280 [1:09:07<05:03, 10.84s/it, loss=-0.0855, reward=0.6013, temp=0.50]

   Processing 43 sentences...

Epoch 6/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 253/280 [1:09:09<03:46,  8.39s/it, loss=-0.1591, reward=0.5733, temp=0.50]

   Processing 183 sentences...

Epoch 6/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 254/280 [1:09:20<03:59,  9.19s/it, loss=-0.0864, reward=0.6024, temp=0.50]

   Processing 117 sentences...

Epoch 6/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 255/280 [1:09:25<03:15,  7.83s/it, loss=-0.0969, reward=0.5992, temp=0.50]

   Processing 106 sentences...

Epoch 6/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 256/280 [1:09:31<02:58,  7.43s/it, loss=-0.0934, reward=0.5992, temp=0.50]

   Processing 67 sentences...

Epoch 6/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 257/280 [1:09:34<02:16,  5.95s/it, loss=-0.0868, reward=0.5946, temp=0.50]

   Processing 112 sentences...

Epoch 6/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 258/280 [1:09:43<02:31,  6.87s/it, loss=-0.0968, reward=0.5975, temp=0.50]

   Processing 79 sentences...

Epoch 6/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 259/280 [1:09:48<02:10,  6.20s/it, loss=-0.0990, reward=0.5871, temp=0.50]

   Processing 408 sentences...

Epoch 6/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 260/280 [1:10:08<03:29, 10.49s/it, loss=-0.0766, reward=0.6160, temp=0.50]

   Processing 103 sentences...

Epoch 6/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 261/280 [1:10:13<02:45,  8.72s/it, loss=-0.0993, reward=0.5970, temp=0.50]

   Processing 103 sentences...

Epoch 6/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 262/280 [1:10:20<02:27,  8.17s/it, loss=-0.0985, reward=0.5985, temp=0.50]

   Processing 118 sentences...

Epoch 6/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 263/280 [1:10:26<02:08,  7.55s/it, loss=-0.0950, reward=0.5993, temp=0.50]

   Processing 249 sentences...

Epoch 6/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 264/280 [1:10:46<02:59, 11.24s/it, loss=-0.0897, reward=0.6089, temp=0.50]

   Processing 145 sentences...

Epoch 6/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 265/280 [1:10:51<02:23,  9.54s/it, loss=-0.0917, reward=0.6042, temp=0.50]

   Processing 294 sentences...

Epoch 6/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 266/280 [1:11:16<03:17, 14.12s/it, loss=-0.0846, reward=0.6185, temp=0.50]

   Processing 147 sentences...

Epoch 6/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 267/280 [1:11:24<02:40, 12.36s/it, loss=-0.0926, reward=0.6011, temp=0.50]

   Processing 304 sentences...

Epoch 6/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 268/280 [1:11:58<03:44, 18.71s/it, loss=-0.0886, reward=0.6090, temp=0.50]

   Processing 128 sentences...

Epoch 6/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 269/280 [1:12:11<03:06, 16.99s/it, loss=-0.0900, reward=0.6071, temp=0.50]

   Processing 247 sentences...

Epoch 6/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 270/280 [1:12:27<02:48, 16.90s/it, loss=-0.0886, reward=0.6103, temp=0.50]

   Processing 165 sentences...

Epoch 6/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 271/280 [1:12:44<02:31, 16.84s/it, loss=-0.0921, reward=0.6085, temp=0.50]

   Processing 38 sentences...

Epoch 6/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 272/280 [1:12:47<01:40, 12.62s/it, loss=-0.1270, reward=0.5766, temp=0.50]

   Processing 63 sentences...

Epoch 6/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 273/280 [1:12:50<01:08,  9.77s/it, loss=-0.1009, reward=0.5880, temp=0.50]

   Processing 215 sentences...

Epoch 6/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 274/280 [1:13:15<01:25, 14.26s/it, loss=-0.0908, reward=0.6086, temp=0.50]

   Processing 39 sentences...

Epoch 6/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 275/280 [1:13:17<00:53, 10.61s/it, loss=-0.1393, reward=0.5786, temp=0.50]

   Processing 105 sentences...

Epoch 6/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 276/280 [1:13:22<00:35,  8.87s/it, loss=-0.0992, reward=0.5925, temp=0.50]

   Processing 138 sentences...

Epoch 6/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 277/280 [1:13:33<00:28,  9.54s/it, loss=-0.0972, reward=0.5985, temp=0.50]

   Processing 161 sentences...

Epoch 6/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 278/280 [1:13:42<00:19,  9.62s/it, loss=-0.1378, reward=0.5963, temp=0.50]

   Processing 35 sentences...

Epoch 6/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 279/280 [1:13:44<00:07,  7.32s/it, loss=-0.1640, reward=0.5722, temp=0.50]

   Processing 500 sentences...ents), truncating to 500

Epoch 6/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 280/280 [1:14:16<00:00, 15.92s/it, loss=-0.0511, reward=0.6202, temp=0.50]


   ‚ö†Ô∏è Document too long (1424 sents), truncating to 500
Epoch 6/10:
  Train Loss: -0.0922
  Overall Val Reward: 0.5787
  Learning Rate: 0.000100
  Temperature: 0.5000

  Aspect-wise Val Rewards:
    facts       : 0.5777
    analysis    : 0.5784
    argument    : 0.5780
    judgement   : 0.5782
    statute     : 0.5813



Epoch 7/10:   0%|          | 0/280 [00:00<?, ?it/s]

   Processing 333 sentences...

Epoch 7/10:   0%|          | 1/280 [00:28<2:11:54, 28.37s/it, loss=-0.0985, reward=0.5967, temp=0.40]

   Processing 353 sentences...

Epoch 7/10:   1%|          | 2/280 [00:52<1:59:53, 25.87s/it, loss=-0.0890, reward=0.6072, temp=0.40]

   Processing 57 sentences...

Epoch 7/10:   1%|          | 3/280 [00:59<1:19:45, 17.28s/it, loss=-0.1295, reward=0.5766, temp=0.40]

   Processing 96 sentences...

Epoch 7/10:   1%|‚ñè         | 4/280 [01:14<1:14:35, 16.22s/it, loss=-0.2049, reward=0.5837, temp=0.40]

   Processing 71 sentences...

Epoch 7/10:   2%|‚ñè         | 5/280 [01:17<52:31, 11.46s/it, loss=-0.0772, reward=0.5960, temp=0.40]  

   Processing 62 sentences...

Epoch 7/10:   2%|‚ñè         | 6/280 [01:19<38:46,  8.49s/it, loss=-0.1399, reward=0.5837, temp=0.40]

   Processing 350 sentences...

Epoch 7/10:   2%|‚ñé         | 7/280 [01:52<1:14:24, 16.35s/it, loss=0.0036, reward=0.6086, temp=0.40]

   Processing 263 sentences...

Epoch 7/10:   3%|‚ñé         | 8/280 [02:08<1:13:43, 16.26s/it, loss=-0.0720, reward=0.6067, temp=0.40]

   Processing 176 sentences...

Epoch 7/10:   3%|‚ñé         | 9/280 [02:17<1:03:49, 14.13s/it, loss=-0.1601, reward=0.6024, temp=0.40]

   Processing 318 sentences...

Epoch 7/10:   4%|‚ñé         | 10/280 [03:23<2:14:48, 29.96s/it, loss=-0.0905, reward=0.5936, temp=0.40]

   Processing 338 sentences...

Epoch 7/10:   4%|‚ñç         | 11/280 [03:46<2:04:43, 27.82s/it, loss=-0.2633, reward=0.6012, temp=0.40]

   Processing 40 sentences...

Epoch 7/10:   4%|‚ñç         | 12/280 [03:48<1:29:43, 20.09s/it, loss=-0.1333, reward=0.5754, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:   5%|‚ñç         | 13/280 [04:43<2:16:45, 30.73s/it, loss=-0.0591, reward=0.6128, temp=0.40]

   Processing 423 sentences...

Epoch 7/10:   5%|‚ñå         | 14/280 [05:41<2:52:38, 38.94s/it, loss=-0.3180, reward=0.5955, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:   5%|‚ñå         | 15/280 [05:59<2:24:03, 32.62s/it, loss=0.1857, reward=0.6206, temp=0.40] 

   Processing 82 sentences...

Epoch 7/10:   6%|‚ñå         | 16/280 [06:02<1:44:04, 23.65s/it, loss=-0.0914, reward=0.5857, temp=0.40]

   Processing 137 sentences...

Epoch 7/10:   6%|‚ñå         | 17/280 [06:09<1:22:06, 18.73s/it, loss=-0.0940, reward=0.5970, temp=0.40]

   Processing 30 sentences...

Epoch 7/10:   6%|‚ñã         | 18/280 [06:12<1:00:33, 13.87s/it, loss=-0.1340, reward=0.5516, temp=0.40]

   Processing 82 sentences...

Epoch 7/10:   7%|‚ñã         | 19/280 [06:20<52:26, 12.06s/it, loss=-0.0862, reward=0.5905, temp=0.40]  

   Processing 61 sentences...

Epoch 7/10:   7%|‚ñã         | 20/280 [06:23<40:25,  9.33s/it, loss=-0.0996, reward=0.5739, temp=0.40]

   Processing 50 sentences...

Epoch 7/10:   8%|‚ñä         | 21/280 [06:26<32:00,  7.41s/it, loss=-0.1106, reward=0.5646, temp=0.40]

   Processing 115 sentences...

Epoch 7/10:   8%|‚ñä         | 22/280 [06:34<33:05,  7.69s/it, loss=-0.0905, reward=0.5776, temp=0.40]

   Processing 80 sentences...

Epoch 7/10:   8%|‚ñä         | 23/280 [06:37<27:02,  6.31s/it, loss=-0.0933, reward=0.5845, temp=0.40]

   Processing 434 sentences...

Epoch 7/10:   9%|‚ñä         | 24/280 [08:03<2:09:10, 30.28s/it, loss=-0.0804, reward=0.6177, temp=0.40]

   Processing 254 sentences...

Epoch 7/10:   9%|‚ñâ         | 25/280 [08:35<2:10:52, 30.79s/it, loss=-0.0829, reward=0.6075, temp=0.40]

   Processing 329 sentences...

Epoch 7/10:   9%|‚ñâ         | 26/280 [08:55<1:55:47, 27.35s/it, loss=-0.0833, reward=0.6091, temp=0.40]

   Processing 87 sentences...

Epoch 7/10:  10%|‚ñâ         | 27/280 [09:00<1:26:55, 20.61s/it, loss=-0.0900, reward=0.5958, temp=0.40]

   Processing 39 sentences...

Epoch 7/10:  10%|‚ñà         | 28/280 [09:01<1:02:41, 14.93s/it, loss=-0.0943, reward=0.5797, temp=0.40]

   Processing 83 sentences...

Epoch 7/10:  10%|‚ñà         | 29/280 [09:05<47:52, 11.44s/it, loss=-0.0890, reward=0.5819, temp=0.40]  

   Processing 177 sentences...

Epoch 7/10:  11%|‚ñà         | 30/280 [09:16<48:16, 11.59s/it, loss=-0.0888, reward=0.6033, temp=0.40]

   Processing 82 sentences...

Epoch 7/10:  11%|‚ñà         | 31/280 [09:24<43:10, 10.40s/it, loss=-0.0998, reward=0.5828, temp=0.40]

   Processing 136 sentences...

Epoch 7/10:  11%|‚ñà‚ñè        | 32/280 [09:29<36:12,  8.76s/it, loss=-0.0896, reward=0.5996, temp=0.40]

   Processing 166 sentences...

Epoch 7/10:  12%|‚ñà‚ñè        | 33/280 [09:39<37:30,  9.11s/it, loss=-0.0928, reward=0.6053, temp=0.40]

   Processing 91 sentences...

Epoch 7/10:  12%|‚ñà‚ñè        | 34/280 [09:44<32:42,  7.98s/it, loss=-0.0992, reward=0.5952, temp=0.40]

   Processing 87 sentences...

Epoch 7/10:  12%|‚ñà‚ñé        | 35/280 [09:49<28:44,  7.04s/it, loss=-0.0919, reward=0.5908, temp=0.40]

   Processing 203 sentences...

Epoch 7/10:  13%|‚ñà‚ñé        | 36/280 [10:03<36:42,  9.03s/it, loss=-0.0876, reward=0.6098, temp=0.40]

   Processing 140 sentences...

Epoch 7/10:  13%|‚ñà‚ñé        | 37/280 [10:13<38:03,  9.40s/it, loss=-0.0922, reward=0.5983, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  14%|‚ñà‚ñé        | 38/280 [11:04<1:28:23, 21.92s/it, loss=-0.0844, reward=0.6120, temp=0.40]

   Processing 97 sentences...

Epoch 7/10:  14%|‚ñà‚ñç        | 39/280 [11:10<1:08:52, 17.15s/it, loss=-0.0982, reward=0.5763, temp=0.40]

   Processing 182 sentences...

Epoch 7/10:  14%|‚ñà‚ñç        | 40/280 [11:23<1:03:17, 15.82s/it, loss=-0.0947, reward=0.5971, temp=0.40]

   Processing 87 sentences...

Epoch 7/10:  15%|‚ñà‚ñç        | 41/280 [11:27<49:06, 12.33s/it, loss=-0.0867, reward=0.5847, temp=0.40]  

   Processing 235 sentences...

Epoch 7/10:  15%|‚ñà‚ñå        | 42/280 [12:00<1:13:44, 18.59s/it, loss=-0.0913, reward=0.6107, temp=0.40]

   Processing 50 sentences...

Epoch 7/10:  15%|‚ñà‚ñå        | 43/280 [12:03<54:07, 13.70s/it, loss=-0.1084, reward=0.5731, temp=0.40]  

   Processing 70 sentences...

Epoch 7/10:  16%|‚ñà‚ñå        | 44/280 [12:07<42:45, 10.87s/it, loss=-0.0841, reward=0.5872, temp=0.40]

   Processing 65 sentences...

Epoch 7/10:  16%|‚ñà‚ñå        | 45/280 [12:12<36:12,  9.24s/it, loss=-0.0948, reward=0.5797, temp=0.40]

   Processing 482 sentences...

Epoch 7/10:  16%|‚ñà‚ñã        | 46/280 [13:04<1:26:13, 22.11s/it, loss=-0.0834, reward=0.6174, temp=0.40]

   Processing 164 sentences...

Epoch 7/10:  17%|‚ñà‚ñã        | 47/280 [13:20<1:18:45, 20.28s/it, loss=-0.0875, reward=0.6030, temp=0.40]

   Processing 102 sentences...

Epoch 7/10:  17%|‚ñà‚ñã        | 48/280 [13:27<1:02:27, 16.15s/it, loss=-0.0952, reward=0.5981, temp=0.40]

   Processing 48 sentences...

Epoch 7/10:  18%|‚ñà‚ñä        | 49/280 [13:29<45:31, 11.82s/it, loss=-0.1038, reward=0.5866, temp=0.40]  

   Processing 74 sentences...

Epoch 7/10:  18%|‚ñà‚ñä        | 50/280 [13:34<37:56,  9.90s/it, loss=-0.0981, reward=0.5844, temp=0.40]

   Processing 67 sentences...

Epoch 7/10:  18%|‚ñà‚ñä        | 51/280 [13:40<33:21,  8.74s/it, loss=-0.0995, reward=0.5784, temp=0.40]

   Processing 31 sentences...

Epoch 7/10:  19%|‚ñà‚ñä        | 52/280 [13:42<25:03,  6.60s/it, loss=-0.0995, reward=0.5598, temp=0.40]

   Processing 53 sentences...

Epoch 7/10:  19%|‚ñà‚ñâ        | 53/280 [13:45<21:32,  5.70s/it, loss=-0.1085, reward=0.5681, temp=0.40]

   Processing 123 sentences...

Epoch 7/10:  19%|‚ñà‚ñâ        | 54/280 [13:53<23:38,  6.28s/it, loss=-0.0913, reward=0.6070, temp=0.40]

   Processing 169 sentences...

Epoch 7/10:  20%|‚ñà‚ñâ        | 55/280 [14:03<27:34,  7.35s/it, loss=-0.0958, reward=0.5968, temp=0.40]

   Processing 33 sentences...

Epoch 7/10:  20%|‚ñà‚ñà        | 56/280 [14:06<22:28,  6.02s/it, loss=-0.1149, reward=0.5702, temp=0.40]

   Processing 190 sentences...

Epoch 7/10:  20%|‚ñà‚ñà        | 57/280 [14:17<28:03,  7.55s/it, loss=-0.0829, reward=0.6100, temp=0.40]

   Processing 223 sentences...

Epoch 7/10:  21%|‚ñà‚ñà        | 58/280 [14:34<38:49, 10.49s/it, loss=-0.0925, reward=0.6019, temp=0.40]

   Processing 57 sentences...

Epoch 7/10:  21%|‚ñà‚ñà        | 59/280 [14:37<30:09,  8.19s/it, loss=-0.0973, reward=0.5949, temp=0.40]

   Processing 362 sentences...

Epoch 7/10:  21%|‚ñà‚ñà‚ñè       | 60/280 [15:02<48:24, 13.20s/it, loss=-0.0810, reward=0.6299, temp=0.40]

   Processing 192 sentences...

Epoch 7/10:  22%|‚ñà‚ñà‚ñè       | 61/280 [15:20<53:57, 14.78s/it, loss=-0.0927, reward=0.6071, temp=0.40]

   Processing 108 sentences...

Epoch 7/10:  22%|‚ñà‚ñà‚ñè       | 62/280 [15:24<41:15, 11.35s/it, loss=-0.0964, reward=0.5992, temp=0.40]

   Processing 380 sentences...

Epoch 7/10:  22%|‚ñà‚ñà‚ñé       | 63/280 [16:04<1:12:34, 20.07s/it, loss=-0.0864, reward=0.6141, temp=0.40]

   Processing 285 sentences...

Epoch 7/10:  23%|‚ñà‚ñà‚ñé       | 64/280 [16:35<1:23:59, 23.33s/it, loss=-0.0908, reward=0.6107, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  23%|‚ñà‚ñà‚ñé       | 65/280 [17:34<2:01:51, 34.01s/it, loss=-0.0889, reward=0.6140, temp=0.40]

   Processing 144 sentences...

Epoch 7/10:  24%|‚ñà‚ñà‚ñé       | 66/280 [17:44<1:35:04, 26.66s/it, loss=-0.0975, reward=0.5913, temp=0.40]

   Processing 202 sentences...

Epoch 7/10:  24%|‚ñà‚ñà‚ñç       | 67/280 [18:01<1:24:23, 23.77s/it, loss=-0.0919, reward=0.6097, temp=0.40]

   Processing 271 sentences...

Epoch 7/10:  24%|‚ñà‚ñà‚ñç       | 68/280 [18:28<1:28:14, 24.98s/it, loss=-0.0937, reward=0.6028, temp=0.40]

   Processing 51 sentences...

Epoch 7/10:  25%|‚ñà‚ñà‚ñç       | 69/280 [18:31<1:03:47, 18.14s/it, loss=-0.1065, reward=0.5867, temp=0.40]

   Processing 376 sentences...

Epoch 7/10:  25%|‚ñà‚ñà‚ñå       | 70/280 [19:01<1:16:27, 21.84s/it, loss=-0.0904, reward=0.6107, temp=0.40]

   Processing 61 sentences...

Epoch 7/10:  25%|‚ñà‚ñà‚ñå       | 71/280 [19:05<57:10, 16.41s/it, loss=-0.1026, reward=0.5955, temp=0.40]  

   Processing 63 sentences...

Epoch 7/10:  26%|‚ñà‚ñà‚ñå       | 72/280 [19:08<43:17, 12.49s/it, loss=-0.1105, reward=0.5780, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  26%|‚ñà‚ñà‚ñå       | 73/280 [20:17<1:41:03, 29.29s/it, loss=-0.0871, reward=0.6130, temp=0.40]

   Processing 254 sentences...

Epoch 7/10:  26%|‚ñà‚ñà‚ñã       | 74/280 [20:43<1:37:51, 28.50s/it, loss=-0.0925, reward=0.6038, temp=0.40]

   Processing 118 sentences...

Epoch 7/10:  27%|‚ñà‚ñà‚ñã       | 75/280 [21:05<1:30:50, 26.59s/it, loss=-0.0891, reward=0.6164, temp=0.40]

   Processing 439 sentences...

Epoch 7/10:  27%|‚ñà‚ñà‚ñã       | 76/280 [21:41<1:39:27, 29.25s/it, loss=-0.0893, reward=0.6074, temp=0.40]

   Processing 51 sentences...

Epoch 7/10:  28%|‚ñà‚ñà‚ñä       | 77/280 [21:43<1:11:23, 21.10s/it, loss=-0.1145, reward=0.5860, temp=0.40]

   Processing 143 sentences...

Epoch 7/10:  28%|‚ñà‚ñà‚ñä       | 78/280 [21:54<1:00:24, 17.94s/it, loss=-0.1138, reward=0.5919, temp=0.40]

   Processing 300 sentences...

Epoch 7/10:  28%|‚ñà‚ñà‚ñä       | 79/280 [22:15<1:03:28, 18.95s/it, loss=-0.0927, reward=0.6084, temp=0.40]

   Processing 137 sentences...

Epoch 7/10:  29%|‚ñà‚ñà‚ñä       | 80/280 [22:24<52:58, 15.89s/it, loss=-0.0922, reward=0.5977, temp=0.40]  

   Processing 120 sentences...

Epoch 7/10:  29%|‚ñà‚ñà‚ñâ       | 81/280 [22:30<43:41, 13.17s/it, loss=-0.0890, reward=0.6062, temp=0.40]

   Processing 170 sentences...

Epoch 7/10:  29%|‚ñà‚ñà‚ñâ       | 82/280 [22:44<44:06, 13.37s/it, loss=-0.0979, reward=0.5978, temp=0.40]

   Processing 107 sentences...

Epoch 7/10:  30%|‚ñà‚ñà‚ñâ       | 83/280 [22:51<37:14, 11.34s/it, loss=-0.1101, reward=0.5861, temp=0.40]

   Processing 48 sentences...

Epoch 7/10:  30%|‚ñà‚ñà‚ñà       | 84/280 [22:55<29:54,  9.15s/it, loss=-0.1482, reward=0.5706, temp=0.40]

   Processing 163 sentences...

Epoch 7/10:  30%|‚ñà‚ñà‚ñà       | 85/280 [23:04<29:14,  9.00s/it, loss=-0.0890, reward=0.6071, temp=0.40]

   Processing 108 sentences...

Epoch 7/10:  31%|‚ñà‚ñà‚ñà       | 86/280 [23:13<29:33,  9.14s/it, loss=-0.1067, reward=0.5906, temp=0.40]

   Processing 86 sentences...

Epoch 7/10:  31%|‚ñà‚ñà‚ñà       | 87/280 [23:17<24:52,  7.73s/it, loss=-0.0644, reward=0.6033, temp=0.40]

   Processing 74 sentences...

Epoch 7/10:  31%|‚ñà‚ñà‚ñà‚ñè      | 88/280 [23:30<29:31,  9.23s/it, loss=-0.1749, reward=0.5527, temp=0.40]

   Processing 81 sentences...

Epoch 7/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 89/280 [23:40<30:06,  9.46s/it, loss=-0.1264, reward=0.5718, temp=0.40]

   Processing 203 sentences...

Epoch 7/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 90/280 [23:52<31:58, 10.10s/it, loss=-0.0739, reward=0.6017, temp=0.40]

   Processing 54 sentences...

Epoch 7/10:  32%|‚ñà‚ñà‚ñà‚ñé      | 91/280 [23:54<24:44,  7.86s/it, loss=-0.1464, reward=0.5616, temp=0.40]

   Processing 143 sentences...

Epoch 7/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 92/280 [24:04<26:21,  8.41s/it, loss=-0.0777, reward=0.5999, temp=0.40]

   Processing 36 sentences...

Epoch 7/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 93/280 [24:06<20:33,  6.60s/it, loss=-0.0879, reward=0.5954, temp=0.40]

   Processing 106 sentences...

Epoch 7/10:  34%|‚ñà‚ñà‚ñà‚ñé      | 94/280 [24:11<18:57,  6.12s/it, loss=-0.0598, reward=0.6076, temp=0.40]

   Processing 133 sentences...

Epoch 7/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 95/280 [24:37<36:52, 11.96s/it, loss=-0.0967, reward=0.5918, temp=0.40]

   Processing 206 sentences...

Epoch 7/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 96/280 [24:47<35:05, 11.45s/it, loss=-0.0882, reward=0.6080, temp=0.40]

   Processing 162 sentences...

Epoch 7/10:  35%|‚ñà‚ñà‚ñà‚ñç      | 97/280 [25:08<43:50, 14.38s/it, loss=-0.0942, reward=0.5932, temp=0.40]

   Processing 42 sentences...

Epoch 7/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 98/280 [25:10<32:05, 10.58s/it, loss=-0.0967, reward=0.5926, temp=0.40]

   Processing 89 sentences...

Epoch 7/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 99/280 [25:15<26:25,  8.76s/it, loss=-0.0935, reward=0.6071, temp=0.40]

   Processing 32 sentences...

Epoch 7/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 100/280 [25:19<21:49,  7.28s/it, loss=-0.2024, reward=0.5627, temp=0.40]

   Processing 408 sentences...

Epoch 7/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 101/280 [26:44<1:31:37, 30.71s/it, loss=-0.0857, reward=0.6077, temp=0.40]

   Processing 35 sentences...

Epoch 7/10:  36%|‚ñà‚ñà‚ñà‚ñã      | 102/280 [26:46<1:05:56, 22.23s/it, loss=-0.1118, reward=0.5517, temp=0.40]

   Processing 137 sentences...

Epoch 7/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 103/280 [26:54<52:32, 17.81s/it, loss=-0.0930, reward=0.5977, temp=0.40]  

   Processing 105 sentences...

Epoch 7/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 104/280 [27:02<43:56, 14.98s/it, loss=-0.0962, reward=0.5937, temp=0.40]

   Processing 124 sentences...

Epoch 7/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 105/280 [27:14<40:47, 13.99s/it, loss=-0.0933, reward=0.5956, temp=0.40]

   Processing 220 sentences...

Epoch 7/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 106/280 [27:28<40:32, 13.98s/it, loss=-0.0903, reward=0.6078, temp=0.40]

   Processing 98 sentences...

Epoch 7/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 107/280 [27:33<32:19, 11.21s/it, loss=-0.0948, reward=0.5977, temp=0.40]

   Processing 133 sentences...

Epoch 7/10:  39%|‚ñà‚ñà‚ñà‚ñä      | 108/280 [27:39<28:18,  9.87s/it, loss=-0.0926, reward=0.5988, temp=0.40]

   Processing 315 sentences...

Epoch 7/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 109/280 [28:09<44:50, 15.73s/it, loss=-0.0882, reward=0.6146, temp=0.40]

   Processing 81 sentences...

Epoch 7/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 110/280 [28:13<34:22, 12.13s/it, loss=-0.0966, reward=0.5938, temp=0.40]

   Processing 286 sentences...

Epoch 7/10:  40%|‚ñà‚ñà‚ñà‚ñâ      | 111/280 [28:29<37:57, 13.48s/it, loss=-0.0914, reward=0.6057, temp=0.40]

   Processing 122 sentences...

Epoch 7/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 112/280 [28:36<32:10, 11.49s/it, loss=-0.0951, reward=0.6022, temp=0.40]

   Processing 170 sentences...

Epoch 7/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 113/280 [28:54<37:36, 13.51s/it, loss=-0.0947, reward=0.6055, temp=0.40]

   Processing 376 sentences...

Epoch 7/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 114/280 [29:38<1:02:11, 22.48s/it, loss=-0.0881, reward=0.6114, temp=0.40]

   Processing 334 sentences...

Epoch 7/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 115/280 [30:26<1:23:32, 30.38s/it, loss=-0.0930, reward=0.6077, temp=0.40]

   Processing 102 sentences...

Epoch 7/10:  41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 116/280 [30:37<1:06:57, 24.50s/it, loss=-0.0968, reward=0.6008, temp=0.40]

   Processing 268 sentences...

Epoch 7/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 117/280 [30:51<58:09, 21.41s/it, loss=-0.0922, reward=0.6108, temp=0.40]  

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 118/280 [32:31<2:01:10, 44.88s/it, loss=-0.0836, reward=0.6135, temp=0.40]

   Processing 253 sentences...

Epoch 7/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 119/280 [32:48<1:38:03, 36.54s/it, loss=-0.0909, reward=0.6155, temp=0.40]

   Processing 132 sentences...

Epoch 7/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 120/280 [32:57<1:15:27, 28.30s/it, loss=-0.1009, reward=0.5952, temp=0.40]

   Processing 286 sentences...

Epoch 7/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 121/280 [33:19<1:10:10, 26.48s/it, loss=-0.0942, reward=0.6091, temp=0.40]

   Processing 128 sentences...

Epoch 7/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 122/280 [33:27<54:32, 20.71s/it, loss=-0.0977, reward=0.5930, temp=0.40]  

   Processing 37 sentences...

Epoch 7/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 123/280 [33:29<39:50, 15.23s/it, loss=-0.1006, reward=0.5776, temp=0.40]

   Processing 205 sentences...

Epoch 7/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 124/280 [33:47<41:45, 16.06s/it, loss=-0.0930, reward=0.6102, temp=0.40]

   Processing 87 sentences...

Epoch 7/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 125/280 [33:54<34:09, 13.22s/it, loss=-0.0998, reward=0.5909, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 126/280 [35:07<1:20:10, 31.24s/it, loss=-0.0855, reward=0.6123, temp=0.40]

   Processing 144 sentences...

Epoch 7/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 127/280 [35:19<1:05:13, 25.58s/it, loss=-0.0950, reward=0.6101, temp=0.40]

   Processing 80 sentences...

Epoch 7/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 128/280 [35:27<51:04, 20.16s/it, loss=-0.1021, reward=0.5886, temp=0.40]  

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 129/280 [36:54<1:41:11, 40.21s/it, loss=-0.0911, reward=0.6082, temp=0.40]

   Processing 124 sentences...

Epoch 7/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 130/280 [37:03<1:17:08, 30.86s/it, loss=-0.0988, reward=0.5968, temp=0.40]

   Processing 112 sentences...

Epoch 7/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 131/280 [37:13<1:01:15, 24.67s/it, loss=-0.0994, reward=0.5950, temp=0.40]

   Processing 75 sentences...

Epoch 7/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 132/280 [37:17<45:48, 18.57s/it, loss=-0.0988, reward=0.5957, temp=0.40]  

   Processing 165 sentences...

Epoch 7/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 133/280 [37:30<40:53, 16.69s/it, loss=-0.0897, reward=0.6155, temp=0.40]

   Processing 146 sentences...

Epoch 7/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 134/280 [37:48<41:25, 17.02s/it, loss=-0.1021, reward=0.5959, temp=0.40]

   Processing 100 sentences...

Epoch 7/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 135/280 [37:53<32:33, 13.48s/it, loss=-0.0952, reward=0.6057, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 136/280 [39:35<1:36:03, 40.02s/it, loss=-0.0781, reward=0.6114, temp=0.40]

   Processing 209 sentences...

Epoch 7/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 137/280 [39:50<1:17:33, 32.54s/it, loss=-0.0901, reward=0.6132, temp=0.40]

   Processing 90 sentences...

Epoch 7/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 138/280 [39:56<58:13, 24.60s/it, loss=-0.1003, reward=0.5967, temp=0.40]  

   Processing 72 sentences...

Epoch 7/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 139/280 [40:00<43:41, 18.59s/it, loss=-0.1039, reward=0.5859, temp=0.40]

   Processing 38 sentences...

Epoch 7/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 140/280 [40:02<31:43, 13.60s/it, loss=-0.1006, reward=0.5914, temp=0.40]

   Processing 131 sentences...

Epoch 7/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 141/280 [40:08<25:36, 11.05s/it, loss=-0.1051, reward=0.5876, temp=0.40]

   Processing 78 sentences...

Epoch 7/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 142/280 [40:12<20:57,  9.11s/it, loss=-0.1000, reward=0.5992, temp=0.40]

   Processing 119 sentences...

Epoch 7/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 143/280 [40:20<19:46,  8.66s/it, loss=-0.0999, reward=0.6020, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 144/280 [42:03<1:24:17, 37.19s/it, loss=-0.0757, reward=0.6115, temp=0.40]

   Processing 133 sentences...

Epoch 7/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 145/280 [42:13<1:04:47, 28.80s/it, loss=-0.1011, reward=0.5964, temp=0.40]

   Processing 83 sentences...

Epoch 7/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 146/280 [42:19<49:20, 22.10s/it, loss=-0.1032, reward=0.5890, temp=0.40]  

   Processing 56 sentences...

Epoch 7/10:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 147/280 [42:22<36:19, 16.39s/it, loss=-0.1028, reward=0.5795, temp=0.40]

   Processing 133 sentences...

Epoch 7/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 148/280 [42:29<29:46, 13.53s/it, loss=-0.0974, reward=0.5985, temp=0.40]

   Processing 101 sentences...

Epoch 7/10:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 149/280 [42:35<24:41, 11.31s/it, loss=-0.1036, reward=0.5918, temp=0.40]

   Processing 151 sentences...

Epoch 7/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 150/280 [42:47<24:34, 11.34s/it, loss=-0.0869, reward=0.6072, temp=0.40]

   Processing 248 sentences...

Epoch 7/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 151/280 [43:01<26:39, 12.40s/it, loss=-0.0849, reward=0.6050, temp=0.40]

   Processing 500 sentences...sents), truncating to 500

Epoch 7/10:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 152/280 [44:00<55:46, 26.14s/it, loss=-0.0698, reward=0.6171, temp=0.40]

   Processing 145 sentences...

Epoch 7/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 153/280 [44:06<42:54, 20.28s/it, loss=-0.0848, reward=0.6131, temp=0.40]

   Processing 202 sentences...

Epoch 7/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 154/280 [44:23<40:36, 19.33s/it, loss=-0.1012, reward=0.6015, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 155/280 [44:55<47:50, 22.96s/it, loss=-0.0810, reward=0.6187, temp=0.40]

   Processing 154 sentences...

Epoch 7/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 156/280 [45:13<44:34, 21.57s/it, loss=-0.0978, reward=0.6053, temp=0.40]

   Processing 345 sentences...

Epoch 7/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 157/280 [46:03<1:01:17, 29.90s/it, loss=-0.0899, reward=0.6143, temp=0.40]

   Processing 99 sentences...

Epoch 7/10:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 158/280 [46:07<45:23, 22.32s/it, loss=-0.1131, reward=0.5838, temp=0.40]  

   Processing 500 sentences...sents), truncating to 500

Epoch 7/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 159/280 [46:57<1:01:57, 30.72s/it, loss=-0.0886, reward=0.6176, temp=0.40]

   Processing 118 sentences...

Epoch 7/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 160/280 [47:07<48:42, 24.35s/it, loss=-0.0963, reward=0.5950, temp=0.40]  

   Processing 30 sentences...

Epoch 7/10:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 161/280 [47:09<34:48, 17.55s/it, loss=-0.1300, reward=0.5529, temp=0.40]

   Processing 76 sentences...

Epoch 7/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 162/280 [47:13<26:29, 13.47s/it, loss=-0.0980, reward=0.5893, temp=0.40]

   Processing 95 sentences...

Epoch 7/10:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 163/280 [47:20<22:25, 11.50s/it, loss=-0.0962, reward=0.5939, temp=0.40]

   Processing 78 sentences...

Epoch 7/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 164/280 [47:24<18:13,  9.43s/it, loss=-0.1018, reward=0.5949, temp=0.40]

   Processing 153 sentences...

Epoch 7/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 165/280 [47:32<17:20,  9.05s/it, loss=-0.0933, reward=0.6057, temp=0.40]

   Processing 229 sentences...

Epoch 7/10:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 166/280 [47:47<20:19, 10.70s/it, loss=-0.0894, reward=0.6103, temp=0.40]

   Processing 108 sentences...

Epoch 7/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 167/280 [47:52<16:55,  8.98s/it, loss=-0.0911, reward=0.5987, temp=0.40]

   Processing 154 sentences...

Epoch 7/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 168/280 [48:11<22:13, 11.90s/it, loss=-0.0915, reward=0.6073, temp=0.40]

   Processing 44 sentences...

Epoch 7/10:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 169/280 [48:18<19:43, 10.67s/it, loss=-0.1083, reward=0.5677, temp=0.40]

   Processing 229 sentences...

Epoch 7/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 170/280 [48:55<33:55, 18.51s/it, loss=-0.0929, reward=0.6080, temp=0.40]

   Processing 221 sentences...

Epoch 7/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 171/280 [49:16<34:45, 19.13s/it, loss=-0.0934, reward=0.6048, temp=0.40]

   Processing 148 sentences...

Epoch 7/10:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 172/280 [49:23<27:58, 15.54s/it, loss=-0.0978, reward=0.5989, temp=0.40]

   Processing 144 sentences...

Epoch 7/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 173/280 [49:29<22:44, 12.75s/it, loss=-0.0922, reward=0.6012, temp=0.40]

   Processing 77 sentences...

Epoch 7/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 174/280 [49:33<17:43, 10.04s/it, loss=-0.1316, reward=0.5886, temp=0.40]

   Processing 90 sentences...

Epoch 7/10:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 175/280 [49:40<16:10,  9.24s/it, loss=-0.1002, reward=0.6000, temp=0.40]

   Processing 309 sentences...

Epoch 7/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 176/280 [49:53<17:45, 10.24s/it, loss=-0.0942, reward=0.6032, temp=0.40]

   Processing 297 sentences...

Epoch 7/10:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 177/280 [50:13<22:54, 13.35s/it, loss=-0.0826, reward=0.6130, temp=0.40]

   Processing 286 sentences...

Epoch 7/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 178/280 [50:40<29:31, 17.37s/it, loss=-0.0930, reward=0.6070, temp=0.40]

   Processing 66 sentences...

Epoch 7/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 179/280 [50:43<21:54, 13.01s/it, loss=-0.1655, reward=0.5712, temp=0.40]

   Processing 220 sentences...

Epoch 7/10:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 180/280 [50:58<22:38, 13.59s/it, loss=-0.0952, reward=0.6023, temp=0.40]

   Processing 209 sentences...

Epoch 7/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 181/280 [51:15<24:23, 14.79s/it, loss=-0.0960, reward=0.6025, temp=0.40]

   Processing 70 sentences...

Epoch 7/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 182/280 [51:20<19:07, 11.71s/it, loss=-0.3213, reward=0.5723, temp=0.40]

   Processing 214 sentences...

Epoch 7/10:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 183/280 [51:30<18:20, 11.35s/it, loss=-0.0799, reward=0.6268, temp=0.40]

   Processing 213 sentences...

Epoch 7/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 184/280 [51:42<18:21, 11.47s/it, loss=-0.0855, reward=0.6180, temp=0.40]

   Processing 29 sentences...

Epoch 7/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 185/280 [51:44<13:40,  8.63s/it, loss=-0.1984, reward=0.5568, temp=0.40]

   Processing 91 sentences...

Epoch 7/10:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 186/280 [51:49<11:37,  7.42s/it, loss=-0.0803, reward=0.5991, temp=0.40]

   Processing 291 sentences...

Epoch 7/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 187/280 [52:09<17:17, 11.15s/it, loss=-0.0920, reward=0.6087, temp=0.40]

   Processing 80 sentences...

Epoch 7/10:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 188/280 [52:18<16:03, 10.47s/it, loss=-0.0929, reward=0.6084, temp=0.40]

   Processing 43 sentences...

Epoch 7/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 189/280 [52:20<12:22,  8.16s/it, loss=-0.1414, reward=0.5728, temp=0.40]

   Processing 45 sentences...

Epoch 7/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 190/280 [52:23<09:42,  6.47s/it, loss=-0.1629, reward=0.5799, temp=0.40]

   Processing 89 sentences...

Epoch 7/10:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 191/280 [52:27<08:25,  5.68s/it, loss=-0.0984, reward=0.5929, temp=0.40]

   Processing 194 sentences...

Epoch 7/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 192/280 [52:36<10:07,  6.90s/it, loss=-0.0928, reward=0.6038, temp=0.40]

   Processing 137 sentences...

Epoch 7/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 193/280 [52:47<11:36,  8.01s/it, loss=-0.1389, reward=0.5785, temp=0.40]

   Processing 73 sentences...

Epoch 7/10:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 194/280 [52:50<09:27,  6.60s/it, loss=-0.1117, reward=0.5860, temp=0.40]

   Processing 158 sentences...

Epoch 7/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 195/280 [53:04<12:09,  8.58s/it, loss=-0.0806, reward=0.5995, temp=0.40]

   Processing 56 sentences...

Epoch 7/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 196/280 [53:06<09:30,  6.79s/it, loss=-0.2374, reward=0.5539, temp=0.40]

   Processing 180 sentences...

Epoch 7/10:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 197/280 [53:17<11:00,  7.96s/it, loss=-0.0742, reward=0.5955, temp=0.40]

   Processing 338 sentences...

Epoch 7/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 198/280 [53:37<15:42, 11.49s/it, loss=-0.0250, reward=0.6114, temp=0.40]

   Processing 261 sentences...

Epoch 7/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 199/280 [54:03<21:39, 16.04s/it, loss=-0.0897, reward=0.6065, temp=0.40]

   Processing 156 sentences...

Epoch 7/10:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 200/280 [54:09<17:25, 13.07s/it, loss=-0.1004, reward=0.5969, temp=0.40]

   Processing 43 sentences...

Epoch 7/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 201/280 [54:12<13:07,  9.97s/it, loss=-0.1218, reward=0.5592, temp=0.40]

   Processing 144 sentences...

Epoch 7/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 202/280 [54:24<13:53, 10.68s/it, loss=-0.1419, reward=0.5781, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 203/280 [55:17<29:46, 23.20s/it, loss=-0.0863, reward=0.6131, temp=0.40]

   Processing 49 sentences...

Epoch 7/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 204/280 [55:20<21:41, 17.13s/it, loss=-0.1236, reward=0.5618, temp=0.40]

   Processing 66 sentences...

Epoch 7/10:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 205/280 [55:26<17:20, 13.87s/it, loss=-0.1244, reward=0.5740, temp=0.40]

   Processing 103 sentences...

Epoch 7/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 206/280 [55:32<14:07, 11.46s/it, loss=-0.1394, reward=0.5583, temp=0.40]

   Processing 350 sentences...

Epoch 7/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 207/280 [56:00<19:50, 16.30s/it, loss=-0.0879, reward=0.6108, temp=0.40]

   Processing 111 sentences...

Epoch 7/10:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 208/280 [56:08<16:38, 13.87s/it, loss=-0.3525, reward=0.5595, temp=0.40]

   Processing 51 sentences...

Epoch 7/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 209/280 [56:10<12:22, 10.46s/it, loss=-0.1281, reward=0.5647, temp=0.40]

   Processing 146 sentences...

Epoch 7/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 210/280 [56:22<12:42, 10.89s/it, loss=-0.0903, reward=0.6113, temp=0.40]

   Processing 161 sentences...

Epoch 7/10:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 211/280 [56:32<12:20, 10.73s/it, loss=-0.0955, reward=0.6011, temp=0.40]

   Processing 243 sentences...

Epoch 7/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 212/280 [56:48<13:37, 12.03s/it, loss=-0.0890, reward=0.6162, temp=0.40]

   Processing 297 sentences...

Epoch 7/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 213/280 [57:15<18:33, 16.62s/it, loss=-0.0898, reward=0.6054, temp=0.40]

   Processing 47 sentences...

Epoch 7/10:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 214/280 [57:17<13:39, 12.41s/it, loss=-0.1114, reward=0.5518, temp=0.40]

   Processing 415 sentences...

Epoch 7/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 215/280 [58:25<31:22, 28.97s/it, loss=-0.0833, reward=0.6161, temp=0.40]

   Processing 138 sentences...

Epoch 7/10:  77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 216/280 [58:37<25:21, 23.78s/it, loss=-0.0032, reward=0.5706, temp=0.40]

   Processing 46 sentences...

Epoch 7/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 217/280 [58:39<18:09, 17.30s/it, loss=-0.1485, reward=0.5413, temp=0.40]

   Processing 73 sentences...

Epoch 7/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 218/280 [58:43<13:41, 13.25s/it, loss=-0.0857, reward=0.5666, temp=0.40]

   Processing 136 sentences...

Epoch 7/10:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 219/280 [58:56<13:23, 13.17s/it, loss=-0.1117, reward=0.5656, temp=0.40]

   Processing 93 sentences...

Epoch 7/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 220/280 [59:01<10:46, 10.78s/it, loss=0.1035, reward=0.5871, temp=0.40] 

   Processing 267 sentences...

Epoch 7/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 221/280 [59:18<12:21, 12.56s/it, loss=-0.0889, reward=0.6058, temp=0.40]

   Processing 421 sentences...

Epoch 7/10:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 222/280 [1:00:24<27:42, 28.66s/it, loss=-0.0868, reward=0.6118, temp=0.40]

   Processing 78 sentences...

Epoch 7/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 223/280 [1:00:27<20:03, 21.11s/it, loss=-0.1467, reward=0.5656, temp=0.40]

   Processing 275 sentences...

Epoch 7/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 224/280 [1:00:47<19:25, 20.81s/it, loss=-0.1682, reward=0.5817, temp=0.40]

   Processing 56 sentences...

Epoch 7/10:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 225/280 [1:00:51<14:16, 15.58s/it, loss=-0.1996, reward=0.5454, temp=0.40]

   Processing 292 sentences...

Epoch 7/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 226/280 [1:01:05<13:45, 15.28s/it, loss=-0.0873, reward=0.6129, temp=0.40]

   Processing 393 sentences...

Epoch 7/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 227/280 [1:01:45<20:01, 22.68s/it, loss=-0.2374, reward=0.5768, temp=0.40]

   Processing 412 sentences...

Epoch 7/10:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 228/280 [1:02:29<25:06, 28.98s/it, loss=-0.0866, reward=0.6139, temp=0.40]

   Processing 53 sentences...

Epoch 7/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 229/280 [1:02:31<17:50, 20.99s/it, loss=-0.1038, reward=0.5837, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 230/280 [1:03:17<23:35, 28.31s/it, loss=-0.0860, reward=0.6137, temp=0.40]

   Processing 449 sentences...

Epoch 7/10:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 231/280 [1:03:49<24:03, 29.47s/it, loss=-0.0875, reward=0.6098, temp=0.40]

   Processing 81 sentences...

Epoch 7/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 232/280 [1:03:53<17:35, 21.99s/it, loss=-0.2154, reward=0.5750, temp=0.40]

   Processing 368 sentences...

Epoch 7/10:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 233/280 [1:04:17<17:28, 22.30s/it, loss=-0.0887, reward=0.6088, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 234/280 [1:05:22<26:56, 35.15s/it, loss=-0.0873, reward=0.6111, temp=0.40]

   Processing 45 sentences...

Epoch 7/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 235/280 [1:05:24<19:01, 25.37s/it, loss=-0.1566, reward=0.5464, temp=0.40]

   Processing 45 sentences...

Epoch 7/10:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 236/280 [1:05:27<13:35, 18.53s/it, loss=-0.1373, reward=0.5668, temp=0.40]

   Processing 96 sentences...

Epoch 7/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 237/280 [1:05:35<11:02, 15.40s/it, loss=-0.0248, reward=0.5833, temp=0.40]

   Processing 184 sentences...

Epoch 7/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 238/280 [1:05:47<10:01, 14.33s/it, loss=-0.0067, reward=0.5761, temp=0.40]

   Processing 63 sentences...

Epoch 7/10:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 239/280 [1:05:52<07:54, 11.58s/it, loss=-0.0843, reward=0.5705, temp=0.40]

   Processing 123 sentences...

Epoch 7/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 240/280 [1:05:58<06:40, 10.01s/it, loss=-0.0463, reward=0.5812, temp=0.40]

   Processing 72 sentences...

Epoch 7/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 241/280 [1:06:03<05:34,  8.58s/it, loss=-0.0540, reward=0.5913, temp=0.40]

   Processing 268 sentences...

Epoch 7/10:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 242/280 [1:06:52<12:56, 20.44s/it, loss=-0.0045, reward=0.5765, temp=0.40]

   Processing 129 sentences...

Epoch 7/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 243/280 [1:06:59<10:10, 16.49s/it, loss=-0.0929, reward=0.5714, temp=0.40]

   Processing 57 sentences...

Epoch 7/10:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 244/280 [1:07:02<07:27, 12.44s/it, loss=-0.1030, reward=0.5785, temp=0.40]

   Processing 267 sentences...

Epoch 7/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 245/280 [1:07:19<08:05, 13.88s/it, loss=-0.0265, reward=0.5717, temp=0.40]

   Processing 84 sentences...

Epoch 7/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 246/280 [1:07:22<06:04, 10.72s/it, loss=-0.1264, reward=0.5667, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 247/280 [1:08:09<11:49, 21.51s/it, loss=-0.0509, reward=0.5834, temp=0.40]

   Processing 97 sentences...

Epoch 7/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 248/280 [1:08:22<10:08, 19.02s/it, loss=-0.1178, reward=0.5773, temp=0.40]

   Processing 232 sentences...

Epoch 7/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 249/280 [1:08:51<11:16, 21.83s/it, loss=-0.1085, reward=0.5801, temp=0.40]

   Processing 111 sentences...

Epoch 7/10:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 250/280 [1:08:57<08:36, 17.23s/it, loss=-0.0967, reward=0.5839, temp=0.40]

   Processing 24 sentences...

Epoch 7/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 251/280 [1:08:59<06:02, 12.50s/it, loss=-0.1127, reward=0.5572, temp=0.40]

   Processing 112 sentences...

Epoch 7/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 252/280 [1:09:05<04:55, 10.55s/it, loss=-0.0817, reward=0.5806, temp=0.40]

   Processing 43 sentences...

Epoch 7/10:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 253/280 [1:09:07<03:41,  8.20s/it, loss=-0.0770, reward=0.5713, temp=0.40]

   Processing 183 sentences...

Epoch 7/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 254/280 [1:09:18<03:50,  8.87s/it, loss=-0.1921, reward=0.5701, temp=0.40]

   Processing 117 sentences...

Epoch 7/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 255/280 [1:09:22<03:09,  7.59s/it, loss=-0.0925, reward=0.5753, temp=0.40]

   Processing 106 sentences...

Epoch 7/10:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 256/280 [1:09:29<02:52,  7.19s/it, loss=-0.0572, reward=0.5806, temp=0.40]

   Processing 67 sentences...

Epoch 7/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 257/280 [1:09:31<02:12,  5.78s/it, loss=-0.0876, reward=0.5700, temp=0.40]

   Processing 112 sentences...

Epoch 7/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 258/280 [1:09:40<02:24,  6.58s/it, loss=-0.0229, reward=0.5746, temp=0.40]

   Processing 79 sentences...

Epoch 7/10:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 259/280 [1:09:44<02:05,  5.96s/it, loss=-0.0929, reward=0.5739, temp=0.40]

   Processing 408 sentences...

Epoch 7/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 260/280 [1:10:04<03:25, 10.28s/it, loss=-0.0849, reward=0.6089, temp=0.40]

   Processing 103 sentences...

Epoch 7/10:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 261/280 [1:10:09<02:42,  8.53s/it, loss=-0.1158, reward=0.5778, temp=0.40]

   Processing 103 sentences...

Epoch 7/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 262/280 [1:10:16<02:23,  7.96s/it, loss=-0.0548, reward=0.5754, temp=0.40]

   Processing 118 sentences...

Epoch 7/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 263/280 [1:10:21<02:04,  7.33s/it, loss=-0.0804, reward=0.5721, temp=0.40]

   Processing 249 sentences...

Epoch 7/10:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 264/280 [1:10:41<02:55, 10.98s/it, loss=0.1489, reward=0.5891, temp=0.40] 

   Processing 145 sentences...

Epoch 7/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 265/280 [1:10:46<02:19,  9.32s/it, loss=-0.1033, reward=0.5679, temp=0.40]

   Processing 294 sentences...

Epoch 7/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 266/280 [1:11:11<03:14, 13.87s/it, loss=0.5141, reward=0.6097, temp=0.40] 

   Processing 147 sentences...

Epoch 7/10:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 267/280 [1:11:19<02:38, 12.20s/it, loss=-0.0881, reward=0.5928, temp=0.40]

   Processing 304 sentences...

Epoch 7/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 268/280 [1:11:53<03:44, 18.70s/it, loss=0.1155, reward=0.6031, temp=0.40] 

   Processing 128 sentences...

Epoch 7/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 269/280 [1:12:07<03:09, 17.18s/it, loss=-0.0879, reward=0.5918, temp=0.40]

   Processing 247 sentences...

Epoch 7/10:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 270/280 [1:12:24<02:51, 17.12s/it, loss=-0.0905, reward=0.6026, temp=0.40]

   Processing 165 sentences...

Epoch 7/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 271/280 [1:12:40<02:32, 16.96s/it, loss=-0.0858, reward=0.6068, temp=0.40]

   Processing 38 sentences...

Epoch 7/10:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 272/280 [1:12:43<01:41, 12.73s/it, loss=-0.0956, reward=0.5665, temp=0.40]

   Processing 63 sentences...

Epoch 7/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 273/280 [1:12:46<01:08,  9.84s/it, loss=-0.0907, reward=0.5960, temp=0.40]

   Processing 215 sentences...

Epoch 7/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 274/280 [1:13:11<01:26, 14.38s/it, loss=-0.0865, reward=0.6053, temp=0.40]

   Processing 39 sentences...

Epoch 7/10:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 275/280 [1:13:13<00:53, 10.68s/it, loss=-0.0939, reward=0.5715, temp=0.40]

   Processing 105 sentences...

Epoch 7/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 276/280 [1:13:18<00:35,  8.93s/it, loss=-0.0881, reward=0.6036, temp=0.40]

   Processing 138 sentences...

Epoch 7/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 277/280 [1:13:29<00:28,  9.59s/it, loss=-0.0895, reward=0.6032, temp=0.40]

   Processing 161 sentences...

Epoch 7/10:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 278/280 [1:13:39<00:19,  9.78s/it, loss=-0.0860, reward=0.5972, temp=0.40]

   Processing 35 sentences...

Epoch 7/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 279/280 [1:13:41<00:07,  7.42s/it, loss=-0.0888, reward=0.5909, temp=0.40]

   Processing 500 sentences...ents), truncating to 500

Epoch 7/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 280/280 [1:14:13<00:00, 15.91s/it, loss=-0.0812, reward=0.6173, temp=0.40]


   ‚ö†Ô∏è Document too long (1424 sents), truncating to 500
Epoch 7/10:
  Train Loss: -0.0956
  Overall Val Reward: 0.5793
  Learning Rate: 0.000100
  Temperature: 0.4000

  Aspect-wise Val Rewards:
    facts       : 0.5792
    analysis    : 0.5822
    argument    : 0.5778
    judgement   : 0.5779
    statute     : 0.5794



Epoch 8/10:   0%|          | 0/280 [00:00<?, ?it/s]

   Processing 333 sentences...

Epoch 8/10:   0%|          | 1/280 [00:27<2:05:41, 27.03s/it, loss=-0.0914, reward=0.6173, temp=0.30]

   Processing 353 sentences...

Epoch 8/10:   1%|          | 2/280 [00:50<1:56:28, 25.14s/it, loss=-0.0926, reward=0.6100, temp=0.30]

   Processing 57 sentences...

Epoch 8/10:   1%|          | 3/280 [00:57<1:18:01, 16.90s/it, loss=-0.0951, reward=0.5796, temp=0.30]

   Processing 96 sentences...

Epoch 8/10:   1%|‚ñè         | 4/280 [01:11<1:12:33, 15.77s/it, loss=-0.0948, reward=0.5844, temp=0.30]

   Processing 71 sentences...

Epoch 8/10:   2%|‚ñè         | 5/280 [01:14<51:10, 11.17s/it, loss=-0.0959, reward=0.5991, temp=0.30]  

   Processing 62 sentences...

Epoch 8/10:   2%|‚ñè         | 6/280 [01:17<37:56,  8.31s/it, loss=-0.0967, reward=0.5988, temp=0.30]

   Processing 350 sentences...

Epoch 8/10:   2%|‚ñé         | 7/280 [01:49<1:13:15, 16.10s/it, loss=-0.0912, reward=0.6178, temp=0.30]

   Processing 263 sentences...

Epoch 8/10:   3%|‚ñé         | 8/280 [02:05<1:12:57, 16.10s/it, loss=-0.0945, reward=0.6071, temp=0.30]

   Processing 176 sentences...

Epoch 8/10:   3%|‚ñé         | 9/280 [02:15<1:03:41, 14.10s/it, loss=-0.0958, reward=0.6107, temp=0.30]

   Processing 318 sentences...

Epoch 8/10:   4%|‚ñé         | 10/280 [03:19<2:12:11, 29.38s/it, loss=-0.0925, reward=0.6063, temp=0.30]

   Processing 338 sentences...

Epoch 8/10:   4%|‚ñç         | 11/280 [03:43<2:04:40, 27.81s/it, loss=-0.0912, reward=0.6141, temp=0.30]

   Processing 40 sentences...

Epoch 8/10:   4%|‚ñç         | 12/280 [03:45<1:29:47, 20.10s/it, loss=-0.1029, reward=0.5643, temp=0.30]

   Processing 500 sentences...ents), truncating to 500

Epoch 8/10:   5%|‚ñç         | 13/280 [04:38<2:12:41, 29.82s/it, loss=-0.0924, reward=0.6102, temp=0.30]

   Processing 423 sentences...

Epoch 8/10:   5%|‚ñå         | 14/280 [05:36<2:50:46, 38.52s/it, loss=-0.0945, reward=0.6103, temp=0.30]

   Processing 500 sentences...ents), truncating to 500

Epoch 8/10:   5%|‚ñå         | 15/280 [05:55<2:23:25, 32.47s/it, loss=-0.0828, reward=0.6402, temp=0.30]

   Processing 82 sentences...

Epoch 8/10:   6%|‚ñå         | 16/280 [05:58<1:43:42, 23.57s/it, loss=-0.0981, reward=0.5996, temp=0.30]

   Processing 137 sentences...

Epoch 8/10:   6%|‚ñå         | 17/280 [06:05<1:21:45, 18.65s/it, loss=-0.0969, reward=0.6050, temp=0.30]

   Processing 30 sentences...

Epoch 8/10:   6%|‚ñã         | 18/280 [06:07<1:00:19, 13.81s/it, loss=-0.1055, reward=0.5434, temp=0.30]

   Processing 82 sentences...

Epoch 8/10:   7%|‚ñã         | 19/280 [06:15<52:20, 12.03s/it, loss=-0.0966, reward=0.5903, temp=0.30]  

   Processing 61 sentences...

Epoch 8/10:   7%|‚ñã         | 20/280 [06:18<40:19,  9.31s/it, loss=-0.0969, reward=0.5863, temp=0.30]

   Processing 50 sentences...

Epoch 8/10:   8%|‚ñä         | 21/280 [06:21<31:59,  7.41s/it, loss=-0.0987, reward=0.5598, temp=0.30]

   Processing 115 sentences...

Epoch 8/10:   8%|‚ñä         | 22/280 [06:30<33:25,  7.77s/it, loss=-0.0954, reward=0.6112, temp=0.30]

   Processing 80 sentences...

Epoch 8/10:   8%|‚ñä         | 23/280 [06:33<27:17,  6.37s/it, loss=-0.0973, reward=0.5904, temp=0.30]

   Processing 434 sentences...

Epoch 8/10:   9%|‚ñä         | 24/280 [07:58<2:08:29, 30.12s/it, loss=-0.0888, reward=0.6132, temp=0.30]

   Processing 254 sentences...

Epoch 8/10:   9%|‚ñâ         | 25/280 [08:31<2:11:07, 30.85s/it, loss=-0.0942, reward=0.6113, temp=0.30]

   Processing 329 sentences...

Epoch 8/10:   9%|‚ñâ         | 26/280 [08:50<1:55:51, 27.37s/it, loss=-0.0939, reward=0.6095, temp=0.30]

   Processing 87 sentences...

Epoch 8/10:  10%|‚ñâ         | 27/280 [08:55<1:26:50, 20.59s/it, loss=-0.0989, reward=0.5986, temp=0.30]

   Processing 39 sentences...

Epoch 8/10:  10%|‚ñà         | 28/280 [08:57<1:02:41, 14.93s/it, loss=-0.1023, reward=0.5749, temp=0.30]

   Processing 83 sentences...

Epoch 8/10:  10%|‚ñà         | 29/280 [09:00<47:54, 11.45s/it, loss=-0.0986, reward=0.5981, temp=0.30]  

   Processing 177 sentences...

Epoch 8/10:  11%|‚ñà         | 30/280 [09:12<48:19, 11.60s/it, loss=-0.0974, reward=0.5974, temp=0.30]

   Processing 82 sentences...

Epoch 8/10:  11%|‚ñà         | 31/280 [09:20<43:27, 10.47s/it, loss=-0.0997, reward=0.5810, temp=0.30]

   Processing 136 sentences...

Epoch 8/10:  11%|‚ñà‚ñè        | 32/280 [09:25<36:37,  8.86s/it, loss=-0.0982, reward=0.5969, temp=0.30]

   Processing 166 sentences...

Epoch 8/10:  12%|‚ñà‚ñè        | 33/280 [09:35<37:35,  9.13s/it, loss=-0.0980, reward=0.6030, temp=0.30]

   Processing 91 sentences...

Epoch 8/10:  12%|‚ñà‚ñè        | 34/280 [09:40<33:02,  8.06s/it, loss=-0.0981, reward=0.5983, temp=0.30]

   Processing 87 sentences...

Epoch 8/10:  12%|‚ñà‚ñé        | 35/280 [09:45<29:01,  7.11s/it, loss=-0.0976, reward=0.6022, temp=0.30]

   Processing 203 sentences...

Epoch 8/10:  13%|‚ñà‚ñé        | 36/280 [09:59<37:30,  9.22s/it, loss=-0.0943, reward=0.6116, temp=0.30]

   Processing 140 sentences...

Epoch 8/10:  13%|‚ñà‚ñé        | 37/280 [10:10<38:50,  9.59s/it, loss=-0.0962, reward=0.6089, temp=0.30]

   Processing 500 sentences...ents), truncating to 500

Epoch 8/10:  14%|‚ñà‚ñé        | 38/280 [11:02<1:30:41, 22.49s/it, loss=-0.0883, reward=0.6088, temp=0.30]

   Processing 97 sentences...

Epoch 8/10:  14%|‚ñà‚ñç        | 39/280 [11:09<1:10:40, 17.59s/it, loss=-0.1018, reward=0.5887, temp=0.30]

   Processing 182 sentences...

Epoch 8/10:  14%|‚ñà‚ñç        | 40/280 [11:21<1:04:22, 16.09s/it, loss=-0.0974, reward=0.6062, temp=0.30]

   Processing 87 sentences...

Epoch 8/10:  15%|‚ñà‚ñç        | 41/280 [11:25<49:55, 12.53s/it, loss=-0.0991, reward=0.6027, temp=0.30]  

   Processing 235 sentences...

Epoch 8/10:  15%|‚ñà‚ñå        | 42/280 [11:59<1:15:21, 19.00s/it, loss=-0.0960, reward=0.6034, temp=0.30]

   Processing 50 sentences...

Epoch 8/10:  15%|‚ñà‚ñå        | 43/280 [12:02<55:18, 14.00s/it, loss=-0.1073, reward=0.5694, temp=0.30]  

   Processing 70 sentences...

Epoch 8/10:  16%|‚ñà‚ñå        | 44/280 [12:06<43:46, 11.13s/it, loss=-0.0981, reward=0.6038, temp=0.30]

   Processing 65 sentences...

Epoch 8/10:  16%|‚ñà‚ñå        | 45/280 [12:12<37:10,  9.49s/it, loss=-0.1027, reward=0.5850, temp=0.30]

   Processing 482 sentences...

Epoch 8/10:  16%|‚ñà‚ñã        | 46/280 [13:04<1:26:54, 22.28s/it, loss=-0.0840, reward=0.6135, temp=0.30]

   Processing 164 sentences...

Epoch 8/10:  17%|‚ñà‚ñã        | 47/280 [13:20<1:18:34, 20.23s/it, loss=-0.0991, reward=0.6027, temp=0.30]

   Processing 102 sentences...

Epoch 8/10:  17%|‚ñà‚ñã        | 48/280 [13:26<1:02:21, 16.13s/it, loss=-0.0987, reward=0.6055, temp=0.30]

   Processing 48 sentences...

Epoch 8/10:  18%|‚ñà‚ñä        | 49/280 [13:28<45:28, 11.81s/it, loss=-0.1033, reward=0.5760, temp=0.30]  

   Processing 74 sentences...

Epoch 8/10:  18%|‚ñà‚ñä        | 50/280 [13:33<37:59,  9.91s/it, loss=-0.0985, reward=0.6018, temp=0.30]

   Processing 67 sentences...

Epoch 8/10:  18%|‚ñà‚ñä        | 51/280 [13:39<33:03,  8.66s/it, loss=-0.0982, reward=0.6041, temp=0.30]

   Processing 31 sentences...

Epoch 8/10:  19%|‚ñà‚ñä        | 52/280 [13:41<24:49,  6.53s/it, loss=-0.0983, reward=0.5868, temp=0.30]

   Processing 53 sentences...

Epoch 8/10:  19%|‚ñà‚ñâ        | 53/280 [13:44<21:23,  5.65s/it, loss=-0.1044, reward=0.5763, temp=0.30]

   Processing 123 sentences...

Epoch 8/10:  19%|‚ñà‚ñâ        | 54/280 [13:52<23:24,  6.21s/it, loss=-0.0984, reward=0.6014, temp=0.30]

   Processing 169 sentences...

Epoch 8/10:  20%|‚ñà‚ñâ        | 55/280 [14:02<27:20,  7.29s/it, loss=-0.0994, reward=0.6010, temp=0.30]

   Processing 33 sentences...

Epoch 8/10:  20%|‚ñà‚ñà        | 56/280 [14:04<22:17,  5.97s/it, loss=-0.1037, reward=0.5743, temp=0.30]

   Processing 190 sentences...

Epoch 8/10:  20%|‚ñà‚ñà        | 57/280 [14:15<27:45,  7.47s/it, loss=-0.0935, reward=0.6107, temp=0.30]

   Processing 223 sentences...

Epoch 8/10:  21%|‚ñà‚ñà        | 58/280 [14:33<38:32, 10.41s/it, loss=-0.0994, reward=0.5992, temp=0.30]

   Processing 57 sentences...

Epoch 8/10:  21%|‚ñà‚ñà        | 59/280 [14:35<29:55,  8.12s/it, loss=-0.0978, reward=0.6097, temp=0.30]

   Processing 362 sentences...

Epoch 8/10:  21%|‚ñà‚ñà‚ñè       | 60/280 [15:00<48:02, 13.10s/it, loss=-0.0668, reward=0.6397, temp=0.30]

   Processing 192 sentences...

Epoch 8/10:  22%|‚ñà‚ñà‚ñè       | 61/280 [15:18<53:11, 14.57s/it, loss=-0.0983, reward=0.6062, temp=0.30]

   Processing 108 sentences...

Epoch 8/10:  22%|‚ñà‚ñà‚ñè       | 62/280 [15:21<40:39, 11.19s/it, loss=-0.0983, reward=0.6057, temp=0.30]

   Processing 380 sentences...

Epoch 8/10:  22%|‚ñà‚ñà‚ñé       | 63/280 [16:00<1:10:18, 19.44s/it, loss=-0.0989, reward=0.6030, temp=0.30]

   Processing 285 sentences...

Epoch 8/10:  23%|‚ñà‚ñà‚ñé       | 64/280 [16:30<1:21:45, 22.71s/it, loss=-0.0982, reward=0.6061, temp=0.30]

   Processing 500 sentences...ents), truncating to 500

Epoch 8/10:  23%|‚ñà‚ñà‚ñé       | 65/280 [17:30<2:00:37, 33.66s/it, loss=-0.0905, reward=0.6177, temp=0.30]

   Processing 144 sentences...

Epoch 8/10:  24%|‚ñà‚ñà‚ñé       | 66/280 [17:39<1:34:07, 26.39s/it, loss=-0.1019, reward=0.5813, temp=0.30]

   Processing 202 sentences...

Epoch 8/10:  24%|‚ñà‚ñà‚ñç       | 67/280 [17:56<1:23:26, 23.51s/it, loss=-0.0964, reward=0.6093, temp=0.30]

   Processing 271 sentences...

Epoch 8/10:  24%|‚ñà‚ñà‚ñç       | 68/280 [18:24<1:27:56, 24.89s/it, loss=-0.0972, reward=0.6053, temp=0.30]

   Processing 51 sentences...

Epoch 8/10:  25%|‚ñà‚ñà‚ñç       | 69/280 [18:26<1:03:31, 18.07s/it, loss=-0.1002, reward=0.5942, temp=0.30]

   Processing 376 sentences...

Epoch 8/10:  25%|‚ñà‚ñà‚ñå       | 70/280 [18:57<1:16:13, 21.78s/it, loss=-0.0949, reward=0.6110, temp=0.30]

   Processing 61 sentences...

Epoch 8/10:  25%|‚ñà‚ñà‚ñå       | 71/280 [19:00<56:59, 16.36s/it, loss=-0.0985, reward=0.6005, temp=0.30]  

   Processing 63 sentences...

Epoch 8/10:  26%|‚ñà‚ñà‚ñå       | 72/280 [19:04<43:11, 12.46s/it, loss=-0.1005, reward=0.5899, temp=0.30]

   Processing 500 sentences...ents), truncating to 500

Epoch 8/10:  26%|‚ñà‚ñà‚ñå       | 73/280 [20:12<1:40:34, 29.15s/it, loss=-0.0926, reward=0.6152, temp=0.30]

   Processing 254 sentences...

Epoch 8/10:  26%|‚ñà‚ñà‚ñã       | 74/280 [20:39<1:37:58, 28.54s/it, loss=-0.0968, reward=0.6048, temp=0.30]

   Processing 118 sentences...

Epoch 8/10:  27%|‚ñà‚ñà‚ñã       | 75/280 [21:01<1:30:48, 26.58s/it, loss=-0.0982, reward=0.6002, temp=0.30]

   Processing 439 sentences...

Epoch 8/10:  27%|‚ñà‚ñà‚ñã       | 76/280 [21:35<1:37:40, 28.73s/it, loss=-0.0951, reward=0.6105, temp=0.30]

   Processing 51 sentences...

Epoch 8/10:  28%|‚ñà‚ñà‚ñä       | 77/280 [21:37<1:10:12, 20.75s/it, loss=-0.0995, reward=0.5971, temp=0.30]

   Processing 143 sentences...

Epoch 8/10:  28%|‚ñà‚ñà‚ñä       | 78/280 [21:47<59:29, 17.67s/it, loss=-0.1005, reward=0.5965, temp=0.30]  

   Processing 300 sentences...

Epoch 8/10:  28%|‚ñà‚ñà‚ñä       | 79/280 [22:09<1:03:01, 18.81s/it, loss=-0.0949, reward=0.6146, temp=0.30]

   Processing 137 sentences...

Epoch 8/10:  29%|‚ñà‚ñà‚ñä       | 80/280 [22:18<53:00, 15.90s/it, loss=-0.1001, reward=0.5983, temp=0.30]  

   Processing 120 sentences...

Epoch 8/10:  29%|‚ñà‚ñà‚ñâ       | 81/280 [22:25<43:39, 13.17s/it, loss=-0.0997, reward=0.6023, temp=0.30]

   Processing 170 sentences...

Epoch 8/10:  29%|‚ñà‚ñà‚ñâ       | 82/280 [22:38<43:13, 13.10s/it, loss=-0.0998, reward=0.5978, temp=0.30]

   Processing 107 sentences...

Epoch 8/10:  30%|‚ñà‚ñà‚ñâ       | 83/280 [22:44<36:41, 11.17s/it, loss=-0.1019, reward=0.5947, temp=0.30]

   Processing 48 sentences...

Epoch 8/10:  30%|‚ñà‚ñà‚ñà       | 84/280 [22:48<29:23,  9.00s/it, loss=-0.1034, reward=0.5755, temp=0.30]

   Processing 163 sentences...

Epoch 8/10:  30%|‚ñà‚ñà‚ñà       | 85/280 [22:57<28:48,  8.86s/it, loss=-0.0959, reward=0.6100, temp=0.30]

   Processing 108 sentences...

Epoch 8/10:  31%|‚ñà‚ñà‚ñà       | 86/280 [23:06<29:13,  9.04s/it, loss=-0.0997, reward=0.5973, temp=0.30]

   Processing 86 sentences...

Epoch 8/10:  31%|‚ñà‚ñà‚ñà       | 87/280 [23:11<24:39,  7.67s/it, loss=-0.1048, reward=0.5925, temp=0.30]

   Processing 74 sentences...

Epoch 8/10:  31%|‚ñà‚ñà‚ñà‚ñè      | 88/280 [23:23<29:14,  9.14s/it, loss=-0.1073, reward=0.5760, temp=0.30]

   Processing 81 sentences...

Epoch 8/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 89/280 [23:33<29:42,  9.33s/it, loss=-0.1039, reward=0.5918, temp=0.30]

   Processing 203 sentences...

Epoch 8/10:  32%|‚ñà‚ñà‚ñà‚ñè      | 90/280 [23:45<31:49, 10.05s/it, loss=-0.0887, reward=0.6106, temp=0.30]

   Processing 54 sentences...

Epoch 8/10:  32%|‚ñà‚ñà‚ñà‚ñé      | 91/280 [23:47<24:40,  7.84s/it, loss=-0.1065, reward=0.5873, temp=0.30]

   Processing 143 sentences...

Epoch 8/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 92/280 [23:57<26:15,  8.38s/it, loss=-0.0947, reward=0.6062, temp=0.30]

   Processing 36 sentences...

Epoch 8/10:  33%|‚ñà‚ñà‚ñà‚ñé      | 93/280 [23:59<20:30,  6.58s/it, loss=-0.1054, reward=0.5811, temp=0.30]

   Processing 106 sentences...

Epoch 8/10:  34%|‚ñà‚ñà‚ñà‚ñé      | 94/280 [24:04<18:48,  6.07s/it, loss=-0.1048, reward=0.5912, temp=0.30]

   Processing 133 sentences...

Epoch 8/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 95/280 [24:30<36:36, 11.87s/it, loss=-0.1100, reward=0.5822, temp=0.30]

   Processing 206 sentences...

Epoch 8/10:  34%|‚ñà‚ñà‚ñà‚ñç      | 96/280 [24:40<34:46, 11.34s/it, loss=-0.0852, reward=0.6085, temp=0.30]

   Processing 162 sentences...

Epoch 8/10:  35%|‚ñà‚ñà‚ñà‚ñç      | 97/280 [25:01<43:27, 14.25s/it, loss=-0.0973, reward=0.6044, temp=0.30]

   Processing 42 sentences...

Epoch 8/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 98/280 [25:03<31:50, 10.50s/it, loss=-0.1031, reward=0.5898, temp=0.30]

   Processing 89 sentences...

Epoch 8/10:  35%|‚ñà‚ñà‚ñà‚ñå      | 99/280 [25:07<26:18,  8.72s/it, loss=-0.0992, reward=0.5935, temp=0.30]

   Processing 32 sentences...

Epoch 8/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 100/280 [25:11<21:50,  7.28s/it, loss=-0.1265, reward=0.5485, temp=0.30]

   Processing 408 sentences...

Epoch 8/10:  36%|‚ñà‚ñà‚ñà‚ñå      | 101/280 [26:37<1:32:32, 31.02s/it, loss=-0.0869, reward=0.5999, temp=0.30]

   Processing 35 sentences...

Epoch 8/10:  36%|‚ñà‚ñà‚ñà‚ñã      | 102/280 [26:40<1:06:31, 22.42s/it, loss=-0.1059, reward=0.5789, temp=0.30]

   Processing 137 sentences...

Epoch 8/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 103/280 [26:47<52:50, 17.91s/it, loss=-0.0982, reward=0.5928, temp=0.30]  

   Processing 105 sentences...

Epoch 8/10:  37%|‚ñà‚ñà‚ñà‚ñã      | 104/280 [26:56<44:06, 15.04s/it, loss=-0.0999, reward=0.5909, temp=0.30]

   Processing 124 sentences...

Epoch 8/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 105/280 [27:07<40:52, 14.02s/it, loss=-0.0928, reward=0.5992, temp=0.30]

   Processing 220 sentences...

Epoch 8/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 106/280 [27:21<40:46, 14.06s/it, loss=-0.0825, reward=0.6128, temp=0.30]

   Processing 98 sentences...

Epoch 8/10:  38%|‚ñà‚ñà‚ñà‚ñä      | 107/280 [27:26<32:30, 11.27s/it, loss=-0.0978, reward=0.6026, temp=0.30]

   Processing 133 sentences...

Epoch 8/10:  39%|‚ñà‚ñà‚ñà‚ñä      | 108/280 [27:33<28:28,  9.93s/it, loss=-0.0955, reward=0.6085, temp=0.30]

   Processing 315 sentences...

Epoch 8/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 109/280 [28:02<44:59, 15.79s/it, loss=-0.0943, reward=0.6045, temp=0.30]

   Processing 81 sentences...

Epoch 8/10:  39%|‚ñà‚ñà‚ñà‚ñâ      | 110/280 [28:06<34:30, 12.18s/it, loss=-0.1057, reward=0.5872, temp=0.30]

   Processing 286 sentences...

Epoch 8/10:  40%|‚ñà‚ñà‚ñà‚ñâ      | 111/280 [28:23<37:55, 13.47s/it, loss=-0.0916, reward=0.6094, temp=0.30]

   Processing 122 sentences...

Epoch 8/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 112/280 [28:29<32:08, 11.48s/it, loss=-0.0979, reward=0.6042, temp=0.30]

   Processing 170 sentences...

Epoch 8/10:  40%|‚ñà‚ñà‚ñà‚ñà      | 113/280 [28:48<37:28, 13.47s/it, loss=-0.0966, reward=0.6092, temp=0.30]

   Processing 376 sentences...

Epoch 8/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 114/280 [29:31<1:02:20, 22.53s/it, loss=-0.0928, reward=0.6161, temp=0.30]

   Processing 334 sentences...

Epoch 8/10:  41%|‚ñà‚ñà‚ñà‚ñà      | 115/280 [30:21<1:24:29, 30.73s/it, loss=-0.0946, reward=0.6148, temp=0.30]

   Processing 102 sentences...

Epoch 8/10:  41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 116/280 [30:32<1:07:40, 24.76s/it, loss=-0.0978, reward=0.6066, temp=0.30]

   Processing 268 sentences...

Epoch 8/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 117/280 [30:46<58:45, 21.63s/it, loss=-0.0959, reward=0.6076, temp=0.30]  

   Processing 500 sentences...ents), truncating to 500

Epoch 8/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 118/280 [32:26<2:01:57, 45.17s/it, loss=-0.0931, reward=0.6192, temp=0.30]

   Processing 253 sentences...

Epoch 8/10:  42%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 119/280 [32:43<1:38:36, 36.75s/it, loss=-0.0963, reward=0.6101, temp=0.30]

   Processing 132 sentences...

Epoch 8/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 120/280 [32:53<1:16:04, 28.53s/it, loss=-0.0979, reward=0.5924, temp=0.30]

   Processing 286 sentences...

Epoch 8/10:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 121/280 [33:15<1:10:52, 26.75s/it, loss=-0.0951, reward=0.6142, temp=0.30]

   Processing 128 sentences...

Epoch 8/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 122/280 [33:23<55:04, 20.92s/it, loss=-0.0970, reward=0.6070, temp=0.30]  

   Processing 37 sentences...

Epoch 8/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 123/280 [33:25<40:09, 15.35s/it, loss=-0.1041, reward=0.5856, temp=0.30]

   Processing 205 sentences...

Epoch 8/10:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 124/280 [33:43<41:54, 16.12s/it, loss=-0.0957, reward=0.6128, temp=0.30]

   Processing 87 sentences...

Epoch 8/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 125/280 [33:50<34:13, 13.25s/it, loss=-0.1070, reward=0.5960, temp=0.30]

   Processing 500 sentences...ents), truncating to 500

Epoch 8/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 126/280 [35:02<1:19:28, 30.96s/it, loss=-0.0942, reward=0.6118, temp=0.30]

   Processing 144 sentences...

Epoch 8/10:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 127/280 [35:14<1:04:29, 25.29s/it, loss=-0.0997, reward=0.5911, temp=0.30]

   Processing 80 sentences...

Epoch 8/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 128/280 [35:21<50:35, 19.97s/it, loss=-0.1100, reward=0.5902, temp=0.30]  

   Processing 500 sentences...ents), truncating to 500

Epoch 8/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 129/280 [36:48<1:40:45, 40.03s/it, loss=-0.0929, reward=0.6157, temp=0.30]

   Processing 124 sentences...

Epoch 8/10:  46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 130/280 [36:58<1:17:00, 30.80s/it, loss=-0.0976, reward=0.6043, temp=0.30]

   Processing 112 sentences...

Epoch 8/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 131/280 [37:08<1:01:02, 24.58s/it, loss=-0.0997, reward=0.5926, temp=0.30]

   Processing 75 sentences...

Epoch 8/10:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 132/280 [37:12<45:38, 18.50s/it, loss=-0.1003, reward=0.6000, temp=0.30]  

   Processing 165 sentences...

Epoch 8/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 133/280 [37:24<40:30, 16.53s/it, loss=-0.0953, reward=0.6131, temp=0.30]

   Processing 146 sentences...

Epoch 8/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 134/280 [37:42<41:07, 16.90s/it, loss=-0.0977, reward=0.6050, temp=0.30]

   Processing 100 sentences...

Epoch 8/10:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 135/280 [37:47<32:17, 13.36s/it, loss=-0.0928, reward=0.6101, temp=0.30]

   Processing 500 sentences...ents), truncating to 500

Epoch 8/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 136/280 [39:25<1:32:50, 38.69s/it, loss=-0.0895, reward=0.6183, temp=0.30]

   Processing 209 sentences...

Epoch 8/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 137/280 [39:40<1:15:16, 31.58s/it, loss=-0.0971, reward=0.6092, temp=0.30]

   Processing 90 sentences...

Epoch 8/10:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 138/280 [39:46<56:37, 23.93s/it, loss=-0.1041, reward=0.5910, temp=0.30]  

   Processing 72 sentences...

Epoch 8/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 139/280 [39:50<42:36, 18.13s/it, loss=-0.1043, reward=0.5870, temp=0.30]

   Processing 38 sentences...

Epoch 8/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 140/280 [39:52<30:58, 13.27s/it, loss=-0.1063, reward=0.5794, temp=0.30]

   Processing 131 sentences...

Epoch 8/10:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 141/280 [39:57<25:08, 10.85s/it, loss=-0.1009, reward=0.5989, temp=0.30]

   Processing 78 sentences...

Epoch 8/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 142/280 [40:02<20:37,  8.97s/it, loss=-0.1071, reward=0.5913, temp=0.30]

   Processing 119 sentences...

Epoch 8/10:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 143/280 [40:10<19:32,  8.56s/it, loss=-0.0995, reward=0.6011, temp=0.30]

   Processing 500 sentences...ents), truncating to 500

In [1]:
def load_trained_model(checkpoint_path='final_inlegalbert_model.pt'):
    """Load the trained model from checkpoint"""
    print("\n" + "="*70)
    print("üìÇ LOADING TRAINED MODEL")
    print("="*70)
    
    encoder = SentenceEncoder(model_name='law-ai/InLegalBERT', hidden_dim=768)
    policy = MultiAspectPolicyNetwork(
        input_dim=768, 
        hidden_dim=256, 
        num_aspects=5, 
        dropout=0.5
    )
    
    checkpoint = torch.load(checkpoint_path, map_location=device)
    policy.load_state_dict(checkpoint['policy_state_dict'])
    
    agent = UnsupervisedRLAgent(
        encoder=encoder,
        policy=policy,
        learning_rate=1e-4,
        weight_decay=1e-5
    )
    
    agent.policy.load_state_dict(checkpoint['policy_state_dict'])
    agent.policy.eval()
    
    print(f"\n‚úÖ Model loaded successfully!")
    print(f"   Training completed at epoch: {checkpoint['epoch']}")
    print(f"   Best validation reward: {checkpoint['val_reward']:.4f}")
    print(f"\n   Aspect-wise Validation Rewards:")
    for aspect, reward in checkpoint['aspect_val_rewards'].items():
        print(f"     {aspect:12s}: {reward:.4f}")
    print("="*70 + "\n")
    
    return agent, checkpoint

# Load the trained model
agent, checkpoint = load_trained_model('final_inlegalbert_model.pt')



üìÇ LOADING TRAINED MODEL


NameError: name 'SentenceEncoder' is not defined

In [None]:
def print_test_sample_summaries(agent, test_dataset, num_samples=5):
    """
    Generate and print sample summaries from TEST SET (unseen data)
    """
    print("\n" + "="*80)
    print(f"üìù SAMPLE SUMMARIES FROM TEST SET (UNSEEN DATA)")
    print(f"   Total test documents: {len(test_dataset)}")
    print("="*80 + "\n")
    
    # Random samples from test set
    sample_indices = random.sample(range(len(test_dataset)), min(num_samples, len(test_dataset)))
    
    for idx, sample_idx in enumerate(sample_indices, 1):
        item = test_dataset[sample_idx]
        judgment = item['judgment']
        judgment_file = item['judgment_file']
        
        print(f"\n{'='*80}")
        print(f"TEST SAMPLE {idx}/{num_samples}: {judgment_file}")
        print(f"{'='*80}\n")
        
        # Original document info
        sentences = agent.preprocess_document(judgment)
        original_words = sum(len(s.split()) for s in sentences)
        
        print(f"üìÑ ORIGINAL DOCUMENT:")
        print(f"   Total sentences: {len(sentences)}")
        print(f"   Total words: {original_words:,}")
        print(f"\n   First 3 sentences:")
        for i, sent in enumerate(sentences[:3], 1):
            truncated = sent[:250] + "..." if len(sent) > 250 else sent
            print(f"   {i}. {truncated}")
        
        # Generate summaries
        print(f"\n{'‚îÄ'*80}")
        print(f"ü§ñ GENERATED SUMMARIES:")
        print(f"{'‚îÄ'*80}\n")
        
        summaries = agent.generate_summaries(judgment)
        
        # Compute quality metrics for each aspect
        for aspect in agent.aspects:
            summary = summaries[aspect]
            summary_sentences = sent_tokenize(summary)
            word_count = len(summary.split())
            
            # Compute quality metrics
            if summary_sentences and sentences:
                summary_embeddings = agent.encoder(summary_sentences)
                document_embeddings = agent.encoder(sentences)
                
                coherence = agent.reward_function.compute_coherence(summary_sentences)
                coverage = agent.reward_function.compute_coverage(summary_embeddings, document_embeddings)
                redundancy = agent.reward_function.compute_redundancy(summary_embeddings)
                diversity = agent.reward_function.compute_diversity(summary_embeddings)
                
                print(f"üìå {aspect.upper()}:")
                print(f"   Length: {len(summary_sentences)} sentences | {word_count} words")
                print(f"   Quality Metrics:")
                print(f"     ‚Ä¢ Coherence:      {coherence:.3f}")
                print(f"     ‚Ä¢ Coverage:       {coverage:.3f}")
                print(f"     ‚Ä¢ Redundancy:     {redundancy:.3f} (lower is better)")
                print(f"     ‚Ä¢ Diversity:      {diversity:.3f}")
                print(f"\n   Summary Text:")
                for i, sent in enumerate(summary_sentences, 1):
                    print(f"     {i}. {sent}")
                print()
        
        # Overall compression statistics
        total_summary_words = sum(len(summaries[a].split()) for a in agent.aspects)
        overall_compression = total_summary_words / original_words
        
        print(f"{'‚îÄ'*80}")
        print(f"üìä COMPRESSION STATISTICS:")
        print(f"{'‚îÄ'*80}")
        print(f"   Original Document:     {original_words:6,} words")
        print(f"   Combined Summary:      {total_summary_words:6,} words")
        print(f"   Overall Compression:   {overall_compression:7.2%}")
        print(f"\n   Aspect-wise Breakdown:")
        for aspect in agent.aspects:
            aspect_words = len(summaries[aspect].split())
            aspect_ratio = aspect_words / original_words
            print(f"     {aspect:12s}: {aspect_words:5,} words ({aspect_ratio:6.2%})")
        
        print(f"\n{'='*80}\n")

# Generate and print sample summaries from test set
print_test_sample_summaries(agent, test_dataset, num_samples=5)


In [None]:
def evaluate_test_set_metrics(agent, test_dataset):
    """
    Compute comprehensive reference-free metrics on TEST SET
    """
    print("\n" + "="*70)
    print("üìä COMPUTING REFERENCE-FREE METRICS ON TEST SET")
    print("="*70 + "\n")
    
    aspect_metrics = {aspect: [] for aspect in agent.aspects}
    
    for item in tqdm(test_dataset, desc="Evaluating test set"):
        judgment = item['judgment']
        sentences = agent.preprocess_document(judgment)
        generated_summaries = agent.generate_summaries(judgment)
        
        for aspect in agent.aspects:
            summary = generated_summaries[aspect]
            summary_sentences = sent_tokenize(summary)
            
            if not summary_sentences or not sentences:
                continue
            
            # Encode for semantic analysis
            summary_embeddings = agent.encoder(summary_sentences)
            document_embeddings = agent.encoder(sentences)
            
            # Find approximate selected indices
            selected_indices = []
            for summ_sent in summary_sentences:
                for idx, orig_sent in enumerate(sentences):
                    if summ_sent.strip()[:50] in orig_sent or orig_sent[:50] in summ_sent.strip():
                        selected_indices.append(idx)
                        break
            
            if not selected_indices:
                selected_indices = list(range(len(summary_sentences)))
            
            # Compute all reference-free metrics
            coherence = agent.reward_function.compute_coherence(summary_sentences)
            coverage = agent.reward_function.compute_coverage(summary_embeddings, document_embeddings)
            redundancy = agent.reward_function.compute_redundancy(summary_embeddings)
            diversity = agent.reward_function.compute_diversity(summary_embeddings)
            informativeness = agent.reward_function.compute_informativeness(summary_embeddings, document_embeddings)
            position_bias = agent.reward_function.compute_position_bias(selected_indices, len(sentences))
            ordering_score = agent.reward_function.compute_sentence_ordering_penalty(selected_indices)
            
            # Length metrics
            gen_words = len(summary.split())
            source_words = sum(len(s.split()) for s in sentences)
            compression_ratio = gen_words / max(source_words, 1)
            
            # Total reward
            total_reward = agent.reward_function.compute_reward(
                summary_sentences, selected_indices, sentences,
                agent.aspect_summary_ratios[aspect], aspect
            )
            
            metrics = {
                'total_reward': total_reward,
                'coherence': coherence,
                'coverage': coverage,
                'redundancy': redundancy,
                'diversity': diversity,
                'informativeness': informativeness,
                'position_bias': position_bias,
                'ordering_score': ordering_score,
                'compression_ratio': compression_ratio,
                'summary_words': gen_words,
                'source_words': source_words,
                'summary_sentences': len(summary_sentences),
                'source_sentences': len(sentences)
            }
            
            aspect_metrics[aspect].append(metrics)
    
    # Print comprehensive summary
    print("\n" + "="*70)
    print("üìà TEST SET RESULTS (FINAL EVALUATION)")
    print("="*70 + "\n")
    
    for aspect in agent.aspects:
        if aspect_metrics[aspect]:
            avg_metrics = {
                k: np.mean([m[k] for m in aspect_metrics[aspect]])
                for k in aspect_metrics[aspect][0].keys()
            }
            
            print(f"üìå {aspect.upper()}")
            print(f"   {'‚îÄ'*60}")
            print(f"   Total Reward:      {avg_metrics['total_reward']:.4f}")
            print(f"   ‚îú‚îÄ Coherence:      {avg_metrics['coherence']:.4f}  (semantic flow)")
            print(f"   ‚îú‚îÄ Coverage:       {avg_metrics['coverage']:.4f}  (content coverage)")
            print(f"   ‚îú‚îÄ Diversity:      {avg_metrics['diversity']:.4f}  (information variety)")
            print(f"   ‚îú‚îÄ Informativeness:{avg_metrics['informativeness']:.4f}  (importance)")
            print(f"   ‚îú‚îÄ Redundancy:     {avg_metrics['redundancy']:.4f}  (lower = better)")
            print(f"   ‚îú‚îÄ Position Bias:  {avg_metrics['position_bias']:.4f}  (doc coverage)")
            print(f"   ‚îî‚îÄ Ordering Score: {avg_metrics['ordering_score']:.4f}  (sentence order)")
            print(f"\n   Length Statistics:")
            print(f"     Compression:     {avg_metrics['compression_ratio']:.2%}")
            print(f"     Avg Summary:     {avg_metrics['summary_words']:.0f} words, {avg_metrics['summary_sentences']:.0f} sents")
            print(f"     Avg Source:      {avg_metrics['source_words']:.0f} words, {avg_metrics['source_sentences']:.0f} sents")
            print()
    
    return aspect_metrics

# Compute metrics on test set
test_metrics = evaluate_test_set_metrics(agent, test_dataset)


In [None]:
def export_test_metrics_to_csv(test_metrics, filename='test_set_metrics.csv'):
    """
    Export test set metrics to CSV with detailed statistics
    """
    print(f"\nüíæ Exporting test metrics to {filename}...")
    
    data = []
    for aspect in agent.aspects:
        for idx, metrics in enumerate(test_metrics[aspect]):
            row = {'sample_id': idx, 'aspect': aspect, **metrics}
            data.append(row)
    
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    
    print(f"‚úÖ Saved {len(df)} test metric records\n")
    
    # Detailed statistical summary
    print("="*90)
    print("üìä DETAILED TEST SET STATISTICS BY ASPECT")
    print("="*90 + "\n")
    
    summary = df.groupby('aspect')[['total_reward', 'coherence', 'coverage', 
                                     'diversity', 'informativeness', 'redundancy',
                                     'ordering_score', 'position_bias',
                                     'compression_ratio']].agg(['mean', 'std', 'min', 'max'])
    
    print(summary.round(4))
    print("\n" + "="*90 + "\n")
    
    # Overall statistics across all aspects
    print("üìà OVERALL TEST SET STATISTICS (All Aspects Combined):")
    print("="*90)
    overall_stats = df[['total_reward', 'coherence', 'coverage', 'diversity', 
                        'redundancy', 'compression_ratio']].describe()
    print(overall_stats.round(4))
    print("="*90 + "\n")
    
    return df

# Export test metrics
test_df = export_test_metrics_to_csv(test_metrics)


In [None]:
def create_test_set_visualizations(test_metrics, test_df):
    """
    Create comprehensive visualizations for test set evaluation
    """
    print("\nüìä Generating comprehensive test set visualizations...\n")
    
    aspects = agent.aspects
    colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98D8C8']
    
    # Aggregate metrics
    agg_data = {}
    for aspect in aspects:
        if test_metrics[aspect]:
            agg_data[aspect] = {
                k: np.mean([m[k] for m in test_metrics[aspect]])
                for k in test_metrics[aspect][0].keys()
            }
    
    # Create comprehensive figure
    fig = plt.figure(figsize=(20, 14))
    gs = fig.add_gridspec(4, 3, hspace=0.35, wspace=0.3)
    
    # 1. Total Reward by Aspect
    ax1 = fig.add_subplot(gs[0, 0])
    total_rewards = [agg_data[a]['total_reward'] for a in aspects]
    bars = ax1.bar(aspects, total_rewards, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
    ax1.set_title('Total Reward by Aspect (Test Set)', fontweight='bold', fontsize=12)
    ax1.set_ylabel('Reward Score')
    ax1.set_xticklabels(aspects, rotation=45, ha='right')
    ax1.grid(True, alpha=0.3, axis='y')
    for bar in bars:
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.3f}', ha='center', va='bottom', fontsize=9)
    
    # 2. Quality Metrics Comparison
    ax2 = fig.add_subplot(gs[0, 1])
    metrics_plot = ['coherence', 'coverage', 'diversity', 'informativeness']
    df_plot = pd.DataFrame({
        a: [agg_data[a][m] for m in metrics_plot] for a in aspects
    }, index=metrics_plot)
    df_plot.T.plot(kind='bar', ax=ax2, alpha=0.8, width=0.8)
    ax2.set_title('Quality Metrics by Aspect', fontweight='bold', fontsize=12)
    ax2.set_ylabel('Score')
    ax2.legend(title='Metrics', fontsize=8, loc='upper right')
    ax2.set_xticklabels(aspects, rotation=45, ha='right')
    ax2.grid(True, alpha=0.3)
    
    # 3. Coherence vs Coverage Scatter
    ax3 = fig.add_subplot(gs[0, 2])
    for idx, aspect in enumerate(aspects):
        ax3.scatter(agg_data[aspect]['coherence'], agg_data[aspect]['coverage'],
                   label=aspect, color=colors[idx], s=250, alpha=0.7,
                   edgecolor='black', linewidth=1.5)
    ax3.set_title('Coherence vs Coverage', fontweight='bold', fontsize=12)
    ax3.set_xlabel('Coherence Score')
    ax3.set_ylabel('Coverage Score')
    ax3.legend(fontsize=9)
    ax3.grid(True, alpha=0.3)
    ax3.plot([0, 1], [0, 1], 'k--', alpha=0.3)
    
    # 4. Redundancy by Aspect
    ax4 = fig.add_subplot(gs[1, 0])
    redundancy = [agg_data[a]['redundancy'] for a in aspects]
    bars = ax4.bar(aspects, redundancy, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
    ax4.set_title('Redundancy by Aspect (Lower = Better)', fontweight='bold', fontsize=12)
    ax4.set_ylabel('Redundancy Score')
    ax4.set_xticklabels(aspects, rotation=45, ha='right')
    ax4.axhline(y=0.2, color='red', linestyle='--', alpha=0.5, label='Target < 0.20')
    ax4.legend(fontsize=9)
    ax4.grid(True, alpha=0.3, axis='y')
    for bar in bars:
        height = bar.get_height()
        ax4.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.3f}', ha='center', va='bottom', fontsize=9)
    
    # 5. Compression Ratios
    ax5 = fig.add_subplot(gs[1, 1])
    compression = [agg_data[a]['compression_ratio'] for a in aspects]
    bars = ax5.bar(aspects, compression, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
    ax5.set_title('Compression Ratios by Aspect', fontweight='bold', fontsize=12)
    ax5.set_ylabel('Compression Ratio')
    ax5.set_xticklabels(aspects, rotation=45, ha='right')
    ax5.grid(True, alpha=0.3, axis='y')
    for bar in bars:
        height = bar.get_height()
        ax5.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1%}', ha='center', va='bottom', fontsize=9)
    
    # 6. Ordering Score
    ax6 = fig.add_subplot(gs[1, 2])
    ordering = [agg_data[a]['ordering_score'] for a in aspects]
    bars = ax6.bar(aspects, ordering, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
    ax6.set_title('Sentence Ordering Score', fontweight='bold', fontsize=12)
    ax6.set_ylabel('Ordering Score (1.0 = Perfect)')
    ax6.set_xticklabels(aspects, rotation=45, ha='right')
    ax6.grid(True, alpha=0.3, axis='y')
    for bar in bars:
        height = bar.get_height()
        ax6.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.3f}', ha='center', va='bottom', fontsize=9)
    
    # 7. Reward Distribution (Box Plot)
    ax7 = fig.add_subplot(gs[2, 0])
    reward_data = [test_df[test_df['aspect'] == a]['total_reward'].values for a in aspects]
    bp = ax7.boxplot(reward_data, labels=aspects, patch_artist=True)
    for patch, color in zip(bp['boxes'], colors):
        patch.set_facecolor(color)
        patch.set_alpha(0.7)
    ax7.set_title('Reward Distribution by Aspect', fontweight='bold', fontsize=12)
    ax7.set_ylabel('Total Reward')
    ax7.set_xticklabels(aspects, rotation=45, ha='right')
    ax7.grid(True, alpha=0.3, axis='y')
    
    # 8. Correlation Heatmap
    ax8 = fig.add_subplot(gs[2, 1])
    corr_cols = ['coherence', 'coverage', 'diversity', 'informativeness', 'redundancy']
    corr_data = test_df[corr_cols].corr()
    sns.heatmap(corr_data, annot=True, fmt='.2f', cmap='coolwarm', center=0,
                ax=ax8, cbar_kws={'label': 'Correlation'}, linewidths=1, vmin=-1, vmax=1)
    ax8.set_title('Metric Correlations', fontweight='bold', fontsize=12)
    
    # 9. Summary Length Distribution
    ax9 = fig.add_subplot(gs[2, 2])
    for idx, aspect in enumerate(aspects):
        aspect_data = test_df[test_df['aspect'] == aspect]['summary_words'].values
        ax9.hist(aspect_data, alpha=0.6, label=aspect, color=colors[idx], 
                bins=10, edgecolor='black', linewidth=0.5)
    ax9.set_title('Summary Word Count Distribution', fontweight='bold', fontsize=12)
    ax9.set_xlabel('Number of Words')
    ax9.set_ylabel('Frequency')
    ax9.legend(fontsize=9)
    ax9.grid(True, alpha=0.3)
    
    # 10. Position Bias
    ax10 = fig.add_subplot(gs[3, 0])
    position_bias = [agg_data[a]['position_bias'] for a in aspects]
    bars = ax10.bar(aspects, position_bias, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
    ax10.set_title('Position Bias (Document Coverage)', fontweight='bold', fontsize=12)
    ax10.set_ylabel('Position Score')
    ax10.set_xticklabels(aspects, rotation=45, ha='right')
    ax10.grid(True, alpha=0.3, axis='y')
    ax10.axhline(y=1.0, color='green', linestyle='--', alpha=0.5, label='Perfect (1.0)')
    ax10.legend(fontsize=9)
    
    # 11. Informativeness
    ax11 = fig.add_subplot(gs[3, 1])
    informativeness = [agg_data[a]['informativeness'] for a in aspects]
    bars = ax11.bar(aspects, informativeness, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
    ax11.set_title('Informativeness by Aspect', fontweight='bold', fontsize=12)
    ax11.set_ylabel('Informativeness Score')
    ax11.set_xticklabels(aspects, rotation=45, ha='right')
    ax11.grid(True, alpha=0.3, axis='y')
    
    # 12. Overall Comparison (Radar Chart Style)
    ax12 = fig.add_subplot(gs[3, 2])
    metrics_compare = ['coherence', 'coverage', 'diversity', 'informativeness']
    x_pos = np.arange(len(metrics_compare))
    width = 0.15
    for idx, aspect in enumerate(aspects):
        values = [agg_data[aspect][m] for m in metrics_compare]
        ax12.bar(x_pos + idx*width, values, width, label=aspect, 
                color=colors[idx], alpha=0.8, edgecolor='black', linewidth=0.5)
    ax12.set_title('Overall Metric Comparison', fontweight='bold', fontsize=12)
    ax12.set_ylabel('Score')
    ax12.set_xticks(x_pos + width * 2)
    ax12.set_xticklabels(metrics_compare, rotation=45, ha='right')
    ax12.legend(fontsize=8)
    ax12.grid(True, alpha=0.3, axis='y')
    
    plt.suptitle('InLegalBERT Model - Test Set Comprehensive Evaluation\n(Reference-Free Metrics on Unseen Data)', 
                 fontsize=16, fontweight='bold', y=0.998)
    
    plt.savefig('test_set_comprehensive_evaluation.png', dpi=300, bbox_inches='tight')
    print("‚úÖ Saved visualization to 'test_set_comprehensive_evaluation.png'\n")
    plt.show()

# Create visualizations
create_test_set_visualizations(test_metrics, test_df)


In [None]:
def print_final_summary_table(test_metrics):
    """
    Print a comprehensive summary table of test results
    """
    print("\n" + "="*90)
    print("üìã FINAL TEST SET EVALUATION SUMMARY")
    print("="*90 + "\n")
    
    # Calculate averages for each aspect
    print(f"{'Aspect':<12} {'Reward':>8} {'Coher':>7} {'Cover':>7} {'Redun':>7} {'Diver':>7} {'Order':>7} {'Compr':>7}")
    print("="*90)
    
    overall_rewards = []
    
    for aspect in agent.aspects:
        if test_metrics[aspect]:
            avg = {k: np.mean([m[k] for m in test_metrics[aspect]]) 
                  for k in test_metrics[aspect][0].keys()}
            
            overall_rewards.append(avg['total_reward'])
            
            print(f"{aspect:<12} "
                  f"{avg['total_reward']:>8.4f} "
                  f"{avg['coherence']:>7.3f} "
                  f"{avg['coverage']:>7.3f} "
                  f"{avg['redundancy']:>7.3f} "
                  f"{avg['diversity']:>7.3f} "
                  f"{avg['ordering_score']:>7.3f} "
                  f"{avg['compression_ratio']:>6.1%}")
    
    print("="*90)
    print(f"{'AVERAGE':<12} {np.mean(overall_rewards):>8.4f}")
    print("="*90 + "\n")
    
    # Key findings
    print("üéØ KEY FINDINGS:")
    print("-" * 90)
    
    all_coherence = [np.mean([m['coherence'] for m in test_metrics[a]]) for a in agent.aspects if test_metrics[a]]
    all_redundancy = [np.mean([m['redundancy'] for m in test_metrics[a]]) for a in agent.aspects if test_metrics[a]]
    all_coverage = [np.mean([m['coverage'] for m in test_metrics[a]]) for a in agent.aspects if test_metrics[a]]
    
    print(f"  ‚Ä¢ Average Coherence:      {np.mean(all_coherence):.4f}")
    print(f"  ‚Ä¢ Average Coverage:       {np.mean(all_coverage):.4f}")
    print(f"  ‚Ä¢ Average Redundancy:     {np.mean(all_redundancy):.4f} (target: < 0.20)")
    print(f"  ‚Ä¢ Best Aspect:            {agent.aspects[np.argmax(overall_rewards)].upper()} "
          f"(reward: {max(overall_rewards):.4f})")
    print(f"  ‚Ä¢ Total Test Documents:   {len(test_dataset)}")
    print(f"\n  ‚úÖ Model successfully evaluated on UNSEEN test data!")
    print("="*90 + "\n")

# Print final summary
print_final_summary_table(test_metrics)


In [None]:
def export_test_summaries_to_file(agent, test_dataset, num_samples=10, 
                                   output_file='test_set_sample_summaries.txt'):
    """
    Export sample summaries from test set to text file
    """
    print(f"\nüíæ Exporting {num_samples} test sample summaries to {output_file}...")
    
    sample_indices = random.sample(range(len(test_dataset)), min(num_samples, len(test_dataset)))
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write("="*80 + "\n")
        f.write("INLEGALBERT MODEL - TEST SET SAMPLE SUMMARIES\n")
        f.write("Reference-Free Unsupervised RL Training\n")
        f.write(f"Test Set Size: {len(test_dataset)} documents\n")
        f.write("="*80 + "\n\n")
        
        for idx, sample_idx in enumerate(sample_indices, 1):
            item = test_dataset[sample_idx]
            judgment = item['judgment']
            judgment_file = item['judgment_file']
            
            sentences = agent.preprocess_document(judgment)
            summaries = agent.generate_summaries(judgment)
            
            original_words = sum(len(s.split()) for s in sentences)
            total_summary_words = sum(len(summaries[a].split()) for a in agent.aspects)
            
            f.write(f"\n{'='*80}\n")
            f.write(f"TEST SAMPLE {idx}: {judgment_file}\n")
            f.write(f"{'='*80}\n\n")
            f.write(f"Original: {len(sentences)} sentences, {original_words:,} words\n")
            f.write(f"Summary: {total_summary_words:,} words ({total_summary_words/original_words:.2%} compression)\n\n")
            
            for aspect in agent.aspects:
                f.write(f"\n{aspect.upper()}:\n")
                f.write(f"{'-'*80}\n")
                f.write(f"{summaries[aspect]}\n")
            
            f.write("\n")
    
    print(f"‚úÖ Exported {num_samples} test summaries to {output_file}\n")

# Export test summaries
export_test_summaries_to_file(agent, test_dataset, num_samples=10)
