### Ver3 Optimized

In [1]:
# ===========================================
# Cell 1. Import Library dan Setup Environment
# ===========================================

import os
import json
import random
from pathlib import Path
from collections import Counter

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
from sklearn.preprocessing import MinMaxScaler

# Gunakan GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device yang digunakan:", device)

# Monitor GPU memory
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Total Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

Device yang digunakan: cuda
GPU: NVIDIA GeForce RTX 4080 SUPER
Total Memory: 17.17 GB


In [2]:
# ===========================================
# Cell 2. Fungsi Bantuan Umum
# ===========================================

from matplotlib.colors import ListedColormap

def seed_everything(seed=42):
    """Menetapkan seed random agar hasil eksperimen bisa direplikasi"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

seed_everything(42)

def visualize_tile(x_tile, y_true=None, y_pred=None, json_path=None, class_names=None, idx=0):
    """Menampilkan citra tile beserta mask ground-truth dan prediksi"""
    if isinstance(x_tile, torch.Tensor):
        x = x_tile.cpu().numpy()
        x = np.transpose(x, (1,2,0))  # ubah dari [B,H,W] -> [H,W,B]
    else:
        x = x_tile

    # menampilkan pseudo-RGB (karena data hyperspectral)
    B = x.shape[2]
    b1, b2, b3 = int(B*0.05), int(B*0.5), int(B*0.9)
    rgb = x[..., [b1, b2, b3]]
    rgb_norm = (rgb - rgb.min()) / (rgb.max() - rgb.min() + 1e-9)

    # Coba baca colormap dari file JSON
    if json_path and os.path.exists(json_path):
        with open(json_path, "r") as f:
            label_info = json.load(f)
        custom_colors = [c["color"][:7] for c in label_info]
        cmap = ListedColormap(custom_colors)
    else:
        print("File json tidak terbaca, menggunakan cmap tab20")
        cmap = "tab20"  # fallback

    # Visualisasi
    plt.figure(figsize=(12,4))
    plt.subplot(1,3,1); plt.imshow(rgb_norm); plt.title("Citra (Pseudo-RGB)")
    if y_true is not None:
        plt.subplot(1,3,2); plt.imshow(y_true, cmap=cmap); plt.title("Ground Truth")
    if y_pred is not None:
        plt.subplot(1,3,3); plt.imshow(y_pred, cmap=cmap); plt.title("Prediksi")
    plt.show()

In [3]:
# ===========================================
# Cell 3. Dataset Loader
# ===========================================

def load_label_mapping(json_path):
    """Membaca file label_classes.json untuk mapping id ke nama kelas"""
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    idx_to_name = {i: item["name"] for i, item in enumerate(data)}
    return idx_to_name

def normalize_reflectance(cube):
    """Menormalkan reflektansi 0–1 per tile, hemat RAM, aman untuk mmap read-only."""
    if not cube.flags.writeable:
        cube = cube.astype(np.float32, copy=True)

    if cube.dtype != np.float32:
        cube = cube.astype(np.float32, copy=False)

    np.nan_to_num(cube, copy=False)

    min_val = np.nanmin(cube)
    max_val = np.nanmax(cube)
    if max_val > min_val:
        cube -= min_val
        cube /= (max_val - min_val + 1e-8)

    return cube


class SeaweedDataset(Dataset):
    """Dataset hemat memori berbasis file .npy hasil konversi."""
    def __init__(self, data_files, label_map, tile_size=64, normalize=True, label_remap=None):
        self.data_files = data_files
        self.label_map = label_map
        self.tile_size = tile_size
        self.normalize = normalize
        self.label_remap = label_remap

        # Daftar pasangan (file_x, file_y)
        self.pairs = []
        for f in data_files:
            if f.endswith("_x.npy"):
                fy = f.replace("_x.npy", "_y.npy")
                if os.path.exists(fy):
                    self.pairs.append((f, fy))
        
        # Hanya menyimpan indeks tile berdasarkan ukuran file .npy
        self.index = []  
        for file_idx, (fx, fy) in enumerate(self.pairs):
            x = np.load(fx, mmap_mode="r")
            H, W, _ = x.shape
            for i in range(0, H - tile_size + 1, tile_size):
                for j in range(0, W - tile_size + 1, tile_size):
                    self.index.append((file_idx, i, j))
            del x

        print(f"[INFO] Total tile terdaftar: {len(self.index)} dari {len(self.pairs)} file")

    def __len__(self):
        return len(self.index)

    def __getitem__(self, idx):
        file_idx, i, j = self.index[idx]
        fx, fy = self.pairs[file_idx]
        
        # Memuat tile menggunakan mmap
        x = np.load(fx, mmap_mode="r")[i:i+self.tile_size, j:j+self.tile_size, :]
        y = np.load(fy, mmap_mode="r")[i:i+self.tile_size, j:j+self.tile_size]

        # Abaikan tile kosong (max 3 attempts)
        for _ in range(3):
            if np.any(y > 0):
                break
            file_idx, i, j = self.index[np.random.randint(0, len(self.index))]
            fx, fy = self.pairs[file_idx]
            x = np.load(fx, mmap_mode="r")[i:i+self.tile_size, j:j+self.tile_size, :]
            y = np.load(fy, mmap_mode="r")[i:i+self.tile_size, j:j+self.tile_size]

        if self.normalize:
            x = normalize_reflectance(x)

        # REMAP label bila mapping diberikan
        if self.label_remap is not None:
            y_remap = np.zeros_like(y, dtype=np.int64)
            for orig_label, new_idx in self.label_remap.items():
                y_remap[y == orig_label] = new_idx
            y = y_remap
        else:
            y = y.astype(np.int64)

        # Konversi ke tensor
        x_tensor = torch.tensor(x.transpose(2, 0, 1), dtype=torch.float32)
        y_tensor = torch.tensor(y, dtype=torch.long)
        return x_tensor, y_tensor


def detect_actual_classes(pairs):
    """Scan semua file y.npy untuk mendeteksi kelas yang benar-benar ada"""
    found = set()
    for _, fy in pairs:
        y = np.load(fy, mmap_mode="r")
        found |= set(np.unique(y))
    found = sorted(list(found))
    print(f"[INFO] Kelas AKTUAL yang ditemukan di dataset: {found}")
    return found

In [4]:
# ===========================================
# Cell 4. Load Dataset dan Splitting
# ===========================================

data_dir = "../data/npy_converted"
label_json_path = "../data/annotation/segmentation_masks/label_classes.json"

label_map = load_label_mapping(label_json_path)
print(f"Jumlah total kelas di JSON: {len(label_map)}")

# Ambil semua file _x.npy
all_x_files = sorted([os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith("_x.npy")])
pairs = [(fx, fx.replace("_x.npy", "_y.npy")) for fx in all_x_files if os.path.exists(fx.replace("_x.npy", "_y.npy"))]

print(f"Total pasangan file X-Y ditemukan: {len(pairs)}")

# Split deterministik berbasis urutan nama (11 train, 5 val, 2 test)
train_pairs = pairs[:11]
val_pairs   = pairs[11:16]
test_pairs  = pairs[16:]

print("\n=== FINAL SPLIT PER FILE ===")
print(f"Train : {len(train_pairs)}")
print(f"Val   : {len(val_pairs)}")
print(f"Test  : {len(test_pairs)}")

# DETEKSI kelas aktual
actual_classes = detect_actual_classes(train_pairs + val_pairs + test_pairs)
orig_classes = [int(x) for x in actual_classes]
label_remap = {orig: idx for idx, orig in enumerate(orig_classes)}
print(f"[INFO] Label remap (orig -> new): {label_remap}")

# PENTING: Ubah tile_size ke 64 untuk menghemat memory
TILE_SIZE = 32  # Turun dari 64

train_dataset = SeaweedDataset([p[0] for p in train_pairs], label_map, tile_size=TILE_SIZE, label_remap=label_remap)
val_dataset   = SeaweedDataset([p[0] for p in val_pairs], label_map, tile_size=TILE_SIZE, label_remap=label_remap)
test_dataset  = SeaweedDataset([p[0] for p in test_pairs], label_map, tile_size=TILE_SIZE, label_remap=label_remap, normalize=False)

# Hitung class weights
counter = Counter()
for _, fy in train_pairs:
    y = np.load(fy, mmap_mode="r")
    for orig, new in label_remap.items():
        cnt = int((y == orig).sum())
        counter[new] += cnt

print(f"[INFO] Pixel counts per class: {dict(counter)}")

counts = np.array([counter.get(i, 0) for i in range(len(label_remap))], dtype=np.float64)
eps = 1e-6
inv_freq = 1.0 / (counts + eps)
inv_freq = inv_freq / np.mean(inv_freq)
inv_freq[0] = 0.0  # ignore background

class_weights_np = inv_freq.astype(np.float32)
print(f"[INFO] Class weights: {class_weights_np}")

num_classes_actual = len(label_remap)
print(f"\nTotal TILE train: {len(train_dataset)}, val: {len(val_dataset)}, test: {len(test_dataset)}")

Jumlah total kelas di JSON: 41
Total pasangan file X-Y ditemukan: 18

=== FINAL SPLIT PER FILE ===
Train : 11
Val   : 5
Test  : 2
[INFO] Kelas AKTUAL yang ditemukan di dataset: [np.int32(0), np.int32(8), np.int32(12), np.int32(13), np.int32(14), np.int32(18), np.int32(38)]
[INFO] Label remap (orig -> new): {0: 0, 8: 1, 12: 2, 13: 3, 14: 4, 18: 5, 38: 6}
[INFO] Total tile terdaftar: 18200 dari 11 file
[INFO] Total tile terdaftar: 8596 dari 5 file
[INFO] Total tile terdaftar: 2660 dari 2 file
[INFO] Pixel counts per class: {0: 14185638, 1: 840140, 2: 1566138, 3: 808104, 4: 36978, 5: 139337, 6: 1297565}
[INFO] Class weights: [0.         0.2185292  0.11722793 0.22719245 4.9649825  1.3176336
 0.14149204]

Total TILE train: 18200, val: 8596, test: 2660


In [5]:
# ===========================================
# Cell 5. Model OPTIMIZED HybridSN
# ===========================================

class OptimizedFCHybridSN(nn.Module):
    """Versi optimized dari HybridSN dengan spectral pooling"""
    def __init__(self, in_bands=300, num_classes=7):
        super().__init__()
        # 3D Convolution layers
        self.conv3d_1 = nn.Conv3d(1, 16, (7,3,3), padding=(0,1,1))
        self.bn3d_1 = nn.BatchNorm3d(16)
        
        self.conv3d_2 = nn.Conv3d(16, 32, (5,3,3), padding=(0,1,1))
        self.bn3d_2 = nn.BatchNorm3d(32)
        
        self.conv3d_3 = nn.Conv3d(32, 64, (3,3,3), padding=(0,1,1))
        self.bn3d_3 = nn.BatchNorm3d(64)

        # PERBAIKAN: Tambahkan spectral pooling
        self.spectral_pool = nn.AdaptiveAvgPool3d((8, None, None))
        
        # 2D Convolution layers (input channel jauh lebih kecil sekarang)
        self.conv2d_1 = nn.Conv2d(64 * 8, 256, 3, padding=1)
        self.bn2d_1 = nn.BatchNorm2d(256)
        self.dropout1 = nn.Dropout2d(0.3)
        
        self.conv2d_2 = nn.Conv2d(256, 128, 3, padding=1)
        self.bn2d_2 = nn.BatchNorm2d(128)
        self.dropout2 = nn.Dropout2d(0.3)
        
        self.conv2d_3 = nn.Conv2d(128, 64, 3, padding=1)
        self.bn2d_3 = nn.BatchNorm2d(64)
        
        self.classifier = nn.Conv2d(64, num_classes, 1)

    def forward(self, x):
        B, Bands, H, W = x.shape
        
        # 3D CNN processing
        x3 = x.unsqueeze(1)
        x3 = F.relu(self.bn3d_1(self.conv3d_1(x3)))
        x3 = F.relu(self.bn3d_2(self.conv3d_2(x3)))
        x3 = F.relu(self.bn3d_3(self.conv3d_3(x3)))
        
        # PERBAIKAN: Spectral pooling
        x3 = self.spectral_pool(x3)
        
        # Reshape ke 2D
        B, C3, reduced_spec, H, W = x3.shape
        x2 = x3.view(B, C3 * reduced_spec, H, W)
        
        # 2D CNN processing
        x2 = self.dropout1(F.relu(self.bn2d_1(self.conv2d_1(x2))))
        x2 = self.dropout2(F.relu(self.bn2d_2(self.conv2d_2(x2))))
        x2 = F.relu(self.bn2d_3(self.conv2d_3(x2)))
        
        return self.classifier(x2)


# Ambil jumlah band dari data
sample_x = np.load(train_pairs[0][0], mmap_mode="r")
in_bands_actual = sample_x.shape[2]
print(f"Band input aktual: {in_bands_actual}")

model = OptimizedFCHybridSN(in_bands=in_bands_actual, num_classes=num_classes_actual).to(device)

# Hitung parameter
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")
print(model)

Band input aktual: 300
Total parameters: 1,629,767
OptimizedFCHybridSN(
  (conv3d_1): Conv3d(1, 16, kernel_size=(7, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1))
  (bn3d_1): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3d_2): Conv3d(16, 32, kernel_size=(5, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1))
  (bn3d_2): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3d_3): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1))
  (bn3d_3): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (spectral_pool): AdaptiveAvgPool3d(output_size=(8, None, None))
  (conv2d_1): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2d_1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout1): Dropout2d(p=0.3, inplace=False)
  (conv2d_2): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2d_2): Batch

In [None]:
print(f"[INFO] Original class weights: {class_weights_np}")

In [7]:
# ===========================================
# Cell 6. Loss, Optimizer, dan Metrics (FIXED)
# ===========================================

import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Enable cudnn benchmark
torch.backends.cudnn.benchmark = True

# Hyperparameters
LR = 5e-5 # turun dari 1e-4
WEIGHT_DECAY = 1e-5
BATCH_SIZE = 1
ACCUMULATION_STEPS = 8  # Efektif batch size = 8
CLIP_NORM = 0.5 # Turun dari 1.0

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)

# PERBAIKAN: Smoothed class weights untuk stabilitas
print(f"[INFO] Original class weights: {class_weights_np}")

# Smoothing varian 1: gunakan sqrt untuk smoothing
# class_weights_smoothed = np.sqrt(class_weights_np)
# class_weights_smoothed[0] = 0.0

# Smoothin varian 2: gunakan batas eksplisit 0.0 hingga 2.0
class_weights_smoothed = np.clip(class_weights_np, 0.0, 2.0)
class_weights_smoothed[0] = 0.0

print(f"[INFO] Smoothed class weights: {class_weights_smoothed}")


# Versi loss function dengan class weights
weight_tensor = torch.from_numpy(class_weights_smoothed).to(device)
criterion = nn.CrossEntropyLoss(weight=weight_tensor, ignore_index=0, label_smoothing=0.1)

# Versi loss function criterion non-class-weight
# criterion = nn.CrossEntropyLoss(ignore_index=0, label_smoothing=0.1)

# Optimizer & scheduler
optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-8)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, verbose=True, min_lr=1e-7)


# PERBAIKAN: Metrik evaluasi yang benar
class SegmentationMetrics:
    def __init__(self, num_classes, ignore_index=0):
        self.num_classes = num_classes
        self.ignore_index = ignore_index
        self.reset()
    
    def reset(self):
        self.total_intersection = torch.zeros(self.num_classes)
        self.total_union = torch.zeros(self.num_classes)
        self.total_correct = 0
        self.total_pixels = 0
    
    def update(self, pred, target):
        """Update metrics dengan batch baru"""
        valid = (target != self.ignore_index)
        pred = pred[valid]
        target = target[valid]
        
        # Pixel accuracy
        self.total_correct += (pred == target).sum().item()
        self.total_pixels += valid.sum().item()
        
        # IoU per class
        for cls in range(self.num_classes):
            pred_i = (pred == cls)
            target_i = (target == cls)
            intersection = (pred_i & target_i).sum().item()
            union = (pred_i | target_i).sum().item()
            
            self.total_intersection[cls] += intersection
            self.total_union[cls] += union
    
    def get_metrics(self):
        """Hitung metrik final"""
        pixel_acc = self.total_correct / (self.total_pixels + 1e-9)
        
        iou_per_class = self.total_intersection / (self.total_union + 1e-9)
        # Exclude background (index 0) dan kelas yang tidak muncul
        valid_ious = []
        for i in range(1, self.num_classes):
            if self.total_union[i] > 0:
                valid_ious.append(iou_per_class[i].item())
        
        mean_iou = np.mean(valid_ious) if valid_ious else 0.0
        
        return pixel_acc, mean_iou, iou_per_class.numpy()

print("Setup selesai!")

[INFO] Original class weights: [0.         0.2185292  0.11722793 0.22719245 4.9649825  1.3176336
 0.14149204]
[INFO] Smoothed class weights: [0.         0.2185292  0.11722793 0.22719245 2.         1.3176336
 0.14149204]
Setup selesai!




In [None]:
# TEST MEMORY - Jalankan sebelum training
model.eval()
with torch.no_grad():
    dummy_x = torch.randn(1, in_bands_actual, TILE_SIZE, TILE_SIZE).to(device)
    dummy_y = model(dummy_x)
    print(f"Test passed! Output shape: {dummy_y.shape}")
    print(f"Memory used: {torch.cuda.memory_allocated()/1e9:.2f} GB")
    del dummy_x, dummy_y
    torch.cuda.empty_cache()

In [10]:
# ===========================================
# Cell 7. Training Loop (OPTIMIZED)
# ===========================================

from tqdm import tqdm
import time

START_EPOCH = 1
NUM_EPOCHS = 2
best_val_miou = 0.0

checkpoint_path = "hybridsn_sgmt_ver3_checkpoint.pth"
best_model_path = "hybridsn_sgmt_ver3_best_model.pth"

# Load checkpoint jika ada
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model.load_state_dict(checkpoint["model_state"])
    optimizer.load_state_dict(checkpoint["optimizer_state"])
    START_EPOCH = checkpoint["epoch"] + 1
    best_val_miou = checkpoint.get("best_val_miou", 0.0)
    print(f"[INFO] Resume dari epoch {START_EPOCH}")
else:
    print("[INFO] Training dari awal")

history = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": [], "val_miou": []}

def train_one_epoch(model, loader, criterion, optimizer, metrics, device):
    model.train()
    running_loss = 0.0
    metrics.reset()
    
    pbar = tqdm(loader, desc="Training", leave=False)
    optimizer.zero_grad()
    
    for i, (xb, yb) in enumerate(pbar):
        xb = xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)
        
        logits = model(xb)
        loss = criterion(logits, yb) / ACCUMULATION_STEPS

        # CHECK NaN
        if torch.isnan(loss) or torch.isinf(loss):
            print(f"\n[WARNING] NaN/Inf detected at batch {i}!")
            print(f"Logits stats - min: {logits.min():.4f}, max: {logits.max():.4f}")
            print(f"Target unique values: {torch.unique(yb)}")
            continue  # Skip batch ini
        
        loss.backward()
        
        # Gradient accumulation
        if (i + 1) % ACCUMULATION_STEPS == 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP_NORM)
            optimizer.step()
            optimizer.zero_grad()
        
        running_loss += loss.item() * ACCUMULATION_STEPS * xb.size(0)
        
        preds = logits.argmax(dim=1)
        metrics.update(preds, yb)
        
        pbar.set_postfix({"loss": f"{loss.item()*ACCUMULATION_STEPS:.4f}"})
    
    avg_loss = running_loss / len(loader.dataset)
    pixel_acc, _, _ = metrics.get_metrics()
    
    return avg_loss, pixel_acc

def validate(model, loader, criterion, metrics, device):
    model.eval()
    running_loss = 0.0
    metrics.reset()
    
    with torch.no_grad():
        for xb, yb in tqdm(loader, desc="Validation", leave=False):
            xb = xb.to(device, non_blocking=True)
            yb = yb.to(device, non_blocking=True)
            
            logits = model(xb)
            loss = criterion(logits, yb)
            
            running_loss += loss.item() * xb.size(0)
            
            preds = logits.argmax(dim=1)
            metrics.update(preds, yb)
    
    avg_loss = running_loss / len(loader.dataset)
    pixel_acc, mean_iou, iou_per_class = metrics.get_metrics()
    
    return avg_loss, pixel_acc, mean_iou, iou_per_class


# Training loop
for epoch in range(START_EPOCH, NUM_EPOCHS + 1):
    print(f"\n{'='*60}")
    print(f"Epoch {epoch}/{NUM_EPOCHS}")
    print(f"{'='*60}")
    
    start_time = time.time()
    
    # Monitor GPU memory
    if torch.cuda.is_available():
        print(f"GPU Memory: {torch.cuda.memory_allocated()/1e9:.2f} GB allocated")
    
    # Training
    train_metrics = SegmentationMetrics(num_classes_actual, ignore_index=0)
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, train_metrics, device)
    
    # Validation
    val_metrics = SegmentationMetrics(num_classes_actual, ignore_index=0)
    val_loss, val_acc, val_miou, val_iou_per_class = validate(model, val_loader, criterion, val_metrics, device)
    
    # Scheduler step
    scheduler.step(val_miou)
    
    # Logging
    elapsed = time.time() - start_time
    print(f"\nResults:")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"  Val Loss  : {val_loss:.4f} | Val Acc  : {val_acc:.4f}")
    print(f"  Val mIoU  : {val_miou:.4f}")
    print(f"  Time      : {elapsed/60:.2f} min")
    print(f"  IoU per class: {val_iou_per_class[1:]}")
    
    # Save history
    history["train_loss"].append(train_loss)
    history["val_loss"].append(val_loss)
    history["train_acc"].append(train_acc)
    history["val_acc"].append(val_acc)
    history["val_miou"].append(val_miou)
    
    # Checkpoint
    checkpoint = {
        "epoch": epoch,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "scheduler_state": scheduler.state_dict(),
        "best_val_miou": best_val_miou,
        "history": history
    }
    torch.save(checkpoint, checkpoint_path)
    
    # Save best model
    if val_miou > best_val_miou:
        best_val_miou = val_miou
        torch.save(checkpoint, best_model_path)
        print(f"[OK] Best model saved! (mIoU: {best_val_miou:.4f})")
    
    # Clear cache
    torch.cuda.empty_cache()

print("\n" + "="*60)
print("Training selesai!")
print(f"Best validation mIoU: {best_val_miou:.4f}")

[INFO] Training dari awal

Epoch 1/2
GPU Memory: 0.03 GB allocated


Training:   0%|                        | 8/18200 [01:10<26:22:38,  5.22s/it, loss=1.7117]


Logits stats - min: -4.3477, max: 1.3833
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                        | 20/18200 [01:10<5:31:54,  1.10s/it, loss=1.7252]


Logits stats - min: -4.4659, max: 2.4693
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                        | 28/18200 [01:10<2:31:35,  2.00it/s, loss=1.7171]


Logits stats - min: -3.7230, max: 2.1938
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3658, max: 1.3689
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 40/18200 [01:10<54:55,  5.51it/s, loss=1.8335]


Logits stats - min: -3.6204, max: 2.4010
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0083, max: 2.6685
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8922, max: 1.6062
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 49/18200 [01:11<28:46, 10.51it/s, loss=1.7328]


Logits stats - min: -4.5669, max: 2.5238
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3480, max: 2.2659
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 60/18200 [01:11<15:44, 19.22it/s, loss=1.6943]


Logits stats - min: -4.1681, max: 2.5144
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3398, max: 1.5551
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9723, max: 1.5580
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2601, max: 2.1601
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5090, max: 1.6124
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.4405, max: 1.6321
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 70/18200 [01:11<12:17, 24.58it/s, loss=1.6394]


Logits stats - min: -3.7745, max: 1.5081
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 78/18200 [01:11<10:31, 28.68it/s, loss=1.6408]


Logits stats - min: -3.9044, max: 1.7652
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9628, max: 1.6126
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 86/18200 [01:12<09:36, 31.42it/s, loss=1.7151]


Logits stats - min: -4.1778, max: 2.4049
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3495, max: 1.5210
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9545, max: 1.4844
Target unique values: tensor([0], device='cuda:0')


Training:   0%|▏                         | 90/18200 [01:12<09:29, 31.78it/s, loss=2.2370]


Logits stats - min: -5.0388, max: 2.4866
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7065, max: 2.4311
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 109/18200 [01:12<08:12, 36.70it/s, loss=2.1705]


Logits stats - min: -3.9665, max: 2.3264
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 121/18200 [01:13<08:48, 34.18it/s, loss=1.6887]


Logits stats - min: -3.8524, max: 1.3756
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9873, max: 1.5757
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 129/18200 [01:13<08:40, 34.72it/s, loss=1.7061]


Logits stats - min: -4.5843, max: 2.1603
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.5204, max: 2.2364
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 138/18200 [01:13<08:18, 36.27it/s, loss=2.6720]


Logits stats - min: -4.0673, max: 2.3030
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9305, max: 1.7755
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1570, max: 1.5016
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1847, max: 1.5057
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3753, max: 2.3967
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 149/18200 [01:13<07:25, 40.53it/s, loss=2.2487]


Logits stats - min: -5.6782, max: 2.0264
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3353, max: 2.3283
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 158/18200 [01:14<08:09, 36.89it/s, loss=1.8274]


Logits stats - min: -4.2885, max: 1.4552
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 166/18200 [01:14<08:30, 35.30it/s, loss=1.6959]


Logits stats - min: -4.0417, max: 1.5249
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3739, max: 1.4751
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8572, max: 1.8494
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 188/18200 [01:14<07:48, 38.47it/s, loss=1.7105]


Logits stats - min: -3.4533, max: 1.7593
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1331, max: 1.6062
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9786, max: 1.3296
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2043, max: 1.4654
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 198/18200 [01:15<07:04, 42.40it/s, loss=1.7174]


Logits stats - min: -4.2555, max: 1.4328
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5671, max: 2.2523
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7565, max: 2.2533
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 208/18200 [01:15<07:51, 38.12it/s, loss=1.6978]


Logits stats - min: -3.9002, max: 2.3009
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6223, max: 1.5099
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 217/18200 [01:15<08:01, 37.37it/s, loss=1.8094]


Logits stats - min: -4.0345, max: 1.3270
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2185, max: 1.3159
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 244/18200 [01:16<07:55, 37.76it/s, loss=1.6524]


Logits stats - min: -4.1186, max: 2.3344
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1253, max: 1.4477
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4412, max: 2.5063
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 253/18200 [01:16<07:55, 37.76it/s, loss=2.3310]


Logits stats - min: -4.1817, max: 2.0841
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0296, max: 1.7430
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 267/18200 [01:17<07:34, 39.49it/s, loss=2.2111]


Logits stats - min: -4.6799, max: 2.4880
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6953, max: 1.5912
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0884, max: 2.5401
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5645, max: 2.4099
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9845, max: 1.2947
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 275/18200 [01:17<07:57, 37.57it/s, loss=1.8258]


Logits stats - min: -4.2858, max: 2.4110
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1236, max: 2.4011
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 284/18200 [01:17<07:18, 40.86it/s, loss=1.7855]


Logits stats - min: -4.0809, max: 1.4960
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6824, max: 1.4746
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 301/18200 [01:17<07:56, 37.59it/s, loss=1.9416]


Logits stats - min: -3.9568, max: 2.7086
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5366, max: 2.4289
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4610, max: 1.3987
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 309/18200 [01:18<08:02, 37.10it/s, loss=1.9702]


Logits stats - min: -4.5760, max: 2.3543
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4671, max: 1.5769
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5348, max: 1.5046
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7833, max: 2.4996
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 318/18200 [01:18<07:45, 38.41it/s, loss=1.8006]


Logits stats - min: -4.2762, max: 2.4982
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 327/18200 [01:18<07:29, 39.80it/s, loss=2.3410]


Logits stats - min: -4.0562, max: 2.5896
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0381, max: 2.3491
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0599, max: 2.3258
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 337/18200 [01:18<07:04, 42.07it/s, loss=1.6687]


Logits stats - min: -5.5155, max: 2.2416
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8960, max: 1.4544
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0048, max: 1.5362
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 347/18200 [01:19<07:16, 40.93it/s, loss=1.8249]


Logits stats - min: -4.0202, max: 1.7169
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7528, max: 1.7192
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6273, max: 2.5289
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 352/18200 [01:19<07:59, 37.22it/s, loss=1.6867]


Logits stats - min: -4.1027, max: 1.9408
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 362/18200 [01:19<07:25, 40.06it/s, loss=1.6755]


Logits stats - min: -4.3963, max: 1.6980
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8128, max: 2.6156
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.4493, max: 2.2796
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 372/18200 [01:19<07:37, 38.93it/s, loss=1.8172]


Logits stats - min: -4.2040, max: 1.5013
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8591, max: 1.4883
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9107, max: 2.3304
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 386/18200 [01:20<07:11, 41.29it/s, loss=1.7079]


Logits stats - min: -4.0312, max: 1.6295
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6105, max: 1.8995
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8201, max: 2.2777
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0370, max: 1.5688
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 391/18200 [01:20<07:43, 38.46it/s, loss=1.6956]


Logits stats - min: -4.1931, max: 1.7525
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0386, max: 2.5994
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6252, max: 1.4766
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 400/18200 [01:20<07:41, 38.55it/s, loss=2.6345]


Logits stats - min: -3.6522, max: 1.8212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6740, max: 2.3023
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7482, max: 2.3511
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 410/18200 [01:20<07:20, 40.42it/s, loss=1.7005]


Logits stats - min: -4.4005, max: 1.4196
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.5925, max: 2.2779
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7536, max: 2.5285
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0497, max: 2.3040
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 434/18200 [01:21<07:54, 37.47it/s, loss=1.8129]


Logits stats - min: -4.1983, max: 1.4392
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3945, max: 1.5929
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 439/18200 [01:21<07:43, 38.34it/s, loss=2.2128]


Logits stats - min: -4.4077, max: 1.8669
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4743, max: 2.1988
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6511, max: 1.4307
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.4651, max: 1.2683
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0819, max: 1.4393
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 448/18200 [01:21<07:45, 38.10it/s, loss=1.8073]


Logits stats - min: -4.2310, max: 1.4608
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3621, max: 1.6852
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 474/18200 [01:22<07:18, 40.41it/s, loss=1.6930]


Logits stats - min: -3.7860, max: 2.4580
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1395, max: 1.4351
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6199, max: 2.3679
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 483/18200 [01:22<07:58, 37.03it/s, loss=1.8083]


Logits stats - min: -3.5975, max: 2.2785
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0475, max: 1.5399
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 493/18200 [01:22<07:11, 41.07it/s, loss=1.6840]


Logits stats - min: -3.7760, max: 1.2628
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9842, max: 1.6378
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5688, max: 2.2182
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8770, max: 1.5486
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 503/18200 [01:23<07:12, 40.89it/s, loss=2.0866]


Logits stats - min: -4.2796, max: 1.6460
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8251, max: 1.6750
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6963, max: 1.6271
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 513/18200 [01:23<07:30, 39.28it/s, loss=1.6400]


Logits stats - min: -3.5477, max: 1.4700
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 519/18200 [01:23<07:00, 42.00it/s, loss=2.2230]


Logits stats - min: -4.0170, max: 2.3669
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5741, max: 2.3821
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8508, max: 2.3115
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 524/18200 [01:23<07:13, 40.79it/s, loss=1.7173]


Logits stats - min: -4.0004, max: 2.2656
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2830, max: 2.1947
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 538/18200 [01:24<07:33, 38.96it/s, loss=2.2239]


Logits stats - min: -4.3141, max: 2.4318
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3147, max: 2.3075
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 544/18200 [01:24<07:02, 41.75it/s, loss=1.6726]


Logits stats - min: -4.2662, max: 2.0021
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8488, max: 1.6061
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9823, max: 2.1487
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2963, max: 2.1622
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 554/18200 [01:24<07:07, 41.25it/s, loss=1.6886]


Logits stats - min: -5.0123, max: 2.2361
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2893, max: 1.2492
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8742, max: 1.3234
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1190, max: 2.4472
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 564/18200 [01:24<07:11, 40.83it/s, loss=2.1981]


Logits stats - min: -3.8226, max: 1.3104
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8747, max: 1.6769
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1657, max: 2.3932
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9383, max: 2.5166
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7713, max: 1.9442
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4897, max: 2.6563
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 575/18200 [01:24<06:56, 42.27it/s, loss=1.6815]


Logits stats - min: -4.3271, max: 1.3330
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8313, max: 1.3665
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8406, max: 1.4230
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 585/18200 [01:25<07:25, 39.56it/s, loss=1.7957]


Logits stats - min: -4.6871, max: 2.4894
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1728, max: 2.3191
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 599/18200 [01:25<07:38, 38.42it/s, loss=1.7961]


Logits stats - min: -4.2553, max: 1.6323
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3053, max: 1.4484
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1584, max: 1.4399
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5908, max: 2.3571
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 608/18200 [01:25<07:45, 37.83it/s, loss=2.5921]


Logits stats - min: -4.9045, max: 2.4554
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9466, max: 1.3897
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7170, max: 2.3419
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3324, max: 2.3994
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9025, max: 2.3041
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2106, max: 1.4825
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 619/18200 [01:26<07:18, 40.12it/s, loss=1.6932]


Logits stats - min: -4.2786, max: 1.1924
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5190, max: 1.4415
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4163, max: 2.4378
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 634/18200 [01:26<07:28, 39.15it/s, loss=1.7096]


Logits stats - min: -3.7793, max: 1.5197
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3084, max: 2.2160
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4726, max: 2.1676
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2586, max: 2.1517
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 644/18200 [01:26<07:06, 41.18it/s, loss=1.7216]


Logits stats - min: -4.0775, max: 1.4509
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4034, max: 1.5819
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 654/18200 [01:26<07:06, 41.16it/s, loss=1.6912]


Logits stats - min: -4.0548, max: 1.4293
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1738, max: 2.2469
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9024, max: 2.1888
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 667/18200 [01:27<07:47, 37.50it/s, loss=2.2652]


Logits stats - min: -3.8860, max: 1.3333
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7452, max: 2.6258
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 681/18200 [01:27<07:59, 36.57it/s, loss=1.7838]


Logits stats - min: -4.0813, max: 1.4523
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0464, max: 2.1836
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 695/18200 [01:27<07:30, 38.85it/s, loss=1.8014]


Logits stats - min: -4.4665, max: 2.1833
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2816, max: 1.5129
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2101, max: 1.4385
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 703/18200 [01:28<07:35, 38.41it/s, loss=1.7651]


Logits stats - min: -3.7565, max: 2.3064
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7428, max: 2.4944
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2512, max: 1.6771
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 708/18200 [01:28<07:49, 37.25it/s, loss=2.2057]


Logits stats - min: -4.0407, max: 1.3954
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0533, max: 1.5937
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7821, max: 1.5600
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 727/18200 [01:28<07:21, 39.58it/s, loss=1.7839]


Logits stats - min: -4.1723, max: 1.3304
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2921, max: 1.4936
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8265, max: 1.2400
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 739/18200 [01:29<07:38, 38.12it/s, loss=1.7901]


Logits stats - min: -3.8374, max: 1.4125
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9579, max: 1.4032
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 748/18200 [01:29<07:48, 37.28it/s, loss=1.7945]


Logits stats - min: -4.4842, max: 2.4774
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 768/18200 [01:30<08:35, 33.84it/s, loss=1.6836]


Logits stats - min: -4.3389, max: 2.2676
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 778/18200 [01:30<07:44, 37.50it/s, loss=1.6854]


Logits stats - min: -4.7559, max: 1.8371
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9431, max: 2.4083
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0676, max: 1.6379
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 785/18200 [01:30<06:55, 41.91it/s, loss=1.6785]


Logits stats - min: -3.7967, max: 1.5776
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8689, max: 1.5137
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9777, max: 1.4379
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2886, max: 2.4070
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6454, max: 1.2619
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1881, max: 2.1329
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 800/18200 [01:30<07:17, 39.73it/s, loss=1.6817]


Logits stats - min: -4.0106, max: 1.2865
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2944, max: 2.4542
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2377, max: 1.2110
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0522, max: 1.4364
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 810/18200 [01:31<06:45, 42.88it/s, loss=2.1957]


Logits stats - min: -3.8341, max: 1.5791
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1804, max: 1.4872
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2156, max: 1.4899
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█▏                       | 819/18200 [01:31<07:23, 39.20it/s, loss=1.7786]


Logits stats - min: -4.2786, max: 1.5021
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8036, max: 1.2535
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0170, max: 1.4958
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 829/18200 [01:31<07:16, 39.81it/s, loss=2.0305]


Logits stats - min: -3.8818, max: 2.2370
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7580, max: 2.5379
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 839/18200 [01:31<07:19, 39.50it/s, loss=2.7115]


Logits stats - min: -4.1071, max: 1.6998
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9531, max: 1.4883
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 852/18200 [01:32<07:23, 39.11it/s, loss=1.6947]


Logits stats - min: -4.3800, max: 2.1735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3771, max: 1.6182
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9595, max: 2.2118
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 866/18200 [01:32<07:20, 39.34it/s, loss=1.6625]


Logits stats - min: -4.3467, max: 1.5836
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8458, max: 1.9143
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7975, max: 2.2783
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 876/18200 [01:32<06:47, 42.56it/s, loss=1.6814]


Logits stats - min: -4.1548, max: 1.4407
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2309, max: 1.4069
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6566, max: 2.3216
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.5053, max: 2.4054
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 886/18200 [01:32<06:55, 41.63it/s, loss=2.1261]


Logits stats - min: -3.8377, max: 1.5032
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 895/18200 [01:33<07:57, 36.28it/s, loss=1.6900]


Logits stats - min: -4.0042, max: 1.4837
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 904/18200 [01:33<07:46, 37.10it/s, loss=1.6731]


Logits stats - min: -4.6001, max: 2.5165
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 913/18200 [01:33<07:47, 36.97it/s, loss=1.6821]


Logits stats - min: -4.0191, max: 1.9890
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9546, max: 2.4853
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5920, max: 2.2991
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 923/18200 [01:33<06:57, 41.40it/s, loss=1.6601]


Logits stats - min: -3.9460, max: 1.6938
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5615, max: 1.4009
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5380, max: 1.9563
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3142, max: 1.8945
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9919, max: 2.0637
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6543, max: 2.3833
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 933/18200 [01:34<06:38, 43.36it/s, loss=2.6367]


Logits stats - min: -4.0545, max: 2.2220
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1477, max: 1.5258
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7727, max: 1.9513
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3314, max: 2.5120
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 947/18200 [01:34<07:31, 38.19it/s, loss=1.7739]


Logits stats - min: -4.5629, max: 2.2155
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3356, max: 2.2551
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0878, max: 1.3719
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9947, max: 1.4839
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 962/18200 [01:34<07:26, 38.61it/s, loss=1.6867]


Logits stats - min: -4.2961, max: 1.3366
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1137, max: 2.1338
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5269, max: 2.3374
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 980/18200 [01:35<06:57, 41.20it/s, loss=1.6916]


Logits stats - min: -3.8845, max: 1.4205
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.4083, max: 1.5705
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0663, max: 2.3007
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9461, max: 1.3871
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 986/18200 [01:35<06:33, 43.72it/s, loss=1.6463]


Logits stats - min: -4.1841, max: 1.5553
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1492, max: 1.3178
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8496, max: 1.7822
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▎                      | 1004/18200 [01:36<08:00, 35.76it/s, loss=1.7091]


Logits stats - min: -4.5138, max: 2.5133
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9209, max: 2.0896
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▎                      | 1013/18200 [01:36<07:16, 39.37it/s, loss=2.0963]


Logits stats - min: -4.2109, max: 1.5594
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5613, max: 2.2297
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6741, max: 2.2132
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9335, max: 2.1795
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▎                      | 1033/18200 [01:36<07:04, 40.41it/s, loss=2.2020]


Logits stats - min: -4.5693, max: 1.3629
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4632, max: 2.4840
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9253, max: 1.5039
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5254, max: 1.5411
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▎                      | 1039/18200 [01:36<06:56, 41.23it/s, loss=2.0799]


Logits stats - min: -4.3508, max: 1.4372
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1048/18200 [01:37<07:48, 36.61it/s, loss=1.6659]


Logits stats - min: -4.5655, max: 2.3297
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6566, max: 2.3798
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4831, max: 2.1938
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1061/18200 [01:37<07:13, 39.53it/s, loss=2.0271]


Logits stats - min: -4.0243, max: 1.3517
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7074, max: 1.7936
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9474, max: 2.2527
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1071/18200 [01:37<06:57, 41.05it/s, loss=2.1838]


Logits stats - min: -3.9875, max: 1.7937
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5148, max: 1.5819
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4455, max: 1.3932
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2175, max: 1.9902
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1086/18200 [01:38<07:21, 38.76it/s, loss=1.6931]


Logits stats - min: -3.6829, max: 1.2781
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1094/18200 [01:38<07:58, 35.74it/s, loss=1.6440]


Logits stats - min: -4.5951, max: 2.4415
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9317, max: 1.7606
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1108/18200 [01:38<07:09, 39.77it/s, loss=1.5920]


Logits stats - min: -4.5452, max: 2.2354
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5649, max: 1.2994
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3016, max: 1.5488
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1122/18200 [01:38<07:06, 40.08it/s, loss=2.0492]


Logits stats - min: -4.0742, max: 2.1916
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3865, max: 2.4395
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8613, max: 2.2964
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1132/18200 [01:39<06:42, 42.44it/s, loss=1.9336]


Logits stats - min: -4.0645, max: 1.2549
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4814, max: 1.6341
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6426, max: 1.9885
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9107, max: 1.4427
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▌                      | 1151/18200 [01:39<07:37, 37.26it/s, loss=1.6737]


Logits stats - min: -4.0313, max: 1.6922
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8490, max: 2.2015
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▌                      | 1166/18200 [01:40<06:57, 40.82it/s, loss=1.6746]


Logits stats - min: -4.2109, max: 1.3962
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6932, max: 1.4374
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4222, max: 2.0890
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8152, max: 1.8393
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9537, max: 1.4404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2345, max: 1.3733
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▌                      | 1183/18200 [01:40<06:11, 45.86it/s, loss=1.6984]


Logits stats - min: -3.8887, max: 1.4405
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1729, max: 1.4322
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3168, max: 1.5901
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5079, max: 2.2577
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3215, max: 1.4859
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▌                      | 1193/18200 [01:40<07:08, 39.70it/s, loss=2.0889]


Logits stats - min: -4.7007, max: 1.7222
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5351, max: 1.6873
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▌                      | 1208/18200 [01:41<06:48, 41.60it/s, loss=1.6823]


Logits stats - min: -3.9962, max: 1.2841
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6446, max: 1.4507
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2255, max: 2.2827
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▌                      | 1223/18200 [01:41<07:01, 40.24it/s, loss=2.2227]


Logits stats - min: -4.0360, max: 1.5790
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1233/18200 [01:41<06:55, 40.80it/s, loss=1.7045]


Logits stats - min: -3.9882, max: 1.4661
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7793, max: 1.3372
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1238/18200 [01:41<07:20, 38.48it/s, loss=1.9334]


Logits stats - min: -4.2730, max: 2.3065
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1246/18200 [01:42<07:46, 36.36it/s, loss=1.7642]


Logits stats - min: -4.0608, max: 1.5307
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1992, max: 2.2358
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6832, max: 2.2632
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1261/18200 [01:42<06:53, 40.99it/s, loss=1.7639]


Logits stats - min: -4.0710, max: 1.5122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7267, max: 2.1002
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2516, max: 2.4307
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1271/18200 [01:42<07:07, 39.63it/s, loss=1.6878]


Logits stats - min: -4.4118, max: 2.5256
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1276/18200 [01:42<06:52, 41.01it/s, loss=1.6695]


Logits stats - min: -4.1753, max: 1.9873
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1289/18200 [01:43<08:00, 35.17it/s, loss=2.1685]


Logits stats - min: -4.6469, max: 2.2875
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1302/18200 [01:43<07:46, 36.23it/s, loss=1.6382]


Logits stats - min: -4.2676, max: 1.5363
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0368, max: 2.3340
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7911, max: 2.3976
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1317/18200 [01:43<07:15, 38.79it/s, loss=2.1668]


Logits stats - min: -4.9948, max: 2.4741
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8354, max: 1.4750
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5811, max: 2.4445
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1326/18200 [01:44<07:19, 38.43it/s, loss=1.6827]


Logits stats - min: -4.2058, max: 1.3373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0498, max: 2.3144
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▊                      | 1336/18200 [01:44<07:09, 39.30it/s, loss=1.9753]


Logits stats - min: -4.8892, max: 2.3583
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3804, max: 1.7288
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▊                      | 1344/18200 [01:44<07:36, 36.93it/s, loss=2.7370]


Logits stats - min: -4.8453, max: 2.3348
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0115, max: 1.6761
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▊                      | 1354/18200 [01:44<07:01, 40.00it/s, loss=1.6798]


Logits stats - min: -4.0348, max: 1.3221
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▊                      | 1368/18200 [01:45<06:51, 40.89it/s, loss=1.6801]


Logits stats - min: -4.1849, max: 2.1462
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.5970, max: 2.2134
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1624, max: 1.4103
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6646, max: 2.1134
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▊                      | 1383/18200 [01:45<07:23, 37.93it/s, loss=1.7526]


Logits stats - min: -4.1527, max: 1.4275
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0720, max: 2.4712
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2448, max: 1.6882
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8842, max: 2.1979
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▊                      | 1391/18200 [01:45<07:49, 35.77it/s, loss=2.1752]


Logits stats - min: -3.7881, max: 1.6938
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3804, max: 2.0525
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▊                      | 1409/18200 [01:46<06:41, 41.77it/s, loss=1.9727]


Logits stats - min: -3.4446, max: 1.3319
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9024, max: 2.2967
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0476, max: 2.2584
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9725, max: 1.5793
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1426/18200 [01:46<07:17, 38.37it/s, loss=1.6429]


Logits stats - min: -4.2518, max: 2.3244
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8770, max: 2.2829
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9703, max: 2.4837
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8278, max: 1.5197
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1624, max: 2.4509
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1436/18200 [01:47<06:53, 40.53it/s, loss=1.6565]


Logits stats - min: -3.9646, max: 1.4674
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2921, max: 1.3008
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4686, max: 1.8599
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1450/18200 [01:47<06:54, 40.39it/s, loss=1.7729]


Logits stats - min: -4.4981, max: 2.0388
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6761, max: 2.1602
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5793, max: 2.1471
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1460/18200 [01:47<06:50, 40.81it/s, loss=1.8271]


Logits stats - min: -4.3737, max: 1.6626
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6960, max: 2.4487
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5098, max: 1.4280
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1470/18200 [01:47<06:46, 41.12it/s, loss=1.7629]


Logits stats - min: -3.8583, max: 1.5343
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8990, max: 1.4951
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1479/18200 [01:48<07:15, 38.39it/s, loss=1.9333]


Logits stats - min: -3.6082, max: 1.6716
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7732, max: 1.7840
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1500/18200 [01:48<07:21, 37.79it/s, loss=2.0427]


Logits stats - min: -4.5747, max: 1.3815
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5422, max: 1.6619
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3337, max: 1.5492
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1512/18200 [01:48<07:39, 36.34it/s, loss=2.1640]


Logits stats - min: -5.7625, max: 1.8769
Target unique values: tensor([0], device='cuda:0')


Training:   8%|██                      | 1517/18200 [01:49<07:28, 37.22it/s, loss=1.7724]


Logits stats - min: -3.8794, max: 2.0845
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7683, max: 1.3997
Target unique values: tensor([0], device='cuda:0')


Training:   8%|██                      | 1534/18200 [01:49<07:30, 36.98it/s, loss=1.7935]


Logits stats - min: -4.0761, max: 1.4696
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7605, max: 2.2585
Target unique values: tensor([0], device='cuda:0')


Training:   8%|██                      | 1539/18200 [01:49<07:21, 37.70it/s, loss=1.7478]


Logits stats - min: -4.4749, max: 1.9718
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2814, max: 1.6727
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0569, max: 1.8816
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6920, max: 1.4909
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██                      | 1549/18200 [01:50<06:58, 39.80it/s, loss=1.7695]


Logits stats - min: -3.4837, max: 1.9922
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7385, max: 2.3586
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██                      | 1576/18200 [01:50<07:09, 38.70it/s, loss=1.7520]


Logits stats - min: -4.6046, max: 1.4974
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2086, max: 1.5417
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██                      | 1584/18200 [01:50<07:22, 37.51it/s, loss=1.6288]


Logits stats - min: -4.6159, max: 1.7866
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5019, max: 1.4338
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██                      | 1597/18200 [01:51<07:23, 37.47it/s, loss=2.7588]


Logits stats - min: -4.0518, max: 1.5613
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██                      | 1606/18200 [01:51<06:47, 40.68it/s, loss=1.6495]


Logits stats - min: -4.3909, max: 2.1207
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1320, max: 1.6048
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2557, max: 1.8885
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7186, max: 1.4656
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4404, max: 1.9596
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1646/18200 [01:52<06:47, 40.67it/s, loss=1.7622]


Logits stats - min: -4.7483, max: 1.5915
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3289, max: 1.7441
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0951, max: 1.9868
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1682, max: 2.1920
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3466, max: 2.0254
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1669/18200 [01:53<06:58, 39.46it/s, loss=1.7944]


Logits stats - min: -4.5954, max: 1.3207
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1785, max: 2.1618
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1674/18200 [01:53<06:47, 40.51it/s, loss=1.7901]


Logits stats - min: -4.6776, max: 1.5709
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7689, max: 1.6649
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1687/18200 [01:53<07:25, 37.03it/s, loss=2.0849]


Logits stats - min: -4.5838, max: 2.2955
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6089, max: 2.1192
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6309, max: 2.3202
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6817, max: 1.8024
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1701/18200 [01:54<07:27, 36.83it/s, loss=1.6011]


Logits stats - min: -4.0492, max: 1.3892
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6203, max: 1.4911
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▎                     | 1711/18200 [01:54<06:48, 40.34it/s, loss=1.6292]


Logits stats - min: -4.3723, max: 1.5907
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5331, max: 1.3789
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5733, max: 1.4404
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▎                     | 1721/18200 [01:54<06:43, 40.86it/s, loss=2.1608]


Logits stats - min: -4.5290, max: 2.2484
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5064, max: 1.9608
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1731/18200 [01:54<07:16, 37.71it/s, loss=1.7970]


Logits stats - min: -4.5486, max: 2.2773
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7789, max: 2.1768
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0708, max: 1.4722
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7038, max: 1.5472
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1748/18200 [01:55<07:12, 38.04it/s, loss=1.6368]


Logits stats - min: -4.0796, max: 2.2941
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9559, max: 2.3432
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1757/18200 [01:55<07:11, 38.14it/s, loss=2.1634]


Logits stats - min: -3.8178, max: 1.3983
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2907, max: 1.3634
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1772/18200 [01:55<06:28, 42.26it/s, loss=2.1087]


Logits stats - min: -5.1666, max: 2.2884
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4969, max: 2.1514
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3610, max: 2.2658
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3878, max: 2.3651
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9304, max: 1.7139
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1777/18200 [01:55<06:46, 40.38it/s, loss=1.6739]


Logits stats - min: -4.0427, max: 1.4628
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0544, max: 1.4579
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1998, max: 2.1703
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1787/18200 [01:56<06:47, 40.23it/s, loss=2.0557]


Logits stats - min: -5.7410, max: 2.0559
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2640, max: 2.2849
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1802/18200 [01:56<06:14, 43.80it/s, loss=1.9643]


Logits stats - min: -4.4057, max: 1.4969
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3627, max: 1.2959
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2709, max: 1.5144
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5612, max: 1.2720
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4966, max: 1.5590
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9936, max: 1.7241
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7059, max: 1.4596
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1817/18200 [01:56<06:28, 42.17it/s, loss=1.6216]


Logits stats - min: -4.5475, max: 1.9746
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3284, max: 1.4072
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1831/18200 [01:57<07:14, 37.65it/s, loss=1.7359]


Logits stats - min: -5.6802, max: 1.9712
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0474, max: 2.3459
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1835/18200 [01:57<07:18, 37.33it/s, loss=1.9787]


Logits stats - min: -4.4823, max: 1.7492
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6815, max: 1.5151
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4721, max: 1.1948
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3345, max: 1.5593
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1845/18200 [01:57<06:41, 40.69it/s, loss=1.6293]


Logits stats - min: -3.7212, max: 1.6304
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3933, max: 1.5998
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4533, max: 2.2894
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1855/18200 [01:57<06:37, 41.07it/s, loss=1.7323]


Logits stats - min: -4.3200, max: 1.8304
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2778, max: 2.1587
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1865/18200 [01:58<06:35, 41.27it/s, loss=1.9842]


Logits stats - min: -4.0004, max: 1.8489
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1878/18200 [01:58<07:19, 37.17it/s, loss=1.7525]


Logits stats - min: -4.3763, max: 1.5524
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4018, max: 2.2502
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1886/18200 [01:58<07:37, 35.65it/s, loss=1.6103]


Logits stats - min: -4.4301, max: 1.5257
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▌                     | 1900/18200 [01:58<06:52, 39.48it/s, loss=1.6577]


Logits stats - min: -4.4128, max: 1.5767
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1268, max: 1.4456
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2831, max: 1.3397
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▌                     | 1910/18200 [01:59<06:23, 42.45it/s, loss=1.7409]


Logits stats - min: -4.7628, max: 1.9986
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5839, max: 2.3122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0854, max: 1.5242
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1920/18200 [01:59<06:18, 42.99it/s, loss=2.0739]


Logits stats - min: -4.1406, max: 2.2948
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1955, max: 2.1500
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.2924, max: 2.0443
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6911, max: 1.6972
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0449, max: 2.1027
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1939/18200 [02:00<07:14, 37.38it/s, loss=1.7546]


Logits stats - min: -4.9718, max: 1.5179
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8666, max: 1.4245
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1948/18200 [02:00<06:45, 40.09it/s, loss=1.7368]


Logits stats - min: -5.1563, max: 2.2793
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7059, max: 1.9775
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3418, max: 1.6119
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1958/18200 [02:00<06:32, 41.43it/s, loss=2.3319]


Logits stats - min: -4.2774, max: 1.3856
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6106, max: 1.4051
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0229, max: 1.8484
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2196, max: 2.2574
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1968/18200 [02:00<06:45, 40.01it/s, loss=1.7467]


Logits stats - min: -4.4114, max: 1.5714
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2814, max: 2.3522
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5450, max: 1.2733
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1645, max: 2.0951
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1979/18200 [02:00<06:41, 40.43it/s, loss=2.0052]


Logits stats - min: -4.1693, max: 1.3028
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1988/18200 [02:01<07:08, 37.82it/s, loss=2.1528]


Logits stats - min: -4.2518, max: 1.8233
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5694, max: 1.7676
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1699, max: 2.1486
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9739, max: 2.1123
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2004/18200 [02:01<06:24, 42.17it/s, loss=1.6335]


Logits stats - min: -4.0302, max: 2.2374
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3256, max: 1.2829
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1090, max: 1.7909
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1627, max: 1.8188
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2019/18200 [02:01<06:46, 39.85it/s, loss=1.6684]


Logits stats - min: -4.0263, max: 2.6087
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8834, max: 2.3199
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2029/18200 [02:02<06:42, 40.13it/s, loss=1.6762]


Logits stats - min: -4.1217, max: 2.1414
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3791, max: 2.0815
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2473, max: 1.4678
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5526, max: 2.3914
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2039/18200 [02:02<06:35, 40.86it/s, loss=1.7275]


Logits stats - min: -6.0357, max: 1.8857
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.4104, max: 1.8278
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2049/18200 [02:02<06:43, 40.04it/s, loss=1.7420]


Logits stats - min: -4.4866, max: 1.3339
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2054/18200 [02:02<06:36, 40.73it/s, loss=1.7486]


Logits stats - min: -4.8984, max: 2.4679
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5417, max: 1.3760
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2069/18200 [02:03<06:28, 41.53it/s, loss=1.7387]


Logits stats - min: -4.8490, max: 2.0698
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5158, max: 1.7276
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6296, max: 1.6908
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8008, max: 1.4707
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7010, max: 1.4204
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2079/18200 [02:03<06:44, 39.90it/s, loss=2.0740]


Logits stats - min: -5.3948, max: 2.1727
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7026, max: 2.1249
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▊                     | 2089/18200 [02:03<06:36, 40.61it/s, loss=1.7415]


Logits stats - min: -3.8909, max: 2.2079
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0994, max: 1.3551
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9102, max: 1.3540
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2638, max: 1.4546
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2099/18200 [02:03<06:28, 41.48it/s, loss=1.8347]


Logits stats - min: -4.3434, max: 1.6038
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0996, max: 2.2977
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2109/18200 [02:04<06:46, 39.58it/s, loss=1.6514]


Logits stats - min: -4.5101, max: 2.2653
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9076, max: 1.6787
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2119/18200 [02:04<06:41, 40.09it/s, loss=1.6605]


Logits stats - min: -4.3840, max: 1.4715
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6646, max: 2.0960
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2132/18200 [02:04<07:03, 37.97it/s, loss=1.6258]


Logits stats - min: -4.1413, max: 1.3245
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7927, max: 2.1425
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0052, max: 1.2975
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2136/18200 [02:05<07:19, 36.54it/s, loss=1.9844]


Logits stats - min: -4.2242, max: 1.3512
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2212, max: 2.0755
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2159/18200 [02:05<06:47, 39.37it/s, loss=1.7882]


Logits stats - min: -4.8219, max: 2.4147
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8460, max: 2.2266
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2169/18200 [02:05<06:31, 40.94it/s, loss=2.0365]


Logits stats - min: -3.8436, max: 2.2169
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5334, max: 2.1529
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0312, max: 1.6569
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2178/18200 [02:06<07:01, 37.97it/s, loss=1.6597]


Logits stats - min: -5.2007, max: 2.5807
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5925, max: 1.4528
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2770, max: 1.5229
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4452, max: 1.5715
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2193/18200 [02:06<06:39, 40.10it/s, loss=1.6952]


Logits stats - min: -4.0732, max: 1.1996
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9759, max: 2.4919
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0635, max: 2.5018
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0898, max: 1.4035
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2209/18200 [02:06<06:19, 42.18it/s, loss=2.0981]


Logits stats - min: -5.7027, max: 1.7415
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6581, max: 1.4972
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6060, max: 1.3815
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5078, max: 2.3781
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2219/18200 [02:06<06:24, 41.54it/s, loss=1.6666]


Logits stats - min: -4.2709, max: 1.6543
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0526, max: 2.5973
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2224/18200 [02:07<06:42, 39.70it/s, loss=1.8297]


Logits stats - min: -4.5768, max: 2.1005
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3162, max: 2.5901
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1265, max: 1.5647
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2237/18200 [02:07<07:00, 37.98it/s, loss=1.6375]


Logits stats - min: -4.6648, max: 2.6338
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8169, max: 1.4004
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0450, max: 1.6975
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2251/18200 [02:07<06:23, 41.57it/s, loss=1.6486]


Logits stats - min: -4.8912, max: 1.4314
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8339, max: 1.2904
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2468, max: 2.3091
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6414, max: 1.6009
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3703, max: 1.3336
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9744, max: 1.9259
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2261/18200 [02:08<06:02, 43.96it/s, loss=2.0994]


Logits stats - min: -4.4508, max: 2.0912
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6534, max: 1.5484
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4018, max: 1.6306
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2696, max: 2.0447
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2271/18200 [02:08<06:09, 43.14it/s, loss=1.7230]


Logits stats - min: -4.5168, max: 1.3121
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5151, max: 1.3148
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7156, max: 1.5864
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2281/18200 [02:08<06:37, 40.09it/s, loss=1.7517]


Logits stats - min: -4.2962, max: 1.3902
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8763, max: 1.6118
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2291/18200 [02:08<06:49, 38.80it/s, loss=1.7442]


Logits stats - min: -4.1483, max: 2.0066
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4772, max: 2.2175
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4113, max: 1.4253
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4861, max: 1.6429
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2301/18200 [02:09<06:12, 42.70it/s, loss=1.6775]


Logits stats - min: -3.9791, max: 1.3451
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4650, max: 1.5010
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1800, max: 1.5171
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2311/18200 [02:09<06:16, 42.16it/s, loss=1.6423]


Logits stats - min: -4.5656, max: 2.1885
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2020, max: 1.6007
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6412, max: 1.4227
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2323/18200 [02:09<06:05, 43.44it/s, loss=1.6428]


Logits stats - min: -3.8923, max: 1.5976
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1270, max: 2.3297
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6634, max: 1.4521
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4477, max: 2.2306
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1655, max: 2.1102
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2333/18200 [02:09<06:24, 41.26it/s, loss=1.7548]


Logits stats - min: -4.0371, max: 1.4826
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4353, max: 1.2288
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8933, max: 1.4151
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0887, max: 1.4521
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8819, max: 1.4212
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2348/18200 [02:10<06:34, 40.16it/s, loss=2.1318]


Logits stats - min: -5.2965, max: 2.1973
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2555, max: 1.3656
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2359/18200 [02:10<06:03, 43.64it/s, loss=2.1717]


Logits stats - min: -3.9831, max: 2.0501
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2996, max: 1.2196
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5507, max: 1.3928
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6185, max: 1.5332
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2613, max: 2.1694
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2369/18200 [02:10<06:51, 38.49it/s, loss=1.9725]


Logits stats - min: -4.4827, max: 1.5949
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2962, max: 2.3874
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2388/18200 [02:11<06:59, 37.70it/s, loss=1.6278]


Logits stats - min: -4.5291, max: 2.5100
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4532, max: 2.4455
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2398/18200 [02:11<06:46, 38.90it/s, loss=1.6694]


Logits stats - min: -4.1884, max: 1.6224
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9808, max: 1.4732
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0591, max: 2.2218
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2408/18200 [02:11<06:34, 40.06it/s, loss=1.6920]


Logits stats - min: -3.7356, max: 1.4533
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0466, max: 1.4922
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2422/18200 [02:12<06:40, 39.38it/s, loss=1.7289]


Logits stats - min: -4.6080, max: 1.7118
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0068, max: 2.3348
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2430/18200 [02:12<06:54, 38.03it/s, loss=1.6424]


Logits stats - min: -4.2739, max: 2.0968
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6889, max: 2.2387
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4382, max: 2.1971
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2438/18200 [02:12<06:51, 38.29it/s, loss=1.7261]


Logits stats - min: -4.4167, max: 1.4100
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3432, max: 1.8834
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3734, max: 1.4918
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2452/18200 [02:12<06:29, 40.48it/s, loss=1.6681]


Logits stats - min: -3.8229, max: 1.3769
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5310, max: 1.3632
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7556, max: 2.5340
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▏                    | 2457/18200 [02:13<06:38, 39.46it/s, loss=2.0398]


Logits stats - min: -4.2642, max: 1.5253
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9370, max: 1.8841
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2467/18200 [02:13<06:18, 41.54it/s, loss=1.7365]


Logits stats - min: -4.6222, max: 2.4749
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4357, max: 2.1336
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2477/18200 [02:13<06:10, 42.48it/s, loss=2.1632]


Logits stats - min: -4.2848, max: 2.4185
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2921, max: 2.1825
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3356, max: 1.2002
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2486/18200 [02:13<07:04, 37.01it/s, loss=1.5965]


Logits stats - min: -3.8861, max: 2.0501
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1989, max: 1.8331
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7536, max: 1.6886
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9412, max: 1.3166
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2496/18200 [02:13<06:27, 40.52it/s, loss=1.6717]


Logits stats - min: -4.3715, max: 2.2101
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2506/18200 [02:14<06:38, 39.39it/s, loss=1.6388]


Logits stats - min: -4.6838, max: 1.6423
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1971, max: 1.8578
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0326, max: 1.4122
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2516/18200 [02:14<06:25, 40.72it/s, loss=1.9691]


Logits stats - min: -4.2092, max: 1.1727
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5605, max: 1.5094
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2521/18200 [02:14<06:32, 39.93it/s, loss=1.6517]


Logits stats - min: -4.3052, max: 1.3882
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5169, max: 2.4403
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2531/18200 [02:14<06:36, 39.53it/s, loss=1.6577]


Logits stats - min: -4.1658, max: 1.5942
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8286, max: 1.4845
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2545/18200 [02:15<06:45, 38.63it/s, loss=2.7454]


Logits stats - min: -4.6002, max: 1.5122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3408, max: 1.4836
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7998, max: 2.2361
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1655, max: 2.3660
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2556/18200 [02:15<06:28, 40.26it/s, loss=1.6633]


Logits stats - min: -4.4137, max: 1.4747
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7356, max: 1.7284
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2569/18200 [02:15<05:27, 47.75it/s, loss=1.7239]


Logits stats - min: -4.9421, max: 2.2449
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4738, max: 1.2736
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9772, max: 1.7596
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0751, max: 1.6239
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4619, max: 1.4226
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0362, max: 1.8104
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7279, max: 1.4593
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5900, max: 2.1773
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2579/18200 [02:16<06:23, 40.72it/s, loss=1.5786]


Logits stats - min: -4.4563, max: 1.4131
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5290, max: 1.5082
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2593/18200 [02:16<06:42, 38.77it/s, loss=1.6288]


Logits stats - min: -4.5602, max: 1.5257
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4707, max: 1.4543
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0081, max: 1.8742
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2609/18200 [02:16<06:15, 41.55it/s, loss=1.6528]


Logits stats - min: -4.4550, max: 1.6545
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6422, max: 1.5125
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9638, max: 1.3886
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2619/18200 [02:16<06:30, 39.94it/s, loss=1.7269]


Logits stats - min: -4.5472, max: 1.7065
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6491, max: 1.5806
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2629/18200 [02:17<06:20, 40.87it/s, loss=2.0227]


Logits stats - min: -4.7163, max: 1.4787
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6436, max: 2.2531
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0784, max: 2.1874
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▍                    | 2644/18200 [02:17<06:25, 40.30it/s, loss=1.7186]


Logits stats - min: -4.0267, max: 1.5050
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5523, max: 1.5408
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7091, max: 2.3117
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▍                    | 2654/18200 [02:17<06:22, 40.67it/s, loss=1.6644]


Logits stats - min: -4.3010, max: 1.1303
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7902, max: 2.8425
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2659/18200 [02:18<06:29, 39.91it/s, loss=1.7369]


Logits stats - min: -4.5628, max: 1.4844
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2672/18200 [02:18<07:01, 36.83it/s, loss=1.8956]


Logits stats - min: -4.6036, max: 1.5665
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7871, max: 2.0911
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6806, max: 2.1816
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4060, max: 1.7202
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2685/18200 [02:18<06:30, 39.71it/s, loss=1.6629]


Logits stats - min: -4.6260, max: 2.3873
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4313, max: 1.7124
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2694/18200 [02:18<06:22, 40.58it/s, loss=1.7380]


Logits stats - min: -4.2188, max: 1.5242
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2914, max: 2.4327
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2933, max: 1.4972
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2704/18200 [02:19<06:25, 40.20it/s, loss=1.7300]


Logits stats - min: -4.8288, max: 1.4463
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3824, max: 1.5253
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1380, max: 1.6112
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2719/18200 [02:19<06:43, 38.33it/s, loss=1.7783]


Logits stats - min: -4.0474, max: 2.3430
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3751, max: 1.4615
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4694, max: 1.4875
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2734/18200 [02:19<06:35, 39.07it/s, loss=2.0272]


Logits stats - min: -4.1367, max: 2.2124
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7399, max: 2.2433
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2742/18200 [02:20<06:40, 38.64it/s, loss=2.1760]


Logits stats - min: -5.1632, max: 1.3735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0221, max: 1.4779
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▋                    | 2751/18200 [02:20<06:12, 41.43it/s, loss=1.6537]


Logits stats - min: -4.7636, max: 2.0792
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4418, max: 1.6914
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3175, max: 1.6738
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▋                    | 2765/18200 [02:20<06:43, 38.26it/s, loss=1.5843]


Logits stats - min: -4.5240, max: 1.4185
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5556, max: 2.2354
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▋                    | 2776/18200 [02:21<06:07, 41.95it/s, loss=1.7258]


Logits stats - min: -5.1092, max: 2.2196
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6504, max: 1.7546
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6021, max: 1.6779
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6654, max: 2.1680
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▋                    | 2786/18200 [02:21<05:56, 43.20it/s, loss=1.7284]


Logits stats - min: -4.2569, max: 2.2406
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2562, max: 2.1437
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5546, max: 2.2802
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2936, max: 2.1472
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▋                    | 2804/18200 [02:21<06:34, 39.04it/s, loss=1.7637]


Logits stats - min: -4.1512, max: 2.0398
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▋                    | 2817/18200 [02:22<06:36, 38.80it/s, loss=2.0596]


Logits stats - min: -3.9590, max: 2.4706
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4887, max: 2.3266
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1824, max: 1.5186
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▋                    | 2830/18200 [02:22<06:24, 39.92it/s, loss=1.6638]


Logits stats - min: -4.4685, max: 2.0130
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8425, max: 2.1332
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▋                    | 2839/18200 [02:22<06:40, 38.35it/s, loss=1.8259]


Logits stats - min: -4.5149, max: 1.4662
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2913, max: 1.4650
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6081, max: 2.1104
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2135, max: 2.1318
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2850/18200 [02:22<06:04, 42.12it/s, loss=1.6150]


Logits stats - min: -4.5298, max: 1.4249
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5899, max: 2.1579
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2867/18200 [02:23<06:53, 37.11it/s, loss=1.7259]


Logits stats - min: -4.2899, max: 1.6333
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0423, max: 1.6638
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0573, max: 2.1919
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6507, max: 2.4024
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2882/18200 [02:23<06:01, 42.32it/s, loss=1.7362]


Logits stats - min: -4.2276, max: 1.2737
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9844, max: 1.5883
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5197, max: 1.4988
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2896/18200 [02:24<06:52, 37.12it/s, loss=1.6580]


Logits stats - min: -3.9933, max: 1.5646
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2911/18200 [02:24<06:06, 41.71it/s, loss=1.6483]


Logits stats - min: -4.7438, max: 2.2800
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9245, max: 2.1330
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8125, max: 1.8609
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1207, max: 1.4526
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5483, max: 2.2495
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2921/18200 [02:24<06:19, 40.29it/s, loss=1.7215]


Logits stats - min: -4.2414, max: 1.4190
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6947, max: 2.2474
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▉                    | 2953/18200 [02:25<07:00, 36.22it/s, loss=1.7190]


Logits stats - min: -4.7426, max: 1.5895
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▉                    | 2965/18200 [02:25<06:48, 37.33it/s, loss=1.6550]


Logits stats - min: -4.1237, max: 2.1225
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2729, max: 1.2367
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9546, max: 2.4491
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0159, max: 1.2818
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▉                    | 2976/18200 [02:26<05:55, 42.86it/s, loss=1.7205]


Logits stats - min: -4.9012, max: 2.2473
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8824, max: 2.3381
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4047, max: 1.3967
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▉                    | 2986/18200 [02:26<06:36, 38.38it/s, loss=1.6484]


Logits stats - min: -4.5660, max: 2.1675
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4776, max: 2.1164
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▉                    | 3000/18200 [02:26<06:28, 39.08it/s, loss=1.5861]


Logits stats - min: -4.6969, max: 1.4567
Target unique values: tensor([0], device='cuda:0')


Training:  17%|███▉                    | 3008/18200 [02:27<07:03, 35.90it/s, loss=1.6045]


Logits stats - min: -4.2399, max: 1.2762
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2827, max: 1.6127
Target unique values: tensor([0], device='cuda:0')


Training:  17%|███▉                    | 3018/18200 [02:27<06:16, 40.34it/s, loss=1.6476]


Logits stats - min: -5.2868, max: 2.2794
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2971, max: 1.2218
Target unique values: tensor([0], device='cuda:0')


Training:  17%|███▉                    | 3028/18200 [02:27<06:13, 40.58it/s, loss=2.2018]


Logits stats - min: -4.2239, max: 1.4412
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8684, max: 1.5652
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2375, max: 1.3733
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8029, max: 2.0695
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5208, max: 1.1996
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3038/18200 [02:27<06:02, 41.84it/s, loss=1.6584]


Logits stats - min: -4.5444, max: 2.2203
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5608, max: 2.2071
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3048/18200 [02:27<06:14, 40.43it/s, loss=1.6421]


Logits stats - min: -4.7901, max: 2.3466
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6962, max: 2.0520
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4035, max: 1.3595
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3058/18200 [02:28<06:09, 41.00it/s, loss=1.6583]


Logits stats - min: -4.1729, max: 1.4858
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1743, max: 1.5358
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4461, max: 2.1380
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4788, max: 2.0193
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3068/18200 [02:28<06:21, 39.71it/s, loss=1.6733]


Logits stats - min: -4.3204, max: 2.0961
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3082/18200 [02:28<06:20, 39.75it/s, loss=1.7100]


Logits stats - min: -4.6904, max: 2.5130
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8321, max: 1.9229
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4337, max: 1.3938
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3091/18200 [02:28<06:19, 39.77it/s, loss=1.6599]


Logits stats - min: -4.9927, max: 2.3565
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7790, max: 1.2948
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8695, max: 2.0583
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3505, max: 1.4071
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5544, max: 1.5693
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3099/18200 [02:29<06:21, 39.63it/s, loss=1.7130]


Logits stats - min: -4.7557, max: 2.1818
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1537, max: 1.3636
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1364, max: 2.0729
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1693, max: 2.3006
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3114/18200 [02:29<05:55, 42.38it/s, loss=1.7929]


Logits stats - min: -4.4850, max: 1.4734
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2359, max: 1.3342
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1510, max: 1.6089
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3119/18200 [02:29<05:44, 43.79it/s, loss=1.7258]


Logits stats - min: -4.3424, max: 1.3837
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████▏                   | 3134/18200 [02:29<05:52, 42.70it/s, loss=1.7157]


Logits stats - min: -4.9721, max: 2.0817
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9952, max: 1.6373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9139, max: 1.5014
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6172, max: 2.3286
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4285, max: 1.4382
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2532, max: 1.4727
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████▏                   | 3139/18200 [02:30<05:58, 41.95it/s, loss=1.6505]


Logits stats - min: -4.6138, max: 2.2129
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4705, max: 2.0324
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████▏                   | 3149/18200 [02:30<05:48, 43.15it/s, loss=1.7141]


Logits stats - min: -4.6201, max: 2.1277
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6963, max: 2.1840
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3347, max: 1.9013
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████▏                   | 3164/18200 [02:30<06:06, 41.03it/s, loss=1.6007]


Logits stats - min: -4.9711, max: 2.4015
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4513, max: 2.3381
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2954, max: 1.5085
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████▏                   | 3173/18200 [02:31<06:36, 37.92it/s, loss=1.6003]


Logits stats - min: -3.8741, max: 1.4687
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3109, max: 2.4590
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████▏                   | 3183/18200 [02:31<05:59, 41.76it/s, loss=1.6336]


Logits stats - min: -4.2736, max: 1.5161
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1604, max: 1.2712
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7198, max: 1.6095
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0705, max: 1.9479
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9453, max: 2.1387
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3082, max: 1.4660
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▏                   | 3198/18200 [02:31<06:14, 40.05it/s, loss=1.8122]


Logits stats - min: -4.6538, max: 1.5382
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▏                   | 3208/18200 [02:31<06:20, 39.38it/s, loss=1.6589]


Logits stats - min: -4.0284, max: 2.1732
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3917, max: 1.4996
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▏                   | 3217/18200 [02:32<06:35, 37.89it/s, loss=1.6631]


Logits stats - min: -4.4733, max: 2.2795
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0809, max: 2.1309
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8348, max: 1.2243
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3820, max: 2.3523
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3227/18200 [02:32<06:23, 39.00it/s, loss=1.6043]


Logits stats - min: -3.9619, max: 1.7983
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1608, max: 2.1927
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3235/18200 [02:32<06:23, 39.07it/s, loss=1.6588]


Logits stats - min: -4.5353, max: 1.4563
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3258/18200 [02:33<06:00, 41.46it/s, loss=1.6511]


Logits stats - min: -4.1197, max: 2.1433
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8670, max: 1.3825
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3044, max: 2.0790
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3268/18200 [02:33<06:09, 40.43it/s, loss=2.1265]


Logits stats - min: -4.4937, max: 1.1996
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0814, max: 1.3024
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3285/18200 [02:33<05:20, 46.61it/s, loss=1.6074]


Logits stats - min: -4.5659, max: 1.6278
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6527, max: 2.3224
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5703, max: 2.3021
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4968, max: 2.2728
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8282, max: 2.1102
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7961, max: 1.5457
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3295/18200 [02:34<05:49, 42.69it/s, loss=1.6359]


Logits stats - min: -4.2076, max: 2.4771
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3858, max: 2.5302
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3305/18200 [02:34<05:52, 42.25it/s, loss=1.6821]


Logits stats - min: -4.1890, max: 1.4473
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9128, max: 2.2516
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0103, max: 1.8862
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3315/18200 [02:34<05:55, 41.81it/s, loss=1.6140]


Logits stats - min: -4.4579, max: 2.0884
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5048, max: 1.4796
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5758, max: 1.5307
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1223, max: 1.9086
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8787, max: 2.3023


Training:  18%|████▍                   | 3320/18200 [02:34<05:58, 41.47it/s, loss=1.6631]

Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3581, max: 2.5073
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0462, max: 2.3654
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▍                   | 3334/18200 [02:34<06:05, 40.66it/s, loss=1.6654]


Logits stats - min: -4.6022, max: 1.4404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4461, max: 1.4913
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.6357, max: 2.0158
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4398, max: 1.3546
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9689, max: 1.9050
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▍                   | 3344/18200 [02:35<05:55, 41.73it/s, loss=1.6035]


Logits stats - min: -4.6506, max: 2.0266
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4325, max: 1.5300
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▍                   | 3355/18200 [02:35<05:38, 43.81it/s, loss=1.6388]


Logits stats - min: -4.0548, max: 2.1571
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8364, max: 1.7932
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5607, max: 2.0611
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2433, max: 2.3430
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▍                   | 3365/18200 [02:35<05:53, 42.02it/s, loss=1.7064]


Logits stats - min: -4.2868, max: 1.4710
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7889, max: 1.3924
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4793, max: 1.5552
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▍                   | 3375/18200 [02:35<06:14, 39.57it/s, loss=1.7166]


Logits stats - min: -5.2487, max: 2.2230
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▍                   | 3385/18200 [02:36<05:59, 41.25it/s, loss=1.6572]


Logits stats - min: -4.3647, max: 1.6140
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6677, max: 1.7029
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2969, max: 1.3596
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▍                   | 3395/18200 [02:36<06:06, 40.43it/s, loss=1.5819]


Logits stats - min: -4.3968, max: 1.7500
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5951, max: 1.2115
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▍                   | 3411/18200 [02:36<05:43, 43.00it/s, loss=1.7075]


Logits stats - min: -4.4907, max: 1.4719
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6995, max: 2.2923
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3150, max: 1.9160
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3421/18200 [02:37<06:30, 37.81it/s, loss=1.6588]


Logits stats - min: -4.0911, max: 2.1678
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6007, max: 1.4166
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7258, max: 1.9753
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3437/18200 [02:37<05:35, 44.07it/s, loss=1.6568]


Logits stats - min: -4.6684, max: 1.5605
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7000, max: 1.6528
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2889, max: 1.4563
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5047, max: 1.7096
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2786, max: 2.3005
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3442/18200 [02:37<05:43, 42.99it/s, loss=1.6505]


Logits stats - min: -4.7735, max: 1.5341
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3960, max: 1.6560
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3457/18200 [02:37<05:53, 41.72it/s, loss=1.7791]


Logits stats - min: -4.6024, max: 1.4254
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5928, max: 1.5072
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3471/18200 [02:38<06:35, 37.20it/s, loss=1.6458]


Logits stats - min: -4.1149, max: 2.4409
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3481/18200 [02:38<05:58, 41.07it/s, loss=1.6474]


Logits stats - min: -4.0945, max: 1.3557
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7072, max: 2.4087
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1757, max: 1.5234
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3500/18200 [02:39<06:20, 38.62it/s, loss=1.7330]


Logits stats - min: -4.5283, max: 1.4471
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4106, max: 2.0462
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8264, max: 1.1494
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4801, max: 2.3126
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▋                   | 3518/18200 [02:39<06:00, 40.71it/s, loss=1.6630]


Logits stats - min: -5.1976, max: 2.0363
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6329, max: 1.4230
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5739, max: 2.3402
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4749, max: 2.0876
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2652, max: 1.5109
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▋                   | 3528/18200 [02:39<06:15, 39.07it/s, loss=1.9121]


Logits stats - min: -4.4896, max: 1.5599
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5234, max: 1.5756
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▋                   | 3534/18200 [02:39<06:00, 40.67it/s, loss=1.6347]


Logits stats - min: -4.3707, max: 2.4785
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5512, max: 1.3322
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▋                   | 3551/18200 [02:40<06:36, 36.91it/s, loss=1.6666]


Logits stats - min: -4.1826, max: 1.2746
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0294, max: 2.1446
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▋                   | 3560/18200 [02:40<06:29, 37.54it/s, loss=1.6422]


Logits stats - min: -4.7292, max: 1.2207
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9232, max: 1.8308
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6334, max: 1.4189
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▋                   | 3565/18200 [02:40<06:20, 38.49it/s, loss=2.1786]


Logits stats - min: -4.5477, max: 1.7158
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8417, max: 1.9416
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2413, max: 1.6191
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▋                   | 3579/18200 [02:41<06:04, 40.11it/s, loss=1.6263]


Logits stats - min: -4.1519, max: 2.2710
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6009, max: 1.4516
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▋                   | 3589/18200 [02:41<05:50, 41.64it/s, loss=1.5942]


Logits stats - min: -4.2865, max: 1.9244
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5677, max: 1.6847
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7975, max: 2.3785
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▋                   | 3594/18200 [02:41<06:06, 39.88it/s, loss=2.4039]


Logits stats - min: -3.9809, max: 1.5297
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1196, max: 1.6197
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0598, max: 1.6218
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6849, max: 2.2417
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2295, max: 1.5513
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3609/18200 [02:41<06:27, 37.68it/s, loss=2.5638]


Logits stats - min: -4.4292, max: 1.4808
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3618/18200 [02:42<06:17, 38.68it/s, loss=1.6708]


Logits stats - min: -4.5958, max: 1.5828
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3248, max: 1.8637
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8298, max: 1.3836
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3745, max: 2.1462
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3634/18200 [02:42<05:31, 43.89it/s, loss=1.6008]


Logits stats - min: -4.2664, max: 1.5336
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7886, max: 2.0355
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8455, max: 1.6701
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5632, max: 2.2705
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3644/18200 [02:42<05:29, 44.18it/s, loss=1.7593]


Logits stats - min: -4.5658, max: 1.4484
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9627, max: 2.4733
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0090, max: 2.2028
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3806, max: 1.5588
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3663/18200 [02:43<06:21, 38.11it/s, loss=1.7517]


Logits stats - min: -4.8674, max: 2.2485
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5855, max: 1.5451
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6296, max: 1.4878
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3676/18200 [02:43<06:30, 37.22it/s, loss=1.6558]


Logits stats - min: -4.2572, max: 1.4509
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1463, max: 2.5793
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3689/18200 [02:43<05:53, 41.03it/s, loss=1.6412]


Logits stats - min: -4.7296, max: 2.2218
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4855, max: 1.4439
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3023, max: 1.5382
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3694/18200 [02:44<05:58, 40.44it/s, loss=1.7086]


Logits stats - min: -4.5099, max: 2.1990
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3945, max: 1.5427
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▉                   | 3703/18200 [02:44<06:36, 36.59it/s, loss=2.3312]


Logits stats - min: -4.3054, max: 1.3437
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6593, max: 1.4778
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3832, max: 1.2530
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▉                   | 3712/18200 [02:44<06:10, 39.10it/s, loss=1.6407]


Logits stats - min: -4.2811, max: 2.3255
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2943, max: 1.5722
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4773, max: 2.1730
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▉                   | 3722/18200 [02:44<05:38, 42.80it/s, loss=1.6619]


Logits stats - min: -5.1153, max: 2.4942
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2384, max: 1.5318
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5649, max: 1.3605
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0332, max: 2.6683
Target unique values: tensor([0], device='cuda:0')


Training:  21%|████▉                   | 3737/18200 [02:45<05:23, 44.71it/s, loss=1.6381]


Logits stats - min: -5.0804, max: 1.5801
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4531, max: 2.1046
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6460, max: 2.0356
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4053, max: 1.4872
Target unique values: tensor([0], device='cuda:0')


Training:  21%|████▉                   | 3742/18200 [02:45<05:33, 43.40it/s, loss=1.6401]


Logits stats - min: -4.5316, max: 2.2143
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7227, max: 1.4461
Target unique values: tensor([0], device='cuda:0')


Training:  21%|████▉                   | 3756/18200 [02:45<06:23, 37.65it/s, loss=1.6450]


Logits stats - min: -4.7039, max: 2.1863
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5647, max: 2.4709
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6890, max: 2.3765
Target unique values: tensor([0], device='cuda:0')


Training:  21%|████▉                   | 3767/18200 [02:45<06:01, 39.89it/s, loss=1.7356]


Logits stats - min: -4.8046, max: 1.3629
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1703, max: 1.7878
Target unique values: tensor([0], device='cuda:0')


Training:  21%|████▉                   | 3790/18200 [02:46<05:47, 41.49it/s, loss=1.6489]


Logits stats - min: -4.5844, max: 1.5501
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6937, max: 2.3292
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9085, max: 2.3971
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3800/18200 [02:46<06:10, 38.89it/s, loss=2.2565]


Logits stats - min: -4.8790, max: 2.2652
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3815/18200 [02:47<06:06, 39.20it/s, loss=1.6230]


Logits stats - min: -4.1944, max: 2.4704
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3831/18200 [02:47<05:33, 43.05it/s, loss=1.7171]


Logits stats - min: -5.0788, max: 2.0023
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1159, max: 2.4541
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3011, max: 2.3555
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5649, max: 1.5660
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3842/18200 [02:47<05:11, 46.13it/s, loss=1.8886]


Logits stats - min: -5.9447, max: 2.1425
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0501, max: 1.3412
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5476, max: 2.2107
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4821, max: 2.4601
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9939, max: 1.6142
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5222, max: 2.2968
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3857/18200 [02:47<05:25, 44.07it/s, loss=2.0477]


Logits stats - min: -4.3967, max: 1.5732
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7361, max: 1.3112
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5611, max: 1.4190
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1040, max: 2.3046
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5959, max: 1.5605
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8597, max: 2.2858
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3869/18200 [02:48<04:56, 48.39it/s, loss=1.6833]


Logits stats - min: -4.6081, max: 1.6319
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5780, max: 1.7772
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1119, max: 1.2526
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1875, max: 1.4950
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9328, max: 1.4228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1159, max: 1.2731
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9305, max: 2.4332
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3879/18200 [02:48<05:20, 44.71it/s, loss=1.6456]


Logits stats - min: -4.2699, max: 1.5795
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3721, max: 1.4368
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5053, max: 1.5260
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████▏                  | 3894/18200 [02:48<05:28, 43.50it/s, loss=1.7181]


Logits stats - min: -5.2762, max: 2.1637
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2219, max: 1.3781
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████▏                  | 3904/18200 [02:49<05:43, 41.68it/s, loss=2.6668]


Logits stats - min: -4.5548, max: 1.4152
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1265, max: 1.5629
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4412, max: 1.3844
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▏                  | 3919/18200 [02:49<05:40, 41.98it/s, loss=2.2044]


Logits stats - min: -5.0006, max: 2.4250
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7044, max: 1.6046
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5371, max: 2.3339
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▏                  | 3929/18200 [02:49<05:49, 40.81it/s, loss=1.6092]


Logits stats - min: -4.0220, max: 2.4257
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0906, max: 2.3015
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▏                  | 3940/18200 [02:49<05:23, 44.03it/s, loss=1.7012]


Logits stats - min: -4.6938, max: 2.0971
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5198, max: 2.2537
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1021, max: 1.4865
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▏                  | 3950/18200 [02:50<05:50, 40.66it/s, loss=1.7054]


Logits stats - min: -4.6515, max: 1.6366
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7528, max: 1.4728
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6203, max: 1.8687
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▏                  | 3960/18200 [02:50<06:14, 38.01it/s, loss=1.6636]


Logits stats - min: -6.0532, max: 2.1247
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7570, max: 1.9322
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0011, max: 2.0894
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 3982/18200 [02:50<05:36, 42.24it/s, loss=1.5907]


Logits stats - min: -6.7910, max: 1.8226
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5908, max: 1.4933
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0883, max: 2.3631
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8968, max: 1.5230
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4818, max: 1.4289
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2208, max: 2.1067
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 4001/18200 [02:51<06:13, 38.02it/s, loss=1.6935]


Logits stats - min: -4.7452, max: 2.3100
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0666, max: 2.1364
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 4032/18200 [02:52<05:58, 39.56it/s, loss=1.7101]


Logits stats - min: -4.3399, max: 1.8001
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2810, max: 1.6189
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 4044/18200 [02:52<06:18, 37.37it/s, loss=1.6322]


Logits stats - min: -3.8949, max: 1.3767
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9141, max: 2.2453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2784, max: 1.5102
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 4055/18200 [02:52<05:38, 41.79it/s, loss=2.1248]


Logits stats - min: -4.6091, max: 1.3672
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4166, max: 2.1903
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4658, max: 1.6079
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4582, max: 2.3822
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 4065/18200 [02:53<05:24, 43.53it/s, loss=1.5966]


Logits stats - min: -5.9132, max: 2.9820
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9841, max: 1.9927
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5936, max: 1.4514
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2078, max: 1.5200
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 4070/18200 [02:53<05:28, 43.05it/s, loss=1.6775]


Logits stats - min: -4.8733, max: 1.4604
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9627, max: 2.3977
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▍                  | 4080/18200 [02:53<05:40, 41.51it/s, loss=2.1897]


Logits stats - min: -5.2492, max: 2.1985
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5322, max: 2.1524
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1515, max: 1.1609
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4310, max: 1.4768
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▍                  | 4095/18200 [02:53<06:04, 38.71it/s, loss=1.6743]


Logits stats - min: -4.4535, max: 1.6192
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6168, max: 2.4175
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9005, max: 2.2656
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▍                  | 4107/18200 [02:54<05:31, 42.49it/s, loss=1.6405]


Logits stats - min: -5.4625, max: 2.4778
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3863, max: 1.5619
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6253, max: 1.8369
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4556, max: 1.6225
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▍                  | 4117/18200 [02:54<05:39, 41.45it/s, loss=1.7070]


Logits stats - min: -4.3291, max: 1.3013
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1519, max: 1.8632
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8162, max: 2.2253
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▍                  | 4132/18200 [02:54<05:54, 39.71it/s, loss=2.1985]


Logits stats - min: -4.2283, max: 1.5251
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▍                  | 4137/18200 [02:54<05:35, 41.87it/s, loss=1.6929]


Logits stats - min: -5.8239, max: 2.3945
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4307, max: 1.6078
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9396, max: 2.2935
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7669, max: 1.8501
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▍                  | 4147/18200 [02:55<05:59, 39.08it/s, loss=1.6406]


Logits stats - min: -4.7357, max: 1.4750
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1734, max: 2.5444
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3182, max: 1.7986
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▍                  | 4153/18200 [02:55<05:37, 41.56it/s, loss=1.4006]


Logits stats - min: -4.9288, max: 1.3241
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0260, max: 2.0309
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▍                  | 4163/18200 [02:55<05:17, 44.21it/s, loss=1.6560]


Logits stats - min: -4.4258, max: 1.4565
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7943, max: 1.4205
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9620, max: 1.2676
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4173/18200 [02:55<05:52, 39.77it/s, loss=2.1676]


Logits stats - min: -5.0458, max: 1.5562
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4184/18200 [02:55<05:34, 41.90it/s, loss=1.6943]


Logits stats - min: -5.1145, max: 2.2664
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0582, max: 1.3459
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1366, max: 1.5102
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4199/18200 [02:56<05:41, 41.01it/s, loss=1.6510]


Logits stats - min: -4.2171, max: 1.5820
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9410, max: 2.4113
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3403, max: 1.6410
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9145, max: 2.2301
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4209/18200 [02:56<05:36, 41.61it/s, loss=2.1681]


Logits stats - min: -4.9052, max: 2.0802
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1333, max: 1.3062
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4220/18200 [02:56<05:33, 41.94it/s, loss=1.6225]


Logits stats - min: -4.4155, max: 1.5941
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4658, max: 2.3096
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7455, max: 1.4622
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4235/18200 [02:57<05:54, 39.36it/s, loss=2.3032]


Logits stats - min: -5.8848, max: 1.8872
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6845, max: 2.0044
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8059, max: 2.1111
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6001, max: 2.2981
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4245/18200 [02:57<06:00, 38.76it/s, loss=2.1774]


Logits stats - min: -4.2853, max: 1.8373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3789, max: 2.4730
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4250/18200 [02:57<05:53, 39.47it/s, loss=1.7055]


Logits stats - min: -4.3050, max: 1.3647
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7384, max: 2.0700
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7595, max: 1.6470
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4261/18200 [02:57<05:31, 41.99it/s, loss=1.6388]


Logits stats - min: -5.0748, max: 2.0899
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6228, max: 2.1968
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▋                  | 4276/18200 [02:58<05:51, 39.64it/s, loss=1.6196]


Logits stats - min: -6.0832, max: 2.1598
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9561, max: 2.2742
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4284/18200 [02:58<05:54, 39.20it/s, loss=1.6301]


Logits stats - min: -4.8863, max: 1.6228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2987, max: 1.7164
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1627, max: 2.3514
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4297/18200 [02:58<05:51, 39.58it/s, loss=1.6935]


Logits stats - min: -4.8464, max: 2.3429
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9200, max: 1.6085
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4307/18200 [02:59<05:30, 41.99it/s, loss=2.1897]


Logits stats - min: -3.9886, max: 2.3572
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3904, max: 1.3326
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5876, max: 1.5667
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7341, max: 1.4177
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4322/18200 [02:59<05:37, 41.10it/s, loss=2.1515]


Logits stats - min: -4.5118, max: 1.6502
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3147, max: 1.2197
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7933, max: 1.4231
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4334/18200 [02:59<05:04, 45.55it/s, loss=2.3376]


Logits stats - min: -4.5961, max: 1.5420
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7004, max: 2.0868
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8085, max: 2.1621
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6726, max: 1.3659
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5641, max: 1.3683
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4344/18200 [02:59<05:41, 40.55it/s, loss=1.6141]


Logits stats - min: -4.7664, max: 2.1678
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6162, max: 2.2045
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4354/18200 [03:00<05:41, 40.55it/s, loss=1.6374]


Logits stats - min: -4.5440, max: 2.1262
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8583, max: 1.6130
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4366/18200 [03:00<04:53, 47.11it/s, loss=1.6582]


Logits stats - min: -6.5354, max: 1.6978
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1926, max: 1.6007
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7045, max: 1.5963
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8628, max: 1.7566
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9745, max: 2.0892
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4587, max: 1.3948
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4438, max: 1.5844
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4376/18200 [03:00<05:17, 43.57it/s, loss=1.6465]


Logits stats - min: -4.2237, max: 1.4534
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7817, max: 1.5550
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4386/18200 [03:00<05:31, 41.66it/s, loss=1.6375]


Logits stats - min: -4.4647, max: 1.4550
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6070, max: 1.6383
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4399/18200 [03:01<05:58, 38.47it/s, loss=1.6210]


Logits stats - min: -5.0366, max: 2.1727
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8068, max: 2.0482
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4410/18200 [03:01<05:10, 44.36it/s, loss=1.6911]


Logits stats - min: -4.2285, max: 1.6338
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1773, max: 1.3844
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7591, max: 1.6297
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4410, max: 1.7047
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4420/18200 [03:01<05:16, 43.49it/s, loss=1.6430]


Logits stats - min: -4.6777, max: 2.1845
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3682, max: 1.5437
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6716, max: 2.3577
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4430/18200 [03:01<05:12, 44.02it/s, loss=1.6879]


Logits stats - min: -4.7253, max: 1.5515
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3994, max: 1.5521
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3082, max: 1.4311
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7615, max: 2.0363
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4440/18200 [03:02<05:26, 42.11it/s, loss=1.6643]


Logits stats - min: -5.1793, max: 2.4798
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4450/18200 [03:02<05:40, 40.38it/s, loss=1.7402]


Logits stats - min: -5.2714, max: 2.4990
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5443, max: 2.3075
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4479/18200 [03:03<05:24, 42.34it/s, loss=2.6213]


Logits stats - min: -4.7383, max: 2.2618
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6369, max: 1.3541
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6772, max: 1.4857
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0493, max: 1.4153
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6686, max: 1.9730
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4484/18200 [03:03<05:22, 42.55it/s, loss=1.6893]


Logits stats - min: -4.9622, max: 2.1127
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4498/18200 [03:03<05:40, 40.24it/s, loss=1.6377]


Logits stats - min: -5.2496, max: 2.1508
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8056, max: 1.6316
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4503/18200 [03:03<05:30, 41.45it/s, loss=1.5585]


Logits stats - min: -4.7499, max: 1.3854
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3341, max: 2.0709
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2156, max: 2.1373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6796, max: 1.4735
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4514/18200 [03:03<05:03, 45.03it/s, loss=1.6440]


Logits stats - min: -4.1744, max: 2.1867
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1531, max: 2.0923
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6811, max: 2.1566
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7380, max: 1.5376
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4525/18200 [03:04<05:09, 44.12it/s, loss=1.6192]


Logits stats - min: -4.3215, max: 2.2355
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6708, max: 1.4172
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5035, max: 1.5546
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4530/18200 [03:04<05:25, 42.00it/s, loss=1.6680]


Logits stats - min: -4.7031, max: 1.3640
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5016, max: 2.3749
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1935, max: 1.3599
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4545/18200 [03:04<05:25, 42.01it/s, loss=1.6143]


Logits stats - min: -5.0368, max: 1.6123
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6310, max: 3.1270
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2749, max: 2.3204
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1036, max: 1.3697
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4555/18200 [03:04<05:30, 41.30it/s, loss=1.5945]


Logits stats - min: -4.5974, max: 1.5769
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2560, max: 1.3294
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4579/18200 [03:05<05:29, 41.39it/s, loss=1.7742]


Logits stats - min: -4.3749, max: 1.3404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0044, max: 2.2274
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1752, max: 1.2671
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7923, max: 2.2490
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3154, max: 2.0994
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4589/18200 [03:05<05:35, 40.57it/s, loss=2.1515]


Logits stats - min: -4.5542, max: 1.3986
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4599/18200 [03:06<05:51, 38.69it/s, loss=1.6997]


Logits stats - min: -5.0121, max: 1.5940
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4610/18200 [03:06<05:20, 42.34it/s, loss=1.7000]


Logits stats - min: -4.3676, max: 2.0391
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9974, max: 1.4007
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6719, max: 2.1619
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5582, max: 1.7142
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1180, max: 2.2515
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4620/18200 [03:06<05:04, 44.62it/s, loss=1.7171]


Logits stats - min: -4.7466, max: 2.0731
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0298, max: 1.5124
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2281, max: 2.2357
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4635/18200 [03:06<05:49, 38.82it/s, loss=1.6008]


Logits stats - min: -4.5119, max: 1.6070
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4640/18200 [03:07<06:00, 37.61it/s, loss=1.6020]


Logits stats - min: -4.6848, max: 1.6800
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2137, max: 1.7651
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4445, max: 2.2090
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2402, max: 2.2828
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▏                 | 4659/18200 [03:07<05:41, 39.64it/s, loss=1.6853]


Logits stats - min: -4.7324, max: 2.2208
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▏                 | 4669/18200 [03:07<05:14, 43.04it/s, loss=1.6146]


Logits stats - min: -5.4802, max: 1.8929
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7793, max: 1.4965
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8036, max: 1.6115
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2317, max: 2.1714
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8619, max: 1.1913
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8986, max: 2.2462
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▏                 | 4690/18200 [03:08<05:41, 39.53it/s, loss=1.7007]


Logits stats - min: -4.3291, max: 2.2664
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7939, max: 1.5437
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▏                 | 4710/18200 [03:08<05:24, 41.54it/s, loss=1.5809]


Logits stats - min: -4.7795, max: 1.4583
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2426, max: 2.2778
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▏                 | 4716/18200 [03:08<05:02, 44.63it/s, loss=1.6853]


Logits stats - min: -4.1730, max: 1.4387
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9910, max: 1.3346
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8715, max: 2.1501
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▏                 | 4732/18200 [03:09<05:04, 44.22it/s, loss=1.6608]


Logits stats - min: -5.5235, max: 2.1741
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8394, max: 2.2309
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8250, max: 1.4533
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6989, max: 1.5518
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4757/18200 [03:09<05:38, 39.70it/s, loss=2.0218]


Logits stats - min: -4.4653, max: 1.1353
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7676, max: 2.4539
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1664, max: 1.5549
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4767/18200 [03:10<05:22, 41.66it/s, loss=2.0987]


Logits stats - min: -4.2444, max: 1.1602
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0403, max: 2.0420
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4783/18200 [03:10<05:19, 41.95it/s, loss=1.7539]


Logits stats - min: -4.7511, max: 1.3559
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3565, max: 1.7257
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3492, max: 2.5928
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4793/18200 [03:10<05:40, 39.32it/s, loss=2.0416]


Logits stats - min: -4.1586, max: 2.0882
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4811/18200 [03:11<05:23, 41.36it/s, loss=2.1805]


Logits stats - min: -4.4853, max: 2.1749
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3139, max: 2.0991
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3221, max: 1.5815
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6028, max: 2.0886
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4822/18200 [03:11<05:16, 42.21it/s, loss=2.1790]


Logits stats - min: -4.8787, max: 1.1942
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5049, max: 1.5050
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4837/18200 [03:11<05:11, 42.87it/s, loss=2.7333]


Logits stats - min: -4.9007, max: 1.4935
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8459, max: 2.1094
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2175, max: 2.4968
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2877, max: 2.1647
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4842/18200 [03:11<05:12, 42.76it/s, loss=1.5606]


Logits stats - min: -5.1602, max: 2.1940
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9430, max: 1.7673
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4768, max: 1.6085
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8766, max: 2.2080
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4852/18200 [03:12<05:24, 41.15it/s, loss=2.1299]


Logits stats - min: -3.9689, max: 1.4726
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3855, max: 2.2695
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2441, max: 1.4665
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4862/18200 [03:12<05:09, 43.08it/s, loss=1.6264]


Logits stats - min: -4.6309, max: 1.4995
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9048, max: 1.8260
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6440, max: 1.5395
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4589, max: 1.2133
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4872/18200 [03:12<05:16, 42.10it/s, loss=1.8856]


Logits stats - min: -4.7523, max: 1.5528
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4888/18200 [03:13<05:03, 43.83it/s, loss=1.8248]


Logits stats - min: -4.6317, max: 2.2677
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4485, max: 1.5698
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7321, max: 1.4400
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9526, max: 2.3651
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2858, max: 2.3848
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4893/18200 [03:13<05:13, 42.44it/s, loss=2.1716]


Logits stats - min: -4.2117, max: 1.5193
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3346, max: 1.4746
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4913/18200 [03:13<05:24, 41.01it/s, loss=1.6900]


Logits stats - min: -4.1603, max: 1.7417
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4005, max: 1.9376
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4923/18200 [03:13<05:19, 41.59it/s, loss=1.6474]


Logits stats - min: -5.1517, max: 1.3894
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9830, max: 2.5173
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5286, max: 1.5292
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 4934/18200 [03:14<04:54, 45.00it/s, loss=2.1719]


Logits stats - min: -5.0319, max: 2.4036
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4271, max: 2.3564
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7530, max: 1.4061
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6694, max: 1.6888
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8326, max: 2.2478
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 4944/18200 [03:14<04:54, 45.08it/s, loss=1.6871]


Logits stats - min: -4.7141, max: 2.2507
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7762, max: 1.5811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7837, max: 2.4287
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9563, max: 2.2789
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5214, max: 1.7756
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9632, max: 2.1071
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 4954/18200 [03:14<05:17, 41.72it/s, loss=1.6453]


Logits stats - min: -5.4748, max: 2.4203
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1889, max: 1.2068
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4532, max: 1.5231
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 4964/18200 [03:14<05:12, 42.29it/s, loss=1.6836]


Logits stats - min: -4.2040, max: 1.1578
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7427, max: 1.7794
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 4979/18200 [03:15<05:04, 43.44it/s, loss=1.6375]


Logits stats - min: -4.7607, max: 1.4469
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7647, max: 1.7197
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8739, max: 1.6420
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 4990/18200 [03:15<04:36, 47.85it/s, loss=1.6131]


Logits stats - min: -4.6484, max: 2.2928
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3319, max: 1.2395
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7979, max: 1.4444
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7232, max: 1.3353
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9422, max: 1.5961
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2237, max: 1.6473
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6315, max: 1.4535
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 5000/18200 [03:15<05:02, 43.68it/s, loss=1.6951]


Logits stats - min: -4.6044, max: 1.8337
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0886, max: 2.3169
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6875, max: 2.4606
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▌                 | 5010/18200 [03:15<05:20, 41.12it/s, loss=1.6176]


Logits stats - min: -4.7312, max: 2.7083
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4707, max: 2.0148
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6710, max: 1.7066
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▌                 | 5020/18200 [03:16<05:20, 41.12it/s, loss=2.0299]


Logits stats - min: -4.6014, max: 1.6222
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6636, max: 1.2204
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5030/18200 [03:16<04:59, 43.91it/s, loss=1.6231]


Logits stats - min: -4.6609, max: 1.5347
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2353, max: 2.5883
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6467, max: 1.3962
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5041/18200 [03:16<04:43, 46.36it/s, loss=1.6131]


Logits stats - min: -4.6706, max: 1.4455
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0792, max: 2.1633
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7455, max: 2.3727
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1762, max: 2.2824
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0038, max: 1.3531
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5056/18200 [03:16<05:00, 43.74it/s, loss=1.9244]


Logits stats - min: -4.7284, max: 1.7381
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8929, max: 2.0006
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5061/18200 [03:17<05:13, 41.94it/s, loss=1.6861]


Logits stats - min: -5.4118, max: 2.6831
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1268, max: 2.2438
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1295, max: 2.0705
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5071/18200 [03:17<05:20, 40.93it/s, loss=1.6023]


Logits stats - min: -4.6692, max: 1.7069
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6735, max: 2.5906
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0181, max: 1.5408
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5086/18200 [03:17<05:15, 41.54it/s, loss=1.6469]


Logits stats - min: -4.5772, max: 2.4409
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7722, max: 2.1251
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5096/18200 [03:17<05:06, 42.76it/s, loss=2.1406]


Logits stats - min: -4.5268, max: 1.5695
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9744, max: 2.3089
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1419, max: 1.4335
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9507, max: 2.5423
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6130, max: 1.4709
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5107/18200 [03:18<04:54, 44.39it/s, loss=2.1745]


Logits stats - min: -4.5844, max: 1.4921
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5261, max: 2.3104
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1580, max: 1.4750
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4111, max: 1.5112
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7179, max: 1.3936
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5113/18200 [03:18<04:54, 44.50it/s, loss=1.6162]


Logits stats - min: -5.0228, max: 2.3218
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▊                 | 5128/18200 [03:18<05:11, 41.95it/s, loss=1.5957]


Logits stats - min: -5.2715, max: 1.2265
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8101, max: 1.5756
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1416, max: 2.5051
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▊                 | 5138/18200 [03:18<05:18, 40.97it/s, loss=1.5884]


Logits stats - min: -4.5831, max: 1.4682
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0259, max: 1.4260
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▊                 | 5148/18200 [03:19<05:17, 41.07it/s, loss=2.0678]


Logits stats - min: -4.1826, max: 1.4420
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4136, max: 2.1625
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▊                 | 5160/18200 [03:19<04:50, 44.89it/s, loss=1.6855]


Logits stats - min: -5.4707, max: 1.9292
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4595, max: 2.5352
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6543, max: 1.4710
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7540, max: 1.6425
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1766, max: 2.5079
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4865, max: 1.3463
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5272, max: 1.5955
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▊                 | 5179/18200 [03:19<05:33, 39.07it/s, loss=1.6770]


Logits stats - min: -4.4013, max: 1.6060
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▊                 | 5184/18200 [03:20<05:23, 40.27it/s, loss=2.1791]


Logits stats - min: -4.9778, max: 2.1545
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5185, max: 1.9853
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▊                 | 5199/18200 [03:20<05:23, 40.19it/s, loss=1.6648]


Logits stats - min: -4.5949, max: 1.6423
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4636, max: 2.6100
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1526, max: 2.1106
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▊                 | 5209/18200 [03:20<05:21, 40.44it/s, loss=1.5978]


Logits stats - min: -4.2525, max: 2.1459
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5762, max: 2.4491
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5306, max: 1.8865
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5220/18200 [03:20<05:07, 42.26it/s, loss=1.6287]


Logits stats - min: -3.7784, max: 1.7456
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.9269, max: 1.5055
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8394, max: 1.6417
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3623, max: 1.6189
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5230/18200 [03:21<05:08, 42.10it/s, loss=1.6778]


Logits stats - min: -4.8118, max: 2.0233
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3422, max: 1.3746
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0172, max: 2.3760
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5250/18200 [03:21<05:21, 40.25it/s, loss=1.8984]


Logits stats - min: -4.8802, max: 1.4022
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4637, max: 1.8327
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5260/18200 [03:21<05:07, 42.07it/s, loss=2.0465]


Logits stats - min: -4.6915, max: 2.1815
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7475, max: 2.4269
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8611, max: 1.6246
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5275/18200 [03:22<04:51, 44.30it/s, loss=1.6192]


Logits stats - min: -4.2395, max: 2.2882
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4906, max: 1.6463
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0371, max: 1.5411
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5086, max: 1.4957
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8614, max: 1.4928
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7623, max: 1.7309
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5285/18200 [03:22<05:09, 41.72it/s, loss=1.7009]


Logits stats - min: -5.1023, max: 1.3751
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5290/18200 [03:22<05:11, 41.48it/s, loss=1.6377]


Logits stats - min: -4.7651, max: 1.5437
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6302, max: 1.4832
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5304/18200 [03:22<05:21, 40.12it/s, loss=1.5664]


Logits stats - min: -4.3391, max: 1.4817
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9532, max: 2.2086
Target unique values: tensor([0], device='cuda:0')


Training:  29%|███████                 | 5315/18200 [03:23<04:56, 43.44it/s, loss=1.7104]


Logits stats - min: -5.6519, max: 2.5066
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2631, max: 1.5879
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4503, max: 1.4604
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2523, max: 2.6245
Target unique values: tensor([0], device='cuda:0')


Training:  29%|███████                 | 5325/18200 [03:23<05:02, 42.61it/s, loss=2.1146]


Logits stats - min: -4.5478, max: 1.5542
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3389, max: 2.1834
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8645, max: 2.3040
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0182, max: 1.1373
Target unique values: tensor([0], device='cuda:0')


Training:  29%|███████                 | 5335/18200 [03:23<05:18, 40.34it/s, loss=1.6291]


Logits stats - min: -4.9676, max: 1.6660
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9529, max: 2.2245
Target unique values: tensor([0], device='cuda:0')


Training:  29%|███████                 | 5345/18200 [03:24<05:16, 40.65it/s, loss=1.6319]


Logits stats - min: -4.3585, max: 1.2925
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5929, max: 2.2987
Target unique values: tensor([0], device='cuda:0')


Training:  29%|███████                 | 5361/18200 [03:24<05:01, 42.52it/s, loss=1.6822]


Logits stats - min: -5.0523, max: 2.1855
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7406, max: 2.1928
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9977, max: 1.0805
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████                 | 5372/18200 [03:24<04:44, 45.09it/s, loss=1.6175]


Logits stats - min: -4.4954, max: 1.7315
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1429, max: 2.3090
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8588, max: 2.3388
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0047, max: 2.0910
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████                 | 5382/18200 [03:24<05:17, 40.32it/s, loss=1.5541]


Logits stats - min: -4.2632, max: 2.3774
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3620, max: 1.7552
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████                 | 5391/18200 [03:25<05:30, 38.73it/s, loss=1.9530]


Logits stats - min: -5.2540, max: 2.3121
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2763, max: 2.0628
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4282, max: 1.5138
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████                 | 5401/18200 [03:25<05:16, 40.40it/s, loss=1.6827]


Logits stats - min: -4.5082, max: 2.0357
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1064, max: 2.3335
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5189, max: 2.2425
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5411/18200 [03:25<04:52, 43.79it/s, loss=1.8675]


Logits stats - min: -4.4764, max: 1.5220
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5577, max: 1.6571
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4911, max: 1.6988
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5421/18200 [03:25<04:56, 43.14it/s, loss=2.0707]


Logits stats - min: -4.5322, max: 1.6280
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5768, max: 2.2313
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8523, max: 2.2587
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5436/18200 [03:26<05:17, 40.20it/s, loss=1.6160]


Logits stats - min: -4.6799, max: 1.5234
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7286, max: 2.2100
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4939, max: 2.5516
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5451/18200 [03:26<05:19, 39.88it/s, loss=1.6441]


Logits stats - min: -4.8025, max: 2.0385
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6789, max: 1.4372
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5461/18200 [03:26<04:50, 43.78it/s, loss=2.0648]


Logits stats - min: -4.3397, max: 1.5628
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9532, max: 2.2628
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0090, max: 1.5621
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6217, max: 1.3577
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1513, max: 1.3388
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5472/18200 [03:26<04:43, 44.90it/s, loss=1.6377]


Logits stats - min: -4.0922, max: 1.4825
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5491/18200 [03:27<05:25, 39.06it/s, loss=1.6199]


Logits stats - min: -6.4340, max: 1.9809
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▎                | 5505/18200 [03:27<05:09, 41.05it/s, loss=1.5887]


Logits stats - min: -4.6469, max: 1.5711
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8008, max: 1.7297
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1446, max: 2.3784
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▎                | 5510/18200 [03:27<05:03, 41.78it/s, loss=2.1330]


Logits stats - min: -5.4702, max: 2.1245
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6324, max: 2.1789
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8284, max: 1.3082
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▎                | 5525/18200 [03:28<04:43, 44.72it/s, loss=1.6182]


Logits stats - min: -4.7571, max: 2.2974
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6297, max: 1.6720
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1980, max: 1.6197
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6599, max: 1.5212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3051, max: 2.1188
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4463, max: 1.2763
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▎                | 5536/18200 [03:28<04:40, 45.07it/s, loss=1.6149]


Logits stats - min: -4.4028, max: 1.6240
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2575, max: 1.8111
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▎                | 5551/18200 [03:28<05:01, 41.95it/s, loss=1.6430]


Logits stats - min: -4.5650, max: 2.1832
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6525, max: 2.0412
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▎                | 5571/18200 [03:29<04:54, 42.85it/s, loss=1.5607]


Logits stats - min: -4.6228, max: 2.1071
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7662, max: 2.6187
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3391, max: 2.0895
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6560, max: 2.0683
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▎                | 5581/18200 [03:29<04:40, 44.97it/s, loss=1.5615]


Logits stats - min: -4.9869, max: 2.3981
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6327, max: 1.5030
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9438, max: 1.4902
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5525, max: 1.7769
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2307, max: 1.5719
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▎                | 5592/18200 [03:29<04:49, 43.60it/s, loss=1.6170]


Logits stats - min: -4.4934, max: 1.5452
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6658, max: 2.1056
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1771, max: 1.2419
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5602/18200 [03:30<05:08, 40.79it/s, loss=1.6977]


Logits stats - min: -4.9551, max: 1.6906
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2782, max: 2.3601
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5619/18200 [03:30<04:21, 48.10it/s, loss=1.6791]


Logits stats - min: -4.3197, max: 1.3039
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1491, max: 2.1729
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0083, max: 2.4934
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1526, max: 2.3436
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1610, max: 1.1253
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0477, max: 2.1707
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6231, max: 1.6660
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9926, max: 2.0915
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2829, max: 2.1369
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5626/18200 [03:30<04:11, 49.90it/s, loss=1.6054]


Logits stats - min: -4.7892, max: 1.6196
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2420, max: 1.3739
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7542, max: 1.4408
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5642/18200 [03:30<04:26, 47.19it/s, loss=1.6975]


Logits stats - min: -5.2995, max: 2.4976
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9039, max: 2.4161
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6674, max: 1.5965
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5657/18200 [03:31<05:07, 40.80it/s, loss=2.1454]


Logits stats - min: -4.9670, max: 1.6062
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5672/18200 [03:31<04:59, 41.83it/s, loss=1.6918]


Logits stats - min: -4.9477, max: 2.6304
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4253, max: 2.4119
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5682/18200 [03:31<04:58, 41.93it/s, loss=1.6352]


Logits stats - min: -5.4966, max: 2.0202
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9884, max: 2.2371
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▌                | 5701/18200 [03:32<05:16, 39.53it/s, loss=1.6861]


Logits stats - min: -4.5698, max: 1.5613
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4812, max: 1.7901
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4033, max: 1.5847
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▌                | 5711/18200 [03:32<05:09, 40.39it/s, loss=2.1468]


Logits stats - min: -4.6431, max: 1.5853
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6020, max: 1.5061
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▌                | 5721/18200 [03:32<04:59, 41.66it/s, loss=1.7572]


Logits stats - min: -4.6863, max: 1.4111
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▌                | 5731/18200 [03:33<04:56, 42.00it/s, loss=2.6372]


Logits stats - min: -5.2621, max: 2.1597
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▌                | 5746/18200 [03:33<05:04, 40.87it/s, loss=2.1027]


Logits stats - min: -5.1018, max: 1.2589
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6640, max: 1.4573
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3520, max: 2.1228


Training:  32%|███████▌                | 5751/18200 [03:33<05:02, 41.17it/s, loss=1.5550]

Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5446, max: 1.4267
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0344, max: 2.2622
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4834, max: 1.3912
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2067, max: 2.2119
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▌                | 5767/18200 [03:33<04:58, 41.65it/s, loss=1.5058]


Logits stats - min: -5.0792, max: 2.1292
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▌                | 5777/18200 [03:34<05:03, 40.91it/s, loss=1.6055]


Logits stats - min: -4.8126, max: 1.5105
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5791/18200 [03:34<05:13, 39.61it/s, loss=2.1839]


Logits stats - min: -4.7038, max: 1.6625
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5808/18200 [03:35<05:12, 39.71it/s, loss=1.6838]


Logits stats - min: -4.6818, max: 2.3803
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4273, max: 2.7158
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0041, max: 1.8242
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5817/18200 [03:35<05:05, 40.52it/s, loss=1.5535]


Logits stats - min: -4.4004, max: 1.6292
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2699, max: 2.4143
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1330, max: 2.3935
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3605, max: 2.4097
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5833/18200 [03:35<04:27, 46.15it/s, loss=1.6310]


Logits stats - min: -4.4402, max: 1.7490
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6366, max: 1.7761
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6403, max: 1.5628
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2351, max: 2.4175
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9190, max: 2.2391
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5848/18200 [03:35<04:51, 42.37it/s, loss=1.6184]


Logits stats - min: -4.8279, max: 1.7636
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1850, max: 1.5080
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5863/18200 [03:36<05:26, 37.83it/s, loss=1.6346]


Logits stats - min: -5.3309, max: 2.3445
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5877/18200 [03:36<05:13, 39.26it/s, loss=1.5995]


Logits stats - min: -5.4683, max: 2.2779
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▊                | 5886/18200 [03:36<05:12, 39.35it/s, loss=1.7237]


Logits stats - min: -5.0576, max: 2.3663
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5701, max: 2.2878
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▊                | 5895/18200 [03:37<04:54, 41.85it/s, loss=1.6266]


Logits stats - min: -5.1311, max: 2.2613
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5323, max: 2.1934
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4137, max: 1.6451
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▊                | 5905/18200 [03:37<05:07, 39.95it/s, loss=1.6743]


Logits stats - min: -4.8001, max: 1.5401
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3156, max: 1.8357
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▊                | 5918/18200 [03:37<05:24, 37.80it/s, loss=2.1569]


Logits stats - min: -5.2156, max: 2.1173
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▊                | 5927/18200 [03:37<05:00, 40.86it/s, loss=1.6629]


Logits stats - min: -4.4630, max: 1.8419
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0637, max: 2.2913
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▊                | 5937/18200 [03:38<04:56, 41.41it/s, loss=2.0972]


Logits stats - min: -4.0957, max: 1.6253
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5208, max: 1.4177
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2264, max: 2.2005
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8260, max: 2.2737
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5675, max: 2.2613
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▊                | 5947/18200 [03:38<04:46, 42.71it/s, loss=1.6760]


Logits stats - min: -4.4565, max: 1.6299
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9820, max: 1.6070
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▊                | 5962/18200 [03:38<05:02, 40.42it/s, loss=1.6162]


Logits stats - min: -5.2877, max: 2.3568
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▊                | 5967/18200 [03:38<05:08, 39.59it/s, loss=2.6605]


Logits stats - min: -5.6140, max: 2.4956
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 5976/18200 [03:39<04:51, 41.87it/s, loss=1.6043]


Logits stats - min: -5.0826, max: 2.4247
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6940, max: 1.7487
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6040, max: 1.4210
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3092, max: 1.4485
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 5986/18200 [03:39<04:45, 42.74it/s, loss=1.5355]


Logits stats - min: -5.0152, max: 1.6378
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4987, max: 1.5512
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 6001/18200 [03:39<04:53, 41.51it/s, loss=1.6270]


Logits stats - min: -4.9771, max: 2.2415
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5074, max: 1.5846
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7489, max: 1.4827
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5368, max: 1.4406
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2084, max: 1.6048


Training:  33%|███████▉                | 6012/18200 [03:39<04:40, 43.45it/s, loss=1.6220]

Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9635, max: 1.4432
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 6017/18200 [03:40<04:43, 43.01it/s, loss=1.9281]


Logits stats - min: -5.1909, max: 2.2566
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1633, max: 2.2734
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 6052/18200 [03:40<04:33, 44.46it/s, loss=2.0867]


Logits stats - min: -4.8970, max: 2.0704
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2660, max: 2.2763
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7967, max: 2.4613
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2439, max: 2.1766
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2101, max: 1.3040
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 6063/18200 [03:41<04:10, 48.40it/s, loss=1.7772]


Logits stats - min: -5.0864, max: 2.4486
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8418, max: 1.7895
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9278, max: 1.4464
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3475, max: 2.4853
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3268, max: 1.3736
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8760, max: 1.6937
Target unique values: tensor([0], device='cuda:0')


Training:  33%|████████                | 6073/18200 [03:41<04:34, 44.13it/s, loss=2.0370]


Logits stats - min: -5.7342, max: 2.2954
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4229, max: 2.2864
Target unique values: tensor([0], device='cuda:0')


Training:  33%|████████                | 6083/18200 [03:41<04:45, 42.40it/s, loss=1.6265]


Logits stats - min: -6.2018, max: 1.5814
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5358, max: 1.4191
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1215, max: 1.5739
Target unique values: tensor([0], device='cuda:0')


Training:  33%|████████                | 6093/18200 [03:41<04:50, 41.64it/s, loss=1.6376]


Logits stats - min: -5.3168, max: 2.4054
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████                | 6098/18200 [03:42<04:52, 41.41it/s, loss=1.5990]


Logits stats - min: -5.0559, max: 1.4658
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████                | 6110/18200 [03:42<04:32, 44.37it/s, loss=1.6776]


Logits stats - min: -5.2815, max: 2.2985
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9021, max: 2.1653
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7655, max: 2.6341
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8868, max: 1.4563
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████                | 6120/18200 [03:42<04:29, 44.82it/s, loss=1.7369]


Logits stats - min: -4.5567, max: 1.5482
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7219, max: 1.7213
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4969, max: 2.3464
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3870, max: 2.4436
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0303, max: 1.8959
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████                | 6130/18200 [03:42<04:25, 45.39it/s, loss=1.6321]


Logits stats - min: -4.0837, max: 1.8020
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9252, max: 1.5905
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3148, max: 2.1487
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████                | 6135/18200 [03:42<04:51, 41.35it/s, loss=1.5249]


Logits stats - min: -4.8935, max: 1.5623
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1096, max: 1.2958
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2449, max: 1.6431
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████                | 6150/18200 [03:43<04:33, 44.04it/s, loss=1.6840]


Logits stats - min: -5.5486, max: 2.1821
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4101, max: 2.6116
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7377, max: 1.5511
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7405, max: 2.3911
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6170/18200 [03:43<04:38, 43.17it/s, loss=1.6702]


Logits stats - min: -4.8278, max: 1.4989
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6549, max: 1.4821
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6682, max: 1.6685
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7577, max: 1.1731
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6181/18200 [03:43<04:26, 45.11it/s, loss=1.6051]


Logits stats - min: -5.1173, max: 1.5751
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1181, max: 1.4923
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8291, max: 1.4853
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6191/18200 [03:44<04:36, 43.48it/s, loss=2.1241]


Logits stats - min: -5.3521, max: 1.5409
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3044, max: 1.2656
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2596, max: 2.1286
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6201/18200 [03:44<04:53, 40.91it/s, loss=1.6115]


Logits stats - min: -5.9691, max: 2.2059
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9063, max: 2.0664
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6211/18200 [03:44<04:44, 42.18it/s, loss=1.5393]


Logits stats - min: -5.0363, max: 2.2735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0739, max: 2.4083
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6226/18200 [03:44<04:26, 44.89it/s, loss=1.6071]


Logits stats - min: -4.5053, max: 1.5038
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2146, max: 2.1406
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7272, max: 2.0578
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5378, max: 2.0883
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6241/18200 [03:45<04:40, 42.58it/s, loss=1.9289]


Logits stats - min: -5.1185, max: 1.4918
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2132, max: 1.6310
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7552, max: 2.2239
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6251/18200 [03:45<04:37, 43.06it/s, loss=1.6179]


Logits stats - min: -5.0611, max: 1.5737
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9207, max: 1.5174
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8115, max: 2.0889
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1771, max: 2.1590
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▎               | 6261/18200 [03:45<04:42, 42.24it/s, loss=2.1948]


Logits stats - min: -4.6694, max: 1.6075
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▎               | 6272/18200 [03:46<04:24, 45.07it/s, loss=2.0396]


Logits stats - min: -5.6801, max: 2.3876
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3349, max: 1.4543
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9582, max: 1.6693
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4330, max: 2.3253
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1683, max: 1.7553
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▎               | 6282/18200 [03:46<04:23, 45.26it/s, loss=1.6377]


Logits stats - min: -4.8977, max: 2.1187
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2755, max: 2.2376
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8406, max: 2.2273
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3238, max: 2.5515
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▎               | 6297/18200 [03:46<04:55, 40.31it/s, loss=1.5592]


Logits stats - min: -5.3346, max: 2.1451
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8004, max: 1.5023
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▎               | 6312/18200 [03:47<04:38, 42.71it/s, loss=1.6206]


Logits stats - min: -5.5663, max: 1.4954
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9632, max: 2.5099
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8424, max: 1.3817
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▎               | 6327/18200 [03:47<04:50, 40.83it/s, loss=1.6142]


Logits stats - min: -5.0152, max: 2.1287
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5802, max: 2.3962
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▎               | 6342/18200 [03:47<04:47, 41.30it/s, loss=1.6233]


Logits stats - min: -4.0821, max: 1.3158
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0560, max: 1.6285
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8993, max: 1.4703
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2470, max: 2.1970
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6357/18200 [03:48<04:38, 42.60it/s, loss=1.7290]


Logits stats - min: -5.3849, max: 1.7241
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9616, max: 1.7828
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5973, max: 1.5577
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3629, max: 2.3141
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6367/18200 [03:48<04:39, 42.38it/s, loss=1.6647]


Logits stats - min: -4.1895, max: 1.4855
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6057, max: 1.9844
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0519, max: 2.3012
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6377/18200 [03:48<04:31, 43.57it/s, loss=1.7697]


Logits stats - min: -4.8418, max: 2.0688
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9852, max: 1.2424
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6387/18200 [03:48<04:34, 42.96it/s, loss=1.6148]


Logits stats - min: -4.9858, max: 1.5438
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0279, max: 1.6252
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5849, max: 1.9533
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6397/18200 [03:48<04:46, 41.26it/s, loss=1.5936]


Logits stats - min: -5.2256, max: 1.5377
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6776, max: 1.8582
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0569, max: 1.6130
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6407/18200 [03:49<04:57, 39.59it/s, loss=1.5361]


Logits stats - min: -4.3548, max: 1.5257
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3007, max: 2.0241
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9190, max: 1.3043
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3143, max: 1.5029
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6424/18200 [03:49<04:25, 44.31it/s, loss=2.0353]


Logits stats - min: -5.1652, max: 2.3578
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6718, max: 1.4847
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2921, max: 2.1269
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3695, max: 2.1386
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6436/18200 [03:49<03:58, 49.38it/s, loss=1.6600]


Logits stats - min: -5.4495, max: 2.2672
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6911, max: 2.4429
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6271, max: 1.6100
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0065, max: 2.2412
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5178, max: 2.1413
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6838, max: 1.4842
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9377, max: 1.5778
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▌               | 6451/18200 [03:50<04:42, 41.55it/s, loss=1.9626]


Logits stats - min: -4.8779, max: 1.5513
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2554, max: 2.3062
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6466/18200 [03:50<04:44, 41.28it/s, loss=1.5906]


Logits stats - min: -5.5139, max: 2.0972
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6882, max: 1.5459
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5152, max: 1.2214
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6477/18200 [03:50<04:15, 45.94it/s, loss=1.9777]


Logits stats - min: -5.1999, max: 2.1457
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0581, max: 2.3729
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7608, max: 2.3115
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8256, max: 2.5905
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2999, max: 2.3456
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6492/18200 [03:51<04:27, 43.79it/s, loss=2.0217]


Logits stats - min: -5.3572, max: 2.1540
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0971, max: 1.2489
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6497/18200 [03:51<04:37, 42.21it/s, loss=2.7834]


Logits stats - min: -5.8156, max: 2.3380
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7182, max: 1.5509
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5150, max: 2.1652
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6522/18200 [03:51<04:50, 40.24it/s, loss=1.6128]


Logits stats - min: -5.5202, max: 2.2724
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6532/18200 [03:52<04:32, 42.75it/s, loss=2.1861]


Logits stats - min: -4.3984, max: 2.5754
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8487, max: 2.6505
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6542/18200 [03:52<04:38, 41.81it/s, loss=1.6840]


Logits stats - min: -4.9938, max: 1.5738
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7317, max: 1.6432
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6557/18200 [03:52<04:36, 42.14it/s, loss=1.6180]


Logits stats - min: -5.4362, max: 2.4069
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6714, max: 1.5094
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5437, max: 1.7017
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0800, max: 2.3371
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6567/18200 [03:53<04:26, 43.67it/s, loss=1.9452]


Logits stats - min: -5.4367, max: 2.5769
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.0521, max: 1.2605
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3649, max: 1.2045
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6577/18200 [03:53<04:21, 44.52it/s, loss=1.5593]


Logits stats - min: -6.8413, max: 1.8236
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3656, max: 2.2799
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8257, max: 1.6573
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7414, max: 2.2386
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6582/18200 [03:53<04:21, 44.36it/s, loss=2.0379]


Logits stats - min: -4.8173, max: 2.5387
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1668, max: 1.5141
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6592/18200 [03:53<04:28, 43.26it/s, loss=1.6080]


Logits stats - min: -5.6174, max: 2.2274
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1155, max: 2.4523
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9845, max: 2.4180
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6607/18200 [03:53<04:54, 39.31it/s, loss=1.6907]


Logits stats - min: -5.7038, max: 2.3394
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6618/18200 [03:54<04:35, 41.97it/s, loss=2.1252]


Logits stats - min: -5.5512, max: 2.7205
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9465, max: 1.5381
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0956, max: 2.0615
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2915, max: 2.4302
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6623/18200 [03:54<04:33, 42.38it/s, loss=2.0553]


Logits stats - min: -5.2432, max: 2.3051
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3310, max: 1.3712
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1587, max: 2.3718
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3630, max: 1.4056
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▊               | 6638/18200 [03:54<04:44, 40.58it/s, loss=1.6005]


Logits stats - min: -4.9990, max: 2.4012
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3630, max: 2.5523
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6649/18200 [03:54<04:30, 42.71it/s, loss=1.9550]


Logits stats - min: -4.5131, max: 1.5690
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9845, max: 1.7638
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8036, max: 1.6055
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6665/18200 [03:55<04:29, 42.82it/s, loss=1.6710]


Logits stats - min: -4.5862, max: 1.2651
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0213, max: 2.2946
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8905, max: 1.5519
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.1181, max: 1.4498
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7678, max: 1.6778
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6671/18200 [03:55<04:12, 45.65it/s, loss=1.9237]


Logits stats - min: -5.3539, max: 1.4824
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6681/18200 [03:55<04:48, 39.90it/s, loss=1.6168]


Logits stats - min: -5.3843, max: 2.3769
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0774, max: 1.9093
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1555, max: 2.2056
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3999, max: 1.5683
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6692/18200 [03:55<04:31, 42.37it/s, loss=1.5512]


Logits stats - min: -5.3142, max: 2.3448
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9787, max: 2.2573
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0829, max: 1.6789
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.8484, max: 1.3914
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1936, max: 1.8400
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1606, max: 2.3034
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6705/18200 [03:56<04:03, 47.24it/s, loss=1.6065]


Logits stats - min: -5.4336, max: 2.6239
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4766, max: 1.6757
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7653, max: 2.2919
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8608, max: 2.3681
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6725/18200 [03:56<04:28, 42.74it/s, loss=1.6168]


Logits stats - min: -6.4778, max: 1.8027
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8957, max: 2.2604
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0272, max: 2.0995
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6735/18200 [03:56<05:02, 37.93it/s, loss=1.6726]


Logits stats - min: -4.8758, max: 2.3157
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6750/18200 [03:57<04:43, 40.45it/s, loss=1.5370]


Logits stats - min: -4.5160, max: 1.3261
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6760/18200 [03:57<04:43, 40.39it/s, loss=1.5608]


Logits stats - min: -4.9398, max: 2.0805
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1535, max: 1.5464
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6770/18200 [03:57<04:32, 42.00it/s, loss=1.6241]


Logits stats - min: -5.2766, max: 2.8965
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6376, max: 2.2667
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7317, max: 1.5317
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9571, max: 1.4872
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6786/18200 [03:58<04:25, 42.96it/s, loss=1.5657]


Logits stats - min: -7.4399, max: 2.0041
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2743, max: 2.0944
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6568, max: 1.4879
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6796/18200 [03:58<04:38, 40.97it/s, loss=1.5862]


Logits stats - min: -4.8075, max: 1.5386
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6807/18200 [03:58<04:20, 43.73it/s, loss=1.5380]


Logits stats - min: -4.8557, max: 1.6546
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4522, max: 2.4443
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6430, max: 2.0541
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1505, max: 2.0531
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6822/18200 [03:59<04:32, 41.69it/s, loss=1.6200]


Logits stats - min: -4.5946, max: 2.3305
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4943, max: 1.4485
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4123, max: 2.1818
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6832/18200 [03:59<04:35, 41.23it/s, loss=1.7582]


Logits stats - min: -4.9695, max: 1.5759
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9726, max: 1.8092
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7826, max: 1.9280
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6843/18200 [03:59<04:27, 42.42it/s, loss=1.6673]


Logits stats - min: -5.2846, max: 1.8098
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8880, max: 2.0535
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9084, max: 1.4107
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6868/18200 [04:00<04:41, 40.24it/s, loss=1.6297]


Logits stats - min: -4.7283, max: 2.3220
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6873/18200 [04:00<04:35, 41.19it/s, loss=1.9352]


Logits stats - min: -6.4200, max: 1.9347
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4620, max: 1.3379
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6888/18200 [04:00<04:26, 42.44it/s, loss=1.5944]


Logits stats - min: -5.3936, max: 2.2453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1608, max: 1.2639
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7479, max: 1.4048
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5318, max: 2.1826
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2596, max: 2.1338
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3857, max: 2.5669
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9285, max: 2.3268
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6899/18200 [04:00<04:06, 45.80it/s, loss=1.6171]


Logits stats - min: -5.0530, max: 2.3394
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8053, max: 1.7257
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1994, max: 2.2335
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6904/18200 [04:01<04:11, 44.90it/s, loss=1.6097]


Logits stats - min: -5.3269, max: 2.2063
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8744, max: 1.1527
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6914/18200 [04:01<04:13, 44.61it/s, loss=1.6706]


Logits stats - min: -5.0228, max: 1.4788
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3628, max: 2.5356
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6924/18200 [04:01<04:41, 40.01it/s, loss=1.6108]


Logits stats - min: -4.8120, max: 1.6730
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7442, max: 2.1522
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6929/18200 [04:01<04:30, 41.71it/s, loss=1.6116]


Logits stats - min: -5.0549, max: 2.3355
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7763, max: 1.4675
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6951/18200 [04:02<04:48, 39.03it/s, loss=1.6745]


Logits stats - min: -5.2150, max: 2.1677
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7680, max: 2.3498
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6973/18200 [04:02<04:37, 40.42it/s, loss=1.6426]


Logits stats - min: -5.1624, max: 1.4201
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4707, max: 1.8039
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6978/18200 [04:02<04:58, 37.62it/s, loss=1.6775]


Logits stats - min: -4.6540, max: 2.3353
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0827, max: 2.3914
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6987/18200 [04:03<04:53, 38.25it/s, loss=1.6207]


Logits stats - min: -5.7222, max: 2.0265
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0130, max: 1.4486
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6997/18200 [04:03<04:35, 40.69it/s, loss=1.6015]


Logits stats - min: -4.6608, max: 2.2758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8505, max: 1.3770
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 7007/18200 [04:03<04:33, 40.85it/s, loss=1.6042]


Logits stats - min: -4.5885, max: 2.2827
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8371, max: 2.2311
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1550, max: 1.6304
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3859, max: 2.2792
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7017/18200 [04:03<04:24, 42.25it/s, loss=1.6727]


Logits stats - min: -5.0183, max: 1.4676
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -3.7853, max: 1.1717
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0925, max: 1.4800
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7037/18200 [04:04<04:29, 41.49it/s, loss=1.6144]


Logits stats - min: -4.8505, max: 1.6300
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3295, max: 1.4970
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7047/18200 [04:04<04:34, 40.69it/s, loss=1.5810]


Logits stats - min: -5.9799, max: 2.2174
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0617, max: 1.3904
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7057/18200 [04:04<04:33, 40.79it/s, loss=1.5249]


Logits stats - min: -5.1074, max: 1.4761
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1560, max: 2.2585
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5242, max: 2.2837
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7068/18200 [04:05<04:12, 44.16it/s, loss=1.5339]


Logits stats - min: -4.3777, max: 1.5205
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1754, max: 2.0005
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7392, max: 1.6227
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3966, max: 1.6901
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7078/18200 [04:05<04:25, 41.91it/s, loss=1.6240]


Logits stats - min: -5.1747, max: 1.3971
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4220, max: 1.2939
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5618, max: 2.2306
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7093/18200 [04:05<04:15, 43.51it/s, loss=2.7048]


Logits stats - min: -6.1904, max: 1.8277
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8952, max: 1.5430
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7103/18200 [04:05<04:41, 39.42it/s, loss=1.6047]


Logits stats - min: -5.0247, max: 2.3364
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7366, max: 2.0166
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7112/18200 [04:06<04:41, 39.45it/s, loss=1.5331]


Logits stats - min: -4.9979, max: 1.5550
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6561, max: 1.7842
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7127/18200 [04:06<04:28, 41.24it/s, loss=1.6394]


Logits stats - min: -4.8840, max: 1.8569
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7142/18200 [04:06<04:15, 43.32it/s, loss=1.5919]


Logits stats - min: -5.9111, max: 2.1381
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0480, max: 2.2259
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7152/18200 [04:07<04:29, 40.94it/s, loss=1.9973]


Logits stats - min: -5.3037, max: 2.1759
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9904, max: 1.6188
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7521, max: 2.6263
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7157/18200 [04:07<04:24, 41.77it/s, loss=1.6014]


Logits stats - min: -4.8533, max: 1.6656
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7167/18200 [04:07<04:15, 43.21it/s, loss=1.5275]


Logits stats - min: -4.7391, max: 1.7522
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1583, max: 1.5918
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5966, max: 2.2686
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6476, max: 2.5751
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7182/18200 [04:07<04:07, 44.46it/s, loss=1.5524]


Logits stats - min: -5.9435, max: 2.6929
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9115, max: 1.5920
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7404, max: 1.7569
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2645, max: 2.6005
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4429, max: 1.7945
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▍              | 7192/18200 [04:07<04:00, 45.68it/s, loss=2.1771]


Logits stats - min: -5.7752, max: 2.3464
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9680, max: 2.2333
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3831, max: 2.5230
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9598, max: 2.4353
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▍              | 7203/18200 [04:08<04:04, 44.90it/s, loss=1.5327]


Logits stats - min: -4.5589, max: 1.5246
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6455, max: 1.2956
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8327, max: 2.3031
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7213/18200 [04:08<04:06, 44.64it/s, loss=1.5951]


Logits stats - min: -5.0251, max: 1.5606
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0775, max: 1.6286
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2292, max: 1.7817
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7228/18200 [04:08<04:17, 42.54it/s, loss=1.6070]


Logits stats - min: -5.1837, max: 2.0831
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9487, max: 1.6130
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5899, max: 1.6350
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7238/18200 [04:09<04:14, 43.00it/s, loss=1.5716]


Logits stats - min: -5.4311, max: 2.5132
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7248/18200 [04:09<04:12, 43.45it/s, loss=2.7504]


Logits stats - min: -4.9399, max: 1.5734
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1959, max: 2.2315
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7265/18200 [04:09<03:57, 46.06it/s, loss=1.6185]


Logits stats - min: -4.8253, max: 1.4212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3154, max: 2.2199
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1152, max: 2.1296
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5217, max: 1.9713
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4351, max: 1.8966
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1412, max: 2.1822
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7275/18200 [04:09<04:05, 44.48it/s, loss=1.6674]


Logits stats - min: -4.9229, max: 1.4232
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2320, max: 2.2545
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7285/18200 [04:10<04:14, 42.85it/s, loss=1.6036]


Logits stats - min: -5.5396, max: 2.5589
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2037, max: 1.8756
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7295/18200 [04:10<04:17, 42.38it/s, loss=1.5767]


Logits stats - min: -4.6834, max: 2.2765
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0238, max: 1.6180
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3943, max: 1.8046
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▋              | 7317/18200 [04:10<04:10, 43.48it/s, loss=1.5924]


Logits stats - min: -5.2202, max: 1.6413
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3894, max: 2.3032
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5390, max: 2.1560
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1710, max: 2.1533
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▋              | 7327/18200 [04:11<04:14, 42.67it/s, loss=1.6640]


Logits stats - min: -5.7196, max: 2.1446
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6232, max: 2.1132
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▋              | 7342/18200 [04:11<04:25, 40.92it/s, loss=1.5072]


Logits stats - min: -5.3435, max: 1.6999
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▋              | 7357/18200 [04:11<04:21, 41.48it/s, loss=1.5202]


Logits stats - min: -5.7877, max: 2.3458
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1912, max: 2.4487
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3500, max: 1.3799
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▋              | 7367/18200 [04:12<04:11, 43.14it/s, loss=1.6611]


Logits stats - min: -5.1492, max: 2.1298
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1928, max: 1.6183
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2870, max: 1.5123
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3686, max: 2.3293
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▋              | 7377/18200 [04:12<04:14, 42.58it/s, loss=1.7054]


Logits stats - min: -4.8170, max: 1.4975
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4793, max: 1.8464
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4901, max: 2.4630
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▋              | 7387/18200 [04:12<04:19, 41.60it/s, loss=1.5431]


Logits stats - min: -5.2052, max: 1.5609
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4173, max: 1.6989
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7402/18200 [04:12<04:21, 41.26it/s, loss=1.6739]


Logits stats - min: -4.8272, max: 1.7683
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9652, max: 2.4239
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7412/18200 [04:13<04:22, 41.14it/s, loss=1.6095]


Logits stats - min: -4.7342, max: 1.7658
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0423, max: 2.2882
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7033, max: 1.7455
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1789, max: 2.3936
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7740, max: 1.9293
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7425/18200 [04:13<04:45, 37.74it/s, loss=1.6641]


Logits stats - min: -4.9395, max: 1.7637
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7439/18200 [04:13<04:38, 38.65it/s, loss=1.9521]


Logits stats - min: -5.4115, max: 2.1845
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9315, max: 1.7088
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6827, max: 2.0769
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7445/18200 [04:14<04:07, 43.48it/s, loss=1.6196]


Logits stats - min: -5.9082, max: 1.9797
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9566, max: 2.1597
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7456/18200 [04:14<04:06, 43.60it/s, loss=1.5740]


Logits stats - min: -5.3133, max: 2.0973
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4611, max: 2.2163
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8387, max: 2.5860
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4839, max: 2.1587
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7471/18200 [04:14<04:25, 40.40it/s, loss=1.6634]


Logits stats - min: -5.4460, max: 2.3358
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7342, max: 1.7702
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7481/18200 [04:15<04:15, 42.03it/s, loss=1.6070]


Logits stats - min: -5.3551, max: 2.3970
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1356, max: 1.7735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7412, max: 1.5492
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▉              | 7491/18200 [04:15<04:11, 42.52it/s, loss=2.0185]


Logits stats - min: -6.1378, max: 2.1363
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4332, max: 1.8786
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0864, max: 1.6229
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▉              | 7506/18200 [04:15<04:09, 42.78it/s, loss=1.6075]


Logits stats - min: -5.3948, max: 2.4148
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6097, max: 2.3607
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9764, max: 1.6065
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▉              | 7516/18200 [04:15<04:03, 43.96it/s, loss=1.6610]


Logits stats - min: -5.4053, max: 2.1760
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1824, max: 1.7334
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2398, max: 1.9753
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▉              | 7532/18200 [04:16<04:01, 44.22it/s, loss=1.9225]


Logits stats - min: -5.4403, max: 2.1041
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1878, max: 1.5224
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1128, max: 1.4717
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9948, max: 2.4044
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▉              | 7542/18200 [04:16<04:18, 41.20it/s, loss=1.5434]


Logits stats - min: -4.9397, max: 2.4578
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4729, max: 1.8822
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▉              | 7547/18200 [04:16<04:09, 42.77it/s, loss=1.6082]


Logits stats - min: -5.0377, max: 2.1981
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3406, max: 2.3393
Target unique values: tensor([0], device='cuda:0')


Training:  42%|█████████▉              | 7557/18200 [04:16<04:03, 43.71it/s, loss=2.3087]


Logits stats - min: -4.9467, max: 1.2185
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0995, max: 1.5899
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8045, max: 1.7312
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8738, max: 2.2493
Target unique values: tensor([0], device='cuda:0')


Training:  42%|█████████▉              | 7567/18200 [04:16<03:52, 45.81it/s, loss=1.6080]


Logits stats - min: -4.8998, max: 1.6649
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5213, max: 2.1584
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0954, max: 1.6561
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7588/18200 [04:17<03:49, 46.30it/s, loss=2.1893]


Logits stats - min: -5.2978, max: 1.2735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4906, max: 1.8550
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7001, max: 1.7091
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2363, max: 1.6314
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9909, max: 1.9128
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2216, max: 1.5221
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6181, max: 2.0299
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3535, max: 2.1731
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7599/18200 [04:17<03:48, 46.39it/s, loss=1.6258]


Logits stats - min: -4.7128, max: 1.6695
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9102, max: 2.4943
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1927, max: 1.9754
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0044, max: 1.6512
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7614/18200 [04:17<04:04, 43.38it/s, loss=2.1164]


Logits stats - min: -4.2574, max: 1.9594
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6912, max: 2.4289
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9143, max: 2.5455
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7620/18200 [04:18<03:55, 44.94it/s, loss=1.7168]


Logits stats - min: -5.0779, max: 2.5273
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8645, max: 2.1865
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5085, max: 1.9731
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7635/18200 [04:18<04:17, 41.01it/s, loss=1.6042]


Logits stats - min: -5.2433, max: 1.7206
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5750, max: 2.3692
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7641/18200 [04:18<04:06, 42.79it/s, loss=1.6130]


Logits stats - min: -5.2806, max: 1.4530
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8927, max: 2.4077
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2819, max: 2.2700
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7656/18200 [04:18<04:18, 40.78it/s, loss=2.1434]


Logits stats - min: -5.0118, max: 1.5304
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8883, max: 1.6164
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7666/18200 [04:19<04:03, 43.22it/s, loss=2.1854]


Logits stats - min: -4.9371, max: 1.6394
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0794, max: 1.3855
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3320, max: 1.5708
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8617, max: 1.2820
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████▏             | 7681/18200 [04:19<04:01, 43.57it/s, loss=1.5253]


Logits stats - min: -4.7210, max: 2.1832
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0772, max: 2.3055
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████▏             | 7696/18200 [04:19<04:01, 43.44it/s, loss=1.6583]


Logits stats - min: -5.6287, max: 2.2631
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7373, max: 2.2145
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3578, max: 2.0847
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9856, max: 1.6752
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1127, max: 2.0131
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████▏             | 7711/18200 [04:20<04:12, 41.46it/s, loss=1.6084]


Logits stats - min: -4.8481, max: 2.3103
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████▏             | 7721/18200 [04:20<04:23, 39.81it/s, loss=1.5177]


Logits stats - min: -5.8254, max: 1.9108
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7397, max: 1.8314
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4695, max: 1.6643
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████▏             | 7731/18200 [04:20<04:12, 41.45it/s, loss=1.5109]


Logits stats - min: -4.9213, max: 1.5642
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5303, max: 1.9533
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3357, max: 1.5625
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▏             | 7741/18200 [04:20<04:10, 41.67it/s, loss=1.5009]


Logits stats - min: -5.0002, max: 1.7250
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1141, max: 1.6235
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4125, max: 2.5102
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2273, max: 1.9104
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▏             | 7751/18200 [04:21<04:04, 42.69it/s, loss=1.5856]


Logits stats - min: -5.0563, max: 2.2496
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6258, max: 2.3246
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4011, max: 1.7651
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▏             | 7761/18200 [04:21<04:19, 40.19it/s, loss=1.5694]


Logits stats - min: -5.2718, max: 1.9813
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7188, max: 2.2392
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▏             | 7771/18200 [04:21<04:24, 39.46it/s, loss=1.6590]


Logits stats - min: -4.6659, max: 1.3162
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7792/18200 [04:22<03:55, 44.25it/s, loss=1.6664]


Logits stats - min: -4.8321, max: 1.6543
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7980, max: 1.4541
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5921, max: 2.0541
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1977, max: 2.1791
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2210, max: 2.2927
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7802/18200 [04:22<03:57, 43.79it/s, loss=1.5411]


Logits stats - min: -6.4185, max: 2.6563
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2919, max: 2.5799
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7490, max: 1.7332
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1843, max: 2.0686
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4591, max: 2.3097
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7812/18200 [04:22<03:59, 43.35it/s, loss=1.9490]


Logits stats - min: -4.6923, max: 1.7022
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1596, max: 1.6009
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6898, max: 2.2205
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7822/18200 [04:22<04:07, 41.89it/s, loss=1.6814]


Logits stats - min: -5.2957, max: 1.6879
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4043, max: 2.1978
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7832/18200 [04:23<04:15, 40.51it/s, loss=1.6857]


Logits stats - min: -4.9018, max: 1.7467
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5097, max: 2.1191
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7851/18200 [04:23<04:09, 41.54it/s, loss=2.1711]


Logits stats - min: -5.0368, max: 1.7457
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3860, max: 2.4526
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9884, max: 2.0750
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0447, max: 2.0670
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7856/18200 [04:23<04:05, 42.15it/s, loss=1.5281]


Logits stats - min: -6.1342, max: 2.6493
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4585, max: 2.4245
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7866/18200 [04:24<04:05, 42.12it/s, loss=1.5775]


Logits stats - min: -4.6735, max: 2.4891
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5277, max: 1.6674
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9521, max: 1.6868
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▍             | 7876/18200 [04:24<03:57, 43.41it/s, loss=1.6605]


Logits stats - min: -5.2563, max: 1.8561
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9640, max: 1.7982
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▍             | 7886/18200 [04:24<03:54, 44.01it/s, loss=1.5203]


Logits stats - min: -4.5923, max: 1.7093
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9169, max: 1.8827
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▍             | 7896/18200 [04:24<03:52, 44.27it/s, loss=1.5149]


Logits stats - min: -5.0543, max: 2.1980
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4935, max: 1.6739
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8550, max: 1.7356
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▍             | 7906/18200 [04:24<04:08, 41.46it/s, loss=1.6771]


Logits stats - min: -5.7429, max: 2.1433
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3368, max: 2.2303
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3097, max: 2.9331
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▍             | 7916/18200 [04:25<04:05, 41.85it/s, loss=1.6165]


Logits stats - min: -5.6399, max: 2.4887
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5712, max: 1.5780
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3307, max: 2.2520
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▍             | 7926/18200 [04:25<04:09, 41.12it/s, loss=1.5181]


Logits stats - min: -5.2731, max: 2.8405
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6883, max: 2.3105
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▍             | 7942/18200 [04:25<03:54, 43.77it/s, loss=1.6225]


Logits stats - min: -5.4281, max: 2.2305
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2923, max: 1.6257
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3236, max: 1.6198
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▍             | 7952/18200 [04:26<04:04, 41.86it/s, loss=1.6094]


Logits stats - min: -5.5634, max: 2.5580
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 7967/18200 [04:26<04:04, 41.83it/s, loss=1.6606]


Logits stats - min: -5.7760, max: 2.4584
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.3706, max: 1.5994
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2847, max: 1.9875
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 7982/18200 [04:26<04:08, 41.08it/s, loss=1.5357]


Logits stats - min: -7.3924, max: 1.9998
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7446, max: 1.8405
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 7992/18200 [04:26<03:56, 43.24it/s, loss=1.6510]


Logits stats - min: -5.2165, max: 1.6295
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6477, max: 2.2198
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0590, max: 1.5774
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 7998/18200 [04:27<03:42, 45.76it/s, loss=1.6032]


Logits stats - min: -4.8257, max: 1.8689
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1712, max: 1.6089
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6126, max: 2.2521
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 8008/18200 [04:27<04:01, 42.12it/s, loss=1.7011]


Logits stats - min: -5.0877, max: 1.5156
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2262, max: 1.9595
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0107, max: 2.4258
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 8023/18200 [04:27<04:03, 41.84it/s, loss=1.6908]


Logits stats - min: -5.9296, max: 2.2603
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1482, max: 2.4901
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4433, max: 2.2691
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 8033/18200 [04:27<03:54, 43.44it/s, loss=1.6005]


Logits stats - min: -5.3497, max: 1.4094
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6057, max: 2.1507
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7755, max: 2.2898
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 8048/18200 [04:28<03:52, 43.70it/s, loss=1.6593]


Logits stats - min: -5.6036, max: 2.9223
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9680, max: 2.0991
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▋             | 8058/18200 [04:28<03:58, 42.44it/s, loss=1.5753]


Logits stats - min: -5.2461, max: 1.9414
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2672, max: 1.5572
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▋             | 8068/18200 [04:28<03:51, 43.84it/s, loss=1.6678]


Logits stats - min: -4.5148, max: 1.2844
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0583, max: 1.7059
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1478, max: 1.7714
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8465, max: 1.7393
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4789, max: 2.2744
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9509, max: 1.6281
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▋             | 8083/18200 [04:29<03:55, 42.99it/s, loss=1.7387]


Logits stats - min: -4.7897, max: 1.9043
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▋             | 8093/18200 [04:29<03:57, 42.61it/s, loss=1.6975]


Logits stats - min: -5.4386, max: 2.0165
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6634, max: 1.6063
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3146, max: 2.1117
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▋             | 8104/18200 [04:29<03:53, 43.20it/s, loss=1.6872]


Logits stats - min: -4.9045, max: 1.6810
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.2640, max: 1.1567
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1110, max: 2.2600
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▋             | 8119/18200 [04:29<03:59, 42.13it/s, loss=2.1716]


Logits stats - min: -5.2053, max: 1.7105
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6393, max: 2.2658
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▋             | 8134/18200 [04:30<04:22, 38.32it/s, loss=2.1199]


Logits stats - min: -5.2925, max: 1.6702
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8376, max: 2.1319
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6791, max: 2.2793
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2843, max: 1.4714
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▋             | 8150/18200 [04:30<03:50, 43.57it/s, loss=1.6763]


Logits stats - min: -4.9029, max: 2.0653
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4716, max: 2.1383
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8160/18200 [04:30<03:40, 45.47it/s, loss=2.1288]


Logits stats - min: -5.4874, max: 1.7678
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3979, max: 1.9037
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8215, max: 2.3315
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8180/18200 [04:31<04:04, 41.00it/s, loss=1.6532]


Logits stats - min: -5.4085, max: 2.1319
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6061, max: 1.4849
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8195/18200 [04:31<04:03, 41.08it/s, loss=1.6611]


Logits stats - min: -5.5205, max: 1.6072
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3760, max: 1.7828
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0023, max: 1.7624
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6946, max: 1.6929
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8205/18200 [04:31<03:58, 41.82it/s, loss=1.7880]


Logits stats - min: -5.3718, max: 1.6632
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2494, max: 1.4584
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0727, max: 1.7175
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8221/18200 [04:32<04:03, 40.92it/s, loss=1.5353]


Logits stats - min: -5.5819, max: 2.0717
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8231/18200 [04:32<04:01, 41.35it/s, loss=1.6098]


Logits stats - min: -5.1147, max: 1.3626
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2968, max: 1.7633
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3943, max: 2.2950
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8241/18200 [04:32<04:05, 40.50it/s, loss=1.6153]


Logits stats - min: -5.8034, max: 2.3207
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2047, max: 1.6533
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▉             | 8253/18200 [04:33<03:48, 43.51it/s, loss=1.5115]


Logits stats - min: -5.2829, max: 1.6364
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3831, max: 1.8723
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4925, max: 2.3282
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3793, max: 2.5048
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▉             | 8268/18200 [04:33<03:51, 42.84it/s, loss=1.4989]


Logits stats - min: -4.7007, max: 2.2413
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▉             | 8278/18200 [04:33<04:06, 40.23it/s, loss=2.6289]


Logits stats - min: -5.1065, max: 1.6519
Target unique values: tensor([0], device='cuda:0')


Training:  46%|██████████▉             | 8293/18200 [04:34<04:09, 39.77it/s, loss=1.6048]


Logits stats - min: -5.2513, max: 1.5395
Target unique values: tensor([0], device='cuda:0')


Training:  46%|██████████▉             | 8303/18200 [04:34<03:48, 43.38it/s, loss=1.5145]


Logits stats - min: -6.1031, max: 2.5082
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9802, max: 2.3455
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5029, max: 1.4169
Target unique values: tensor([0], device='cuda:0')


Training:  46%|██████████▉             | 8318/18200 [04:34<03:48, 43.27it/s, loss=1.5776]


Logits stats - min: -5.1014, max: 1.5613
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1998, max: 1.6134
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2796, max: 1.5503
Target unique values: tensor([0], device='cuda:0')


Training:  46%|██████████▉             | 8338/18200 [04:35<03:56, 41.72it/s, loss=1.6601]


Logits stats - min: -6.0081, max: 2.2421
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3768, max: 1.5723
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1464, max: 2.1423
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1064, max: 2.4592
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7546, max: 2.0346
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8354/18200 [04:35<03:33, 46.08it/s, loss=1.6067]


Logits stats - min: -5.5578, max: 1.5063
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2907, max: 2.1173
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4783, max: 2.0614
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3361, max: 1.6875
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4703, max: 1.7157
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8125, max: 1.6549
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8365/18200 [04:35<03:25, 47.77it/s, loss=1.4790]


Logits stats - min: -5.7565, max: 2.2145
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8854, max: 2.3570
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6707, max: 1.2816
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6456, max: 2.4420
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8380/18200 [04:36<03:51, 42.40it/s, loss=1.6535]


Logits stats - min: -4.8713, max: 1.6025
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4784, max: 1.5993
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9387, max: 1.9461
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0373, max: 2.3767
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7884, max: 2.2621
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8401/18200 [04:36<03:42, 44.03it/s, loss=1.6206]


Logits stats - min: -5.2728, max: 1.8687
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8744, max: 2.1809
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1969, max: 2.3303
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8412/18200 [04:36<03:38, 44.81it/s, loss=1.5211]


Logits stats - min: -5.5769, max: 2.2879
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4252, max: 1.5068
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5954, max: 2.4546
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8422/18200 [04:37<03:39, 44.51it/s, loss=2.1872]


Logits stats - min: -5.7374, max: 2.4818
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1526, max: 2.3074
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8432/18200 [04:37<03:46, 43.18it/s, loss=1.6530]


Logits stats - min: -4.8237, max: 1.2393
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████▏            | 8452/18200 [04:37<04:06, 39.57it/s, loss=1.6193]


Logits stats - min: -5.3259, max: 1.6427
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1585, max: 1.6465
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0558, max: 1.5134
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████▏            | 8462/18200 [04:38<03:54, 41.59it/s, loss=1.4967]


Logits stats - min: -4.9074, max: 1.5769
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8472/18200 [04:38<03:41, 43.98it/s, loss=2.1549]


Logits stats - min: -5.4914, max: 1.6181
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5709, max: 1.6153
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5305, max: 1.7704
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3577, max: 2.5012
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4889, max: 1.4928
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8483/18200 [04:38<03:52, 41.87it/s, loss=1.6087]


Logits stats - min: -6.0788, max: 2.5094
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8493/18200 [04:38<03:47, 42.59it/s, loss=1.5863]


Logits stats - min: -5.2263, max: 2.3315
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2116, max: 2.2441
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8504/18200 [04:39<03:42, 43.51it/s, loss=1.6168]


Logits stats - min: -5.4992, max: 2.3305
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3993, max: 2.2764
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4825, max: 1.5967
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2337, max: 2.4122
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8521/18200 [04:39<03:20, 48.18it/s, loss=1.5627]


Logits stats - min: -5.7985, max: 2.3741
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2039, max: 2.3882
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9402, max: 2.5190
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4281, max: 2.4527
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1475, max: 2.0931
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8531/18200 [04:39<03:36, 44.76it/s, loss=1.8958]


Logits stats - min: -4.9759, max: 1.7083
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2288, max: 2.5604
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8541/18200 [04:39<03:54, 41.28it/s, loss=1.5078]


Logits stats - min: -5.2382, max: 1.7953
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6136, max: 2.0279
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8556/18200 [04:40<03:44, 42.98it/s, loss=1.6636]


Logits stats - min: -5.6366, max: 2.4276
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8629, max: 1.6601
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5830, max: 1.1320
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5775, max: 1.6942
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8577/18200 [04:40<03:47, 42.31it/s, loss=2.2147]


Logits stats - min: -5.2285, max: 2.2027
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6362, max: 1.5981
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8587/18200 [04:40<03:52, 41.44it/s, loss=1.6630]


Logits stats - min: -5.5433, max: 1.8346
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9294, max: 2.7576
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9784, max: 1.6222
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8597/18200 [04:41<03:57, 40.39it/s, loss=1.4819]


Logits stats - min: -6.1331, max: 2.4140
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8612/18200 [04:41<03:54, 40.89it/s, loss=1.4929]


Logits stats - min: -5.1503, max: 1.8196
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5790, max: 2.4228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9966, max: 1.7533
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▍            | 8627/18200 [04:41<03:45, 42.48it/s, loss=1.4876]


Logits stats - min: -5.2795, max: 1.8428
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4881, max: 1.6314
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▍            | 8643/18200 [04:42<03:32, 44.88it/s, loss=2.1545]


Logits stats - min: -4.6148, max: 1.5119
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5970, max: 2.1314
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6545, max: 1.7131
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1169, max: 2.2589
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1673, max: 2.2850
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8658/18200 [04:42<03:44, 42.47it/s, loss=1.5408]


Logits stats - min: -5.3079, max: 2.1659
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5526, max: 2.2511
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3867, max: 2.2386
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3838, max: 2.2646
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8669/18200 [04:42<03:35, 44.15it/s, loss=1.6400]


Logits stats - min: -5.3755, max: 2.1436
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8835, max: 2.2020
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3854, max: 2.2589
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5960, max: 2.2051
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7914, max: 1.4615
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8680/18200 [04:43<03:27, 45.84it/s, loss=1.5655]


Logits stats - min: -5.8721, max: 1.7920
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6546, max: 1.7721
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8690/18200 [04:43<03:47, 41.78it/s, loss=1.6643]


Logits stats - min: -4.6467, max: 2.0155
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7538, max: 2.2676
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8700/18200 [04:43<03:42, 42.73it/s, loss=1.5827]


Logits stats - min: -5.7712, max: 2.2675
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2542, max: 2.2233
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8715/18200 [04:44<03:36, 43.76it/s, loss=1.9848]


Logits stats - min: -5.0531, max: 1.7377
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0430, max: 1.8951
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0177, max: 1.6154
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4264, max: 1.8407
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8725/18200 [04:44<03:37, 43.64it/s, loss=1.7591]


Logits stats - min: -5.5913, max: 2.3450
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8663, max: 2.3282
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9661, max: 2.2551
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7111, max: 1.8354
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8736/18200 [04:44<03:27, 45.56it/s, loss=2.0480]


Logits stats - min: -5.1509, max: 1.5867
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9602, max: 2.1943
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8751/18200 [04:44<03:42, 42.53it/s, loss=1.6589]


Logits stats - min: -5.6873, max: 1.9941
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4525, max: 1.4506
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5693, max: 1.6010
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8762/18200 [04:45<03:21, 46.77it/s, loss=1.6712]


Logits stats - min: -5.1272, max: 1.2483
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3827, max: 1.7717
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2917, max: 1.6464
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6149, max: 2.1353
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6628, max: 1.4314
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8778/18200 [04:45<03:30, 44.77it/s, loss=1.6258]


Logits stats - min: -6.0081, max: 2.4429
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4799, max: 1.6471
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8788/18200 [04:45<03:50, 40.80it/s, loss=1.6177]


Logits stats - min: -5.9992, max: 2.2219
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2522, max: 2.2124
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9748, max: 2.2784
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8806, max: 1.8331
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8809/18200 [04:46<03:45, 41.73it/s, loss=1.6227]


Logits stats - min: -5.6861, max: 2.1342
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8829/18200 [04:46<03:48, 41.04it/s, loss=1.6538]


Logits stats - min: -6.0988, max: 2.3870
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4250, max: 1.2523
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1231, max: 1.6618
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0258, max: 2.4713
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4890, max: 2.1188
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7980, max: 1.8718
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8844/18200 [04:46<03:47, 41.20it/s, loss=1.5643]


Logits stats - min: -6.0501, max: 2.5317
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8043, max: 2.1350
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4891, max: 1.8094
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3720, max: 1.7634
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8854/18200 [04:47<03:49, 40.79it/s, loss=1.4863]


Logits stats - min: -6.0171, max: 2.7160
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8869/18200 [04:47<03:32, 43.94it/s, loss=1.8207]


Logits stats - min: -5.5202, max: 2.2357
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1004, max: 2.1971
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1145, max: 1.6402
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6136, max: 1.8549
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8884/18200 [04:48<03:52, 39.99it/s, loss=1.6565]


Logits stats - min: -6.4344, max: 2.4741
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8898/18200 [04:48<03:51, 40.26it/s, loss=1.4764]


Logits stats - min: -5.4880, max: 2.3170
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5250, max: 1.5909
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2413, max: 1.6887
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8908/18200 [04:48<03:50, 40.31it/s, loss=2.1126]


Logits stats - min: -5.8676, max: 1.8729
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8918/18200 [04:48<03:43, 41.50it/s, loss=1.4883]


Logits stats - min: -5.0271, max: 1.9569
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2552, max: 2.1649
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4529, max: 1.6186
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8933/18200 [04:49<03:31, 43.73it/s, loss=1.6057]


Logits stats - min: -5.7037, max: 1.9089
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0463, max: 1.5990
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4011, max: 1.6353
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8948/18200 [04:49<03:45, 41.09it/s, loss=1.6319]


Logits stats - min: -5.4276, max: 2.1087
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6647, max: 1.5443
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1957, max: 1.8378
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1418, max: 2.2975
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8958/18200 [04:49<03:33, 43.37it/s, loss=1.6022]


Logits stats - min: -5.7195, max: 1.5258
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6331, max: 1.6320
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8968/18200 [04:49<03:32, 43.50it/s, loss=1.6042]


Logits stats - min: -4.6172, max: 1.8465
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0245, max: 2.0956
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3652, max: 1.6583
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9253, max: 2.5373
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8979/18200 [04:50<03:24, 45.07it/s, loss=1.5679]


Logits stats - min: -5.0102, max: 1.7134
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8989/18200 [04:50<03:40, 41.78it/s, loss=1.6877]


Logits stats - min: -5.8261, max: 2.3894
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 9004/18200 [04:50<03:53, 39.41it/s, loss=1.6135]


Logits stats - min: -5.2228, max: 1.6179
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9013/18200 [04:51<03:41, 41.50it/s, loss=2.1590]


Logits stats - min: -6.0586, max: 2.3495
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7767, max: 2.3550
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9023/18200 [04:51<03:36, 42.43it/s, loss=1.4219]


Logits stats - min: -4.8316, max: 1.3490
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5814, max: 1.5406
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1977, max: 2.0438
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9034/18200 [04:51<03:30, 43.49it/s, loss=1.6607]


Logits stats - min: -4.8746, max: 1.6537
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1234, max: 1.6893
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9039/18200 [04:51<03:35, 42.47it/s, loss=1.5918]


Logits stats - min: -5.6892, max: 2.3119
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9049/18200 [04:51<03:48, 40.05it/s, loss=2.1522]


Logits stats - min: -4.3654, max: 1.4621
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3543, max: 2.2834
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9064/18200 [04:52<03:42, 41.04it/s, loss=1.5722]


Logits stats - min: -5.1426, max: 1.6351
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9079/18200 [04:52<03:34, 42.60it/s, loss=1.5319]


Logits stats - min: -5.5859, max: 1.5874
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0954, max: 1.6515
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5256, max: 1.6799
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9089/18200 [04:52<03:30, 43.22it/s, loss=1.6066]


Logits stats - min: -5.1420, max: 1.4313
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0701, max: 2.3268
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9094/18200 [04:52<03:30, 43.30it/s, loss=2.1769]


Logits stats - min: -5.6298, max: 1.2204
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9783, max: 2.2777
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9104/18200 [04:53<03:32, 42.73it/s, loss=1.6595]


Logits stats - min: -6.4901, max: 2.5467
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4517, max: 2.1086
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8896, max: 2.5061
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2605, max: 1.9181
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9115/18200 [04:53<03:23, 44.69it/s, loss=1.6727]


Logits stats - min: -6.0870, max: 2.1245
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5692, max: 1.6115
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1830, max: 1.5734
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9125/18200 [04:53<03:22, 44.79it/s, loss=1.5809]


Logits stats - min: -5.1609, max: 1.9251
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2827, max: 2.3563
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9131/18200 [04:53<03:10, 47.64it/s, loss=1.6381]


Logits stats - min: -5.3665, max: 1.8705
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7321, max: 1.9507
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9018, max: 1.5650
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1832, max: 1.6462
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0865, max: 2.4783
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9141/18200 [04:54<03:33, 42.43it/s, loss=1.6935]


Logits stats - min: -5.3791, max: 1.6583
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5997, max: 1.6336
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9151/18200 [04:54<03:40, 41.09it/s, loss=1.5022]


Logits stats - min: -5.8109, max: 2.6392
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2388, max: 1.5186
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9162/18200 [04:54<03:27, 43.60it/s, loss=1.6734]


Logits stats - min: -6.0955, max: 2.1450
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6408, max: 1.5669
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2961, max: 2.5060
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1364, max: 2.3292
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9178/18200 [04:54<03:18, 45.45it/s, loss=1.6465]


Logits stats - min: -5.4659, max: 1.6473
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2769, max: 1.8531
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0964, max: 2.3164
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2366, max: 2.2722
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3241, max: 1.7989
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9184/18200 [04:55<03:10, 47.29it/s, loss=1.6593]


Logits stats - min: -5.4738, max: 2.5809
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9293, max: 2.1915
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4064, max: 2.4354
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9199/18200 [04:55<03:13, 46.48it/s, loss=1.6478]


Logits stats - min: -5.0563, max: 1.6477
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4140, max: 1.7837
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7905, max: 1.6918
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5833, max: 2.8178
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9210/18200 [04:55<03:18, 45.29it/s, loss=1.5060]


Logits stats - min: -5.4490, max: 1.9069
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9540, max: 1.6426
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9788, max: 2.3828
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9220/18200 [04:55<03:22, 44.34it/s, loss=1.6040]


Logits stats - min: -4.6916, max: 2.1053
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5714, max: 2.6215
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9230/18200 [04:56<03:20, 44.67it/s, loss=1.6044]


Logits stats - min: -5.4452, max: 1.6019
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9717, max: 2.4030
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1157, max: 2.1791
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1739, max: 1.6936
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9241/18200 [04:56<03:15, 45.93it/s, loss=2.1327]


Logits stats - min: -6.4671, max: 1.3954
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4127, max: 1.7733
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9256/18200 [04:56<03:21, 44.39it/s, loss=1.6479]


Logits stats - min: -6.4730, max: 1.8818
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7143, max: 1.9220
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6173, max: 2.1347
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8150, max: 2.0983
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7051, max: 2.3494
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0672, max: 2.2909
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9266/18200 [04:56<03:31, 42.21it/s, loss=1.6582]


Logits stats - min: -5.5775, max: 2.2844
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3395, max: 1.8092
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1039, max: 1.7438
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9271/18200 [04:56<03:30, 42.51it/s, loss=1.6279]


Logits stats - min: -5.8385, max: 1.7693
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6944, max: 1.5132
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9983, max: 2.3889
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9281/18200 [04:57<03:24, 43.62it/s, loss=1.4931]


Logits stats - min: -5.1077, max: 1.8645
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2532, max: 2.5151
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6920, max: 2.1324
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4194, max: 2.1729
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6493, max: 2.2345
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9292/18200 [04:57<03:23, 43.78it/s, loss=1.6464]


Logits stats - min: -5.8042, max: 1.7480
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4901, max: 1.8052
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5802, max: 1.6469
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9302/18200 [04:57<03:26, 43.19it/s, loss=1.6627]


Logits stats - min: -5.2083, max: 1.6584
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1790, max: 1.8979
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9316/18200 [04:58<03:46, 39.26it/s, loss=1.6581]


Logits stats - min: -5.2131, max: 2.2609
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4665, max: 2.3293
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4372, max: 1.7294
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7056, max: 1.7852
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9326/18200 [04:58<03:32, 41.82it/s, loss=2.0026]


Logits stats - min: -6.1172, max: 2.4703
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6789, max: 2.2228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0789, max: 2.7073
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9337/18200 [04:58<03:24, 43.40it/s, loss=2.1545]


Logits stats - min: -5.9536, max: 2.3295
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5060, max: 2.2162
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3259, max: 1.8636
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9347/18200 [04:58<03:26, 42.83it/s, loss=1.9944]


Logits stats - min: -4.9690, max: 1.9456
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3101, max: 2.1217
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9362/18200 [04:59<03:26, 42.84it/s, loss=1.6611]


Logits stats - min: -5.6085, max: 2.5200
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7161, max: 1.8095
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7866, max: 2.2272
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9372/18200 [04:59<03:31, 41.80it/s, loss=1.6107]


Logits stats - min: -6.3164, max: 2.6081
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5919, max: 1.7580
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8302, max: 2.4490
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9557, max: 1.6932
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9387/18200 [04:59<03:30, 41.81it/s, loss=1.6640]


Logits stats - min: -6.8014, max: 1.9461
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0788, max: 2.8177
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9408/18200 [05:00<03:28, 42.07it/s, loss=1.6007]


Logits stats - min: -5.7000, max: 2.3519
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5055, max: 2.0127
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9418/18200 [05:00<03:34, 41.04it/s, loss=1.6010]


Logits stats - min: -5.6037, max: 1.9929
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6872, max: 2.2618
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4660, max: 2.0945
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3551, max: 2.0127
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9433/18200 [05:00<03:36, 40.58it/s, loss=1.5135]


Logits stats - min: -4.5589, max: 1.6891
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9443/18200 [05:01<03:37, 40.33it/s, loss=1.8539]


Logits stats - min: -6.9903, max: 1.7373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4100, max: 1.4606
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9454/18200 [05:01<03:18, 44.15it/s, loss=2.0947]


Logits stats - min: -5.0264, max: 1.6527
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5674, max: 2.2202
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5231, max: 1.9281
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9469/18200 [05:01<03:29, 41.74it/s, loss=2.1311]


Logits stats - min: -5.7576, max: 2.2599
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▌           | 9484/18200 [05:02<03:27, 42.08it/s, loss=2.1345]


Logits stats - min: -5.4330, max: 1.6676
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9271, max: 2.3954
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0327, max: 2.1138
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8241, max: 1.6858
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2784, max: 1.9505
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▌           | 9494/18200 [05:02<03:18, 43.82it/s, loss=2.1930]


Logits stats - min: -4.7254, max: 1.6452
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7927, max: 1.8331
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5589, max: 1.7423
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▌           | 9504/18200 [05:02<03:21, 43.18it/s, loss=1.6566]


Logits stats - min: -5.0196, max: 1.9532
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7281, max: 2.4152
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4345, max: 1.7472
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3744, max: 2.3763
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▌           | 9520/18200 [05:02<03:13, 44.85it/s, loss=1.6661]


Logits stats - min: -4.4147, max: 1.2108
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8111, max: 1.6919
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3226, max: 2.2883
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▌           | 9530/18200 [05:03<03:15, 44.28it/s, loss=2.1244]


Logits stats - min: -5.3051, max: 2.0349
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7187, max: 1.7105
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0765, max: 1.7186
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▌           | 9545/18200 [05:03<03:17, 43.91it/s, loss=1.5465]


Logits stats - min: -4.9871, max: 1.6810
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8647, max: 1.2109
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0516, max: 1.7395
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4529, max: 1.6959
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▌           | 9556/18200 [05:03<03:13, 44.63it/s, loss=1.5117]


Logits stats - min: -6.1859, max: 2.4112
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3137, max: 2.4094
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3796, max: 1.7643
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▌           | 9566/18200 [05:03<03:10, 45.37it/s, loss=1.5061]


Logits stats - min: -5.9744, max: 2.2779
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4663, max: 1.8129
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6735, max: 1.9085
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9581/18200 [05:04<03:14, 44.20it/s, loss=1.9722]


Logits stats - min: -5.1842, max: 2.3371
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9834, max: 1.5779
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2338, max: 1.6164
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9230, max: 1.7399
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9592/18200 [05:04<03:15, 43.97it/s, loss=1.9051]


Logits stats - min: -5.8389, max: 2.3699
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7630, max: 1.7694
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2186, max: 2.3959
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3507, max: 1.7644
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9597/18200 [05:04<03:12, 44.77it/s, loss=1.6152]


Logits stats - min: -4.7105, max: 1.6677
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9849, max: 2.0945
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9471, max: 2.3159
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9607/18200 [05:04<03:19, 42.97it/s, loss=1.6550]


Logits stats - min: -5.5673, max: 2.4196
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3938, max: 2.3090
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1463, max: 1.7201
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0530, max: 2.5618
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9623/18200 [05:05<03:18, 43.23it/s, loss=1.6221]


Logits stats - min: -4.9814, max: 1.8501
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9638/18200 [05:05<03:19, 42.85it/s, loss=1.3015]


Logits stats - min: -5.1615, max: 2.2441
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3737, max: 2.4622
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3394, max: 2.1214
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9649/18200 [05:05<03:20, 42.59it/s, loss=1.4726]


Logits stats - min: -5.6534, max: 1.6605
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5235, max: 1.7758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1777, max: 1.6775
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3759, max: 1.6643
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9659/18200 [05:06<03:07, 45.66it/s, loss=1.4900]


Logits stats - min: -5.9074, max: 2.2341
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2232, max: 2.0606
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7480, max: 2.2694
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9665/18200 [05:06<02:59, 47.68it/s, loss=1.6525]


Logits stats - min: -5.1027, max: 1.8651
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7036, max: 2.2140
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3193, max: 1.5707
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6726, max: 2.2365
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5928, max: 1.8583
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▊           | 9680/18200 [05:06<03:09, 44.95it/s, loss=1.6049]


Logits stats - min: -5.8003, max: 2.3386
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9931, max: 2.0379
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5459, max: 2.0105
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1757, max: 2.2822
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1155, max: 1.2972
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3059, max: 1.5041
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▊           | 9696/18200 [05:06<03:15, 43.59it/s, loss=1.6397]


Logits stats - min: -5.0796, max: 1.8032
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7073, max: 2.3249
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▊           | 9706/18200 [05:07<03:18, 42.86it/s, loss=1.6610]


Logits stats - min: -5.5971, max: 2.3255
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4234, max: 2.3324
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5991, max: 1.6758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2312, max: 2.5570
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▊           | 9726/18200 [05:07<03:17, 42.90it/s, loss=1.4649]


Logits stats - min: -5.5059, max: 1.1976
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9276, max: 2.1567
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▊           | 9737/18200 [05:07<03:12, 43.95it/s, loss=1.5396]


Logits stats - min: -5.9293, max: 2.3989
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3766, max: 2.3181
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5909, max: 2.5510
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▊           | 9747/18200 [05:07<03:12, 44.00it/s, loss=1.6121]


Logits stats - min: -5.5661, max: 1.7760
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1486, max: 2.3746
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▊           | 9762/18200 [05:08<03:21, 41.90it/s, loss=2.1190]


Logits stats - min: -6.6199, max: 2.3550
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3847, max: 1.6552
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5272, max: 1.8596
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9772/18200 [05:08<03:12, 43.89it/s, loss=1.4595]


Logits stats - min: -5.3382, max: 2.4582
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2356, max: 2.4116
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1640, max: 1.6599
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9782/18200 [05:08<03:16, 42.80it/s, loss=1.5984]


Logits stats - min: -5.6565, max: 1.6532
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6584, max: 1.8622
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4992, max: 1.3156
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9798/18200 [05:09<03:07, 44.84it/s, loss=1.6072]


Logits stats - min: -5.8601, max: 2.5770
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6159, max: 2.1561
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4026, max: 2.3289
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0488, max: 2.5146
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9808/18200 [05:09<03:15, 42.83it/s, loss=1.4729]


Logits stats - min: -5.5755, max: 2.4114
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9438, max: 1.3749
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2171, max: 1.6561
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9818/18200 [05:09<03:16, 42.76it/s, loss=1.5574]


Logits stats - min: -5.4326, max: 1.6065
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6958, max: 2.4300
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2466, max: 1.6045
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9833/18200 [05:10<03:29, 39.84it/s, loss=1.6258]


Logits stats - min: -5.7347, max: 2.3265
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9843/18200 [05:10<03:20, 41.78it/s, loss=1.6168]


Logits stats - min: -5.3966, max: 1.6399
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1397, max: 2.3749
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0724, max: 2.1085
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9853/18200 [05:10<03:18, 42.00it/s, loss=1.4542]


Logits stats - min: -5.3081, max: 1.5375
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6814, max: 1.8758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1844, max: 2.4689
Target unique values: tensor([0], device='cuda:0')


Training:  54%|█████████████           | 9863/18200 [05:10<03:28, 39.96it/s, loss=1.9490]


Logits stats - min: -4.9084, max: 1.5500
Target unique values: tensor([0], device='cuda:0')


Training:  54%|█████████████           | 9873/18200 [05:11<03:25, 40.44it/s, loss=1.9548]


Logits stats - min: -5.3991, max: 1.7047
Target unique values: tensor([0], device='cuda:0')


Training:  54%|█████████████           | 9892/18200 [05:11<03:20, 41.49it/s, loss=1.6461]


Logits stats - min: -5.4634, max: 1.5514
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4604, max: 1.5811
Target unique values: tensor([0], device='cuda:0')


Training:  54%|█████████████           | 9902/18200 [05:11<03:15, 42.39it/s, loss=1.6389]


Logits stats - min: -5.9666, max: 1.8949
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3996, max: 1.8462
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7807, max: 2.1867
Target unique values: tensor([0], device='cuda:0')


Training:  54%|█████████████           | 9912/18200 [05:11<03:32, 38.96it/s, loss=1.4994]


Logits stats - min: -4.8031, max: 1.5409
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7885, max: 2.5034
Target unique values: tensor([0], device='cuda:0')


Training:  54%|█████████████           | 9917/18200 [05:12<03:26, 40.14it/s, loss=1.5963]


Logits stats - min: -5.7746, max: 2.2973
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████           | 9927/18200 [05:12<03:30, 39.36it/s, loss=1.5349]


Logits stats - min: -4.9631, max: 1.4987
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████           | 9937/18200 [05:12<03:21, 41.02it/s, loss=1.5926]


Logits stats - min: -5.7076, max: 1.6358
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8943, max: 1.5670
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████▏          | 9962/18200 [05:13<03:25, 40.17it/s, loss=2.6957]


Logits stats - min: -5.6943, max: 2.1950
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1002, max: 1.6481
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7620, max: 2.6454
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3219, max: 2.1524
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████▏          | 9972/18200 [05:13<03:17, 41.67it/s, loss=1.5451]


Logits stats - min: -5.3948, max: 1.8536
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0183, max: 1.6323
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4759, max: 1.7376
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████▏          | 9983/18200 [05:13<03:12, 42.64it/s, loss=1.6434]


Logits stats - min: -5.2106, max: 2.1820
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8796, max: 1.7251
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████▏          | 9998/18200 [05:14<03:12, 42.64it/s, loss=1.8916]


Logits stats - min: -5.0358, max: 1.9965
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5476, max: 2.5509
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1791, max: 2.0773
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6072, max: 1.8928
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10008/18200 [05:14<03:12, 42.64it/s, loss=2.0767]


Logits stats - min: -5.2901, max: 1.7022
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7281, max: 2.3901
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4400, max: 2.3326
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10018/18200 [05:14<03:13, 42.32it/s, loss=1.5438]


Logits stats - min: -5.5731, max: 2.2841
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10038/18200 [05:15<03:09, 43.06it/s, loss=1.4899]


Logits stats - min: -5.3680, max: 1.6662
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1715, max: 1.7355
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5868, max: 2.3343
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10048/18200 [05:15<03:11, 42.49it/s, loss=1.3979]


Logits stats - min: -5.1522, max: 1.9781
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10058/18200 [05:15<03:12, 42.36it/s, loss=2.0240]


Logits stats - min: -5.2431, max: 1.4416
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8370, max: 2.2083
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10068/18200 [05:15<03:17, 41.09it/s, loss=1.6292]


Logits stats - min: -5.8769, max: 1.7310
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10073/18200 [05:15<03:21, 40.43it/s, loss=1.6459]


Logits stats - min: -5.7044, max: 1.5453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2367, max: 2.1159
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8876, max: 2.3068
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10089/18200 [05:16<03:03, 44.12it/s, loss=1.8268]


Logits stats - min: -5.5447, max: 1.9957
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7131, max: 1.8001
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2660, max: 1.6756
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6050, max: 1.5950
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▊          | 10104/18200 [05:16<03:14, 41.62it/s, loss=1.6473]


Logits stats - min: -5.8156, max: 2.2308
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3918, max: 2.2825
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9564, max: 2.6869
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▊          | 10119/18200 [05:16<03:14, 41.59it/s, loss=2.2108]


Logits stats - min: -5.4252, max: 2.2916
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▊          | 10129/18200 [05:17<03:24, 39.46it/s, loss=1.6215]


Logits stats - min: -4.8530, max: 2.2457
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0699, max: 1.9888
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4765, max: 1.7867
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▊          | 10144/18200 [05:17<03:01, 44.43it/s, loss=1.6424]


Logits stats - min: -5.6123, max: 2.4395
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6388, max: 1.8679
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7278, max: 2.1188
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2977, max: 1.5920
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▊          | 10160/18200 [05:17<03:00, 44.42it/s, loss=2.1510]


Logits stats - min: -6.4376, max: 2.5324
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9855, max: 2.2112
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0640, max: 1.6537
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▊          | 10175/18200 [05:18<03:02, 43.89it/s, loss=2.1353]


Logits stats - min: -5.7831, max: 2.2811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2825, max: 1.3299
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6441, max: 2.4351
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▊          | 10185/18200 [05:18<03:05, 43.27it/s, loss=2.6766]


Logits stats - min: -5.5273, max: 2.4314
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2796, max: 1.7477
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7812, max: 2.5160
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10196/18200 [05:18<03:06, 42.84it/s, loss=2.1211]


Logits stats - min: -5.8177, max: 2.3442
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10211/18200 [05:19<03:05, 42.97it/s, loss=1.6500]


Logits stats - min: -5.0273, max: 1.6846
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1998, max: 2.3210
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10226/18200 [05:19<03:10, 41.84it/s, loss=1.9324]


Logits stats - min: -6.0092, max: 2.6051
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7729, max: 2.5084
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3523, max: 2.2428
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1876, max: 2.2828
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10237/18200 [05:19<03:13, 41.17it/s, loss=1.7101]


Logits stats - min: -5.5869, max: 2.3071
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5178, max: 1.8567
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2403, max: 2.3473
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5779, max: 1.8708
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10248/18200 [05:19<02:57, 44.74it/s, loss=1.6012]


Logits stats - min: -6.5888, max: 2.2084
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1137, max: 2.1696
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1029, max: 1.6069
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7116, max: 1.5806
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10264/18200 [05:20<02:52, 45.97it/s, loss=1.5510]


Logits stats - min: -6.3704, max: 1.7732
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5448, max: 1.7230
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3058, max: 1.7138
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1521, max: 1.8822
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2273, max: 2.0955
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10279/18200 [05:20<02:58, 44.34it/s, loss=1.8819]


Logits stats - min: -5.3400, max: 1.8150
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9317, max: 1.8578
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10289/18200 [05:20<03:10, 41.59it/s, loss=1.6181]


Logits stats - min: -5.8392, max: 2.1769
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10299/18200 [05:21<03:08, 41.89it/s, loss=1.6745]


Logits stats - min: -5.7851, max: 1.7066
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0459, max: 1.8083
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10315/18200 [05:21<02:57, 44.40it/s, loss=1.8254]


Logits stats - min: -6.2799, max: 2.3433
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3184, max: 1.2586
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9336, max: 2.3871
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5521, max: 2.3123
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10325/18200 [05:21<02:58, 44.11it/s, loss=1.4995]


Logits stats - min: -5.9766, max: 2.6019
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1767, max: 1.7500
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6210, max: 2.5371
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10335/18200 [05:21<03:01, 43.34it/s, loss=1.5624]


Logits stats - min: -4.9264, max: 1.9053
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6856, max: 2.2176
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9027, max: 1.5690
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6346, max: 1.5813
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10345/18200 [05:22<03:05, 42.35it/s, loss=1.5060]


Logits stats - min: -5.5649, max: 2.0490
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8486, max: 2.3322
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10371/18200 [05:22<03:04, 42.46it/s, loss=1.6476]


Logits stats - min: -5.0768, max: 1.8171
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7687, max: 1.9468
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1642, max: 1.6913
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████▏         | 10386/18200 [05:23<03:14, 40.27it/s, loss=1.6668]


Logits stats - min: -6.8665, max: 2.4717
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████▏         | 10391/18200 [05:23<03:07, 41.66it/s, loss=1.5747]


Logits stats - min: -5.8502, max: 2.4299
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0654, max: 2.2055
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8384, max: 2.2344
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████▏         | 10407/18200 [05:23<02:54, 44.60it/s, loss=1.5626]


Logits stats - min: -6.0461, max: 2.3275
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2748, max: 1.9222
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3317, max: 1.4200
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5587, max: 2.4191
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6456, max: 2.3893
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2099, max: 2.3554
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████▏         | 10439/18200 [05:24<03:05, 41.87it/s, loss=1.6491]


Logits stats - min: -5.7585, max: 1.7945
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3821, max: 1.5992
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6171, max: 1.8342
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████▏         | 10450/18200 [05:24<02:57, 43.55it/s, loss=1.6478]


Logits stats - min: -4.6530, max: 1.5800
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9937, max: 2.3783
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6906, max: 2.9453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6714, max: 2.6022
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████▏         | 10460/18200 [05:24<02:52, 44.80it/s, loss=1.6411]


Logits stats - min: -5.1786, max: 2.4392
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3155, max: 2.8267
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1867, max: 2.5434
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3915, max: 1.4846
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5657, max: 1.7903
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▏         | 10475/18200 [05:25<03:02, 42.34it/s, loss=1.7660]


Logits stats - min: -5.5929, max: 1.7508
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7683, max: 2.2609
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6135, max: 1.7488
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5589, max: 1.6917
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10486/18200 [05:25<02:54, 44.22it/s, loss=1.8002]


Logits stats - min: -5.3232, max: 1.5337
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2282, max: 1.8013
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8291, max: 2.3969
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10497/18200 [05:25<02:45, 46.44it/s, loss=1.6083]


Logits stats - min: -5.3784, max: 1.8859
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8995, max: 2.7940
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5051, max: 1.4510
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6012, max: 2.2890
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10512/18200 [05:26<02:50, 44.97it/s, loss=2.1712]


Logits stats - min: -5.4520, max: 1.7960
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9365, max: 2.2132
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9960, max: 2.3533
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6188, max: 2.0989
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5360, max: 2.2978
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5260, max: 2.5754
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10522/18200 [05:26<02:58, 42.90it/s, loss=1.5841]


Logits stats - min: -6.2088, max: 2.3671
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10532/18200 [05:26<03:01, 42.27it/s, loss=1.6386]


Logits stats - min: -6.2792, max: 2.4189
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10548/18200 [05:26<02:52, 44.26it/s, loss=2.6949]


Logits stats - min: -4.9937, max: 1.7601
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2751, max: 1.6272
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3649, max: 2.4212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0558, max: 2.5866
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3924, max: 1.8191
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10563/18200 [05:27<03:02, 41.94it/s, loss=1.5281]


Logits stats - min: -5.6403, max: 1.7554
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4209, max: 2.5401
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0329, max: 2.3054
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10579/18200 [05:27<02:54, 43.66it/s, loss=1.6497]


Logits stats - min: -5.5730, max: 1.8226
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6664, max: 1.7916
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2366, max: 2.4653
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▍         | 10589/18200 [05:27<02:57, 42.94it/s, loss=1.7361]


Logits stats - min: -5.7493, max: 1.7179
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4143, max: 2.4035
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▍         | 10610/18200 [05:28<02:45, 45.83it/s, loss=1.6453]


Logits stats - min: -5.1719, max: 1.6307
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8611, max: 2.3226
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6413, max: 2.5442
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6147, max: 1.5533
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6239, max: 2.4847
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2306, max: 2.0082
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▍         | 10615/18200 [05:28<02:53, 43.76it/s, loss=1.8475]


Logits stats - min: -5.2568, max: 1.6981
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7129, max: 1.7763
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▍         | 10630/18200 [05:28<02:59, 42.28it/s, loss=1.6239]


Logits stats - min: -5.7354, max: 2.1607
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▍         | 10641/18200 [05:29<02:45, 45.69it/s, loss=1.6051]


Logits stats - min: -5.5129, max: 1.9203
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2941, max: 2.2442
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4486, max: 1.6673
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2059, max: 1.7318
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1484, max: 1.7619
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6308, max: 1.6060
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▍         | 10656/18200 [05:29<02:55, 42.92it/s, loss=1.5739]


Logits stats - min: -6.1875, max: 2.2131
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5105, max: 1.7948
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5222, max: 1.3758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9187, max: 1.5554
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▍         | 10662/18200 [05:29<02:49, 44.51it/s, loss=1.6063]


Logits stats - min: -5.4272, max: 1.5313
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6748, max: 2.3692
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▍         | 10672/18200 [05:29<02:50, 44.26it/s, loss=1.6321]


Logits stats - min: -6.4009, max: 2.3614
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5241, max: 2.5519
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4206, max: 2.1570
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▍         | 10682/18200 [05:30<02:53, 43.39it/s, loss=1.6327]


Logits stats - min: -5.4009, max: 1.6345
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5883, max: 1.5581
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0702, max: 2.2164
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10697/18200 [05:30<02:50, 43.94it/s, loss=1.4722]


Logits stats - min: -4.8372, max: 1.4953
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5325, max: 1.4674
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10707/18200 [05:30<02:58, 41.86it/s, loss=1.6110]


Logits stats - min: -6.3821, max: 2.3216
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6080, max: 2.3336
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10718/18200 [05:30<02:54, 42.87it/s, loss=1.6450]


Logits stats - min: -5.5444, max: 1.6797
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2368, max: 1.5805
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8950, max: 2.2876
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10728/18200 [05:31<02:53, 43.00it/s, loss=1.6087]


Logits stats - min: -6.2275, max: 2.4044
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5683, max: 2.4345
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2139, max: 1.6707
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10743/18200 [05:31<02:57, 42.02it/s, loss=1.6168]


Logits stats - min: -5.4501, max: 1.7574
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3358, max: 1.5165
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10753/18200 [05:31<02:57, 41.85it/s, loss=1.6337]


Logits stats - min: -5.9397, max: 2.4777
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1239, max: 1.9745
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10763/18200 [05:31<02:56, 42.19it/s, loss=1.7205]


Logits stats - min: -5.2763, max: 1.7880
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6759, max: 1.4682
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8596, max: 2.0997
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4054, max: 1.5550
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10778/18200 [05:32<02:51, 43.33it/s, loss=1.6344]


Logits stats - min: -5.0194, max: 1.7543
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4031, max: 2.4842
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9492, max: 1.4198
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6044, max: 1.6996
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▋         | 10793/18200 [05:32<02:55, 42.13it/s, loss=1.6406]


Logits stats - min: -5.0083, max: 2.1628
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1548, max: 1.8866
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2138, max: 1.7244
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8339, max: 2.2718
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1114, max: 1.5814
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4233, max: 2.3505
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▋         | 10808/18200 [05:32<02:51, 42.98it/s, loss=1.6041]


Logits stats - min: -5.2635, max: 1.4641
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4869, max: 1.4244
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2494, max: 2.4618
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▋         | 10813/18200 [05:33<02:52, 42.91it/s, loss=1.4429]


Logits stats - min: -5.6332, max: 1.7261
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0771, max: 2.4347
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▋         | 10823/18200 [05:33<02:52, 42.65it/s, loss=1.7825]


Logits stats - min: -5.5764, max: 2.1757
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1179, max: 1.3258
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3705, max: 1.7430
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▋         | 10833/18200 [05:33<03:01, 40.68it/s, loss=1.6190]


Logits stats - min: -5.9928, max: 1.6889
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8771, max: 1.7648
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▋         | 10843/18200 [05:33<02:58, 41.25it/s, loss=1.5966]


Logits stats - min: -5.5737, max: 1.5640
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▋         | 10853/18200 [05:34<03:05, 39.52it/s, loss=1.5504]


Logits stats - min: -5.6704, max: 1.7256
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▋         | 10863/18200 [05:34<03:05, 39.66it/s, loss=1.6116]


Logits stats - min: -5.4027, max: 2.1584
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6791, max: 2.1520
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▋         | 10878/18200 [05:34<02:49, 43.17it/s, loss=1.6048]


Logits stats - min: -5.3546, max: 2.2809
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9142, max: 2.2298
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9446, max: 1.6338
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10888/18200 [05:34<02:55, 41.77it/s, loss=1.4903]


Logits stats - min: -5.9029, max: 2.1637
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5103, max: 1.6350
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8002, max: 1.4387
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1416, max: 2.3684
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10903/18200 [05:35<02:57, 41.05it/s, loss=1.4574]


Logits stats - min: -5.2053, max: 1.7464
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7314, max: 1.5361
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3715, max: 2.2575
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10918/18200 [05:35<02:59, 40.62it/s, loss=1.4349]


Logits stats - min: -5.5169, max: 1.7531
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10929/18200 [05:35<02:35, 46.81it/s, loss=1.5979]


Logits stats - min: -5.3728, max: 1.5581
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6261, max: 2.1333
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2765, max: 1.6651
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2204, max: 2.4552
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4144, max: 1.7970
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8947, max: 1.6831
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10944/18200 [05:36<02:53, 41.91it/s, loss=1.6472]


Logits stats - min: -7.6014, max: 1.9057
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10954/18200 [05:36<02:56, 41.03it/s, loss=1.4368]


Logits stats - min: -4.7818, max: 1.4237
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1826, max: 2.2677
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10960/18200 [05:36<02:47, 43.28it/s, loss=1.4698]


Logits stats - min: -6.0606, max: 2.2619
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6993, max: 1.4509
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10970/18200 [05:36<02:57, 40.70it/s, loss=1.6350]


Logits stats - min: -5.2287, max: 2.2690
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4465, max: 1.5607
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▉         | 10985/18200 [05:37<02:51, 42.01it/s, loss=1.5985]


Logits stats - min: -5.0249, max: 1.5265
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2478, max: 1.5461
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8621, max: 1.7289
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▉         | 11005/18200 [05:37<02:53, 41.43it/s, loss=1.6322]


Logits stats - min: -5.5422, max: 1.6449
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3731, max: 1.6414
Target unique values: tensor([0], device='cuda:0')


Training:  61%|█████████████▉         | 11020/18200 [05:38<02:45, 43.31it/s, loss=1.4349]


Logits stats - min: -5.3832, max: 1.7109
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8070, max: 2.2616
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5539, max: 2.6287
Target unique values: tensor([0], device='cuda:0')


Training:  61%|█████████████▉         | 11030/18200 [05:38<02:46, 43.03it/s, loss=1.6620]


Logits stats - min: -5.5556, max: 1.7632
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5437, max: 1.7715
Target unique values: tensor([0], device='cuda:0')


Training:  61%|█████████████▉         | 11040/18200 [05:38<02:43, 43.76it/s, loss=1.6374]


Logits stats - min: -5.5049, max: 1.5780
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5290, max: 1.7135
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6246, max: 1.7208
Target unique values: tensor([0], device='cuda:0')


Training:  61%|█████████████▉         | 11055/18200 [05:38<02:44, 43.32it/s, loss=1.6358]


Logits stats - min: -5.3153, max: 1.6970
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2546, max: 1.7610
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5559, max: 2.2695
Target unique values: tensor([0], device='cuda:0')


Training:  61%|█████████████▉         | 11065/18200 [05:39<02:44, 43.28it/s, loss=1.6333]


Logits stats - min: -5.4615, max: 1.9533
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8638, max: 2.3061
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11080/18200 [05:39<02:50, 41.78it/s, loss=1.6894]


Logits stats - min: -5.6837, max: 2.2596
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7531, max: 1.5266
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4628, max: 2.4579
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4348, max: 1.8234
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2109, max: 1.9170
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11092/18200 [05:39<02:32, 46.48it/s, loss=2.1872]


Logits stats - min: -5.2634, max: 1.7614
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6977, max: 1.8036
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2621, max: 1.6779
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3828, max: 2.2921
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11102/18200 [05:39<02:35, 45.52it/s, loss=1.5388]


Logits stats - min: -5.6016, max: 1.5678
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4761, max: 2.3172
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1189, max: 1.9550
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11117/18200 [05:40<02:49, 41.79it/s, loss=1.4910]


Logits stats - min: -7.0392, max: 1.9331
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8421, max: 1.7453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1356, max: 1.7246
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11132/18200 [05:40<02:46, 42.45it/s, loss=1.5230]


Logits stats - min: -6.5389, max: 2.2350
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1840, max: 2.3739
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1781, max: 2.2886
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11143/18200 [05:40<02:40, 44.01it/s, loss=1.6093]


Logits stats - min: -5.8491, max: 2.2062
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3608, max: 1.9598
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4401, max: 1.7695
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11153/18200 [05:41<02:38, 44.52it/s, loss=2.5981]


Logits stats - min: -5.6350, max: 1.5660
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6305, max: 1.6420
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2270, max: 1.4323
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11163/18200 [05:41<02:51, 40.99it/s, loss=1.4413]


Logits stats - min: -5.8786, max: 1.7159
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████▏        | 11178/18200 [05:41<02:49, 41.50it/s, loss=1.6428]


Logits stats - min: -5.6440, max: 2.4877
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6591, max: 2.1917
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████▏        | 11183/18200 [05:41<02:58, 39.35it/s, loss=1.5979]


Logits stats - min: -5.0510, max: 1.4267
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6844, max: 2.2441
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11193/18200 [05:42<02:44, 42.48it/s, loss=1.4471]


Logits stats - min: -6.3318, max: 2.1761
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5238, max: 1.6205
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7375, max: 2.3032
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3961, max: 2.3049
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11214/18200 [05:42<02:47, 41.76it/s, loss=1.4485]


Logits stats - min: -4.9063, max: 1.5107
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0875, max: 1.6902
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11230/18200 [05:43<02:38, 43.99it/s, loss=1.5751]


Logits stats - min: -5.0120, max: 1.5357
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4379, max: 1.7110
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1273, max: 2.2708
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11246/18200 [05:43<02:26, 47.44it/s, loss=1.5933]


Logits stats - min: -5.0753, max: 1.8004
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0401, max: 1.7884
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6786, max: 1.6896
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1253, max: 2.3514
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6916, max: 1.4185
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8212, max: 2.3617
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5757, max: 1.6923
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11252/18200 [05:43<02:23, 48.47it/s, loss=1.5204]


Logits stats - min: -6.1230, max: 2.3799
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0659, max: 1.6490
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11267/18200 [05:43<02:45, 41.81it/s, loss=1.6319]


Logits stats - min: -5.8344, max: 2.2827
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11273/18200 [05:44<02:32, 45.49it/s, loss=1.4372]


Logits stats - min: -5.7520, max: 1.8008
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0575, max: 2.1507
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9463, max: 2.3295
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11288/18200 [05:44<02:42, 42.54it/s, loss=1.6420]


Logits stats - min: -5.5236, max: 1.6908
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2429, max: 1.2775
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11318/18200 [05:45<02:46, 41.29it/s, loss=1.2481]


Logits stats - min: -5.9095, max: 2.5241
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6311, max: 1.6058
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7874, max: 1.8631
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7774, max: 2.4645
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11333/18200 [05:45<02:37, 43.58it/s, loss=1.6011]


Logits stats - min: -5.2220, max: 1.6901
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4825, max: 1.8386
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3913, max: 2.4983
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11343/18200 [05:45<02:32, 45.09it/s, loss=1.6375]


Logits stats - min: -5.5019, max: 1.7995
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4120, max: 1.9193
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8520, max: 2.4952
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11353/18200 [05:45<02:36, 43.86it/s, loss=2.1077]


Logits stats - min: -5.5029, max: 1.6168
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7219, max: 1.9759
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2452, max: 2.2528
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.4565, max: 1.6309
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11363/18200 [05:46<02:33, 44.41it/s, loss=1.4461]


Logits stats - min: -5.5084, max: 2.4057
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6281, max: 1.7015
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3029, max: 2.2589
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5004, max: 1.7800
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8595, max: 1.5582
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6631, max: 1.6510
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1541, max: 1.8642
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3837, max: 1.4593
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3826, max: 2.3230
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11380/18200 [05:46<02:25, 46.74it/s, loss=1.6484]


Logits stats - min: -7.0195, max: 2.0101
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7276, max: 2.3019
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8315, max: 1.8685
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11385/18200 [05:46<02:25, 46.93it/s, loss=1.5289]


Logits stats - min: -5.4190, max: 2.5018
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9028, max: 1.4422
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1188, max: 1.2627
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11395/18200 [05:46<02:31, 45.01it/s, loss=1.5959]


Logits stats - min: -5.1198, max: 1.8047
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6586, max: 1.6883
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11410/18200 [05:47<02:41, 41.99it/s, loss=1.6160]


Logits stats - min: -5.4435, max: 2.3219
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11425/18200 [05:47<02:45, 41.01it/s, loss=1.5924]


Logits stats - min: -5.1649, max: 1.3678
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1599, max: 2.3108
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11435/18200 [05:47<02:43, 41.35it/s, loss=1.6401]


Logits stats - min: -6.6102, max: 2.4735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3908, max: 1.7420
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11446/18200 [05:48<02:32, 44.25it/s, loss=1.6417]


Logits stats - min: -6.3897, max: 2.3946
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8224, max: 1.7700
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3486, max: 1.4701
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9823, max: 2.3394
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7832, max: 1.5254
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11461/18200 [05:48<02:34, 43.62it/s, loss=1.6203]


Logits stats - min: -5.5199, max: 1.7148
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2296, max: 2.3209
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11471/18200 [05:48<02:37, 42.72it/s, loss=2.2452]


Logits stats - min: -6.0932, max: 2.3771
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5333, max: 2.7319
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▌        | 11486/18200 [05:48<02:45, 40.52it/s, loss=1.6281]


Logits stats - min: -5.1481, max: 1.8031
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▌        | 11506/18200 [05:49<02:50, 39.37it/s, loss=1.6323]


Logits stats - min: -5.1996, max: 1.7774
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7063, max: 2.2668
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5625, max: 1.8685
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▌        | 11527/18200 [05:49<02:34, 43.14it/s, loss=1.6506]


Logits stats - min: -5.7902, max: 1.7169
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0914, max: 1.9749
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8014, max: 2.2578
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▌        | 11537/18200 [05:50<02:32, 43.73it/s, loss=1.6529]


Logits stats - min: -5.3675, max: 1.7167
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1833, max: 1.7080
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5788, max: 1.4693
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6628, max: 2.2803
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1400, max: 2.1413
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▌        | 11552/18200 [05:50<02:30, 44.32it/s, loss=1.6086]


Logits stats - min: -5.4464, max: 1.5801
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2753, max: 1.5648
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6836, max: 1.7567
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▌        | 11558/18200 [05:50<02:19, 47.58it/s, loss=1.6197]


Logits stats - min: -5.6782, max: 1.8561
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7217, max: 2.1058
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9516, max: 1.9405
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1690, max: 2.3360
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▌        | 11568/18200 [05:50<02:41, 41.04it/s, loss=1.8925]


Logits stats - min: -5.1211, max: 1.5320
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11579/18200 [05:51<02:28, 44.56it/s, loss=2.1033]


Logits stats - min: -5.7800, max: 2.1587
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1638, max: 2.0765
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8580, max: 2.1531
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1617, max: 2.2625
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11594/18200 [05:51<02:32, 43.42it/s, loss=1.4582]


Logits stats - min: -5.7211, max: 1.4782
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4266, max: 1.8030
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11604/18200 [05:51<02:36, 42.28it/s, loss=1.6269]


Logits stats - min: -6.3192, max: 2.3332
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11614/18200 [05:51<02:27, 44.61it/s, loss=1.6269]


Logits stats - min: -4.9666, max: 1.6899
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2036, max: 1.5036
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4623, max: 2.0557
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4574, max: 2.6457
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11624/18200 [05:52<02:35, 42.17it/s, loss=1.4512]


Logits stats - min: -5.8377, max: 1.8590
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5650, max: 2.1593
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3083, max: 1.3201
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11640/18200 [05:52<02:21, 46.23it/s, loss=1.9678]


Logits stats - min: -5.2208, max: 1.9831
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1107, max: 2.4518
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2032, max: 1.4733
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7904, max: 2.0426
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11650/18200 [05:52<02:26, 44.62it/s, loss=1.6073]


Logits stats - min: -5.3288, max: 1.6182
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0534, max: 1.4004
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8052, max: 2.1387
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2987, max: 1.7509
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11661/18200 [05:52<02:25, 44.96it/s, loss=1.5539]


Logits stats - min: -5.5133, max: 1.4054
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1395, max: 1.7567
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11671/18200 [05:53<02:33, 42.43it/s, loss=1.5918]


Logits stats - min: -4.6651, max: 1.3474
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▊        | 11686/18200 [05:53<02:38, 41.16it/s, loss=1.5933]


Logits stats - min: -5.2513, max: 1.7311
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▊        | 11702/18200 [05:53<02:32, 42.66it/s, loss=1.9511]


Logits stats - min: -5.6063, max: 1.6243
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1268, max: 2.1370
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6911, max: 2.2820
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▊        | 11717/18200 [05:54<02:35, 41.78it/s, loss=1.5446]


Logits stats - min: -5.4804, max: 1.7358
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6235, max: 2.2800
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▊        | 11733/18200 [05:54<02:29, 43.22it/s, loss=1.6192]


Logits stats - min: -6.3851, max: 2.3956
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0892, max: 1.7969
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7835, max: 1.8030
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5952, max: 2.2027
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▊        | 11743/18200 [05:54<02:30, 42.95it/s, loss=1.9330]


Logits stats - min: -6.5473, max: 2.3668
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▊        | 11748/18200 [05:55<02:32, 42.43it/s, loss=1.6226]


Logits stats - min: -5.3498, max: 2.0201
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6941, max: 2.1854
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▊        | 11758/18200 [05:55<02:32, 42.13it/s, loss=1.6232]


Logits stats - min: -5.8860, max: 2.3429
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0013, max: 1.7040
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7349, max: 2.4633
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9919, max: 1.5320
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4046, max: 1.8101
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▊        | 11769/18200 [05:55<02:21, 45.44it/s, loss=1.4222]


Logits stats - min: -5.7852, max: 2.3174
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5767, max: 2.2190
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8803, max: 1.9025
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11784/18200 [05:55<02:34, 41.42it/s, loss=2.1453]


Logits stats - min: -5.2792, max: 1.6649
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11789/18200 [05:56<02:27, 43.40it/s, loss=1.6221]


Logits stats - min: -5.8939, max: 1.8990
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8396, max: 2.9335
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7910, max: 1.8616
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11800/18200 [05:56<02:21, 45.13it/s, loss=1.6294]


Logits stats - min: -4.9940, max: 1.8187
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4301, max: 1.6527
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11815/18200 [05:56<02:30, 42.45it/s, loss=1.5089]


Logits stats - min: -6.5420, max: 3.0931
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7907, max: 1.7211
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11825/18200 [05:56<02:32, 41.82it/s, loss=1.5224]


Logits stats - min: -5.4774, max: 1.5990
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4453, max: 1.9850
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11835/18200 [05:57<02:35, 40.82it/s, loss=1.6939]


Logits stats - min: -5.3082, max: 1.5675
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6138, max: 2.3644
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11850/18200 [05:57<02:32, 41.69it/s, loss=1.5987]


Logits stats - min: -5.8323, max: 1.7004
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9863, max: 2.2472
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11861/18200 [05:57<02:17, 46.04it/s, loss=1.4368]


Logits stats - min: -5.3874, max: 1.7389
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3062, max: 1.6268
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9122, max: 1.5085
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3111, max: 1.8295
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3563, max: 2.2505
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0831, max: 1.8941
Target unique values: tensor([0], device='cuda:0')


Training:  65%|███████████████        | 11871/18200 [05:57<02:27, 43.02it/s, loss=1.6253]


Logits stats - min: -4.9806, max: 1.5299
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4561, max: 2.1697
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0249, max: 1.8727
Target unique values: tensor([0], device='cuda:0')


Training:  65%|███████████████        | 11886/18200 [05:58<02:33, 41.21it/s, loss=1.6176]


Logits stats - min: -6.1215, max: 2.3083
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7757, max: 1.7055
Target unique values: tensor([0], device='cuda:0')


Training:  65%|███████████████        | 11896/18200 [05:58<02:35, 40.46it/s, loss=1.5883]


Logits stats - min: -6.3583, max: 2.2753
Target unique values: tensor([0], device='cuda:0')


Training:  65%|███████████████        | 11906/18200 [05:58<02:28, 42.40it/s, loss=1.6277]


Logits stats - min: -5.8952, max: 1.7864
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7426, max: 1.8242
Target unique values: tensor([0], device='cuda:0')


Training:  65%|███████████████        | 11911/18200 [05:58<02:29, 42.02it/s, loss=1.6505]


Logits stats - min: -6.6171, max: 2.5604
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4653, max: 2.3217
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████        | 11936/18200 [05:59<02:34, 40.48it/s, loss=1.6109]


Logits stats - min: -5.3109, max: 1.6872
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9092, max: 1.5528
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6426, max: 2.7451
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████        | 11946/18200 [05:59<02:30, 41.43it/s, loss=1.7062]


Logits stats - min: -5.7759, max: 1.3745
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4714, max: 2.2520
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████        | 11957/18200 [05:59<02:18, 45.06it/s, loss=1.4753]


Logits stats - min: -5.0343, max: 1.5747
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3297, max: 2.5080
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3661, max: 2.5745
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4335, max: 2.7174
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8265, max: 2.1854
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████        | 11962/18200 [06:00<02:21, 44.24it/s, loss=1.4530]


Logits stats - min: -6.4001, max: 2.2403
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9944, max: 2.2690
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8067, max: 1.7104
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8966, max: 1.7004
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 11979/18200 [06:00<02:12, 46.83it/s, loss=1.5308]


Logits stats - min: -5.1042, max: 1.3963
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4961, max: 2.7406
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5907, max: 1.6562
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6211, max: 1.6276
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2606, max: 1.6612
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 11994/18200 [06:00<02:29, 41.65it/s, loss=1.5238]


Logits stats - min: -5.9089, max: 2.2608
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6791, max: 2.1706
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 12004/18200 [06:01<02:35, 39.95it/s, loss=1.6209]


Logits stats - min: -5.3646, max: 1.6418
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8275, max: 3.0929
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3862, max: 1.6709
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 12016/18200 [06:01<02:16, 45.25it/s, loss=1.6234]


Logits stats - min: -5.5887, max: 2.3112
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7329, max: 1.6106
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5713, max: 1.7445
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4438, max: 2.4105
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3372, max: 1.6791
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 12031/18200 [06:01<02:21, 43.46it/s, loss=1.5903]


Logits stats - min: -5.7655, max: 2.5276
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3761, max: 2.4081
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 12042/18200 [06:01<02:23, 42.83it/s, loss=1.5848]


Logits stats - min: -6.2937, max: 1.5548
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9173, max: 1.5120
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5454, max: 2.4607
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8019, max: 1.6307
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3885, max: 2.3701
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3601, max: 1.8481
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▎       | 12068/18200 [06:02<02:28, 41.30it/s, loss=1.6354]


Logits stats - min: -6.2156, max: 2.2869
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▎       | 12083/18200 [06:02<02:27, 41.44it/s, loss=2.2480]


Logits stats - min: -6.8123, max: 2.6685
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3893, max: 2.3103
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▎       | 12093/18200 [06:03<02:31, 40.40it/s, loss=1.5359]


Logits stats - min: -6.3603, max: 2.4002
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▎       | 12103/18200 [06:03<02:27, 41.37it/s, loss=1.5870]


Logits stats - min: -5.5228, max: 1.7799
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▎       | 12113/18200 [06:03<02:28, 40.87it/s, loss=2.6089]


Logits stats - min: -5.3398, max: 1.5935
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5499, max: 1.5301
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▎       | 12128/18200 [06:03<02:26, 41.57it/s, loss=2.6542]


Logits stats - min: -5.3194, max: 1.7495
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3044, max: 1.5583
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6056, max: 2.1989
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▎       | 12133/18200 [06:04<02:28, 40.74it/s, loss=2.6498]


Logits stats - min: -5.2717, max: 1.5494
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3859, max: 2.1898
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▎       | 12143/18200 [06:04<02:30, 40.32it/s, loss=1.5912]


Logits stats - min: -5.5198, max: 1.6235
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4154, max: 2.2536
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9506, max: 1.5554
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5564, max: 1.6186
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▎       | 12154/18200 [06:04<02:18, 43.57it/s, loss=1.5965]


Logits stats - min: -5.2973, max: 1.5639
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4413, max: 2.4491
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12174/18200 [06:05<02:25, 41.36it/s, loss=1.6322]


Logits stats - min: -6.2165, max: 2.3287
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12189/18200 [06:05<02:25, 41.25it/s, loss=2.6578]


Logits stats - min: -5.9618, max: 2.5703
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6395, max: 1.7210
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12200/18200 [06:05<02:10, 46.10it/s, loss=1.5883]


Logits stats - min: -5.6141, max: 1.7556
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3037, max: 2.3539
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4155, max: 2.3950
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9721, max: 2.2105
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0839, max: 2.8684
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4015, max: 1.7710
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12212/18200 [06:05<02:06, 47.28it/s, loss=1.6550]


Logits stats - min: -5.8665, max: 2.5224
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8706, max: 1.8814
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5235, max: 1.7241
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1653, max: 2.2076
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3793, max: 2.2784
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2522, max: 1.5192
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12222/18200 [06:06<02:12, 45.14it/s, loss=1.6412]


Logits stats - min: -5.5976, max: 1.8269
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12238/18200 [06:06<02:04, 47.76it/s, loss=1.6664]


Logits stats - min: -6.3955, max: 2.8827
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1954, max: 1.7216
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8064, max: 2.0349
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9885, max: 2.1224
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3640, max: 2.3378
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0783, max: 2.2376
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6901, max: 2.6653
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6442, max: 2.6109
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12248/18200 [06:06<02:13, 44.43it/s, loss=1.6388]


Logits stats - min: -5.2326, max: 1.7006
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3215, max: 1.7522
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2527, max: 2.7658
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12258/18200 [06:07<02:13, 44.56it/s, loss=1.8410]


Logits stats - min: -5.3071, max: 2.1202
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1166, max: 2.3252
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4288, max: 1.8915
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1148, max: 1.3746
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3426, max: 1.5086
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5859, max: 2.2680
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▌       | 12275/18200 [06:07<02:05, 47.38it/s, loss=1.6002]


Logits stats - min: -5.8533, max: 2.1234
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9651, max: 1.7452
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5565, max: 2.2840
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12291/18200 [06:07<02:09, 45.76it/s, loss=1.6385]


Logits stats - min: -5.0724, max: 1.5670
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7280, max: 1.7836
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7565, max: 2.4279
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12301/18200 [06:08<02:25, 40.54it/s, loss=2.3301]


Logits stats - min: -5.8221, max: 1.7782
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3213, max: 1.8549
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12311/18200 [06:08<02:14, 43.81it/s, loss=1.6076]


Logits stats - min: -5.0741, max: 1.4893
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6224, max: 1.5829
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5830, max: 1.7793
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5103, max: 1.7262
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12321/18200 [06:08<02:12, 44.35it/s, loss=1.5682]


Logits stats - min: -5.8811, max: 2.6464
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.5877, max: 1.3218
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12331/18200 [06:08<02:12, 44.25it/s, loss=1.6085]


Logits stats - min: -6.5461, max: 2.4634
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6843, max: 1.7856
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9783, max: 1.6349
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7853, max: 2.4075
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12346/18200 [06:09<02:17, 42.59it/s, loss=2.1693]


Logits stats - min: -6.2786, max: 2.1774
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8062, max: 1.8007
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4350, max: 2.2951
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12357/18200 [06:09<02:05, 46.71it/s, loss=1.6147]


Logits stats - min: -5.2393, max: 2.4803
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2380, max: 1.7058
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3927, max: 2.2749
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2712, max: 1.9294
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4816, max: 2.5299
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7185, max: 2.5542
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12372/18200 [06:09<02:09, 45.03it/s, loss=1.5319]


Logits stats - min: -6.0956, max: 2.4115
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7644, max: 1.6632
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0257, max: 1.8063
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12387/18200 [06:09<02:16, 42.62it/s, loss=1.5103]


Logits stats - min: -5.9578, max: 2.4085
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6852, max: 2.2230
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12397/18200 [06:10<02:17, 42.07it/s, loss=1.5768]


Logits stats - min: -5.3918, max: 1.8140
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12407/18200 [06:10<02:12, 43.69it/s, loss=1.6406]


Logits stats - min: -5.1729, max: 1.6606
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7453, max: 2.0232
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5682, max: 2.2971
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5587, max: 1.3208
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12417/18200 [06:10<02:10, 44.37it/s, loss=1.5137]


Logits stats - min: -5.7084, max: 2.2221
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9791, max: 2.3468
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9397, max: 1.8231
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9221, max: 2.1816
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12433/18200 [06:10<02:06, 45.48it/s, loss=1.6276]


Logits stats - min: -5.7138, max: 2.0143
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6001, max: 2.7699
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0068, max: 1.7529
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12448/18200 [06:11<02:18, 41.57it/s, loss=1.6360]


Logits stats - min: -6.3445, max: 2.3213
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9631, max: 1.6189
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2382, max: 1.4715
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12458/18200 [06:11<02:19, 41.19it/s, loss=1.5863]


Logits stats - min: -5.8881, max: 1.7295
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12468/18200 [06:11<02:19, 40.98it/s, loss=1.5749]


Logits stats - min: -6.6147, max: 2.1279
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6368, max: 1.5739
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12478/18200 [06:12<02:16, 41.95it/s, loss=1.6234]


Logits stats - min: -6.6018, max: 2.5417
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8561, max: 2.1103
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12483/18200 [06:12<02:14, 42.59it/s, loss=1.5096]


Logits stats - min: -4.8139, max: 1.3257
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9520, max: 1.6290
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12498/18200 [06:12<02:21, 40.38it/s, loss=1.4183]


Logits stats - min: -6.0240, max: 1.5219
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7316, max: 1.8789
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12514/18200 [06:12<02:06, 44.84it/s, loss=2.2304]


Logits stats - min: -5.3732, max: 1.3752
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6946, max: 2.4044
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8535, max: 2.4824
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6056, max: 1.7419
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2576, max: 2.3003
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12524/18200 [06:13<02:11, 43.02it/s, loss=2.2545]


Logits stats - min: -6.2424, max: 2.2683
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8190, max: 1.7404
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12539/18200 [06:13<02:07, 44.51it/s, loss=1.4311]


Logits stats - min: -5.1562, max: 1.2832
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1805, max: 2.2792
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6817, max: 2.3093
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12550/18200 [06:13<02:10, 43.18it/s, loss=1.6847]


Logits stats - min: -5.7215, max: 2.3489
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2992, max: 2.5068
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12560/18200 [06:13<02:12, 42.64it/s, loss=2.3288]


Logits stats - min: -6.5626, max: 2.3766
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1645, max: 1.7734
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3882, max: 2.4986
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0668, max: 2.3664
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5629, max: 1.8409
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12575/18200 [06:14<02:19, 40.39it/s, loss=1.6793]


Logits stats - min: -5.4848, max: 1.7849
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5828, max: 2.1492
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12590/18200 [06:14<02:10, 42.90it/s, loss=1.5745]


Logits stats - min: -5.9675, max: 1.5767
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5234, max: 1.7527
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12600/18200 [06:14<02:13, 41.93it/s, loss=2.1790]


Logits stats - min: -5.6767, max: 1.8897
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7485, max: 1.6721
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1539, max: 1.6962
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5625, max: 2.6317
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12621/18200 [06:15<02:08, 43.58it/s, loss=1.6293]


Logits stats - min: -5.8242, max: 1.7453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7154, max: 1.8674
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7624, max: 2.7417
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12636/18200 [06:15<02:08, 43.29it/s, loss=1.6415]


Logits stats - min: -5.6097, max: 1.7710
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6067, max: 2.6428
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7147, max: 2.3957
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3186, max: 1.9979
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12646/18200 [06:16<02:10, 42.49it/s, loss=2.2043]


Logits stats - min: -5.3696, max: 2.1657
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5148, max: 1.4743
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12661/18200 [06:16<02:10, 42.45it/s, loss=1.6347]


Logits stats - min: -5.2546, max: 1.5751
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4051, max: 2.2101
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12686/18200 [06:16<02:10, 42.32it/s, loss=1.8483]


Logits stats - min: -5.7424, max: 2.2605
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7531, max: 1.4362
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2417, max: 2.4600
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9996, max: 2.4746
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12710/18200 [06:17<02:14, 40.95it/s, loss=2.1871]


Logits stats - min: -5.7932, max: 1.7784
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6303, max: 1.9497
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5749, max: 1.7806
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9767, max: 1.9259
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12720/18200 [06:17<02:04, 44.10it/s, loss=1.6107]


Logits stats - min: -5.2964, max: 1.9157
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8433, max: 2.5351
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7318, max: 2.6466
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12731/18200 [06:18<02:03, 44.28it/s, loss=1.6391]


Logits stats - min: -5.3507, max: 2.1648
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4826, max: 2.4762
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12741/18200 [06:18<02:05, 43.52it/s, loss=1.5740]


Logits stats - min: -5.4203, max: 1.6966
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7988, max: 1.7457
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12751/18200 [06:18<02:08, 42.46it/s, loss=2.1777]


Logits stats - min: -5.6184, max: 1.8983
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12756/18200 [06:18<02:08, 42.22it/s, loss=1.5799]


Logits stats - min: -5.8656, max: 1.9765
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12766/18200 [06:18<02:08, 42.24it/s, loss=2.6689]


Logits stats - min: -6.3034, max: 2.4652
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6820, max: 2.4012
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5575, max: 2.1201
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12777/18200 [06:19<01:56, 46.40it/s, loss=1.6213]


Logits stats - min: -4.9063, max: 1.3111
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8155, max: 2.4121
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0092, max: 1.8953
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8654, max: 1.9085
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7843, max: 2.6988
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7278, max: 2.2112
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12792/18200 [06:19<01:59, 45.42it/s, loss=1.4204]


Logits stats - min: -5.9506, max: 2.3884
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1339, max: 1.4630
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9911, max: 1.7974
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12797/18200 [06:19<01:59, 45.03it/s, loss=1.4163]


Logits stats - min: -5.8586, max: 1.7074
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8060, max: 1.1902
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12812/18200 [06:19<02:08, 41.97it/s, loss=1.6022]


Logits stats - min: -5.1542, max: 1.9396
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3476, max: 2.3569
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12822/18200 [06:20<02:02, 44.07it/s, loss=1.5750]


Logits stats - min: -5.1868, max: 2.2213
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5544, max: 2.7034
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5286, max: 2.3574
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▏      | 12833/18200 [06:20<01:57, 45.69it/s, loss=2.1857]


Logits stats - min: -5.2650, max: 1.9128
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6358, max: 1.8289
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3244, max: 1.8489
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5538, max: 2.3624
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▏      | 12844/18200 [06:20<01:55, 46.47it/s, loss=1.8264]


Logits stats - min: -5.5644, max: 1.6984
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7379, max: 1.7965
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8418, max: 2.2277
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12859/18200 [06:20<02:06, 42.14it/s, loss=1.5658]


Logits stats - min: -5.7020, max: 2.2689
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4634, max: 2.0718
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0544, max: 2.1004
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12870/18200 [06:21<01:53, 46.78it/s, loss=1.4421]


Logits stats - min: -6.4879, max: 2.3509
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5722, max: 2.1176
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6110, max: 2.2260
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0901, max: 2.1839
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6099, max: 2.4291
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5474, max: 1.4392
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1110, max: 2.1455
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12875/18200 [06:21<01:58, 44.93it/s, loss=1.5425]


Logits stats - min: -5.1184, max: 1.6128
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12890/18200 [06:21<01:59, 44.38it/s, loss=1.5551]


Logits stats - min: -5.8520, max: 2.2427
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4902, max: 1.7631
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12895/18200 [06:21<01:55, 45.90it/s, loss=1.4331]


Logits stats - min: -6.5205, max: 2.2224
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6736, max: 2.0420
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2237, max: 2.2515
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12915/18200 [06:22<02:07, 41.55it/s, loss=1.5073]


Logits stats - min: -5.9666, max: 1.9345
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5353, max: 1.6230
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5172, max: 2.4658
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12926/18200 [06:22<02:02, 43.02it/s, loss=1.6426]


Logits stats - min: -6.7454, max: 2.4453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9836, max: 2.3177
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4995, max: 1.9641
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12932/18200 [06:22<01:58, 44.63it/s, loss=1.5621]


Logits stats - min: -6.6871, max: 2.6033
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0683, max: 2.2122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1836, max: 1.9074
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12952/18200 [06:23<02:03, 42.42it/s, loss=1.5008]


Logits stats - min: -5.5380, max: 1.9259
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7240, max: 1.9848
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▍      | 12963/18200 [06:23<01:57, 44.43it/s, loss=1.5409]


Logits stats - min: -6.5589, max: 2.5195
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3375, max: 2.6704
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4512, max: 2.3231
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6717, max: 1.5735
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▍      | 12974/18200 [06:23<01:57, 44.33it/s, loss=1.5607]


Logits stats - min: -6.3005, max: 2.2367
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5153, max: 1.6826
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2448, max: 1.9058
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4239, max: 2.4262
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▍      | 12979/18200 [06:23<01:56, 44.65it/s, loss=1.5656]


Logits stats - min: -5.6239, max: 2.0337
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3341, max: 2.2160
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▍      | 12989/18200 [06:24<02:00, 43.31it/s, loss=2.2916]


Logits stats - min: -5.7931, max: 1.6809
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4676, max: 2.4192
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▍      | 13009/18200 [06:24<02:13, 38.94it/s, loss=1.5589]


Logits stats - min: -5.0406, max: 1.3913
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3288, max: 2.1380
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▍      | 13019/18200 [06:24<02:04, 41.71it/s, loss=1.5575]


Logits stats - min: -5.0513, max: 1.8445
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0235, max: 1.8992
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5040, max: 2.3453
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▍      | 13029/18200 [06:24<02:04, 41.39it/s, loss=1.5451]


Logits stats - min: -5.7816, max: 1.8593
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7436, max: 2.3727
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7739, max: 2.1984
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▍      | 13039/18200 [06:25<02:05, 41.04it/s, loss=2.1779]


Logits stats - min: -6.1396, max: 1.6080
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▍      | 13049/18200 [06:25<02:00, 42.73it/s, loss=1.6212]


Logits stats - min: -5.5078, max: 1.4319
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2051, max: 2.4808
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0668, max: 1.9377
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▍      | 13056/18200 [06:25<01:49, 46.82it/s, loss=1.6168]


Logits stats - min: -6.2463, max: 2.3376
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4959, max: 2.3204
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1497, max: 1.4796
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1707, max: 1.9729
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5742, max: 2.3525
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4815, max: 2.2819
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13066/18200 [06:25<01:58, 43.47it/s, loss=1.6170]


Logits stats - min: -5.0318, max: 1.3304
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3810, max: 2.3753
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13087/18200 [06:26<01:51, 45.93it/s, loss=1.3640]


Logits stats - min: -6.1394, max: 1.8514
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1314, max: 1.3689
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6622, max: 2.4020
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8468, max: 2.3453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4778, max: 1.9620
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0389, max: 1.9571
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13098/18200 [06:26<01:47, 47.52it/s, loss=1.5181]


Logits stats - min: -5.3613, max: 1.6634
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3888, max: 1.8074
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2246, max: 1.8184
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3104, max: 2.4764
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13108/18200 [06:26<01:54, 44.35it/s, loss=1.5472]


Logits stats - min: -7.8917, max: 2.1800
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1923, max: 1.2951
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6650, max: 2.5373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3339, max: 2.2907
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7256, max: 2.0180


Training:  72%|████████████████▌      | 13118/18200 [06:26<01:57, 43.18it/s, loss=1.6303]

Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1358, max: 1.7808
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9632, max: 2.0192
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13133/18200 [06:27<02:01, 41.79it/s, loss=1.4898]


Logits stats - min: -6.3309, max: 2.3773
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0547, max: 2.3766
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5177, max: 2.0080
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13143/18200 [06:27<01:55, 43.60it/s, loss=1.4395]


Logits stats - min: -5.6987, max: 1.7994
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0056, max: 1.2059
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6819, max: 2.4228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6909, max: 1.8112
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1896, max: 2.2197
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13154/18200 [06:27<01:46, 47.31it/s, loss=2.2123]


Logits stats - min: -5.9336, max: 1.8938
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3776, max: 1.6297
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5581, max: 1.9145
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▋      | 13169/18200 [06:28<01:48, 46.43it/s, loss=1.5638]


Logits stats - min: -5.5580, max: 2.4214
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3175, max: 1.2478
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3250, max: 2.6666
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8606, max: 1.8462
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▋      | 13180/18200 [06:28<01:49, 45.82it/s, loss=2.1929]


Logits stats - min: -5.9818, max: 2.4191
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4231, max: 2.5858
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8355, max: 2.5991
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1046, max: 2.2621
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▋      | 13190/18200 [06:28<01:59, 41.89it/s, loss=1.5632]


Logits stats - min: -5.6686, max: 2.4064
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2626, max: 2.2648
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6784, max: 2.0556
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▋      | 13200/18200 [06:28<01:50, 45.09it/s, loss=1.5901]


Logits stats - min: -6.8124, max: 2.5332
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1434, max: 2.3657
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7673, max: 1.4864
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▋      | 13211/18200 [06:29<01:43, 48.14it/s, loss=1.6219]


Logits stats - min: -7.8678, max: 1.8642
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3228, max: 1.9321
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0017, max: 1.7639
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4231, max: 2.7596
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▋      | 13221/18200 [06:29<01:54, 43.45it/s, loss=1.5444]


Logits stats - min: -5.6014, max: 1.7786
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2194, max: 1.4995
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9602, max: 1.4008
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▋      | 13231/18200 [06:29<01:56, 42.50it/s, loss=1.6157]


Logits stats - min: -5.4135, max: 2.0202
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9912, max: 2.2135
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▋      | 13241/18200 [06:29<02:02, 40.54it/s, loss=1.5911]


Logits stats - min: -6.0022, max: 1.8082
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▋      | 13251/18200 [06:30<01:59, 41.37it/s, loss=1.5666]


Logits stats - min: -6.0708, max: 1.8973
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5670, max: 2.3959
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13266/18200 [06:30<01:59, 41.12it/s, loss=2.1623]


Logits stats - min: -5.7095, max: 2.1852
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7319, max: 2.2422
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13276/18200 [06:30<01:55, 42.62it/s, loss=2.1910]


Logits stats - min: -5.2718, max: 1.9097
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6549, max: 1.8407
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13286/18200 [06:30<01:53, 43.45it/s, loss=1.5305]


Logits stats - min: -5.6985, max: 1.7296
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9639, max: 2.0124
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7717, max: 1.6925
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7953, max: 2.3149
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6213, max: 1.9547
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13301/18200 [06:31<01:48, 45.35it/s, loss=1.5603]


Logits stats - min: -5.2962, max: 1.7985
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4922, max: 1.8354
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0260, max: 2.4936
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9487, max: 1.8670
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6889, max: 2.2722
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13312/18200 [06:31<01:48, 44.93it/s, loss=1.5521]


Logits stats - min: -5.9120, max: 2.2373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8126, max: 2.2067
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8813, max: 2.3077
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13317/18200 [06:31<01:46, 45.82it/s, loss=2.0928]


Logits stats - min: -7.9527, max: 2.0049
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0839, max: 1.9530
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9034, max: 1.6539
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13332/18200 [06:31<01:49, 44.47it/s, loss=1.5544]


Logits stats - min: -6.6088, max: 2.4506
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8545, max: 1.8453
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13347/18200 [06:32<01:52, 42.98it/s, loss=1.5686]


Logits stats - min: -5.6264, max: 1.7703
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8432, max: 2.2020
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4943, max: 2.4430
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▉      | 13357/18200 [06:32<01:57, 41.12it/s, loss=1.4702]


Logits stats - min: -6.2011, max: 2.3223
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3978, max: 2.2824
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▉      | 13362/18200 [06:32<01:55, 41.73it/s, loss=1.5379]


Logits stats - min: -6.3950, max: 2.2515
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13382/18200 [06:33<01:55, 41.54it/s, loss=1.5718]


Logits stats - min: -5.4062, max: 1.8266
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4886, max: 2.6803
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13392/18200 [06:33<01:56, 41.18it/s, loss=1.6096]


Logits stats - min: -5.8477, max: 1.7270
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13402/18200 [06:33<01:58, 40.64it/s, loss=1.5572]


Logits stats - min: -5.5966, max: 1.7026
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9136, max: 2.2831
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13412/18200 [06:33<01:58, 40.48it/s, loss=1.5199]


Logits stats - min: -5.4436, max: 2.2119
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9757, max: 2.1039
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6972, max: 1.2772
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4021, max: 2.0583
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13422/18200 [06:34<01:52, 42.46it/s, loss=1.5555]


Logits stats - min: -5.5812, max: 1.8110
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9264, max: 1.4057
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8297, max: 1.8325
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13437/18200 [06:34<01:49, 43.37it/s, loss=1.5619]


Logits stats - min: -5.8737, max: 1.7960
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2168, max: 1.7643
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13448/18200 [06:34<01:44, 45.47it/s, loss=1.7101]


Logits stats - min: -6.0233, max: 2.3613
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6641, max: 1.7019
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5924, max: 2.4462
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6040, max: 1.5996
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4281, max: 1.9052
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13463/18200 [06:34<01:49, 43.34it/s, loss=1.7229]


Logits stats - min: -7.4041, max: 2.0821
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7846, max: 2.4461
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5620, max: 2.2231
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0168, max: 2.1771
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3781, max: 2.3898
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13483/18200 [06:35<01:47, 43.86it/s, loss=1.6147]


Logits stats - min: -6.9948, max: 2.8177
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6581, max: 2.6547
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5088, max: 1.9430
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13488/18200 [06:35<01:48, 43.55it/s, loss=1.7575]


Logits stats - min: -6.5754, max: 2.2007
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6173, max: 2.3343
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13499/18200 [06:35<01:45, 44.72it/s, loss=2.9414]


Logits stats - min: -7.5312, max: 1.9894
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4723, max: 2.1240
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9494, max: 2.1843
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13510/18200 [06:36<01:45, 44.37it/s, loss=2.0903]


Logits stats - min: -6.6799, max: 2.2403
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1380, max: 2.1988
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0256, max: 1.9380
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7341, max: 1.7773
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13520/18200 [06:36<01:52, 41.56it/s, loss=1.6136]


Logits stats - min: -6.3233, max: 2.3237
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13535/18200 [06:36<01:48, 43.08it/s, loss=1.5879]


Logits stats - min: -5.6436, max: 1.8356
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5836, max: 1.3458
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7855, max: 2.2157
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13545/18200 [06:36<01:47, 43.20it/s, loss=1.6302]


Logits stats - min: -6.1499, max: 1.8302
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6579, max: 1.7257
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13560/18200 [06:37<01:48, 42.71it/s, loss=1.5224]


Logits stats - min: -5.6275, max: 1.9436
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0389, max: 2.3776
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9103, max: 2.2903
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13570/18200 [06:37<01:51, 41.45it/s, loss=1.5622]


Logits stats - min: -5.9874, max: 1.5985
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7530, max: 1.7624
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13585/18200 [06:37<01:49, 42.20it/s, loss=1.5815]


Logits stats - min: -6.5161, max: 2.2923
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8129, max: 1.7440
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5182, max: 1.8205
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6158, max: 1.8956
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13596/18200 [06:38<01:48, 42.33it/s, loss=2.8114]


Logits stats - min: -5.8719, max: 1.8187
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6525, max: 1.7277
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13607/18200 [06:38<01:43, 44.23it/s, loss=1.4373]


Logits stats - min: -5.3028, max: 1.7029
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1578, max: 1.9297
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13617/18200 [06:38<01:40, 45.73it/s, loss=1.5576]


Logits stats - min: -4.9115, max: 1.2303
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2654, max: 1.9563
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0452, max: 2.3027
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8674, max: 1.6488
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13633/18200 [06:38<01:38, 46.26it/s, loss=1.9080]


Logits stats - min: -5.9725, max: 1.8017
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5835, max: 2.4008
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6229, max: 1.7074
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5004, max: 2.2167
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13644/18200 [06:39<01:37, 46.55it/s, loss=1.6156]


Logits stats - min: -7.3265, max: 2.0140
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9743, max: 1.6083
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3093, max: 2.2401
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0337, max: 1.5110
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6211, max: 1.8227
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13650/18200 [06:39<01:37, 46.54it/s, loss=1.6247]


Logits stats - min: -5.7862, max: 1.8836
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9921, max: 1.3947
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8166, max: 2.6289
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13666/18200 [06:39<01:33, 48.24it/s, loss=1.4507]


Logits stats - min: -5.4803, max: 1.7662
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1576, max: 2.2316
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3786, max: 2.2710
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9695, max: 1.7759
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3243, max: 2.1981
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4354, max: 1.5209
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13671/18200 [06:39<01:35, 47.19it/s, loss=1.6267]


Logits stats - min: -6.4210, max: 2.5555
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5326, max: 2.6389
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13686/18200 [06:40<01:44, 43.31it/s, loss=1.5259]


Logits stats - min: -6.1120, max: 1.7028
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7922, max: 1.8326
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4046, max: 1.8488
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7268, max: 2.3280
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2675, max: 1.7750
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13698/18200 [06:40<01:36, 46.76it/s, loss=1.6123]


Logits stats - min: -5.7422, max: 1.8707
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2548, max: 2.2502
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9223, max: 2.4328
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13708/18200 [06:40<01:40, 44.91it/s, loss=1.5532]


Logits stats - min: -5.7331, max: 1.8612
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4028, max: 1.4196
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5853, max: 2.4999
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0732, max: 1.3079
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13718/18200 [06:40<01:37, 45.78it/s, loss=2.1807]


Logits stats - min: -5.7423, max: 1.7042
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5209, max: 2.3748
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9263, max: 1.8145
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13728/18200 [06:41<01:43, 43.01it/s, loss=2.1148]


Logits stats - min: -6.1526, max: 2.1686
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4981, max: 1.8208
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13738/18200 [06:41<01:50, 40.54it/s, loss=1.5053]


Logits stats - min: -6.0224, max: 1.6358
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▎     | 13748/18200 [06:41<01:47, 41.52it/s, loss=2.1554]


Logits stats - min: -5.1825, max: 1.7643
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8892, max: 2.1932
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1556, max: 1.7213
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13758/18200 [06:41<01:45, 42.16it/s, loss=1.5600]


Logits stats - min: -5.4425, max: 1.7961
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8095, max: 1.7387
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13774/18200 [06:42<01:41, 43.55it/s, loss=1.4203]


Logits stats - min: -6.3344, max: 2.4253
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6865, max: 2.3055
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2776, max: 1.5637
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13785/18200 [06:42<01:42, 43.07it/s, loss=1.5464]


Logits stats - min: -6.5626, max: 2.2267
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9703, max: 1.9733
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3445, max: 2.6679
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7402, max: 2.2182
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13800/18200 [06:42<01:40, 43.61it/s, loss=1.4106]


Logits stats - min: -6.1736, max: 1.9189
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8123, max: 1.8606
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4523, max: 1.8231
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13816/18200 [06:43<01:36, 45.22it/s, loss=1.6224]


Logits stats - min: -6.5031, max: 2.2626
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1623, max: 1.6288
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9157, max: 1.7357
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0715, max: 2.4537
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13831/18200 [06:43<01:40, 43.57it/s, loss=1.6223]


Logits stats - min: -5.6249, max: 1.9224
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9968, max: 2.2297
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5260, max: 1.6994
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2474, max: 2.2954
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13846/18200 [06:43<01:41, 42.82it/s, loss=2.1677]


Logits stats - min: -5.7219, max: 2.3493
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2426, max: 2.2268
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▌     | 13856/18200 [06:43<01:40, 43.07it/s, loss=1.6061]


Logits stats - min: -5.9219, max: 1.7600
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8692, max: 1.8131
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9690, max: 2.3640
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▌     | 13876/18200 [06:44<01:40, 43.18it/s, loss=1.5707]


Logits stats - min: -5.8794, max: 2.4805
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5676, max: 1.5297
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7925, max: 1.8259
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▌     | 13891/18200 [06:44<01:39, 43.28it/s, loss=1.6238]


Logits stats - min: -5.2393, max: 1.8340
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3845, max: 1.5309
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▌     | 13911/18200 [06:45<01:43, 41.48it/s, loss=1.5630]


Logits stats - min: -6.1724, max: 2.2921
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1578, max: 2.4582
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1259, max: 2.1848
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▌     | 13926/18200 [06:45<01:37, 44.01it/s, loss=1.6132]


Logits stats - min: -5.8450, max: 1.6885
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3580, max: 1.8707
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.6579, max: 1.7398
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▌     | 13931/18200 [06:45<01:37, 43.64it/s, loss=2.1517]


Logits stats - min: -6.0843, max: 1.9024
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5013, max: 2.5287
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4044, max: 1.5715
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4570, max: 2.2099
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 13947/18200 [06:46<01:34, 45.09it/s, loss=1.6071]


Logits stats - min: -6.0293, max: 2.4068
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2000, max: 1.8529
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0898, max: 1.7701
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 13958/18200 [06:46<01:38, 43.05it/s, loss=1.9262]


Logits stats - min: -5.7428, max: 2.3529
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0157, max: 2.5301
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 13968/18200 [06:46<01:38, 42.99it/s, loss=1.5390]


Logits stats - min: -4.9756, max: 1.3720
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8397, max: 1.8581
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2798, max: 2.3512
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 13979/18200 [06:46<01:36, 43.72it/s, loss=1.5813]


Logits stats - min: -6.7344, max: 2.5833
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7362, max: 2.4529
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8011, max: 2.5533
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 13994/18200 [06:47<01:38, 42.78it/s, loss=1.2718]


Logits stats - min: -5.3129, max: 1.7123
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 14004/18200 [06:47<01:37, 42.85it/s, loss=1.5416]


Logits stats - min: -6.7770, max: 2.5404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8863, max: 2.2658
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6991, max: 1.6981
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 14014/18200 [06:47<01:45, 39.67it/s, loss=1.6085]


Logits stats - min: -5.0564, max: 1.4378
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 14025/18200 [06:48<01:33, 44.74it/s, loss=1.5736]


Logits stats - min: -5.2561, max: 1.7094
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6792, max: 1.7675
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3109, max: 1.8267
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1918, max: 1.7941
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2950, max: 2.5068
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8260, max: 1.7370
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 14035/18200 [06:48<01:32, 45.17it/s, loss=1.5513]


Logits stats - min: -7.0810, max: 2.7277
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0315, max: 2.6185
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5191, max: 1.5089
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2565, max: 1.6159
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6527, max: 2.6092
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▊     | 14048/18200 [06:48<01:24, 49.12it/s, loss=1.5656]


Logits stats - min: -5.5062, max: 1.8259
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6432, max: 2.4314
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1021, max: 2.1867
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▊     | 14058/18200 [06:48<01:26, 48.01it/s, loss=1.6501]


Logits stats - min: -7.0040, max: 2.7714
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3720, max: 1.4360
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8590, max: 2.1977
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▊     | 14068/18200 [06:48<01:28, 46.74it/s, loss=1.6125]


Logits stats - min: -5.6796, max: 1.6345
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1508, max: 3.1263
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5609, max: 1.5059
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5979, max: 1.9969
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▊     | 14078/18200 [06:49<01:33, 44.23it/s, loss=1.5712]


Logits stats - min: -6.8175, max: 2.6481
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0573, max: 1.8790
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3932, max: 1.8151
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8098, max: 2.4347
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▊     | 14093/18200 [06:49<01:33, 43.84it/s, loss=1.6301]


Logits stats - min: -7.1678, max: 2.7435
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7271, max: 2.3566
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8362, max: 1.6600
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5435, max: 2.2270
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▊     | 14104/18200 [06:49<01:27, 46.67it/s, loss=2.1720]


Logits stats - min: -6.1862, max: 2.4946
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0192, max: 2.5709
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7924, max: 2.4701
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5667, max: 1.7012
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▊     | 14129/18200 [06:50<01:37, 41.81it/s, loss=1.6080]


Logits stats - min: -6.0773, max: 2.2590
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5407, max: 2.5000
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6973, max: 1.4333
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▊     | 14134/18200 [06:50<01:33, 43.35it/s, loss=1.5914]


Logits stats - min: -5.4856, max: 1.9954
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4719, max: 2.4433
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0984, max: 2.3252
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8814, max: 2.3818
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▊     | 14144/18200 [06:50<01:30, 44.86it/s, loss=1.4310]


Logits stats - min: -5.8464, max: 1.7348
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1396, max: 2.6443
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14169/18200 [06:51<01:36, 41.83it/s, loss=1.4732]


Logits stats - min: -5.8882, max: 1.7942
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2848, max: 2.4291
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5434, max: 1.8388
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14179/18200 [06:51<01:37, 41.13it/s, loss=1.5791]


Logits stats - min: -7.9416, max: 2.0007
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5988, max: 1.8298
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7641, max: 2.2742
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14199/18200 [06:51<01:33, 42.67it/s, loss=1.6352]


Logits stats - min: -6.7973, max: 2.5670
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5408, max: 1.3059
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6441, max: 1.8434
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14205/18200 [06:52<01:27, 45.60it/s, loss=2.1424]


Logits stats - min: -5.7293, max: 1.7563
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9552, max: 1.9175
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1311, max: 1.4441
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14220/18200 [06:52<01:30, 43.78it/s, loss=1.6038]


Logits stats - min: -6.1914, max: 2.5222
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2858, max: 1.8156
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1808, max: 2.2620
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8645, max: 2.3214
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14236/18200 [06:52<01:34, 42.01it/s, loss=1.5281]


Logits stats - min: -7.2206, max: 2.8202
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0943, max: 2.5313
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5344, max: 1.7889
Target unique values: tensor([0], device='cuda:0')


Training:  78%|██████████████████     | 14247/18200 [06:53<01:30, 43.69it/s, loss=1.5964]


Logits stats - min: -6.2231, max: 2.4193
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7768, max: 1.7404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3283, max: 1.9118
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6098, max: 1.6528
Target unique values: tensor([0], device='cuda:0')


Training:  78%|██████████████████     | 14258/18200 [06:53<01:22, 47.51it/s, loss=1.5018]


Logits stats - min: -6.3441, max: 2.3596
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9382, max: 1.7923
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8244, max: 1.8035
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0682, max: 1.3322
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5293, max: 1.7429
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5351, max: 1.7189
Target unique values: tensor([0], device='cuda:0')


Training:  78%|██████████████████     | 14269/18200 [06:53<01:22, 47.62it/s, loss=1.6152]


Logits stats - min: -5.5380, max: 2.0233
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4923, max: 2.4401
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9333, max: 2.2138
Target unique values: tensor([0], device='cuda:0')


Training:  78%|██████████████████     | 14279/18200 [06:53<01:27, 44.84it/s, loss=1.7628]


Logits stats - min: -5.2878, max: 1.4010
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████     | 14289/18200 [06:53<01:30, 43.43it/s, loss=1.6004]


Logits stats - min: -6.7146, max: 2.3721
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5414, max: 2.5437
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████     | 14300/18200 [06:54<01:25, 45.44it/s, loss=2.1770]


Logits stats - min: -5.8416, max: 2.4378
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9754, max: 1.6794
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0407, max: 2.6552
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3495, max: 2.8381
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8902, max: 2.5087
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████     | 14315/18200 [06:54<01:29, 43.48it/s, loss=1.5608]


Logits stats - min: -5.9472, max: 1.8156
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7303, max: 2.1036
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4971, max: 1.8111
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████     | 14326/18200 [06:54<01:23, 46.35it/s, loss=1.5605]


Logits stats - min: -5.4103, max: 1.6195
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9233, max: 1.6714
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2756, max: 1.8773
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14346/18200 [06:55<01:28, 43.47it/s, loss=1.5353]


Logits stats - min: -6.2950, max: 2.1819
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2178, max: 2.6162
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8745, max: 2.5328
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4486, max: 2.3366
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14357/18200 [06:55<01:27, 43.90it/s, loss=1.5591]


Logits stats - min: -5.9192, max: 1.7194
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1312, max: 1.6890
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6769, max: 1.3286
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0468, max: 1.8518
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14368/18200 [06:55<01:20, 47.87it/s, loss=2.4087]


Logits stats - min: -5.8978, max: 1.7505
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6318, max: 2.0206
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7632, max: 2.5821
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8651, max: 2.2999
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5081, max: 1.7735
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14378/18200 [06:55<01:24, 44.99it/s, loss=2.6379]


Logits stats - min: -5.3717, max: 1.8611
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0654, max: 2.6411
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9028, max: 1.9482
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14389/18200 [06:56<01:23, 45.44it/s, loss=1.6168]


Logits stats - min: -5.8290, max: 1.6991
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9182, max: 2.1601
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14404/18200 [06:56<01:29, 42.64it/s, loss=1.5428]


Logits stats - min: -6.4063, max: 2.4248
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9740, max: 2.5758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0224, max: 1.6762
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14429/18200 [06:57<01:31, 41.11it/s, loss=1.6126]


Logits stats - min: -6.1864, max: 1.7423
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14435/18200 [06:57<01:28, 42.50it/s, loss=1.6136]


Logits stats - min: -6.7965, max: 2.6152
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9776, max: 1.7695
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0318, max: 1.7624
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7757, max: 2.3343
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8655, max: 2.1649
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▎    | 14451/18200 [06:57<01:27, 42.65it/s, loss=1.5280]


Logits stats - min: -6.7200, max: 2.3123
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2993, max: 2.5116
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▎    | 14466/18200 [06:57<01:31, 40.97it/s, loss=1.6175]


Logits stats - min: -6.1317, max: 1.7531
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8150, max: 1.6792
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14476/18200 [06:58<01:27, 42.43it/s, loss=1.5205]


Logits stats - min: -6.2471, max: 2.4183
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9362, max: 2.3080
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7755, max: 1.3294
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14486/18200 [06:58<01:26, 42.98it/s, loss=1.5384]


Logits stats - min: -5.3296, max: 1.7233
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7133, max: 2.2034
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7789, max: 2.1710
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9376, max: 1.7283
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14498/18200 [06:58<01:19, 46.32it/s, loss=1.6999]


Logits stats - min: -5.7614, max: 1.8981
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6278, max: 2.0036
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8833, max: 2.4041
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14509/18200 [06:58<01:21, 45.50it/s, loss=2.3539]


Logits stats - min: -5.5075, max: 1.5986
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5818, max: 1.7491
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3040, max: 2.0952
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14519/18200 [06:59<01:24, 43.35it/s, loss=1.5752]


Logits stats - min: -6.1574, max: 2.2302
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7274, max: 1.7298
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5100, max: 2.3904
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14534/18200 [06:59<01:24, 43.25it/s, loss=1.5611]


Logits stats - min: -6.0021, max: 1.7963
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4775, max: 1.3607
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7711, max: 1.8923
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5036, max: 2.3477
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14545/18200 [06:59<01:23, 43.61it/s, loss=1.5566]


Logits stats - min: -6.2797, max: 2.2077
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6965, max: 2.2610
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14555/18200 [07:00<01:21, 44.77it/s, loss=1.5024]


Logits stats - min: -6.5621, max: 2.2426
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2074, max: 2.0936
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1518, max: 1.4275
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14565/18200 [07:00<01:22, 43.98it/s, loss=1.6120]


Logits stats - min: -5.6503, max: 1.7270
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8225, max: 2.3384
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0141, max: 1.7110
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4930, max: 1.3513
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14580/18200 [07:00<01:26, 42.04it/s, loss=1.6192]


Logits stats - min: -5.9771, max: 1.7918
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8105, max: 2.3734
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4481, max: 1.5502
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5008, max: 2.3617
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14587/18200 [07:00<01:17, 46.82it/s, loss=1.6049]


Logits stats - min: -5.2724, max: 1.8357
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8413, max: 1.8363
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0447, max: 1.5605
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14597/18200 [07:01<01:22, 43.73it/s, loss=1.5579]


Logits stats - min: -5.4880, max: 1.8770
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8475, max: 1.7818
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1312, max: 2.2054
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14607/18200 [07:01<01:26, 41.70it/s, loss=2.4682]


Logits stats - min: -5.9099, max: 1.7450
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9381, max: 2.3782
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6512, max: 2.4595
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4265, max: 2.2556
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14618/18200 [07:01<01:19, 45.06it/s, loss=2.0878]


Logits stats - min: -6.2950, max: 1.7853
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1917, max: 2.1751
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2468, max: 1.5748
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1225, max: 2.6392
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14628/18200 [07:01<01:25, 41.90it/s, loss=1.6000]


Logits stats - min: -6.8856, max: 2.3565
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7986, max: 2.2890
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14638/18200 [07:01<01:18, 45.14it/s, loss=1.5101]


Logits stats - min: -6.2062, max: 1.9182
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0414, max: 1.8767
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0752, max: 2.4109
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0134, max: 1.9332
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9493, max: 1.8679
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▌    | 14649/18200 [07:02<01:20, 44.07it/s, loss=2.1819]


Logits stats - min: -5.3143, max: 2.2459
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6045, max: 1.6082
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14664/18200 [07:02<01:23, 42.51it/s, loss=1.5757]


Logits stats - min: -6.0267, max: 2.5003
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7902, max: 2.2204
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4400, max: 1.3891
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4810, max: 2.4280
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14671/18200 [07:02<01:15, 47.00it/s, loss=1.4030]


Logits stats - min: -5.9574, max: 1.9005
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8063, max: 1.7037
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9345, max: 1.6456
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14681/18200 [07:02<01:20, 43.51it/s, loss=1.2905]


Logits stats - min: -6.0484, max: 1.8477
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1050, max: 2.3680
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2827, max: 2.3362
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8192, max: 1.3945
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14696/18200 [07:03<01:20, 43.69it/s, loss=2.1918]


Logits stats - min: -7.1185, max: 2.2122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1894, max: 2.1830
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14701/18200 [07:03<01:24, 41.65it/s, loss=2.2047]


Logits stats - min: -5.8282, max: 1.8032
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1849, max: 2.2216
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14711/18200 [07:03<01:21, 42.86it/s, loss=1.9574]


Logits stats - min: -6.1136, max: 2.3186
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4003, max: 2.5220
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8580, max: 1.4482
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4596, max: 2.2000
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14728/18200 [07:03<01:16, 45.67it/s, loss=1.6134]


Logits stats - min: -5.9956, max: 1.8424
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3531, max: 1.5795
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5033, max: 1.5317
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3084, max: 2.5538
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14743/18200 [07:04<01:24, 40.93it/s, loss=1.5462]


Logits stats - min: -6.0069, max: 2.3054
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2418, max: 2.6576
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14753/18200 [07:04<01:22, 41.74it/s, loss=1.5209]


Logits stats - min: -5.7023, max: 2.2451
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6052, max: 1.7958
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14769/18200 [07:04<01:18, 43.61it/s, loss=1.4091]


Logits stats - min: -5.3144, max: 1.9743
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7881, max: 2.6063
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3491, max: 1.6522
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1968, max: 2.6493
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3517, max: 2.4457
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14780/18200 [07:05<01:15, 45.46it/s, loss=1.5090]


Logits stats - min: -5.7776, max: 2.1620
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7332, max: 2.1781
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4842, max: 1.4591
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5154, max: 1.8276
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8398, max: 2.1024
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4029, max: 2.3799
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14806/18200 [07:05<01:19, 42.47it/s, loss=1.5329]


Logits stats - min: -4.9349, max: 1.4538
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14826/18200 [07:06<01:20, 41.73it/s, loss=1.5522]


Logits stats - min: -5.4384, max: 1.7501
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4686, max: 1.5703
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2769, max: 2.3041
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8103, max: 2.9537
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14837/18200 [07:06<01:14, 45.18it/s, loss=1.5602]


Logits stats - min: -5.8319, max: 1.4708
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6258, max: 2.0150
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3368, max: 1.7731
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14847/18200 [07:06<01:16, 44.10it/s, loss=1.6053]


Logits stats - min: -6.1784, max: 1.6500
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9745, max: 2.6041
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4992, max: 2.3698
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14862/18200 [07:07<01:20, 41.28it/s, loss=1.6045]


Logits stats - min: -6.0640, max: 1.9300
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6461, max: 1.6544
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2099, max: 2.7173
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14872/18200 [07:07<01:20, 41.22it/s, loss=1.4047]


Logits stats - min: -6.2856, max: 1.6997
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6205, max: 2.4091
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14878/18200 [07:07<01:17, 42.98it/s, loss=1.5477]


Logits stats - min: -7.1588, max: 2.3106
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14888/18200 [07:07<01:17, 42.55it/s, loss=1.6148]


Logits stats - min: -6.6128, max: 2.4366
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14898/18200 [07:08<01:18, 42.33it/s, loss=2.0897]


Logits stats - min: -6.2668, max: 2.4636
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1433, max: 2.3052
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14908/18200 [07:08<01:19, 41.23it/s, loss=2.1985]


Logits stats - min: -5.7267, max: 1.6575
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5457, max: 2.4135
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14919/18200 [07:08<01:12, 45.37it/s, loss=2.1331]


Logits stats - min: -5.6853, max: 1.8047
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0100, max: 2.4304
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2105, max: 2.3843
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9163, max: 1.4256
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6758, max: 1.7659
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14935/18200 [07:08<01:13, 44.45it/s, loss=1.3942]


Logits stats - min: -5.6257, max: 1.6332
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5938, max: 2.1219
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0890, max: 1.8175
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1450, max: 1.6507
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 14945/18200 [07:09<01:13, 44.22it/s, loss=1.5739]


Logits stats - min: -6.4204, max: 2.5804
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 14960/18200 [07:09<01:17, 41.83it/s, loss=1.5149]


Logits stats - min: -6.2087, max: 2.1201
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5703, max: 1.4126
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7913, max: 1.4321
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 14970/18200 [07:09<01:15, 42.58it/s, loss=1.8602]


Logits stats - min: -6.9325, max: 2.4716
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7639, max: 1.9139
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 14981/18200 [07:09<01:13, 43.95it/s, loss=1.5047]


Logits stats - min: -9.0211, max: 2.1395
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0092, max: 1.9502
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 14991/18200 [07:10<01:13, 43.40it/s, loss=1.5309]


Logits stats - min: -5.9908, max: 1.8526
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0930, max: 1.8069
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1007, max: 1.8965
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 15001/18200 [07:10<01:13, 43.71it/s, loss=1.9493]


Logits stats - min: -5.9441, max: 2.3166
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2104, max: 1.8728
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8277, max: 2.4213
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7430, max: 1.6801
Target unique values: tensor([0], device='cuda:0')


Training:  83%|██████████████████▉    | 15016/18200 [07:10<01:13, 43.60it/s, loss=1.6183]


Logits stats - min: -6.0472, max: 2.4016
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6651, max: 1.8847
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1708, max: 1.7729
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7466, max: 1.6243
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9551, max: 1.8646
Target unique values: tensor([0], device='cuda:0')


Training:  83%|██████████████████▉    | 15026/18200 [07:10<01:14, 42.66it/s, loss=1.5768]


Logits stats - min: -5.9148, max: 1.9449
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5152, max: 2.3534
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15036/18200 [07:11<01:11, 44.34it/s, loss=1.5075]


Logits stats - min: -5.8236, max: 1.9311
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5646, max: 1.9250
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5588, max: 2.4480
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7281, max: 1.5232
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7217, max: 2.5427
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15052/18200 [07:11<01:11, 43.88it/s, loss=1.5439]


Logits stats - min: -5.7864, max: 1.7648
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8124, max: 2.3798
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15062/18200 [07:11<01:11, 43.74it/s, loss=1.9221]


Logits stats - min: -5.8260, max: 1.7760
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4156, max: 1.8408
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9483, max: 1.9286
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7469, max: 2.4482
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15080/18200 [07:12<01:06, 47.24it/s, loss=1.5991]


Logits stats - min: -5.6402, max: 1.6420
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7548, max: 1.8918
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5001, max: 2.4040
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0801, max: 2.3567
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5681, max: 2.5967
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5263, max: 1.9343
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15090/18200 [07:12<01:05, 47.66it/s, loss=2.0086]


Logits stats - min: -5.8225, max: 1.8362
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9854, max: 2.0942
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5066, max: 2.1276
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7648, max: 2.3683
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6574, max: 2.3116
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15102/18200 [07:12<01:01, 50.40it/s, loss=1.9331]


Logits stats - min: -6.1195, max: 2.2988
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2886, max: 1.8285
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0916, max: 1.8888
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0764, max: 2.4287
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15113/18200 [07:12<01:06, 46.71it/s, loss=1.4925]


Logits stats - min: -6.0543, max: 1.7860
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15123/18200 [07:13<01:09, 44.49it/s, loss=1.5774]


Logits stats - min: -5.8882, max: 2.1800
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9099, max: 1.7810
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6404, max: 2.4885
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████▏   | 15134/18200 [07:13<01:07, 45.50it/s, loss=2.1789]


Logits stats - min: -6.8592, max: 2.2147
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9916, max: 1.8285
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1085, max: 2.2016
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5014, max: 2.3786
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████▏   | 15144/18200 [07:13<01:08, 44.47it/s, loss=1.5469]


Logits stats - min: -6.0175, max: 1.7319
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6647, max: 2.3563
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████▏   | 15164/18200 [07:13<01:11, 42.25it/s, loss=1.6054]


Logits stats - min: -6.0602, max: 2.1743
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████▏   | 15174/18200 [07:14<01:08, 44.01it/s, loss=1.5212]


Logits stats - min: -5.3525, max: 1.5491
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0594, max: 2.7948
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9521, max: 1.8497
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████▏   | 15184/18200 [07:14<01:10, 42.69it/s, loss=1.6111]


Logits stats - min: -6.4953, max: 2.4647
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3882, max: 2.2023
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▏   | 15199/18200 [07:14<01:08, 43.75it/s, loss=1.5580]


Logits stats - min: -5.9157, max: 2.2590
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0235, max: 1.8344
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8268, max: 1.9158
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▏   | 15204/18200 [07:14<01:08, 43.76it/s, loss=1.5488]


Logits stats - min: -6.1551, max: 2.0569
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9819, max: 2.6986
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▏   | 15214/18200 [07:15<01:07, 44.19it/s, loss=1.3994]


Logits stats - min: -5.9284, max: 1.8880
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▏   | 15224/18200 [07:15<01:08, 43.69it/s, loss=1.5921]


Logits stats - min: -6.8733, max: 2.8279
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9096, max: 1.7532
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15235/18200 [07:15<01:05, 45.02it/s, loss=2.2100]


Logits stats - min: -6.1448, max: 1.7212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8983, max: 1.8230
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6748, max: 2.5874
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3008, max: 2.2475
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5405, max: 1.9164
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15250/18200 [07:16<01:08, 43.31it/s, loss=1.6027]


Logits stats - min: -7.2000, max: 2.7533
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1774, max: 2.0545
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9995, max: 1.9833
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15266/18200 [07:16<01:04, 45.48it/s, loss=1.5314]


Logits stats - min: -6.2602, max: 2.6024
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9609, max: 2.4285
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7923, max: 2.4721
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15276/18200 [07:16<01:05, 44.74it/s, loss=1.6576]


Logits stats - min: -5.8253, max: 1.9925
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0358, max: 2.0297
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15286/18200 [07:16<01:09, 41.84it/s, loss=1.9491]


Logits stats - min: -5.5103, max: 2.0063
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9765, max: 1.6993
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3769, max: 2.0347
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3536, max: 2.4117
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3000, max: 2.2375
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15313/18200 [07:17<01:04, 44.67it/s, loss=2.1775]


Logits stats - min: -5.7079, max: 2.2439
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1554, max: 2.6269
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9408, max: 1.9128
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1764, max: 1.9034
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9611, max: 1.6944
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5140, max: 2.4619
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7546, max: 1.7247
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15329/18200 [07:17<01:02, 46.30it/s, loss=1.3946]


Logits stats - min: -5.0534, max: 1.2796
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7311, max: 1.6379
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7932, max: 1.7886
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▍   | 15349/18200 [07:18<01:07, 42.26it/s, loss=1.5480]


Logits stats - min: -5.7078, max: 2.1173
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5833, max: 2.5076
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6841, max: 1.9817
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2373, max: 2.2435
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▍   | 15354/18200 [07:18<01:04, 44.00it/s, loss=1.6062]


Logits stats - min: -6.6955, max: 2.3796
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5778, max: 2.1304
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1307, max: 1.8644
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8768, max: 1.9202
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▍   | 15374/18200 [07:18<01:07, 42.06it/s, loss=1.5723]


Logits stats - min: -6.1512, max: 2.0511
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8721, max: 2.1727
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5261, max: 2.0083
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▍   | 15384/18200 [07:19<01:07, 41.99it/s, loss=1.5364]


Logits stats - min: -5.5532, max: 1.8363
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▍   | 15394/18200 [07:19<01:10, 39.75it/s, loss=1.8614]


Logits stats - min: -5.4434, max: 1.9447
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2344, max: 1.7888
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▍   | 15405/18200 [07:19<01:02, 44.64it/s, loss=1.5293]


Logits stats - min: -5.9586, max: 1.8014
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4022, max: 1.7680
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6813, max: 2.3990
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8316, max: 2.3873
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3777, max: 2.3260
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▍   | 15420/18200 [07:19<01:03, 43.87it/s, loss=1.4086]


Logits stats - min: -7.0325, max: 2.2258
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6934, max: 2.9675
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8937, max: 2.6050
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6104, max: 2.4849
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15440/18200 [07:20<01:02, 44.35it/s, loss=1.6184]


Logits stats - min: -5.3716, max: 1.8350
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7140, max: 1.8225
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3274, max: 1.6803
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7108, max: 1.5701
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8298, max: 1.8240
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3134, max: 1.3290
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15451/18200 [07:20<00:59, 46.40it/s, loss=1.5414]


Logits stats - min: -5.9504, max: 1.3687
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2961, max: 2.3346
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3299, max: 2.4379
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8221, max: 1.7171
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15456/18200 [07:20<00:58, 47.27it/s, loss=1.5547]


Logits stats - min: -7.4409, max: 2.4316
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1702, max: 1.6592
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15471/18200 [07:21<00:58, 46.37it/s, loss=1.9429]


Logits stats - min: -5.6862, max: 1.7419
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6912, max: 2.3834
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3985, max: 1.7483
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5765, max: 2.3552
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15481/18200 [07:21<01:02, 43.61it/s, loss=1.5103]


Logits stats - min: -5.9638, max: 1.9480
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8085, max: 1.6216
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6362, max: 1.7160
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15492/18200 [07:21<00:56, 47.63it/s, loss=1.8571]


Logits stats - min: -6.4848, max: 2.2846
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9837, max: 1.8492
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5640, max: 2.3382
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0381, max: 3.0491
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5811, max: 2.5827
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15502/18200 [07:21<01:00, 44.51it/s, loss=1.5217]


Logits stats - min: -5.7214, max: 1.6618
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8901, max: 1.7399
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8970, max: 2.3305
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9829, max: 2.0080
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15517/18200 [07:22<00:59, 44.97it/s, loss=2.0557]


Logits stats - min: -5.2412, max: 1.7519
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6986, max: 2.6955
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15527/18200 [07:22<01:06, 40.41it/s, loss=1.5719]


Logits stats - min: -5.4058, max: 1.9357
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▋   | 15537/18200 [07:22<01:06, 40.11it/s, loss=1.6002]


Logits stats - min: -6.9086, max: 2.3792
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▋   | 15547/18200 [07:22<01:04, 41.15it/s, loss=1.5413]


Logits stats - min: -5.6369, max: 1.8884
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4836, max: 1.7730
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0856, max: 1.7709
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▋   | 15557/18200 [07:23<01:01, 42.87it/s, loss=1.9518]


Logits stats - min: -6.2803, max: 2.2860
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0863, max: 2.3091
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9210, max: 2.5551
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7908, max: 1.7056
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▋   | 15563/18200 [07:23<00:56, 46.58it/s, loss=1.6115]


Logits stats - min: -5.7058, max: 1.7377
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1698, max: 1.3362
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▋   | 15578/18200 [07:23<01:00, 43.40it/s, loss=1.5097]


Logits stats - min: -5.7973, max: 1.6469
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9572, max: 1.7470
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0351, max: 1.7729
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7910, max: 2.4468
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▋   | 15583/18200 [07:23<01:00, 43.47it/s, loss=1.5971]


Logits stats - min: -5.2652, max: 1.7661
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1253, max: 2.8024
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▋   | 15593/18200 [07:23<01:01, 42.36it/s, loss=1.5444]


Logits stats - min: -6.0194, max: 1.4243
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4562, max: 2.5466
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▋   | 15603/18200 [07:24<01:00, 42.94it/s, loss=1.5853]


Logits stats - min: -5.4774, max: 1.7371
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9677, max: 2.1810
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▋   | 15614/18200 [07:24<00:54, 47.29it/s, loss=2.1968]


Logits stats - min: -5.2342, max: 1.9581
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4255, max: 2.2486
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6319, max: 2.5909
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9710, max: 1.7686
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9942, max: 2.2930
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0933, max: 1.4930
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9871, max: 2.2410
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15636/18200 [07:24<00:54, 47.17it/s, loss=1.5508]


Logits stats - min: -7.1701, max: 2.9849
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3974, max: 2.3182
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1813, max: 2.5868
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7144, max: 1.8254
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0076, max: 1.8613
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15646/18200 [07:25<00:59, 42.89it/s, loss=2.1802]


Logits stats - min: -6.0975, max: 1.9490
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8939, max: 2.6558
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15656/18200 [07:25<01:04, 39.71it/s, loss=1.4207]


Logits stats - min: -5.7358, max: 2.2160
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8777, max: 1.8140
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9044, max: 2.3343
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15672/18200 [07:25<00:57, 44.02it/s, loss=1.3869]


Logits stats - min: -6.2472, max: 2.4188
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9297, max: 2.6082
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4847, max: 2.4912
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15682/18200 [07:25<00:56, 44.37it/s, loss=1.9688]


Logits stats - min: -5.6788, max: 1.8485
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5531, max: 2.6064
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15692/18200 [07:26<00:56, 44.71it/s, loss=1.6977]


Logits stats - min: -6.1217, max: 1.7394
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8333, max: 1.6664
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1013, max: 2.0251
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7004, max: 2.2537
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0655, max: 1.7728
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15707/18200 [07:26<00:59, 41.74it/s, loss=1.5609]


Logits stats - min: -5.8543, max: 1.9392
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0455, max: 2.2824
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1862, max: 2.4830
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8361, max: 1.8425
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15717/18200 [07:26<00:59, 41.84it/s, loss=1.5550]


Logits stats - min: -6.1501, max: 1.9888
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9071, max: 1.7889
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2392, max: 2.1842
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1448, max: 1.6421
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▉   | 15734/18200 [07:27<00:52, 46.82it/s, loss=1.3729]


Logits stats - min: -6.6597, max: 2.2190
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9701, max: 1.8691
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4613, max: 1.7561
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0877, max: 2.1567
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15744/18200 [07:27<00:55, 44.31it/s, loss=1.5599]


Logits stats - min: -6.4926, max: 2.4903
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3363, max: 2.4816
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15754/18200 [07:27<00:57, 42.52it/s, loss=1.8789]


Logits stats - min: -6.3583, max: 2.1658
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6349, max: 1.3050
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5597, max: 1.8168
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5852, max: 1.5781
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0984, max: 1.7794
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15764/18200 [07:27<00:58, 41.52it/s, loss=1.6279]


Logits stats - min: -6.9389, max: 2.4143
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5788, max: 2.2991
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15774/18200 [07:28<00:59, 40.45it/s, loss=1.5353]


Logits stats - min: -6.1539, max: 2.7819
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9600, max: 2.5049
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15784/18200 [07:28<00:59, 40.29it/s, loss=1.6003]


Logits stats - min: -6.3579, max: 2.3602
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15794/18200 [07:28<00:57, 42.21it/s, loss=1.4353]


Logits stats - min: -6.0462, max: 2.2952
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2457, max: 1.6123
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6876, max: 2.3241
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5976, max: 1.5077
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15810/18200 [07:28<00:53, 44.31it/s, loss=1.6049]


Logits stats - min: -6.0398, max: 2.2023
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3285, max: 1.4931
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4624, max: 2.5839
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7937, max: 1.8183
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6272, max: 2.0552
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15820/18200 [07:29<00:54, 43.45it/s, loss=1.5627]


Logits stats - min: -6.0717, max: 1.7673
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2903, max: 1.5566
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15830/18200 [07:29<00:53, 44.48it/s, loss=1.4148]


Logits stats - min: -6.3231, max: 2.4842
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0602, max: 2.3283
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2678, max: 2.3869
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15840/18200 [07:29<00:53, 44.02it/s, loss=1.5570]


Logits stats - min: -5.8036, max: 1.3960
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9482, max: 2.5386
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3806, max: 1.9316
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15851/18200 [07:29<00:50, 46.73it/s, loss=1.3991]


Logits stats - min: -5.8366, max: 1.7032
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1188, max: 1.9719
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7936, max: 2.4059
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0851, max: 2.4207
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15861/18200 [07:30<00:54, 43.12it/s, loss=1.2418]


Logits stats - min: -6.6268, max: 2.5416
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9533, max: 2.3073
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15871/18200 [07:30<00:53, 43.59it/s, loss=1.6716]


Logits stats - min: -5.9690, max: 1.7637
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8125, max: 2.4266
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8893, max: 1.5486
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15886/18200 [07:30<00:52, 44.29it/s, loss=1.7645]


Logits stats - min: -5.5195, max: 1.7659
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6540, max: 1.4847
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5242, max: 2.5576
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9565, max: 2.6122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6114, max: 2.5106
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15898/18200 [07:30<00:48, 47.66it/s, loss=1.4089]


Logits stats - min: -5.5874, max: 1.3753
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1148, max: 1.7830
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6875, max: 1.7375
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6063, max: 2.3203
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4479, max: 2.5908
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15918/18200 [07:31<00:55, 41.38it/s, loss=1.6111]


Logits stats - min: -6.0691, max: 2.0225
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4095, max: 1.7531
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9624, max: 1.7593
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 15933/18200 [07:31<00:53, 42.56it/s, loss=1.4905]


Logits stats - min: -8.5512, max: 1.8974
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6305, max: 2.3238
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4843, max: 1.8367
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 15944/18200 [07:31<00:48, 46.50it/s, loss=1.5891]


Logits stats - min: -5.4982, max: 1.8326
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2512, max: 2.0648
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5541, max: 1.8999
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6485, max: 2.0564
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 15959/18200 [07:32<00:48, 45.81it/s, loss=1.6114]


Logits stats - min: -6.0891, max: 1.9154
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2182, max: 1.8209
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9898, max: 2.4244
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 15969/18200 [07:32<00:51, 42.92it/s, loss=1.5450]


Logits stats - min: -6.5449, max: 1.2938
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 15989/18200 [07:32<00:54, 40.76it/s, loss=2.1864]


Logits stats - min: -7.1300, max: 2.3722
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 15999/18200 [07:33<00:52, 41.97it/s, loss=1.5103]


Logits stats - min: -6.8159, max: 2.4253
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9464, max: 1.8525
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 16009/18200 [07:33<00:49, 44.40it/s, loss=1.4095]


Logits stats - min: -5.4586, max: 2.2749
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2065, max: 1.9359
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3088, max: 2.4719
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16029/18200 [07:33<00:51, 42.13it/s, loss=1.3843]


Logits stats - min: -6.5116, max: 2.3189
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3841, max: 1.9267
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2038, max: 2.3018
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16040/18200 [07:34<00:48, 44.61it/s, loss=1.6031]


Logits stats - min: -6.6533, max: 2.6229
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5052, max: 1.8936
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1121, max: 2.4840
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2026, max: 1.4441
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16050/18200 [07:34<00:50, 42.85it/s, loss=1.3550]


Logits stats - min: -5.8289, max: 1.4093
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2842, max: 1.3214
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1429, max: 1.8778
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16071/18200 [07:34<00:48, 44.28it/s, loss=2.7973]


Logits stats - min: -6.3728, max: 2.5815
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6389, max: 2.5424
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4441, max: 2.4183
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4861, max: 1.9028
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7931, max: 1.8877
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0559, max: 2.4300
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16096/18200 [07:35<00:50, 41.37it/s, loss=1.5381]


Logits stats - min: -6.0001, max: 1.7807
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1632, max: 1.7232
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16106/18200 [07:35<00:47, 44.25it/s, loss=1.3804]


Logits stats - min: -6.2858, max: 1.8690
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1304, max: 1.8686
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9607, max: 2.5858
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7323, max: 2.3695
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▎  | 16116/18200 [07:35<00:47, 43.64it/s, loss=1.4026]


Logits stats - min: -6.1297, max: 2.2735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7588, max: 2.3592
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16126/18200 [07:36<00:48, 42.87it/s, loss=1.5458]


Logits stats - min: -6.3009, max: 1.7384
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7434, max: 2.5196
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6718, max: 1.6501
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1193, max: 1.9046
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16145/18200 [07:36<00:51, 39.56it/s, loss=1.5979]


Logits stats - min: -5.9467, max: 1.8585
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8996, max: 1.7072
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16155/18200 [07:36<00:47, 42.99it/s, loss=1.5065]


Logits stats - min: -6.1420, max: 1.6523
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3924, max: 1.9845
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3103, max: 1.8796
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3727, max: 2.0113
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6990, max: 2.7714
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16166/18200 [07:37<00:44, 45.99it/s, loss=1.5377]


Logits stats - min: -6.2944, max: 2.2203
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5071, max: 1.7938
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6477, max: 2.9153
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7524, max: 2.3806
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16186/18200 [07:37<00:46, 43.65it/s, loss=1.3969]


Logits stats - min: -6.8381, max: 2.2827
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0095, max: 2.6562
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8031, max: 2.4574
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2567, max: 1.7372
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16196/18200 [07:37<00:46, 43.12it/s, loss=1.6029]


Logits stats - min: -6.9572, max: 2.4665
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6843, max: 2.3441
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7281, max: 1.8761
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16211/18200 [07:38<00:45, 43.89it/s, loss=2.1828]


Logits stats - min: -6.3084, max: 2.2863
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8218, max: 1.8735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7727, max: 1.7983
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16221/18200 [07:38<00:43, 45.38it/s, loss=1.3840]


Logits stats - min: -7.1880, max: 2.0685
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9066, max: 1.9033
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2016, max: 2.4891
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▌  | 16236/18200 [07:38<00:46, 42.51it/s, loss=1.8850]


Logits stats - min: -5.6212, max: 1.5065
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2269, max: 2.9145
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7276, max: 2.5379
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▌  | 16246/18200 [07:39<00:46, 41.74it/s, loss=1.5021]


Logits stats - min: -5.7040, max: 2.0184
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7808, max: 2.4982
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▌  | 16256/18200 [07:39<00:47, 41.33it/s, loss=1.5446]


Logits stats - min: -5.7321, max: 1.8043
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6475, max: 1.8934
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9164, max: 2.1483
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▌  | 16271/18200 [07:39<00:46, 41.80it/s, loss=1.5323]


Logits stats - min: -5.8275, max: 1.8997
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6812, max: 1.7226
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▌  | 16286/18200 [07:40<00:47, 40.41it/s, loss=1.9008]


Logits stats - min: -5.9417, max: 1.5632
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9029, max: 2.0659
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▌  | 16302/18200 [07:40<00:45, 41.45it/s, loss=1.5503]


Logits stats - min: -6.0829, max: 2.0405
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2189, max: 2.4224
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9434, max: 1.9188
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▌  | 16314/18200 [07:40<00:39, 47.18it/s, loss=1.3849]


Logits stats - min: -6.7846, max: 2.5141
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2118, max: 1.8030
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2492, max: 1.9027
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8561, max: 2.3044
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9466, max: 1.6174
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16324/18200 [07:40<00:41, 44.73it/s, loss=1.5965]


Logits stats - min: -6.7830, max: 2.3514
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8299, max: 1.7988
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7415, max: 2.0179
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8277, max: 1.8554
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4944, max: 2.3632
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16339/18200 [07:41<00:41, 44.88it/s, loss=1.5296]


Logits stats - min: -6.5030, max: 2.5056
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1769, max: 2.3582
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7582, max: 1.8807
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16359/18200 [07:41<00:43, 42.27it/s, loss=1.3735]


Logits stats - min: -5.9525, max: 1.9434
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7933, max: 1.7293
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16369/18200 [07:41<00:43, 42.16it/s, loss=1.5346]


Logits stats - min: -7.1503, max: 2.8693
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4810, max: 2.4237
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5468, max: 2.2910
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16379/18200 [07:42<00:43, 41.61it/s, loss=1.6082]


Logits stats - min: -5.4870, max: 1.5468
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7024, max: 1.8598
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16389/18200 [07:42<00:43, 41.34it/s, loss=1.3605]


Logits stats - min: -5.6250, max: 1.7491
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16404/18200 [07:42<00:43, 41.13it/s, loss=1.5503]


Logits stats - min: -5.7975, max: 1.8542
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5594, max: 2.0937
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1775, max: 2.0377
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9798, max: 1.9236
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16414/18200 [07:43<00:42, 41.73it/s, loss=2.1246]


Logits stats - min: -5.9255, max: 1.8965
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0276, max: 1.7321
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▊  | 16429/18200 [07:43<00:40, 43.74it/s, loss=2.2316]


Logits stats - min: -5.7261, max: 2.0309
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3086, max: 2.3448
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4897, max: 1.9515
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7199, max: 2.6093
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▊  | 16440/18200 [07:43<00:39, 44.13it/s, loss=1.3729]


Logits stats - min: -5.4086, max: 1.7468
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1104, max: 1.9095
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2471, max: 2.1945
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▊  | 16450/18200 [07:43<00:41, 42.43it/s, loss=1.6039]


Logits stats - min: -6.4931, max: 2.1938
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0842, max: 2.5274
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▊  | 16460/18200 [07:44<00:42, 40.60it/s, loss=1.6062]


Logits stats - min: -5.7725, max: 1.5323
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8572, max: 2.0151
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▊  | 16470/18200 [07:44<00:41, 42.16it/s, loss=1.6100]


Logits stats - min: -7.0960, max: 2.5205
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9482, max: 2.0001
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9950, max: 1.7465
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▊  | 16486/18200 [07:44<00:39, 43.21it/s, loss=1.5094]


Logits stats - min: -6.1784, max: 1.9188
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9385, max: 1.3061
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4867, max: 1.7465
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8476, max: 2.3008
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▊  | 16496/18200 [07:44<00:39, 43.10it/s, loss=2.1759]


Logits stats - min: -7.0475, max: 2.3165
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8424, max: 1.8988
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▊  | 16501/18200 [07:45<00:39, 43.14it/s, loss=1.6091]


Logits stats - min: -6.8863, max: 2.3073
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4643, max: 1.8075
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7927, max: 2.7133
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1634, max: 1.9740
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▊  | 16512/18200 [07:45<00:36, 46.32it/s, loss=1.5446]


Logits stats - min: -5.9004, max: 1.4413
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7091, max: 1.9151
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0130, max: 1.6602
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16522/18200 [07:45<00:38, 43.53it/s, loss=1.5054]


Logits stats - min: -6.1029, max: 1.3969
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5088, max: 1.9803
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16532/18200 [07:45<00:39, 42.46it/s, loss=1.6014]


Logits stats - min: -6.5512, max: 1.4436
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6316, max: 1.8004
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16542/18200 [07:45<00:38, 42.79it/s, loss=2.1415]


Logits stats - min: -5.9882, max: 1.7374
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6107, max: 1.9425
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8011, max: 1.9497
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0075, max: 1.8119
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16552/18200 [07:46<00:38, 43.21it/s, loss=1.3802]


Logits stats - min: -5.9771, max: 1.8352
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16567/18200 [07:46<00:38, 42.21it/s, loss=1.6072]


Logits stats - min: -6.6440, max: 2.1505
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0241, max: 1.9451
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4840, max: 1.7926
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7946, max: 2.3596
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16577/18200 [07:46<00:37, 42.93it/s, loss=1.5124]


Logits stats - min: -6.3494, max: 2.1263
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0359, max: 2.4616
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9205, max: 2.0692
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16587/18200 [07:47<00:37, 42.57it/s, loss=1.5622]


Logits stats - min: -5.7050, max: 1.8032
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9543, max: 1.9317
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16597/18200 [07:47<00:36, 44.20it/s, loss=1.5812]


Logits stats - min: -5.7823, max: 2.1029
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7798, max: 1.7698
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8384, max: 1.9014
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7678, max: 1.9557
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8941, max: 2.0815
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16617/18200 [07:47<00:37, 42.28it/s, loss=1.5559]


Logits stats - min: -6.4012, max: 2.3684
Target unique values: tensor([0], device='cuda:0')


Training:  91%|█████████████████████  | 16627/18200 [07:47<00:38, 40.65it/s, loss=2.1570]


Logits stats - min: -5.3716, max: 1.8816
Target unique values: tensor([0], device='cuda:0')


Training:  91%|█████████████████████  | 16642/18200 [07:48<00:37, 41.11it/s, loss=1.5533]


Logits stats - min: -6.6977, max: 1.4786
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0109, max: 1.8443
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6714, max: 2.1371
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3358, max: 2.5965
Target unique values: tensor([0], device='cuda:0')


Training:  91%|█████████████████████  | 16652/18200 [07:48<00:35, 43.17it/s, loss=1.5407]


Logits stats - min: -6.1553, max: 1.9499
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0986, max: 1.3961
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.7846, max: 1.7132
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2417, max: 2.5029
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████  | 16668/18200 [07:48<00:35, 42.84it/s, loss=1.5626]


Logits stats - min: -6.3921, max: 2.0599
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████  | 16683/18200 [07:49<00:36, 41.46it/s, loss=1.5910]


Logits stats - min: -5.2275, max: 1.4297
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4215, max: 2.3575
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3127, max: 1.8737
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████  | 16699/18200 [07:49<00:34, 43.33it/s, loss=1.6407]


Logits stats - min: -7.4464, max: 2.6496
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9521, max: 2.3015
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9254, max: 1.8983
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9956, max: 2.2310
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3090, max: 2.3442
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████  | 16709/18200 [07:49<00:33, 45.18it/s, loss=1.6138]


Logits stats - min: -6.5885, max: 2.5140
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3955, max: 2.4647
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2830, max: 2.0909
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5657, max: 2.4964
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4841, max: 1.6347
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16725/18200 [07:50<00:34, 42.82it/s, loss=1.6101]


Logits stats - min: -7.3981, max: 2.3804
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16730/18200 [07:50<00:33, 43.71it/s, loss=1.5651]


Logits stats - min: -5.3341, max: 1.4489
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7635, max: 1.6343
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2292, max: 2.4877
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16740/18200 [07:50<00:35, 40.73it/s, loss=1.3606]


Logits stats - min: -5.7473, max: 1.7172
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7124, max: 2.3162
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6964, max: 2.0229
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16750/18200 [07:50<00:34, 41.51it/s, loss=2.1517]


Logits stats - min: -5.2367, max: 1.2953
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4640, max: 2.2112
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16760/18200 [07:51<00:33, 43.18it/s, loss=1.3692]


Logits stats - min: -6.2006, max: 1.7661
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5090, max: 1.4159
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6729, max: 2.8761
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2455, max: 2.1259
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16771/18200 [07:51<00:31, 45.78it/s, loss=1.4912]


Logits stats - min: -6.6913, max: 2.2686
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8351, max: 2.0892
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0133, max: 2.4422
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16786/18200 [07:51<00:33, 41.79it/s, loss=2.1804]


Logits stats - min: -7.0976, max: 2.2862
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8311, max: 1.3580
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9303, max: 1.6146
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16791/18200 [07:51<00:33, 41.91it/s, loss=2.0962]


Logits stats - min: -5.9585, max: 1.9220
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16806/18200 [07:52<00:33, 41.26it/s, loss=2.2072]


Logits stats - min: -10.1436, max: 1.6602
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2674, max: 2.3707
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▎ | 16821/18200 [07:52<00:33, 41.46it/s, loss=2.3088]


Logits stats - min: -6.7761, max: 2.2375
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▎ | 16832/18200 [07:52<00:30, 45.02it/s, loss=2.2579]


Logits stats - min: -5.5738, max: 1.8555
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9778, max: 2.4803
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5171, max: 2.0626
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9900, max: 1.4571
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6745, max: 1.8810
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7905, max: 2.2434
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16837/18200 [07:52<00:29, 45.86it/s, loss=1.5432]


Logits stats - min: -6.0150, max: 2.3917
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7904, max: 2.6055
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16852/18200 [07:53<00:31, 42.73it/s, loss=1.5383]


Logits stats - min: -5.6994, max: 2.0797
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6454, max: 2.4441
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1825, max: 1.7564
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5853, max: 2.0193
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5073, max: 1.3212
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16863/18200 [07:53<00:30, 43.44it/s, loss=1.6150]


Logits stats - min: -5.5029, max: 2.0120
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8504, max: 2.6547
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16873/18200 [07:53<00:29, 44.65it/s, loss=2.1748]


Logits stats - min: -8.2482, max: 3.2460
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8476, max: 1.8588
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1352, max: 2.4144
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16879/18200 [07:53<00:28, 45.87it/s, loss=1.5149]


Logits stats - min: -6.0752, max: 2.0552
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9727, max: 2.5285
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7103, max: 1.9030
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9736, max: 2.1488
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16889/18200 [07:54<00:30, 43.12it/s, loss=1.5950]


Logits stats - min: -7.5048, max: 2.9424
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16900/18200 [07:54<00:28, 45.97it/s, loss=2.1512]


Logits stats - min: -5.7225, max: 1.9350
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3908, max: 1.8994
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4637, max: 2.3843
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0099, max: 2.0817
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16915/18200 [07:54<00:28, 44.57it/s, loss=1.5149]


Logits stats - min: -6.6917, max: 2.4795
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1168, max: 1.3241
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16925/18200 [07:54<00:30, 41.59it/s, loss=1.5830]


Logits stats - min: -6.3591, max: 2.0097
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8631, max: 2.4793
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16935/18200 [07:55<00:29, 42.96it/s, loss=1.5513]


Logits stats - min: -6.4765, max: 2.5112
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16945/18200 [07:55<00:29, 42.20it/s, loss=1.5905]


Logits stats - min: -5.8967, max: 1.9972
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9997, max: 2.3299
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16955/18200 [07:55<00:29, 42.40it/s, loss=2.1867]


Logits stats - min: -7.6604, max: 2.0140
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7734, max: 2.3338
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0297, max: 2.4286
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6391, max: 1.4155
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16970/18200 [07:55<00:28, 43.71it/s, loss=1.3672]


Logits stats - min: -6.5206, max: 2.3845
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0171, max: 1.9596
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9264, max: 2.4404
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16980/18200 [07:56<00:27, 44.96it/s, loss=1.5433]


Logits stats - min: -5.7475, max: 2.3051
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9208, max: 2.0809
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4470, max: 2.3340
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1236, max: 2.5975
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16991/18200 [07:56<00:27, 44.37it/s, loss=1.5932]


Logits stats - min: -5.7740, max: 2.2271
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2159, max: 2.5928
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 17011/18200 [07:56<00:28, 41.85it/s, loss=1.5479]


Logits stats - min: -6.0371, max: 2.3757
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3815, max: 1.9161
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6958, max: 1.9694
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▌ | 17016/18200 [07:57<00:28, 42.19it/s, loss=1.5345]


Logits stats - min: -6.3923, max: 1.7878
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3368, max: 2.0229
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8728, max: 2.3331
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17041/18200 [07:57<00:27, 41.78it/s, loss=1.3618]


Logits stats - min: -6.6131, max: 1.6679
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5200, max: 2.3591
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17051/18200 [07:57<00:28, 40.21it/s, loss=1.3937]


Logits stats - min: -6.1376, max: 2.2670
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6276, max: 2.3560
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17057/18200 [07:57<00:26, 43.83it/s, loss=2.2174]


Logits stats - min: -6.1804, max: 2.5872
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6159, max: 2.2777
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7188, max: 2.0341
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0165, max: 1.8810
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17077/18200 [07:58<00:25, 43.32it/s, loss=1.5752]


Logits stats - min: -6.8872, max: 2.3735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7176, max: 2.0980
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9906, max: 2.0008
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0913, max: 1.9306
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17097/18200 [07:58<00:25, 43.20it/s, loss=1.0876]


Logits stats - min: -7.3612, max: 2.3897
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0039, max: 2.0277
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1657, max: 1.9796
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17107/18200 [07:59<00:27, 40.23it/s, loss=2.8124]


Logits stats - min: -6.3570, max: 1.8663
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2316, max: 1.9838
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8486, max: 2.3033
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17118/18200 [07:59<00:25, 42.09it/s, loss=1.3620]


Logits stats - min: -4.5247, max: 1.3571
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4208, max: 1.5985
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8536, max: 1.6107
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17129/18200 [07:59<00:24, 44.07it/s, loss=1.5936]


Logits stats - min: -6.6053, max: 2.3912
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0134, max: 2.7564
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8580, max: 2.3528
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9560, max: 2.4258
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6543, max: 1.9076
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17145/18200 [08:00<00:24, 43.77it/s, loss=1.3773]


Logits stats - min: -7.3536, max: 2.5400
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3446, max: 2.0288
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3132, max: 1.3214
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9889, max: 1.8466
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17156/18200 [08:00<00:21, 47.86it/s, loss=2.2253]


Logits stats - min: -6.4521, max: 2.4216
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5432, max: 2.0823
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0987, max: 1.4025
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.8677, max: 1.5069
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17166/18200 [08:00<00:25, 41.24it/s, loss=2.1854]


Logits stats - min: -5.9790, max: 1.7205
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4936, max: 2.1784
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17181/18200 [08:00<00:26, 38.78it/s, loss=1.5319]


Logits stats - min: -8.0304, max: 2.0775
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17191/18200 [08:01<00:24, 41.55it/s, loss=1.8870]


Logits stats - min: -5.5997, max: 2.0542
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -11.5439, max: 2.5023
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0330, max: 2.3939
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17196/18200 [08:01<00:23, 42.22it/s, loss=1.5449]


Logits stats - min: -4.7798, max: 1.6899
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5518, max: 2.2933
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1174, max: 2.3966
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▋ | 17206/18200 [08:01<00:22, 43.62it/s, loss=1.6977]


Logits stats - min: -5.9247, max: 2.1924
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2868, max: 1.6285
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9472, max: 2.1630
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0401, max: 1.8011
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17226/18200 [08:02<00:23, 40.79it/s, loss=1.7138]


Logits stats - min: -5.9795, max: 1.9131
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17242/18200 [08:02<00:23, 41.64it/s, loss=1.5864]


Logits stats - min: -6.1549, max: 1.9464
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3672, max: 1.9296
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9933, max: 2.3586
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9715, max: 2.1581
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17257/18200 [08:02<00:22, 42.55it/s, loss=2.1439]


Logits stats - min: -6.3674, max: 1.8313
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3418, max: 1.8658
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17277/18200 [08:03<00:22, 40.72it/s, loss=1.6122]


Logits stats - min: -6.6544, max: 2.0667
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4956, max: 1.7733
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17297/18200 [08:03<00:21, 41.27it/s, loss=1.5326]


Logits stats - min: -6.3872, max: 1.9461
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0370, max: 2.3175
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1509, max: 1.2569
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0563, max: 2.3723
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▉ | 17312/18200 [08:04<00:22, 39.63it/s, loss=1.5285]


Logits stats - min: -6.5786, max: 2.4190
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7545, max: 2.6371
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▉ | 17322/18200 [08:04<00:20, 43.32it/s, loss=1.5816]


Logits stats - min: -7.5414, max: 2.4102
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4053, max: 1.3925
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4348, max: 2.2345
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1697, max: 2.0338
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8267, max: 2.0591
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▉ | 17332/18200 [08:04<00:19, 44.47it/s, loss=1.5247]


Logits stats - min: -5.8194, max: 1.7118
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0743, max: 1.7408
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▉ | 17337/18200 [08:04<00:20, 41.97it/s, loss=1.3515]


Logits stats - min: -7.0520, max: 2.4214
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3919, max: 1.7318
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7603, max: 1.7305
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3144, max: 2.6279
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▉ | 17352/18200 [08:04<00:19, 42.81it/s, loss=1.5208]


Logits stats - min: -6.9822, max: 2.4876
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▉ | 17362/18200 [08:05<00:19, 43.06it/s, loss=1.6381]


Logits stats - min: -7.1842, max: 2.2287
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7330, max: 2.2061
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4738, max: 2.2943
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▉ | 17372/18200 [08:05<00:19, 42.54it/s, loss=1.5291]


Logits stats - min: -5.9699, max: 1.4756
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9069, max: 2.4897
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2926, max: 2.2360
Target unique values: tensor([0], device='cuda:0')


Training:  96%|█████████████████████▉ | 17383/18200 [08:05<00:18, 45.18it/s, loss=2.2339]


Logits stats - min: -6.9567, max: 2.5211
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0454, max: 1.8257
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0056, max: 2.0460
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4180, max: 2.6343
Target unique values: tensor([0], device='cuda:0')


Training:  96%|█████████████████████▉ | 17393/18200 [08:05<00:18, 44.09it/s, loss=1.5687]


Logits stats - min: -5.9276, max: 1.3127
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -4.9295, max: 1.2300
Target unique values: tensor([0], device='cuda:0')


Training:  96%|█████████████████████▉ | 17408/18200 [08:06<00:18, 43.62it/s, loss=2.2177]


Logits stats - min: -6.8796, max: 2.4302
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5435, max: 2.1050
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1101, max: 1.8102
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6269, max: 2.6056
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17423/18200 [08:06<00:18, 41.31it/s, loss=1.5161]


Logits stats - min: -6.6695, max: 1.7681
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9675, max: 2.5875
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17433/18200 [08:06<00:18, 40.88it/s, loss=1.6167]


Logits stats - min: -6.4788, max: 1.7923
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2818, max: 1.9172
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17448/18200 [08:07<00:18, 40.30it/s, loss=1.5288]


Logits stats - min: -6.2780, max: 2.0444
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9078, max: 1.9302
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8131, max: 2.1748
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17459/18200 [08:07<00:16, 43.73it/s, loss=1.3627]


Logits stats - min: -6.4397, max: 2.3009
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8028, max: 1.8541
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1974, max: 2.7873
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1516, max: 2.4205
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17469/18200 [08:07<00:17, 41.75it/s, loss=1.5220]


Logits stats - min: -6.6804, max: 2.1541
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2989, max: 1.9362
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0724, max: 1.8230
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0271, max: 1.7475
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17476/18200 [08:07<00:15, 47.75it/s, loss=1.5203]


Logits stats - min: -6.0253, max: 2.0025
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9607, max: 1.7389
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.0151, max: 1.3933
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5181, max: 2.3335
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3445, max: 1.8112
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17488/18200 [08:08<00:14, 49.63it/s, loss=1.5919]


Logits stats - min: -6.2568, max: 2.4340
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6247, max: 1.3791
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3406, max: 1.6959
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4182, max: 2.7611
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6261, max: 2.2261
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17503/18200 [08:08<00:16, 42.82it/s, loss=1.5368]


Logits stats - min: -5.8048, max: 1.9298
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6730, max: 2.3407
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8421, max: 1.8357
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3295, max: 2.5525
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████▏| 17519/18200 [08:08<00:15, 44.42it/s, loss=1.4915]


Logits stats - min: -7.7504, max: 2.5278
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3734, max: 1.8899
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0983, max: 2.8549
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████▏| 17530/18200 [08:09<00:13, 47.91it/s, loss=1.5394]


Logits stats - min: -7.0573, max: 2.3721
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1096, max: 1.7300
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5612, max: 2.3394
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3923, max: 1.7671
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3395, max: 1.9391
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████▏| 17545/18200 [08:09<00:14, 45.26it/s, loss=1.5283]


Logits stats - min: -6.4091, max: 2.4972
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7458, max: 2.3367
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6406, max: 1.3242
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████▏| 17555/18200 [08:09<00:14, 43.22it/s, loss=1.8229]


Logits stats - min: -5.7493, max: 2.0219
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8274, max: 2.1131
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3959, max: 2.0544
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▏| 17570/18200 [08:10<00:14, 42.23it/s, loss=2.4382]


Logits stats - min: -6.4759, max: 1.9303
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4047, max: 2.6242
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▏| 17580/18200 [08:10<00:15, 41.23it/s, loss=1.5185]


Logits stats - min: -6.5456, max: 1.9300
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8125, max: 1.9601
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▏| 17600/18200 [08:10<00:14, 41.05it/s, loss=1.5851]


Logits stats - min: -7.4198, max: 2.3479
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9394, max: 1.8714
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17610/18200 [08:10<00:14, 41.51it/s, loss=2.9387]


Logits stats - min: -6.2054, max: 2.2324
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2278, max: 2.3491
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17625/18200 [08:11<00:14, 40.72it/s, loss=1.7296]


Logits stats - min: -6.6666, max: 2.5028
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3867, max: 2.3514
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9795, max: 2.0036
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17647/18200 [08:11<00:12, 44.42it/s, loss=1.5506]


Logits stats - min: -6.9974, max: 2.3562
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5924, max: 2.3051
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5662, max: 2.3407
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1861, max: 1.9409
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9555, max: 1.9605
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17657/18200 [08:12<00:12, 42.67it/s, loss=1.5290]


Logits stats - min: -5.8929, max: 1.8791
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0829, max: 1.9032
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17667/18200 [08:12<00:13, 40.44it/s, loss=1.7338]


Logits stats - min: -6.0086, max: 2.0935
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7709, max: 2.6534
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0011, max: 2.0874
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17683/18200 [08:12<00:12, 40.54it/s, loss=2.8731]


Logits stats - min: -4.8962, max: 1.4125
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17693/18200 [08:12<00:12, 40.92it/s, loss=1.5340]


Logits stats - min: -8.4635, max: 1.9559
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2767, max: 2.0464
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17702/18200 [08:13<00:12, 38.82it/s, loss=1.5730]


Logits stats - min: -5.9848, max: 1.8436
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2979, max: 1.8934
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▍| 17712/18200 [08:13<00:12, 39.62it/s, loss=1.7140]


Logits stats - min: -7.8485, max: 2.2511
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▍| 17722/18200 [08:13<00:12, 39.26it/s, loss=1.4948]


Logits stats - min: -5.2469, max: 1.4515
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4668, max: 2.4202
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7893, max: 2.0247
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▍| 17742/18200 [08:14<00:11, 39.56it/s, loss=1.5906]


Logits stats - min: -6.6058, max: 2.4362
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1083, max: 1.8587
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▍| 17757/18200 [08:14<00:10, 42.49it/s, loss=1.7578]


Logits stats - min: -7.7024, max: 2.7306
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1221, max: 2.1480
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3848, max: 1.9342
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▍| 17767/18200 [08:14<00:10, 41.40it/s, loss=1.5075]


Logits stats - min: -7.4430, max: 3.2218
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▍| 17777/18200 [08:15<00:10, 41.70it/s, loss=1.5836]


Logits stats - min: -6.9746, max: 2.4569
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0509, max: 1.7029
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▍| 17787/18200 [08:15<00:09, 43.26it/s, loss=1.5849]


Logits stats - min: -7.7942, max: 2.8517
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9133, max: 2.0767
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1979, max: 2.2496
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▍| 17802/18200 [08:15<00:09, 42.21it/s, loss=1.3862]


Logits stats - min: -6.5133, max: 2.0237
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9755, max: 1.7373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0527, max: 2.1314
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2795, max: 2.4985
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17813/18200 [08:15<00:08, 45.40it/s, loss=2.8706]


Logits stats - min: -5.9527, max: 2.1884
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1233, max: 2.3686
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0235, max: 2.3228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9059, max: 2.4543
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17828/18200 [08:16<00:09, 40.87it/s, loss=1.3603]


Logits stats - min: -6.7177, max: 2.0787
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17838/18200 [08:16<00:08, 42.92it/s, loss=2.2302]


Logits stats - min: -6.3146, max: 1.9463
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5242, max: 1.4100
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17849/18200 [08:16<00:07, 44.11it/s, loss=1.5252]


Logits stats - min: -7.5496, max: 2.1818
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7866, max: 1.9751
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3504, max: 2.4102
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0025, max: 2.2132
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6220, max: 2.7722
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17864/18200 [08:17<00:07, 44.15it/s, loss=1.5839]


Logits stats - min: -5.8947, max: 1.8795
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8646, max: 2.0491
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8508, max: 2.3956
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17875/18200 [08:17<00:06, 47.12it/s, loss=1.5805]


Logits stats - min: -7.6254, max: 2.7670
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7054, max: 2.1452
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1987, max: 2.4509
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2673, max: 2.5870
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9203, max: 2.2750
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17890/18200 [08:17<00:07, 42.04it/s, loss=1.5101]


Logits stats - min: -6.0055, max: 2.1330
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17900/18200 [08:17<00:07, 41.45it/s, loss=1.5885]


Logits stats - min: -7.1575, max: 1.9779
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7281, max: 2.1202
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7786, max: 1.8120
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▋| 17911/18200 [08:18<00:06, 43.74it/s, loss=2.1245]


Logits stats - min: -6.4270, max: 1.9529
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5332, max: 1.4121
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7873, max: 2.6451
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6928, max: 2.8832
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▋| 17916/18200 [08:18<00:06, 44.65it/s, loss=1.9890]


Logits stats - min: -5.7309, max: 2.5446
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1249, max: 2.3222
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1474, max: 2.5741
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▋| 17926/18200 [08:18<00:06, 42.87it/s, loss=1.3853]


Logits stats - min: -7.1708, max: 2.3143
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0168, max: 1.9057
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▋| 17936/18200 [08:18<00:06, 40.06it/s, loss=1.5326]


Logits stats - min: -6.0228, max: 2.1318
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8283, max: 2.0656
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▋| 17947/18200 [08:18<00:05, 44.19it/s, loss=1.4888]


Logits stats - min: -7.3748, max: 2.5634
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2457, max: 2.1881
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1796, max: 1.8342
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7658, max: 2.0267
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9557, max: 2.5500
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▋| 17962/18200 [08:19<00:05, 44.13it/s, loss=2.0031]


Logits stats - min: -6.3261, max: 2.0855
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0367, max: 1.9508
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9833, max: 1.8167
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2499, max: 2.4245
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▋| 17982/18200 [08:19<00:05, 41.32it/s, loss=2.1354]


Logits stats - min: -6.5899, max: 2.6495
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7562, max: 2.5110
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7438, max: 1.8502
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1764, max: 1.9108
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▋| 17993/18200 [08:20<00:04, 43.56it/s, loss=1.5393]


Logits stats - min: -5.9614, max: 1.8841
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3664, max: 2.2708
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4973, max: 2.5146
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8482, max: 2.0264
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1686, max: 2.0075
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18003/18200 [08:20<00:04, 43.58it/s, loss=1.5523]


Logits stats - min: -5.7399, max: 1.6851
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6509, max: 1.9568
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18014/18200 [08:20<00:04, 44.98it/s, loss=1.9379]


Logits stats - min: -7.4440, max: 1.6811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9137, max: 2.7444
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2464, max: 1.9012
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18029/18200 [08:20<00:04, 41.64it/s, loss=1.5407]


Logits stats - min: -6.3010, max: 1.9783
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2308, max: 2.0082
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1727, max: 2.4302
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7408, max: 2.3726
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18045/18200 [08:21<00:03, 40.65it/s, loss=1.5367]


Logits stats - min: -5.9772, max: 1.8199
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6801, max: 2.5645
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2957, max: 1.9231
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18064/18200 [08:21<00:03, 40.55it/s, loss=1.5177]


Logits stats - min: -6.2259, max: 2.0446
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9868, max: 2.4604
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18069/18200 [08:21<00:03, 41.54it/s, loss=1.5753]


Logits stats - min: -6.8435, max: 2.4618
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0714, max: 1.8508
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18079/18200 [08:22<00:02, 43.69it/s, loss=1.5315]


Logits stats - min: -8.4533, max: 2.6345
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6532, max: 1.9258
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18089/18200 [08:22<00:02, 43.99it/s, loss=1.5867]


Logits stats - min: -6.0766, max: 1.8527
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0947, max: 2.5109
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▉| 18104/18200 [08:22<00:02, 41.46it/s, loss=1.3497]


Logits stats - min: -6.8727, max: 1.6672
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6334, max: 2.3789
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0795, max: 1.7618
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8495, max: 2.4232
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18115/18200 [08:22<00:01, 45.22it/s, loss=1.5838]


Logits stats - min: -6.1783, max: 1.9453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7728, max: 2.5889
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0761, max: 1.8727
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0988, max: 2.0421
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18131/18200 [08:23<00:01, 45.24it/s, loss=2.2271]


Logits stats - min: -6.7375, max: 2.4836
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5402, max: 2.3788
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9092, max: 2.2047
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9787, max: 2.2055
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4218, max: 2.0498
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5103, max: 2.1837
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18146/18200 [08:23<00:01, 42.56it/s, loss=1.5266]


Logits stats - min: -6.3645, max: 2.0620
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7140, max: 2.1209
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18157/18200 [08:23<00:01, 41.90it/s, loss=1.9719]


Logits stats - min: -5.8478, max: 1.5005
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1284, max: 2.5287
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6142, max: 2.1877
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18177/18200 [08:24<00:00, 44.44it/s, loss=1.5264]


Logits stats - min: -5.4359, max: 1.3766
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8162, max: 1.9144
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1401, max: 2.0899
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1204, max: 2.2290
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18187/18200 [08:24<00:00, 41.75it/s, loss=1.3655]


Logits stats - min: -7.4013, max: 2.4790
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0103, max: 1.8845
Target unique values: tensor([0], device='cuda:0')


                                                                                         


Logits stats - min: -7.3613, max: 2.3787
Target unique values: tensor([0], device='cuda:0')


                                                                                         


Results:
  Train Loss: 1.3139 | Train Acc: 0.7950
  Val Loss  : nan | Val Acc  : 0.3008
  Val mIoU  : 0.0649
  Time      : 10.80 min
  IoU per class: [0.         0.01704518 0.00913785 0.         0.         0.36337712]
[OK] Best model saved! (mIoU: 0.0649)

Epoch 2/2
GPU Memory: 0.02 GB allocated


Training:   0%|                           | 3/18200 [00:00<10:22, 29.22it/s, loss=1.5430]


Logits stats - min: -6.1220, max: 1.7863
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                           | 6/18200 [00:00<11:05, 27.35it/s, loss=1.5232]


Logits stats - min: -5.9157, max: 2.3085
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2888, max: 1.8331
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7288, max: 1.5846
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 16/18200 [00:00<08:04, 37.51it/s, loss=1.7330]


Logits stats - min: -8.0078, max: 1.8667
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 16/18200 [00:00<08:04, 37.51it/s, loss=1.3587]


Logits stats - min: -5.8740, max: 2.0823
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3621, max: 2.4966
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 22/18200 [00:00<07:02, 43.00it/s, loss=1.5391]


Logits stats - min: -5.9367, max: 1.8755
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 51/18200 [00:01<07:26, 40.61it/s, loss=1.5311]


Logits stats - min: -6.9155, max: 2.6487
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9272, max: 2.5124
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8411, max: 2.4211
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 62/18200 [00:01<07:01, 43.02it/s, loss=1.5077]


Logits stats - min: -6.9480, max: 2.1553
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1771, max: 1.9890
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2221, max: 2.1177
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8826, max: 2.5175
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6793, max: 2.3917
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 77/18200 [00:01<06:46, 44.58it/s, loss=1.3600]


Logits stats - min: -6.4341, max: 2.3210
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7507, max: 1.7619
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8898, max: 2.0414
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7906, max: 2.1434
Target unique values: tensor([0], device='cuda:0')


Training:   0%|                          | 87/18200 [00:02<06:53, 43.76it/s, loss=1.5338]


Logits stats - min: -6.6374, max: 2.5253
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4785, max: 1.8506
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7033, max: 2.5746
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                         | 97/18200 [00:02<07:02, 42.81it/s, loss=1.5331]


Logits stats - min: -8.5779, max: 2.4106
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0348, max: 2.3341
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 107/18200 [00:02<07:06, 42.45it/s, loss=1.5220]


Logits stats - min: -7.1970, max: 2.3686
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5038, max: 2.8659
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4707, max: 2.4900
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 127/18200 [00:03<07:01, 42.90it/s, loss=1.6174]


Logits stats - min: -6.5411, max: 2.4659
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0927, max: 1.8839
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3391, max: 2.2251
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7462, max: 2.1916
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6419, max: 2.7397
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7958, max: 2.4388
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 132/18200 [00:03<06:47, 44.31it/s, loss=1.6592]


Logits stats - min: -5.8677, max: 2.3286
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2554, max: 2.0046
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0200, max: 2.5069
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 147/18200 [00:03<06:44, 44.61it/s, loss=1.5421]


Logits stats - min: -6.2146, max: 2.0209
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2589, max: 2.5687
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4981, max: 2.6722
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0175, max: 2.6173
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1125, max: 2.2570
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 162/18200 [00:03<07:17, 41.19it/s, loss=1.4946]


Logits stats - min: -10.0318, max: 2.3911
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 172/18200 [00:04<07:01, 42.73it/s, loss=1.5817]


Logits stats - min: -7.5600, max: 2.3997
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2281, max: 1.9100
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2328, max: 2.3810
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▏                        | 177/18200 [00:04<07:06, 42.27it/s, loss=1.5736]


Logits stats - min: -7.3247, max: 2.5099
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 187/18200 [00:04<07:07, 42.09it/s, loss=1.9053]


Logits stats - min: -6.1351, max: 1.9293
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4331, max: 2.2029
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3524, max: 2.4346
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 197/18200 [00:04<07:09, 41.91it/s, loss=1.6583]


Logits stats - min: -5.9642, max: 1.9118
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 207/18200 [00:05<07:02, 42.55it/s, loss=1.3336]


Logits stats - min: -5.7902, max: 1.4308
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5536, max: 1.6590
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0661, max: 2.2323
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4211, max: 2.1220
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 222/18200 [00:05<07:07, 42.07it/s, loss=1.5357]


Logits stats - min: -7.1372, max: 2.6028
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4747, max: 1.5436
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2943, max: 1.3044
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 242/18200 [00:05<07:30, 39.90it/s, loss=1.5393]


Logits stats - min: -6.4639, max: 2.1910
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8055, max: 2.3199
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 252/18200 [00:06<07:10, 41.66it/s, loss=1.5282]


Logits stats - min: -5.8956, max: 2.5717
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6808, max: 2.7871
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7958, max: 2.1268
Target unique values: tensor([0], device='cuda:0')


Training:   1%|▎                        | 262/18200 [00:06<07:08, 41.82it/s, loss=1.8366]


Logits stats - min: -7.0714, max: 2.5094
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8523, max: 2.6320
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0060, max: 1.9385
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5232, max: 2.4867
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 278/18200 [00:06<07:01, 42.56it/s, loss=1.4030]


Logits stats - min: -7.1527, max: 2.6035
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7175, max: 2.0889
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8069, max: 2.2660
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9158, max: 1.4703
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 283/18200 [00:06<06:49, 43.74it/s, loss=1.5022]


Logits stats - min: -5.4694, max: 2.0820
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0173, max: 2.1668
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0540, max: 2.3960
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4930, max: 2.5896
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 293/18200 [00:07<07:10, 41.61it/s, loss=1.5200]


Logits stats - min: -7.1095, max: 2.5719
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9853, max: 2.1986
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 303/18200 [00:07<07:00, 42.53it/s, loss=1.3545]


Logits stats - min: -6.0210, max: 2.6769
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8126, max: 2.1369
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 313/18200 [00:07<07:13, 41.26it/s, loss=1.3774]


Logits stats - min: -6.5493, max: 2.1641
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8201, max: 1.5402
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 328/18200 [00:07<07:03, 42.18it/s, loss=1.5766]


Logits stats - min: -6.1671, max: 2.3568
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3514, max: 1.4110
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3187, max: 2.0246
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 348/18200 [00:08<06:46, 43.90it/s, loss=1.5773]


Logits stats - min: -6.7627, max: 2.4527
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4417, max: 2.2829
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1146, max: 1.4820
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3853, max: 2.5125
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8475, max: 1.3452
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9931, max: 2.1285
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7447, max: 2.2947
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▍                        | 359/18200 [00:08<06:37, 44.83it/s, loss=1.5845]


Logits stats - min: -5.4183, max: 1.9884
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8248, max: 1.9655
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 374/18200 [00:09<07:17, 40.74it/s, loss=1.5108]


Logits stats - min: -6.5652, max: 1.9337
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1213, max: 2.2081
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7195, max: 2.4035
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 384/18200 [00:09<06:58, 42.56it/s, loss=1.5123]


Logits stats - min: -6.9716, max: 2.2784
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2270, max: 1.8585
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 399/18200 [00:09<06:59, 42.47it/s, loss=1.5899]


Logits stats - min: -6.1366, max: 2.0982
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2622, max: 2.0156
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7294, max: 1.9771
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5470, max: 1.3412
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6107, max: 2.1844
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 414/18200 [00:09<07:09, 41.44it/s, loss=2.6770]


Logits stats - min: -6.6229, max: 2.0740
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 419/18200 [00:10<07:06, 41.65it/s, loss=1.5792]


Logits stats - min: -6.6148, max: 1.3091
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 434/18200 [00:10<06:53, 42.94it/s, loss=1.5034]


Logits stats - min: -6.0607, max: 2.0927
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3045, max: 2.0175
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2531, max: 2.5136
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 444/18200 [00:10<07:00, 42.20it/s, loss=1.5551]


Logits stats - min: -7.3452, max: 2.4864
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3977, max: 2.5480
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4172, max: 1.2696
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1029, max: 2.2203
Target unique values: tensor([0], device='cuda:0')


Training:   2%|▌                        | 454/18200 [00:10<07:03, 41.87it/s, loss=2.1403]


Logits stats - min: -7.2832, max: 2.4486
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 465/18200 [00:11<06:58, 42.37it/s, loss=1.5218]


Logits stats - min: -6.1963, max: 2.0963
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7678, max: 2.7745
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 475/18200 [00:11<07:20, 40.19it/s, loss=2.6885]


Logits stats - min: -6.4936, max: 1.9690
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 490/18200 [00:11<07:20, 40.17it/s, loss=1.5275]


Logits stats - min: -7.2796, max: 2.3250
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5615, max: 1.9694
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5277, max: 2.0955
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1140, max: 2.1137
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7914, max: 1.9927
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 500/18200 [00:12<07:01, 41.98it/s, loss=1.5010]


Logits stats - min: -7.6298, max: 2.3133
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7685, max: 2.7571
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1053, max: 1.9972
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8382, max: 2.0974
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 515/18200 [00:12<06:56, 42.46it/s, loss=2.0577]


Logits stats - min: -7.6225, max: 2.2279
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4167, max: 2.3376
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0279, max: 1.4769
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 525/18200 [00:12<07:08, 41.27it/s, loss=1.5202]


Logits stats - min: -5.9391, max: 2.0747
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9285, max: 2.4934
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8763, max: 2.1630
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▋                        | 535/18200 [00:12<06:52, 42.79it/s, loss=1.5655]


Logits stats - min: -7.0150, max: 2.4191
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4703, max: 1.4312
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1420, max: 1.8947
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7577, max: 2.6382
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 551/18200 [00:13<06:31, 45.12it/s, loss=1.5529]


Logits stats - min: -6.3811, max: 1.9801
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5499, max: 2.0396
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7621, max: 2.4819
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0166, max: 2.3531
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5039, max: 2.4781
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0282, max: 2.2242
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 561/18200 [00:13<06:44, 43.65it/s, loss=1.4804]


Logits stats - min: -6.3071, max: 1.5320
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 572/18200 [00:13<06:36, 44.41it/s, loss=2.7322]


Logits stats - min: -6.4821, max: 2.1179
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3080, max: 2.0252
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6465, max: 2.6691
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2279, max: 2.4198
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9558, max: 2.4094
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 582/18200 [00:13<06:39, 44.05it/s, loss=1.3363]


Logits stats - min: -5.2102, max: 1.3621
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4818, max: 2.0233
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 593/18200 [00:14<06:58, 42.04it/s, loss=1.5234]


Logits stats - min: -6.3721, max: 2.5658
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9110, max: 2.1658
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2642, max: 2.4054
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 608/18200 [00:14<06:56, 42.26it/s, loss=1.5830]


Logits stats - min: -6.3019, max: 1.9720
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3490, max: 1.9760
Target unique values: tensor([0], device='cuda:0')


Training:   3%|▊                        | 618/18200 [00:14<06:55, 42.29it/s, loss=1.5864]


Logits stats - min: -5.6606, max: 1.7092
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0704, max: 2.0142
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 639/18200 [00:15<06:35, 44.40it/s, loss=2.1672]


Logits stats - min: -5.9165, max: 2.1638
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8132, max: 1.9707
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6345, max: 2.0329
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3017, max: 1.3788
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0006, max: 2.4841
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8552, max: 1.4627
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 650/18200 [00:15<06:49, 42.90it/s, loss=2.2501]


Logits stats - min: -7.3431, max: 2.9066
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6481, max: 2.7195
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5183, max: 2.2194
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7303, max: 2.8018
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2400, max: 1.9289
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 660/18200 [00:15<06:59, 41.86it/s, loss=1.5200]


Logits stats - min: -6.4269, max: 2.3413
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6098, max: 2.4662
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6484, max: 1.4908
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 670/18200 [00:15<06:45, 43.27it/s, loss=1.5795]


Logits stats - min: -6.2243, max: 1.8948
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5458, max: 2.6684
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8700, max: 2.1904
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 690/18200 [00:16<07:06, 41.07it/s, loss=1.7645]


Logits stats - min: -6.3934, max: 1.6836
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1567, max: 1.9754
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3239, max: 2.0463
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 705/18200 [00:16<06:45, 43.13it/s, loss=1.5826]


Logits stats - min: -6.1123, max: 1.8929
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3320, max: 1.8929
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7196, max: 2.4650
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2084, max: 1.7532
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8293, max: 2.4623
Target unique values: tensor([0], device='cuda:0')


Training:   4%|▉                        | 715/18200 [00:17<06:36, 44.08it/s, loss=1.3876]


Logits stats - min: -6.8917, max: 2.4817
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8900, max: 1.9229
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2462, max: 2.0232
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 730/18200 [00:17<06:43, 43.29it/s, loss=1.5734]


Logits stats - min: -6.2480, max: 2.0362
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0094, max: 2.1300
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 740/18200 [00:17<06:52, 42.29it/s, loss=1.3644]


Logits stats - min: -6.5158, max: 2.6216
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9454, max: 2.4139
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9672, max: 2.5360
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 750/18200 [00:17<06:55, 41.98it/s, loss=1.3303]


Logits stats - min: -6.3293, max: 2.0563
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9428, max: 2.3748
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2605, max: 2.5952
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 760/18200 [00:18<06:53, 42.20it/s, loss=1.5732]


Logits stats - min: -6.1863, max: 1.9602
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2838, max: 2.4171
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 767/18200 [00:18<06:12, 46.81it/s, loss=1.4917]


Logits stats - min: -6.0231, max: 1.8985
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5958, max: 1.9626
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2362, max: 2.0528
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8142, max: 2.3796
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3286, max: 2.4055
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 782/18200 [00:18<06:35, 44.00it/s, loss=1.5175]


Logits stats - min: -6.9080, max: 2.2676
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2649, max: 2.4243
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6642, max: 2.0451
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 797/18200 [00:18<06:41, 43.36it/s, loss=1.5821]


Logits stats - min: -7.6537, max: 2.4352
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0129, max: 1.8239
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9980, max: 2.3970
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 807/18200 [00:19<06:50, 42.41it/s, loss=1.5724]


Logits stats - min: -6.8974, max: 2.4488
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9036, max: 1.7802
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2018, max: 1.9008
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2567, max: 2.2393
Target unique values: tensor([0], device='cuda:0')


Training:   4%|█                        | 817/18200 [00:19<06:59, 41.45it/s, loss=1.3408]


Logits stats - min: -7.4625, max: 2.7467
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3535, max: 2.1385
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5971, max: 2.5858
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 827/18200 [00:19<06:53, 42.00it/s, loss=2.0883]


Logits stats - min: -6.6488, max: 2.4529
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 837/18200 [00:19<06:50, 42.26it/s, loss=1.5335]


Logits stats - min: -6.6450, max: 2.4674
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9810, max: 2.5693
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 842/18200 [00:20<06:46, 42.70it/s, loss=2.0072]


Logits stats - min: -6.2845, max: 1.9025
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2034, max: 1.9697
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 852/18200 [00:20<06:47, 42.54it/s, loss=2.7188]


Logits stats - min: -5.9175, max: 1.9517
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5441, max: 2.4134
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0923, max: 2.4715
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7576, max: 2.4238
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 868/18200 [00:20<06:18, 45.75it/s, loss=1.1541]


Logits stats - min: -6.0668, max: 2.2079
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1928, max: 2.6700
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4145, max: 2.4430
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4808, max: 2.6815
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4704, max: 2.1798
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 878/18200 [00:20<06:27, 44.67it/s, loss=1.3293]


Logits stats - min: -6.1631, max: 2.0409
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4674, max: 2.9121
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 893/18200 [00:21<06:47, 42.51it/s, loss=1.5544]


Logits stats - min: -6.3538, max: 1.4627
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▏                       | 903/18200 [00:21<06:45, 42.71it/s, loss=1.9263]


Logits stats - min: -7.1322, max: 2.6219
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0953, max: 2.5760
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8037, max: 2.1555
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 928/18200 [00:22<06:38, 43.31it/s, loss=1.5856]


Logits stats - min: -5.7925, max: 1.8531
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9025, max: 2.0652
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9597, max: 1.3910
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2326, max: 2.4258
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9681, max: 2.3074
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 938/18200 [00:22<06:27, 44.59it/s, loss=2.0271]


Logits stats - min: -6.1343, max: 2.1720
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0943, max: 2.5422
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9428, max: 1.8784
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 943/18200 [00:22<06:29, 44.26it/s, loss=1.5823]


Logits stats - min: -6.3049, max: 1.7814
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 953/18200 [00:22<07:09, 40.17it/s, loss=1.8884]


Logits stats - min: -6.8381, max: 2.3186
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 963/18200 [00:22<07:04, 40.63it/s, loss=1.8078]


Logits stats - min: -5.9493, max: 1.9798
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0268, max: 2.6330
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0554, max: 2.7675
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7227, max: 2.8160
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 973/18200 [00:23<07:00, 41.00it/s, loss=1.6834]


Logits stats - min: -6.2459, max: 1.8458
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3845, max: 2.3635
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9214, max: 1.8797
Target unique values: tensor([0], device='cuda:0')


Training:   5%|█▎                       | 988/18200 [00:23<07:08, 40.21it/s, loss=1.2475]


Logits stats - min: -7.5265, max: 2.3631
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9937, max: 1.8963
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▎                      | 1003/18200 [00:23<06:39, 43.00it/s, loss=1.5327]


Logits stats - min: -5.8280, max: 1.9166
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0427, max: 2.8754
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1406, max: 2.1595
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1240, max: 2.1104
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▎                      | 1018/18200 [00:24<06:41, 42.78it/s, loss=1.4875]


Logits stats - min: -7.1748, max: 2.3076
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5467, max: 2.2288
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2636, max: 2.3452
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2780, max: 1.4401
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▎                      | 1028/18200 [00:24<06:40, 42.83it/s, loss=1.5014]


Logits stats - min: -5.4982, max: 1.4856
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9577, max: 2.8611
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7979, max: 2.2378
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5319, max: 2.4186
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7566, max: 2.0650
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1044/18200 [00:24<06:25, 44.49it/s, loss=1.4925]


Logits stats - min: -5.5545, max: 2.3922
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1732, max: 2.1748
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8871, max: 2.0510
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1054/18200 [00:25<06:25, 44.47it/s, loss=1.3337]


Logits stats - min: -5.3056, max: 1.9256
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3373, max: 2.5909
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1069/18200 [00:25<06:57, 41.00it/s, loss=2.0107]


Logits stats - min: -5.4325, max: 2.1213
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6104, max: 2.5342
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8877, max: 2.5679
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1079/18200 [00:25<07:09, 39.86it/s, loss=2.2550]


Logits stats - min: -5.6967, max: 1.9577
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2603, max: 2.2359
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4916, max: 2.6025
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1094/18200 [00:25<06:36, 43.13it/s, loss=1.5761]


Logits stats - min: -7.2057, max: 2.6028
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1683, max: 2.1344
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7488, max: 2.1412
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8306, max: 2.1043
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1109/18200 [00:26<07:10, 39.69it/s, loss=1.5747]


Logits stats - min: -5.8843, max: 2.1303
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1120/18200 [00:26<06:20, 44.90it/s, loss=1.5851]


Logits stats - min: -6.2985, max: 1.9736
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9973, max: 2.1865
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4239, max: 2.5893
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8279, max: 2.0012
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0489, max: 2.1504
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8899, max: 2.7660
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9259, max: 2.3695
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▍                      | 1135/18200 [00:26<06:34, 43.29it/s, loss=1.3922]


Logits stats - min: -7.7822, max: 2.6656
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0110, max: 2.5114
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5318, max: 2.2120
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5564, max: 2.8229
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9674, max: 1.3312
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▌                      | 1145/18200 [00:27<06:33, 43.36it/s, loss=1.5286]


Logits stats - min: -6.6876, max: 2.3097
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▌                      | 1160/18200 [00:27<06:46, 41.96it/s, loss=1.5111]


Logits stats - min: -7.3000, max: 2.4202
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2077, max: 2.5779
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2319, max: 2.0455
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7936, max: 2.0512
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8656, max: 3.1513
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▌                      | 1170/18200 [00:27<06:41, 42.44it/s, loss=1.3354]


Logits stats - min: -7.0192, max: 2.3129
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1970, max: 2.4299
Target unique values: tensor([0], device='cuda:0')


Training:   6%|█▌                      | 1180/18200 [00:28<06:38, 42.74it/s, loss=1.9752]


Logits stats - min: -6.2911, max: 2.3817
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0176, max: 1.9544
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7617, max: 2.3220
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▌                      | 1190/18200 [00:28<06:26, 44.03it/s, loss=1.9328]


Logits stats - min: -5.6911, max: 2.1001
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0325, max: 2.4149
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1739, max: 2.6762
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6743, max: 2.2891
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▌                      | 1210/18200 [00:28<06:38, 42.62it/s, loss=2.2340]


Logits stats - min: -7.8519, max: 2.9615
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2408, max: 1.9656
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3205, max: 1.9232
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▌                      | 1220/18200 [00:29<06:57, 40.72it/s, loss=1.5227]


Logits stats - min: -6.3930, max: 2.4104
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8818, max: 2.2175
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▌                      | 1230/18200 [00:29<06:52, 41.19it/s, loss=1.9551]


Logits stats - min: -5.8636, max: 2.1006
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4696, max: 1.9769
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7687, max: 2.0958
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1245/18200 [00:29<06:36, 42.76it/s, loss=1.6110]


Logits stats - min: -9.5633, max: 2.0980
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3769, max: 2.7115
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9667, max: 2.1919
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1260/18200 [00:29<06:47, 41.56it/s, loss=1.3217]


Logits stats - min: -5.9797, max: 1.6430
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0680, max: 2.2615
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9336, max: 2.3283
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1270/18200 [00:30<06:50, 41.27it/s, loss=2.1398]


Logits stats - min: -6.3574, max: 1.9617
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3142, max: 1.9499
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1280/18200 [00:30<06:54, 40.83it/s, loss=1.3520]


Logits stats - min: -7.4749, max: 2.4966
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1286/18200 [00:30<06:40, 42.19it/s, loss=1.5028]


Logits stats - min: -8.3621, max: 2.8968
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2478, max: 1.3552
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2151, max: 2.3925
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1296/18200 [00:30<06:44, 41.74it/s, loss=1.4898]


Logits stats - min: -6.6520, max: 2.1102
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8236, max: 2.4934
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5939, max: 2.0552
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5257, max: 2.8492
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5038, max: 2.6120
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1307/18200 [00:31<06:24, 43.97it/s, loss=2.1847]


Logits stats - min: -7.8953, max: 2.4506
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0824, max: 2.1438
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▋                      | 1322/18200 [00:31<06:56, 40.53it/s, loss=2.6927]


Logits stats - min: -7.4203, max: 2.5943
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▊                      | 1332/18200 [00:31<06:36, 42.59it/s, loss=1.5087]


Logits stats - min: -6.5276, max: 2.0562
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.5812, max: 1.5618
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0029, max: 2.0561
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▊                      | 1342/18200 [00:31<06:52, 40.89it/s, loss=1.3544]


Logits stats - min: -6.2570, max: 2.1073
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5500, max: 2.7097
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2469, max: 2.5296
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▊                      | 1352/18200 [00:32<06:47, 41.36it/s, loss=1.5037]


Logits stats - min: -6.2283, max: 2.3067
Target unique values: tensor([0], device='cuda:0')


Training:   7%|█▊                      | 1362/18200 [00:32<06:42, 41.79it/s, loss=1.5212]


Logits stats - min: -6.1655, max: 2.1177
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1573, max: 2.5839
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3282, max: 1.7402
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7931, max: 2.0973
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▊                      | 1373/18200 [00:32<06:05, 45.99it/s, loss=1.3205]


Logits stats - min: -6.3411, max: 1.8285
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4568, max: 2.4773
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8063, max: 1.9995
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0742, max: 1.8363
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2476, max: 2.0289
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▊                      | 1389/18200 [00:33<06:31, 42.91it/s, loss=1.5376]


Logits stats - min: -6.3560, max: 1.6863
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9592, max: 1.9448
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4922, max: 2.4279
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▊                      | 1399/18200 [00:33<06:45, 41.44it/s, loss=1.5304]


Logits stats - min: -6.3307, max: 2.2128
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2882, max: 2.0252
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▊                      | 1414/18200 [00:33<06:24, 43.61it/s, loss=1.5813]


Logits stats - min: -6.5070, max: 2.2585
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0050, max: 1.9527
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2714, max: 2.2570
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6391, max: 2.2890
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1429/18200 [00:34<07:00, 39.85it/s, loss=1.3608]


Logits stats - min: -6.3864, max: 2.2228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0599, max: 2.1240
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1439/18200 [00:34<06:41, 41.70it/s, loss=1.5164]


Logits stats - min: -8.0662, max: 2.3585
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1499, max: 2.5308
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7370, max: 2.8151
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1449/18200 [00:34<06:35, 42.30it/s, loss=1.7943]


Logits stats - min: -8.7095, max: 3.0637
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8223, max: 2.8058
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5354, max: 2.0532
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1464/18200 [00:34<06:44, 41.39it/s, loss=1.5000]


Logits stats - min: -5.6939, max: 1.5967
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9617, max: 2.4811
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1474/18200 [00:35<06:38, 41.95it/s, loss=1.5753]


Logits stats - min: -6.1708, max: 3.0053
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0111, max: 2.5647
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2511, max: 1.5393
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1484/18200 [00:35<06:30, 42.75it/s, loss=1.5245]


Logits stats - min: -6.0966, max: 2.5830
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1310, max: 2.4072
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3726, max: 2.4168
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9558, max: 2.5666
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1500/18200 [00:35<06:15, 44.42it/s, loss=1.4966]


Logits stats - min: -6.8316, max: 1.9933
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1313, max: 1.3511
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0371, max: 2.0818
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1257, max: 2.0556
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1245, max: 2.4583
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6971, max: 3.0343
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1505/18200 [00:35<06:31, 42.68it/s, loss=1.7331]


Logits stats - min: -8.2108, max: 2.2025
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3581, max: 2.3882
Target unique values: tensor([0], device='cuda:0')


Training:   8%|█▉                      | 1515/18200 [00:36<06:32, 42.50it/s, loss=1.5214]


Logits stats - min: -6.5742, max: 2.0052
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3527, max: 2.3034
Target unique values: tensor([0], device='cuda:0')


Training:   8%|██                      | 1530/18200 [00:36<06:31, 42.61it/s, loss=1.3275]


Logits stats - min: -6.3812, max: 2.0859
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3212, max: 2.3944
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5706, max: 2.2724
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9324, max: 2.4823
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8956, max: 3.4442
Target unique values: tensor([0], device='cuda:0')


Training:   8%|██                      | 1540/18200 [00:36<06:47, 40.92it/s, loss=1.3215]


Logits stats - min: -6.2827, max: 2.0040
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██                      | 1556/18200 [00:36<06:32, 42.40it/s, loss=1.5444]


Logits stats - min: -7.3813, max: 2.3465
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9068, max: 2.1814
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4315, max: 1.8037
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3577, max: 2.0417
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██                      | 1561/18200 [00:37<06:39, 41.70it/s, loss=1.5961]


Logits stats - min: -6.7472, max: 2.0207
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5378, max: 2.0575
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4468, max: 2.0210
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██                      | 1576/18200 [00:37<06:40, 41.46it/s, loss=1.5254]


Logits stats - min: -8.0107, max: 2.6810
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1556, max: 1.4856
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██                      | 1587/18200 [00:37<06:09, 44.96it/s, loss=1.5105]


Logits stats - min: -7.3344, max: 1.4738
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9382, max: 1.9052
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6104, max: 2.2426
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3834, max: 2.3528
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2291, max: 2.6349
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██                      | 1597/18200 [00:38<06:39, 41.59it/s, loss=1.5788]


Logits stats - min: -6.3466, max: 2.1661
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6225, max: 1.9913
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██                      | 1607/18200 [00:38<06:15, 44.20it/s, loss=2.1103]


Logits stats - min: -6.5848, max: 2.3399
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4908, max: 2.4166
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5804, max: 2.0504
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1612/18200 [00:38<06:31, 42.34it/s, loss=2.1192]


Logits stats - min: -7.1120, max: 2.0905
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3505, max: 2.2314
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1622/18200 [00:38<06:22, 43.34it/s, loss=1.5587]


Logits stats - min: -6.5446, max: 2.2365
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3925, max: 2.0411
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1632/18200 [00:38<06:57, 39.67it/s, loss=1.4857]


Logits stats - min: -7.1024, max: 2.5973
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6806, max: 2.6585
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1647/18200 [00:39<06:25, 42.97it/s, loss=1.3128]


Logits stats - min: -6.9281, max: 2.3621
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3391, max: 1.6794
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9948, max: 2.4290
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7147, max: 1.6686
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2420, max: 1.7970
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3826, max: 2.0120
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1658/18200 [00:39<06:02, 45.65it/s, loss=1.4728]


Logits stats - min: -7.9166, max: 2.5484
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9644, max: 2.4477
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6766, max: 2.2225
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1673/18200 [00:39<06:41, 41.16it/s, loss=1.5908]


Logits stats - min: -6.5018, max: 2.0958
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4202, max: 2.0789
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1684/18200 [00:40<06:21, 43.27it/s, loss=2.1858]


Logits stats - min: -6.5811, max: 1.9975
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8122, max: 1.5273
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9000, max: 2.1686
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5002, max: 2.3201
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1205, max: 2.3492
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▏                     | 1700/18200 [00:40<06:27, 42.55it/s, loss=2.0272]


Logits stats - min: -7.3553, max: 2.5012
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9760, max: 2.6770
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4691, max: 2.2823
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▎                     | 1710/18200 [00:40<06:33, 41.93it/s, loss=1.8374]


Logits stats - min: -7.0456, max: 2.3494
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8201, max: 2.0282
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2717, max: 1.9892
Target unique values: tensor([0], device='cuda:0')


Training:   9%|██▎                     | 1720/18200 [00:40<06:25, 42.73it/s, loss=2.1647]


Logits stats - min: -7.9059, max: 2.1752
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3931, max: 1.9792
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2352, max: 2.4579
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6906, max: 2.4103
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1730/18200 [00:41<06:14, 43.99it/s, loss=1.5731]


Logits stats - min: -7.2483, max: 2.5755
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1455, max: 1.9415
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2130, max: 2.5176
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6836, max: 1.9805
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0436, max: 2.1072
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1740/18200 [00:41<06:15, 43.86it/s, loss=1.5044]


Logits stats - min: -6.7283, max: 1.5339
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9486, max: 2.3284
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1751/18200 [00:41<06:05, 45.02it/s, loss=2.7158]


Logits stats - min: -5.8837, max: 2.1254
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2455, max: 2.4435
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4425, max: 2.1168
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4097, max: 2.3781
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1756/18200 [00:41<06:04, 45.08it/s, loss=1.4617]


Logits stats - min: -6.4256, max: 2.0173
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5362, max: 2.7496
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3017, max: 2.5651
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1768/18200 [00:41<05:47, 47.27it/s, loss=1.3421]


Logits stats - min: -5.3152, max: 1.5378
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2025, max: 2.1206
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3373, max: 1.9664
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1251, max: 2.3005
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2388, max: 2.3971
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1778/18200 [00:42<06:14, 43.84it/s, loss=1.5245]


Logits stats - min: -8.1881, max: 2.7915
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9570, max: 2.5645
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2185, max: 2.3459
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4428, max: 2.0810
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▎                     | 1793/18200 [00:42<06:12, 44.06it/s, loss=1.6053]


Logits stats - min: -6.0944, max: 2.6600
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0790, max: 2.0377
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2698, max: 2.0999
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7758, max: 2.0250
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1803/18200 [00:42<06:12, 44.00it/s, loss=1.5672]


Logits stats - min: -6.0216, max: 2.0463
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7745, max: 2.7115
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2307, max: 2.0531
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4206, max: 2.6684
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1813/18200 [00:43<06:19, 43.19it/s, loss=1.5249]


Logits stats - min: -7.3023, max: 2.6141
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8559, max: 1.9920
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0056, max: 1.9383
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8423, max: 2.1896
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4127, max: 2.3132
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1833/18200 [00:43<06:28, 42.16it/s, loss=2.7579]


Logits stats - min: -6.1549, max: 2.2464
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3029, max: 2.3021
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1838/18200 [00:43<06:16, 43.45it/s, loss=1.3417]


Logits stats - min: -5.8141, max: 1.3461
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3474, max: 2.2910
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5117, max: 2.0703
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1848/18200 [00:43<06:25, 42.40it/s, loss=1.5986]


Logits stats - min: -6.3825, max: 2.0751
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3560, max: 2.3146
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1858/18200 [00:43<06:29, 41.96it/s, loss=1.9484]


Logits stats - min: -6.0360, max: 2.1903
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7093, max: 2.2584
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0439, max: 2.9525
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1868/18200 [00:44<06:48, 39.96it/s, loss=1.3454]


Logits stats - min: -6.9745, max: 2.5831
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7420, max: 1.9292
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1883/18200 [00:44<06:53, 39.45it/s, loss=2.1916]


Logits stats - min: -6.3045, max: 2.3424
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5370, max: 1.7489
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2500, max: 2.0810
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▍                     | 1892/18200 [00:44<06:58, 39.00it/s, loss=1.5161]


Logits stats - min: -6.3179, max: 2.0019
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▌                     | 1901/18200 [00:45<06:35, 41.20it/s, loss=2.7756]


Logits stats - min: -7.5831, max: 2.3992
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3635, max: 2.3735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6969, max: 2.2437
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0803, max: 2.6418
Target unique values: tensor([0], device='cuda:0')


Training:  10%|██▌                     | 1911/18200 [00:45<06:29, 41.77it/s, loss=1.5355]


Logits stats - min: -6.3085, max: 1.8042
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1926/18200 [00:45<06:28, 41.89it/s, loss=1.9505]


Logits stats - min: -6.2579, max: 1.9050
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2189, max: 1.9922
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3453, max: 2.1246
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1931/18200 [00:45<06:28, 41.85it/s, loss=1.5790]


Logits stats - min: -7.6874, max: 2.7296
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6796, max: 2.3422
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1941/18200 [00:46<06:39, 40.68it/s, loss=1.5231]


Logits stats - min: -7.0340, max: 2.5954
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9702, max: 2.9260
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5260, max: 1.9280
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1953/18200 [00:46<06:07, 44.15it/s, loss=2.7411]


Logits stats - min: -6.1757, max: 2.2085
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0691, max: 1.9494
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7756, max: 1.8698
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1968/18200 [00:46<06:24, 42.19it/s, loss=1.5202]


Logits stats - min: -6.7384, max: 2.2891
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▌                     | 1983/18200 [00:47<06:43, 40.22it/s, loss=1.3364]


Logits stats - min: -6.5775, max: 2.2171
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4404, max: 2.3031
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7614, max: 1.9664
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 1993/18200 [00:47<06:45, 39.96it/s, loss=2.1759]


Logits stats - min: -7.8183, max: 2.5293
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6197, max: 1.9884
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8162, max: 2.4401
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2003/18200 [00:47<06:21, 42.40it/s, loss=1.5878]


Logits stats - min: -7.5582, max: 2.3599
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2013/18200 [00:47<06:16, 42.94it/s, loss=1.4864]


Logits stats - min: -8.1778, max: 2.8685
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8926, max: 2.2634
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3843, max: 2.0360
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2023/18200 [00:48<06:21, 42.43it/s, loss=1.5022]


Logits stats - min: -7.0722, max: 2.4947
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8429, max: 2.7614
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3490, max: 2.6466
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3633, max: 2.3374
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2038/18200 [00:48<06:31, 41.31it/s, loss=1.5169]


Logits stats - min: -7.1598, max: 2.3999
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2544, max: 2.2299
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2048/18200 [00:48<06:12, 43.41it/s, loss=1.5011]


Logits stats - min: -7.5652, max: 2.5468
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1959, max: 2.0248
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1018, max: 2.4184
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2063/18200 [00:48<05:58, 44.97it/s, loss=1.5780]


Logits stats - min: -7.2910, max: 2.4125
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5535, max: 2.4089
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9567, max: 2.1212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5610, max: 2.4271
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4639, max: 2.1590
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2167, max: 2.2146
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▋                     | 2073/18200 [00:49<06:20, 42.43it/s, loss=1.5704]


Logits stats - min: -6.1639, max: 2.1245
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1519, max: 2.1573
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3129, max: 2.0568
Target unique values: tensor([0], device='cuda:0')


Training:  11%|██▊                     | 2089/18200 [00:49<06:07, 43.79it/s, loss=1.3279]


Logits stats - min: -6.5379, max: 2.2422
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3754, max: 2.0495
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2563, max: 2.2811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1712, max: 2.2020
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2109/18200 [00:50<06:05, 44.04it/s, loss=1.5998]


Logits stats - min: -5.8872, max: 1.8964
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2318, max: 2.0550
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1288, max: 2.4696
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5301, max: 2.6459
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2119/18200 [00:50<06:20, 42.25it/s, loss=1.5694]


Logits stats - min: -5.9734, max: 2.6712
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5238, max: 2.3688
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4941, max: 2.1630
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2130/18200 [00:50<06:19, 42.31it/s, loss=1.4876]


Logits stats - min: -6.8783, max: 2.0680
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6390, max: 1.9384
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2140/18200 [00:50<06:22, 42.01it/s, loss=1.8026]


Logits stats - min: -6.1534, max: 1.9291
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2150/18200 [00:51<06:26, 41.55it/s, loss=1.4889]


Logits stats - min: -5.5981, max: 1.5913
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6585, max: 2.2981
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2165/18200 [00:51<06:20, 42.14it/s, loss=2.1412]


Logits stats - min: -6.1221, max: 2.3557
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9612, max: 2.1301
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▊                     | 2175/18200 [00:51<06:15, 42.67it/s, loss=1.7167]


Logits stats - min: -5.2280, max: 1.5890
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6172, max: 2.2713
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0244, max: 2.0252
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2185/18200 [00:51<06:39, 40.14it/s, loss=1.5733]


Logits stats - min: -7.9690, max: 2.2161
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5398, max: 2.0588
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4654, max: 2.1563
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5227, max: 2.3440
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2195/18200 [00:52<06:32, 40.77it/s, loss=1.3150]


Logits stats - min: -6.5637, max: 1.9676
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3226, max: 2.1588
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2205/18200 [00:52<06:04, 43.89it/s, loss=1.5717]


Logits stats - min: -7.5864, max: 2.5083
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3009, max: 2.0384
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6275, max: 2.5101
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0420, max: 2.3341
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2215/18200 [00:52<06:22, 41.83it/s, loss=1.7152]


Logits stats - min: -6.0310, max: 2.3421
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3842, max: 2.0984
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1833, max: 2.0368
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2226/18200 [00:52<05:44, 46.31it/s, loss=1.6157]


Logits stats - min: -7.2632, max: 2.5714
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2503, max: 2.5557
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7138, max: 2.2851
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4129, max: 2.3520
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5514, max: 1.9856
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2232/18200 [00:52<05:48, 45.87it/s, loss=1.5164]


Logits stats - min: -7.2968, max: 2.2183
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2247/18200 [00:53<06:29, 40.99it/s, loss=1.5827]


Logits stats - min: -7.5303, max: 2.4100
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2734, max: 2.3532
Target unique values: tensor([0], device='cuda:0')


Training:  12%|██▉                     | 2258/18200 [00:53<06:04, 43.73it/s, loss=1.3526]


Logits stats - min: -6.7004, max: 1.8557
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6510, max: 2.2456
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2466, max: 2.0520
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2278/18200 [00:54<06:24, 41.41it/s, loss=1.3214]


Logits stats - min: -6.0689, max: 1.6705
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1900, max: 2.1361
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2288/18200 [00:54<06:32, 40.56it/s, loss=2.0173]


Logits stats - min: -6.5463, max: 2.1140
Target unique values: tensor([0], device='cuda:0')



Training:  13%|███                     | 2298/18200 [00:54<06:28, 40.95it/s, loss=1.4604]

Logits stats - min: -5.9786, max: 1.9241
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2562, max: 2.0209
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6417, max: 2.2305
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2308/18200 [00:54<06:16, 42.19it/s, loss=1.3187]


Logits stats - min: -6.0232, max: 1.8654
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8507, max: 2.6164
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5432, max: 2.1094
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2318/18200 [00:55<06:21, 41.59it/s, loss=1.3176]


Logits stats - min: -6.8543, max: 2.5133
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3907, max: 2.6353
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2333/18200 [00:55<06:27, 40.91it/s, loss=2.1753]


Logits stats - min: -6.5363, max: 1.9878
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2343/18200 [00:55<06:54, 38.28it/s, loss=1.3024]


Logits stats - min: -7.9493, max: 2.6404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7471, max: 2.0628
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3853, max: 2.5304
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2358/18200 [00:56<06:15, 42.22it/s, loss=2.2264]


Logits stats - min: -6.6393, max: 2.4122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6526, max: 2.0258
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4474, max: 2.0186
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8572, max: 1.4278
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███                     | 2368/18200 [00:56<06:38, 39.72it/s, loss=1.5869]


Logits stats - min: -5.6428, max: 1.6873
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2392/18200 [00:56<06:43, 39.15it/s, loss=1.6475]


Logits stats - min: -5.3294, max: 1.3974
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2401/18200 [00:57<06:22, 41.36it/s, loss=1.5739]


Logits stats - min: -6.4885, max: 2.2073
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7938, max: 2.4984
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7707, max: 2.4777
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3012, max: 2.5032
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4132, max: 2.2826
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2416/18200 [00:57<06:26, 40.83it/s, loss=2.1401]


Logits stats - min: -7.3863, max: 2.6468
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2426/18200 [00:57<06:07, 42.93it/s, loss=1.3385]


Logits stats - min: -7.4971, max: 2.4812
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0607, max: 2.2485
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7799, max: 2.2159
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2441/18200 [00:58<06:33, 40.00it/s, loss=1.5692]


Logits stats - min: -7.0387, max: 2.4131
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7049, max: 2.0914
Target unique values: tensor([0], device='cuda:0')


Training:  13%|███▏                    | 2451/18200 [00:58<06:11, 42.45it/s, loss=1.5652]


Logits stats - min: -7.3387, max: 2.4698
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9594, max: 2.4438
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5325, max: 1.9690
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2466/18200 [00:58<06:12, 42.23it/s, loss=1.5287]


Logits stats - min: -6.7903, max: 1.9078
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7878, max: 2.5460
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3711, max: 2.0886
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2481/18200 [00:59<06:37, 39.54it/s, loss=1.6792]


Logits stats - min: -6.2721, max: 2.2905
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6063, max: 2.4070
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7652, max: 1.8612
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2501/18200 [00:59<06:21, 41.14it/s, loss=1.5192]


Logits stats - min: -7.5124, max: 2.4191
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7795, max: 1.9248
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9933, max: 2.2106
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2511/18200 [00:59<06:20, 41.18it/s, loss=2.2318]


Logits stats - min: -7.4340, max: 2.5837
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2521/18200 [01:00<06:43, 38.90it/s, loss=1.4632]


Logits stats - min: -7.9208, max: 2.9562
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0270, max: 2.7305
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5120, max: 2.4087
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1434, max: 1.5179
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2532/18200 [01:00<06:02, 43.27it/s, loss=1.3156]


Logits stats - min: -7.6483, max: 2.5810
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3072, max: 2.7849
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9862, max: 2.5457
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1287, max: 2.2738
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▎                    | 2557/18200 [01:00<06:29, 40.19it/s, loss=1.5670]


Logits stats - min: -7.5683, max: 1.3056
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2301, max: 1.9244
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2567/18200 [01:01<06:15, 41.66it/s, loss=1.5159]


Logits stats - min: -6.4914, max: 2.0422
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4755, max: 2.7925
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2577/18200 [01:01<06:26, 40.46it/s, loss=1.6071]


Logits stats - min: -7.1694, max: 2.5338
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2587/18200 [01:01<06:21, 40.91it/s, loss=2.0773]


Logits stats - min: -7.2650, max: 2.5025
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3279, max: 1.8902
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3297, max: 1.9648
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2597/18200 [01:01<06:20, 41.00it/s, loss=1.4634]


Logits stats - min: -6.6023, max: 1.9268
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0562, max: 2.4712
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5212, max: 2.2852
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2612/18200 [01:02<05:58, 43.50it/s, loss=1.5646]


Logits stats - min: -7.4227, max: 2.7080
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6102, max: 1.4928
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9215, max: 1.6072
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2628/18200 [01:02<05:56, 43.65it/s, loss=1.4658]


Logits stats - min: -7.2545, max: 2.3202
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4020, max: 1.4990
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3937, max: 2.1152
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2645, max: 1.6378
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5322, max: 1.9357
Target unique values: tensor([0], device='cuda:0')


Training:  14%|███▍                    | 2634/18200 [01:02<05:41, 45.52it/s, loss=1.2989]


Logits stats - min: -6.4019, max: 1.8771
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8188, max: 2.6590
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9380, max: 1.6726
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▍                    | 2649/18200 [01:03<06:24, 40.49it/s, loss=1.4668]


Logits stats - min: -5.9728, max: 1.6366
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2668/18200 [01:03<06:37, 39.09it/s, loss=1.5150]


Logits stats - min: -5.8693, max: 1.6417
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2677/18200 [01:03<06:24, 40.39it/s, loss=1.5126]


Logits stats - min: -6.1847, max: 1.8692
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5094, max: 2.2976
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3363, max: 2.6249
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1245, max: 3.0267
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1314, max: 2.3038
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2687/18200 [01:04<06:18, 40.96it/s, loss=1.5272]


Logits stats - min: -6.4078, max: 2.3556
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1023, max: 1.5824
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3334, max: 2.2832
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1639, max: 2.3236
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5782, max: 2.0559
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2697/18200 [01:04<06:08, 42.11it/s, loss=1.6802]


Logits stats - min: -8.3617, max: 2.3350
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5342, max: 2.0301
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2712/18200 [01:04<06:10, 41.79it/s, loss=1.2958]


Logits stats - min: -7.6283, max: 1.6847
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6331, max: 2.4760
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1420, max: 2.3920
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8345, max: 2.4703
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2722/18200 [01:04<05:51, 44.02it/s, loss=1.7191]


Logits stats - min: -6.5955, max: 2.0195
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8551, max: 2.0004
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5142, max: 2.2738
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7176, max: 1.8766
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2738/18200 [01:05<05:40, 45.47it/s, loss=1.5136]


Logits stats - min: -7.0377, max: 1.9574
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3383, max: 2.2956
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1769, max: 2.0982
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0477, max: 1.5274
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4832, max: 2.2508
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9428, max: 1.3211
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8680, max: 2.1035
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1377, max: 2.0651
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▌                    | 2748/18200 [01:05<05:43, 45.01it/s, loss=1.6469]


Logits stats - min: -7.2521, max: 2.3241
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8902, max: 2.4670
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▋                    | 2758/18200 [01:05<06:01, 42.71it/s, loss=1.5402]


Logits stats - min: -6.1770, max: 2.3408
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7930, max: 2.2564
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▋                    | 2773/18200 [01:06<06:10, 41.63it/s, loss=1.5624]


Logits stats - min: -6.6235, max: 1.9560
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2294, max: 2.3931
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8081, max: 2.0047
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3081, max: 2.0907
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1084, max: 2.3554
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▋                    | 2783/18200 [01:06<06:01, 42.59it/s, loss=1.1346]


Logits stats - min: -7.9886, max: 2.2904
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▋                    | 2793/18200 [01:06<05:59, 42.90it/s, loss=1.3366]


Logits stats - min: -6.7399, max: 2.3188
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9974, max: 1.9428
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8270, max: 2.5957
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1334, max: 1.8718
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▋                    | 2803/18200 [01:06<05:45, 44.60it/s, loss=1.5492]


Logits stats - min: -7.5685, max: 2.2542
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7094, max: 2.2895
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4199, max: 1.7627
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2297, max: 2.2834
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1832, max: 2.0293
Target unique values: tensor([0], device='cuda:0')


Training:  15%|███▋                    | 2813/18200 [01:06<05:51, 43.77it/s, loss=1.5726]


Logits stats - min: -6.0749, max: 2.0386
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2339, max: 2.3645
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▋                    | 2828/18200 [01:07<06:01, 42.49it/s, loss=1.5668]


Logits stats - min: -7.8344, max: 2.3778
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6430, max: 2.4231
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4918, max: 2.0823
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2062, max: 2.4938
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▋                    | 2838/18200 [01:07<05:56, 43.08it/s, loss=1.5844]


Logits stats - min: -6.2222, max: 2.1713
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3499, max: 2.1299
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2535, max: 2.6443
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2853/18200 [01:07<06:16, 40.76it/s, loss=1.3452]


Logits stats - min: -8.0310, max: 2.5865
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1247, max: 3.1041
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6113, max: 1.6297
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2863/18200 [01:08<05:51, 43.64it/s, loss=1.5101]


Logits stats - min: -6.6391, max: 2.1059
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9490, max: 2.5976
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4282, max: 2.1895
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6444, max: 2.5645
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2873/18200 [01:08<06:09, 41.43it/s, loss=1.4503]


Logits stats - min: -6.2671, max: 1.5603
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2883/18200 [01:08<06:20, 40.26it/s, loss=2.3807]


Logits stats - min: -6.4997, max: 2.1950
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6440, max: 2.6666
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9267, max: 2.6243
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2893/18200 [01:08<06:11, 41.19it/s, loss=1.5616]


Logits stats - min: -6.1992, max: 1.8508
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3600, max: 1.5448
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0657, max: 2.1130
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7616, max: 2.4098
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2903/18200 [01:09<06:01, 42.27it/s, loss=1.4474]


Logits stats - min: -9.1543, max: 2.2706
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1548, max: 2.2588
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8580, max: 2.6999
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3540, max: 2.1265
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8214, max: 2.4392
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▊                    | 2929/18200 [01:09<06:02, 42.11it/s, loss=1.5027]


Logits stats - min: -7.5522, max: 2.4703
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4290, max: 2.6732
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8889, max: 2.1473
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5516, max: 1.5299
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7525, max: 2.8877
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▉                    | 2944/18200 [01:10<06:16, 40.57it/s, loss=2.1705]


Logits stats - min: -6.8315, max: 2.3890
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7976, max: 2.3759
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2234, max: 2.2902
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▉                    | 2964/18200 [01:10<06:06, 41.53it/s, loss=1.4559]


Logits stats - min: -7.5990, max: 2.6752
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9166, max: 2.6177
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5554, max: 2.9115
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▉                    | 2974/18200 [01:10<05:58, 42.51it/s, loss=1.3198]


Logits stats - min: -6.5258, max: 1.8620
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3851, max: 2.3772
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0985, max: 1.6372
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2574, max: 1.9084
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▉                    | 2984/18200 [01:11<06:14, 40.62it/s, loss=2.7202]


Logits stats - min: -7.3713, max: 2.0033
Target unique values: tensor([0], device='cuda:0')


Training:  16%|███▉                    | 2999/18200 [01:11<05:56, 42.69it/s, loss=1.5136]


Logits stats - min: -7.0546, max: 2.8319
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6121, max: 2.3419
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5998, max: 2.5295
Target unique values: tensor([0], device='cuda:0')


Training:  17%|███▉                    | 3019/18200 [01:11<06:00, 42.12it/s, loss=1.5613]


Logits stats - min: -6.3791, max: 2.4303
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6654, max: 1.6028
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0235, max: 2.6241
Target unique values: tensor([0], device='cuda:0')


Training:  17%|███▉                    | 3029/18200 [01:12<06:05, 41.52it/s, loss=1.3537]


Logits stats - min: -6.4766, max: 1.9539
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2954, max: 1.8082
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3059/18200 [01:12<06:16, 40.25it/s, loss=1.4540]


Logits stats - min: -7.6944, max: 2.5231
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9438, max: 2.7444
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0276, max: 2.5138
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3584, max: 2.0525
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3070/18200 [01:13<05:42, 44.12it/s, loss=1.5271]


Logits stats - min: -7.1241, max: 2.3018
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0784, max: 2.5844
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2820, max: 2.3873
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8428, max: 1.9535
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3080/18200 [01:13<05:35, 45.02it/s, loss=1.5270]


Logits stats - min: -5.9024, max: 2.0696
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2936, max: 2.5824
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2437, max: 2.4055
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3095/18200 [01:13<05:57, 42.20it/s, loss=1.8160]


Logits stats - min: -6.7560, max: 2.3201
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6176, max: 2.2561
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4197, max: 2.2051
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7486, max: 2.6199
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0347, max: 2.0601
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1795, max: 2.0715
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3116/18200 [01:14<06:00, 41.88it/s, loss=2.1092]


Logits stats - min: -6.9152, max: 2.0075
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3317, max: 2.0950
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████                    | 3126/18200 [01:14<06:16, 40.03it/s, loss=1.5227]


Logits stats - min: -7.2124, max: 2.1123
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████▏                   | 3136/18200 [01:14<06:25, 39.09it/s, loss=1.5679]


Logits stats - min: -9.5882, max: 1.5425
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9280, max: 2.3503
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████▏                   | 3154/18200 [01:15<05:59, 41.84it/s, loss=2.8128]


Logits stats - min: -6.1494, max: 2.0648
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1005, max: 2.1166
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6069, max: 2.4462
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9312, max: 2.6549
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████▏                   | 3164/18200 [01:15<05:45, 43.58it/s, loss=1.5195]


Logits stats - min: -6.6648, max: 2.1275
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1311, max: 2.3963
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1230, max: 1.8812
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████▏                   | 3169/18200 [01:15<06:07, 40.95it/s, loss=2.0204]


Logits stats - min: -7.9392, max: 2.5782
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0524, max: 2.9308
Target unique values: tensor([0], device='cuda:0')


Training:  17%|████▏                   | 3184/18200 [01:15<05:57, 41.96it/s, loss=1.8373]


Logits stats - min: -6.6568, max: 2.2050
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5227, max: 2.4917
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2865, max: 2.4715
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▏                   | 3194/18200 [01:16<05:54, 42.36it/s, loss=1.5747]


Logits stats - min: -6.7656, max: 1.4898
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6045, max: 2.2531
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6633, max: 2.6081
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▏                   | 3204/18200 [01:16<05:48, 42.98it/s, loss=1.5645]


Logits stats - min: -5.9889, max: 1.6907
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5077, max: 1.5904
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▏                   | 3214/18200 [01:16<06:02, 41.32it/s, loss=1.5163]


Logits stats - min: -6.7484, max: 2.1066
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7160, max: 2.5455
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3234/18200 [01:17<06:01, 41.35it/s, loss=2.1839]


Logits stats - min: -8.9517, max: 2.0795
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3277, max: 2.2751
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4682, max: 1.8609
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5382, max: 1.8967
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3244/18200 [01:17<05:58, 41.72it/s, loss=1.4285]


Logits stats - min: -6.8091, max: 2.8003
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4704, max: 2.2379
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3613, max: 2.1417
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5649, max: 2.5113
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4681, max: 1.9601
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3254/18200 [01:17<06:00, 41.48it/s, loss=1.1971]


Logits stats - min: -7.5557, max: 2.5217
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3873, max: 2.4990
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3264/18200 [01:17<06:02, 41.24it/s, loss=2.7346]


Logits stats - min: -6.5933, max: 2.1480
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0459, max: 2.2211
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3269/18200 [01:18<06:01, 41.36it/s, loss=1.5716]


Logits stats - min: -6.9362, max: 2.3229
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0776, max: 2.2614
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3280/18200 [01:18<05:33, 44.76it/s, loss=1.5312]


Logits stats - min: -6.9569, max: 2.6026
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5926, max: 1.9559
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1736, max: 2.3418
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0543, max: 1.7271
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3290/18200 [01:18<05:34, 44.58it/s, loss=1.6730]


Logits stats - min: -6.3345, max: 2.0010
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3298, max: 2.0219
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1949, max: 1.6403
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8431, max: 1.9219
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2537, max: 3.0877
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2736, max: 1.6273
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8914, max: 1.9740
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3301/18200 [01:18<05:22, 46.17it/s, loss=1.5582]


Logits stats - min: -7.2698, max: 2.2405
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3007, max: 2.5561
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5184, max: 1.7312
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1179, max: 2.1385
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4145, max: 1.5587
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5560, max: 2.0972
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5597, max: 2.1487
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▎                   | 3311/18200 [01:18<05:28, 45.27it/s, loss=2.1992]


Logits stats - min: -7.5107, max: 2.5282
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▍                   | 3321/18200 [01:19<06:11, 40.01it/s, loss=1.5829]


Logits stats - min: -6.5608, max: 2.0511
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6821, max: 1.5615
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7864, max: 2.4767
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▍                   | 3345/18200 [01:19<06:28, 38.19it/s, loss=1.5285]


Logits stats - min: -6.8111, max: 2.0189
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▍                   | 3354/18200 [01:20<05:58, 41.43it/s, loss=2.1967]


Logits stats - min: -7.4567, max: 2.5536
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5429, max: 2.4001
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4027, max: 3.2576
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2544, max: 2.6154
Target unique values: tensor([0], device='cuda:0')


Training:  18%|████▍                   | 3364/18200 [01:20<06:04, 40.65it/s, loss=2.1048]


Logits stats - min: -6.0111, max: 1.9171
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2242, max: 2.3861
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▍                   | 3374/18200 [01:20<06:07, 40.32it/s, loss=1.5802]


Logits stats - min: -8.2352, max: 2.7778
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6435, max: 1.4730
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▍                   | 3384/18200 [01:20<06:09, 40.15it/s, loss=1.4660]


Logits stats - min: -6.2031, max: 1.9113
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0305, max: 2.4681
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6395, max: 2.5104
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8453, max: 1.5514
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▍                   | 3400/18200 [01:21<05:34, 44.23it/s, loss=1.5670]


Logits stats - min: -6.4146, max: 2.1012
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3627, max: 1.5533
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1258, max: 2.0772
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3418/18200 [01:21<06:22, 38.69it/s, loss=1.7898]


Logits stats - min: -6.3822, max: 2.2091
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3436/18200 [01:22<06:30, 37.82it/s, loss=1.5426]


Logits stats - min: -6.6223, max: 2.0501
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9483, max: 2.5617
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3445/18200 [01:22<06:30, 37.80it/s, loss=2.7533]


Logits stats - min: -6.5112, max: 2.3844
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3470/18200 [01:22<06:15, 39.19it/s, loss=1.5213]


Logits stats - min: -7.3338, max: 2.2586
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2302, max: 2.5734
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3482/18200 [01:23<06:24, 38.26it/s, loss=2.0939]


Logits stats - min: -6.1164, max: 2.1028
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6654, max: 2.5868
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▌                   | 3497/18200 [01:23<06:05, 40.18it/s, loss=1.5588]


Logits stats - min: -6.0794, max: 2.0144
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3889, max: 1.6276
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0537, max: 2.0481
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▋                   | 3511/18200 [01:24<06:14, 39.17it/s, loss=1.5196]


Logits stats - min: -6.2471, max: 2.0117
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8017, max: 2.3372
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9223, max: 2.1456
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▋                   | 3526/18200 [01:24<05:48, 42.06it/s, loss=2.2299]


Logits stats - min: -5.9514, max: 2.2630
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9130, max: 2.3680
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6366, max: 2.5106
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▋                   | 3536/18200 [01:24<05:57, 40.99it/s, loss=1.2887]


Logits stats - min: -6.2553, max: 2.2691
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7716, max: 3.1208
Target unique values: tensor([0], device='cuda:0')


Training:  19%|████▋                   | 3546/18200 [01:24<05:49, 41.97it/s, loss=1.5657]


Logits stats - min: -6.0583, max: 1.9531
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8588, max: 1.8999
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7936, max: 2.0053
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3323, max: 2.2654
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▋                   | 3561/18200 [01:25<05:48, 41.95it/s, loss=1.5742]


Logits stats - min: -5.7431, max: 2.0404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4551, max: 2.4218
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5516, max: 2.2926
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4727, max: 2.0985
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▋                   | 3571/18200 [01:25<05:35, 43.56it/s, loss=1.7933]


Logits stats - min: -9.3646, max: 1.6343
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4234, max: 2.0395
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3457, max: 2.3555
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7254, max: 2.9922
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▋                   | 3581/18200 [01:25<05:36, 43.49it/s, loss=1.5663]


Logits stats - min: -7.2761, max: 1.6503
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0257, max: 2.1382
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3957, max: 2.4994
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▋                   | 3596/18200 [01:25<05:38, 43.12it/s, loss=0.9942]


Logits stats - min: -6.4069, max: 1.8243
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0458, max: 1.9978
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3606/18200 [01:26<05:39, 43.02it/s, loss=1.5296]


Logits stats - min: -6.4281, max: 2.2414
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9157, max: 2.5824
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0398, max: 2.4164
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3616/18200 [01:26<05:47, 41.97it/s, loss=1.5120]


Logits stats - min: -6.4740, max: 2.2916
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4533, max: 1.9513
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3626/18200 [01:26<06:00, 40.47it/s, loss=1.5503]


Logits stats - min: -6.4528, max: 2.1581
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3636/18200 [01:27<06:03, 40.05it/s, loss=1.4347]


Logits stats - min: -6.8555, max: 2.3838
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2634, max: 2.0673
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5611, max: 1.4881
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3646/18200 [01:27<06:00, 40.42it/s, loss=1.5620]


Logits stats - min: -9.1138, max: 2.1358
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8814, max: 2.4951
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3656/18200 [01:27<05:42, 42.49it/s, loss=2.0160]


Logits stats - min: -6.3616, max: 2.1873
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2554, max: 2.3649
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3666/18200 [01:27<05:46, 41.99it/s, loss=1.3279]


Logits stats - min: -8.0815, max: 2.6038
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8310, max: 2.7559
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3676/18200 [01:27<05:52, 41.23it/s, loss=1.2897]


Logits stats - min: -5.2932, max: 1.4325
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0253, max: 2.5032
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3686/18200 [01:28<05:45, 41.96it/s, loss=1.5582]


Logits stats - min: -5.5262, max: 1.5009
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9503, max: 1.9760
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▊                   | 3696/18200 [01:28<06:07, 39.45it/s, loss=2.2271]


Logits stats - min: -6.4429, max: 2.2378
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▉                   | 3713/18200 [01:28<06:19, 38.21it/s, loss=1.8573]


Logits stats - min: -5.4335, max: 1.7048
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7840, max: 2.3349
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4880, max: 1.9726
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6766, max: 2.2445
Target unique values: tensor([0], device='cuda:0')


Training:  20%|████▉                   | 3728/18200 [01:29<05:39, 42.66it/s, loss=1.5637]


Logits stats - min: -6.6471, max: 2.4576
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1370, max: 2.5165
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4600, max: 2.0151
Target unique values: tensor([0], device='cuda:0')


Training:  21%|████▉                   | 3738/18200 [01:29<05:42, 42.19it/s, loss=1.5258]


Logits stats - min: -6.8711, max: 2.3376
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5436, max: 2.0574
Target unique values: tensor([0], device='cuda:0')


Training:  21%|████▉                   | 3748/18200 [01:29<05:49, 41.34it/s, loss=1.4609]


Logits stats - min: -6.6781, max: 2.1836
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8380, max: 1.5924
Target unique values: tensor([0], device='cuda:0')


Training:  21%|████▉                   | 3758/18200 [01:29<06:18, 38.17it/s, loss=1.5297]


Logits stats - min: -6.8357, max: 2.0581
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1235, max: 2.3742
Target unique values: tensor([0], device='cuda:0')


Training:  21%|████▉                   | 3768/18200 [01:30<05:59, 40.13it/s, loss=1.5413]


Logits stats - min: -6.4954, max: 2.1473
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5703, max: 2.1311
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5857, max: 3.2038
Target unique values: tensor([0], device='cuda:0')


Training:  21%|████▉                   | 3783/18200 [01:30<05:38, 42.64it/s, loss=1.8418]


Logits stats - min: -7.8440, max: 2.3663
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1211, max: 2.4727
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1848, max: 1.6911
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5890, max: 2.8715
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3793/18200 [01:30<05:50, 41.09it/s, loss=1.5657]


Logits stats - min: -7.0446, max: 2.3459
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3803/18200 [01:31<05:49, 41.16it/s, loss=1.3145]


Logits stats - min: -7.2025, max: 2.4660
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7006, max: 2.0651
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3808/18200 [01:31<05:47, 41.39it/s, loss=1.8904]


Logits stats - min: -6.9265, max: 2.3684
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7210, max: 2.5590
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1092, max: 2.2792
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3818/18200 [01:31<05:43, 41.90it/s, loss=1.5681]


Logits stats - min: -6.7303, max: 2.4485
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3828/18200 [01:31<05:25, 44.13it/s, loss=1.5148]


Logits stats - min: -9.7856, max: 1.9796
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4491, max: 2.1364
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8491, max: 1.4914
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3838/18200 [01:31<05:19, 44.94it/s, loss=1.3050]


Logits stats - min: -6.4927, max: 2.3269
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4334, max: 2.4784
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7473, max: 2.2467
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3843/18200 [01:32<05:32, 43.19it/s, loss=2.1968]


Logits stats - min: -6.5311, max: 2.3421
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3853/18200 [01:32<05:38, 42.39it/s, loss=2.4831]


Logits stats - min: -7.8957, max: 2.4521
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9334, max: 1.8017
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3863/18200 [01:32<05:34, 42.86it/s, loss=1.4464]


Logits stats - min: -7.0527, max: 2.2210
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6325, max: 2.4882
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3825, max: 2.3909
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████                   | 3879/18200 [01:32<05:11, 46.01it/s, loss=1.6319]


Logits stats - min: -7.9825, max: 2.1305
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1347, max: 2.3957
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0438, max: 1.7288
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9243, max: 2.4767
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6047, max: 2.5057
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7274, max: 2.7663
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████▏                  | 3894/18200 [01:33<05:31, 43.09it/s, loss=1.4715]


Logits stats - min: -6.8897, max: 1.9208
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5967, max: 2.5475
Target unique values: tensor([0], device='cuda:0')


Training:  21%|█████▏                  | 3904/18200 [01:33<05:50, 40.75it/s, loss=1.5589]


Logits stats - min: -8.2634, max: 2.7243
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0716, max: 2.6656
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▏                  | 3914/18200 [01:33<05:57, 39.99it/s, loss=2.2181]


Logits stats - min: -7.1530, max: 2.9514
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6988, max: 2.5369
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▏                  | 3924/18200 [01:33<05:43, 41.57it/s, loss=1.5435]


Logits stats - min: -7.3668, max: 2.5567
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9002, max: 2.4315
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▏                  | 3934/18200 [01:34<05:56, 40.06it/s, loss=1.6630]


Logits stats - min: -6.8269, max: 1.8732
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3964, max: 1.7956
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9057, max: 2.2887
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▏                  | 3944/18200 [01:34<05:46, 41.12it/s, loss=1.5830]


Logits stats - min: -5.9968, max: 1.9588
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2184, max: 2.2250
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0600, max: 2.5860
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1407, max: 2.4489
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9201, max: 2.0750
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▏                  | 3956/18200 [01:34<05:17, 44.84it/s, loss=1.4246]


Logits stats - min: -8.4748, max: 2.4595
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7335, max: 2.4090
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▏                  | 3966/18200 [01:34<05:34, 42.54it/s, loss=1.5571]


Logits stats - min: -6.7302, max: 2.1146
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6597, max: 2.4533
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▏                  | 3976/18200 [01:35<05:46, 41.05it/s, loss=3.2063]


Logits stats - min: -7.2867, max: 2.4415
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 3986/18200 [01:35<05:48, 40.77it/s, loss=1.5249]


Logits stats - min: -5.7280, max: 1.8424
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2569, max: 2.3961
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 4001/18200 [01:35<05:50, 40.52it/s, loss=1.2910]


Logits stats - min: -7.8743, max: 2.3594
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9843, max: 2.3810
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9834, max: 2.3492
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 4011/18200 [01:35<05:50, 40.49it/s, loss=1.5796]


Logits stats - min: -6.5009, max: 2.0960
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4208, max: 2.9119
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9971, max: 2.3023
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7297, max: 2.6668
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 4021/18200 [01:36<05:38, 41.87it/s, loss=1.2972]


Logits stats - min: -7.3166, max: 2.6366
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9833, max: 1.9696
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3481, max: 2.2372
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 4031/18200 [01:36<05:46, 40.90it/s, loss=1.5644]


Logits stats - min: -6.2562, max: 1.8982
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8382, max: 2.5618
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5069, max: 3.2906
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 4059/18200 [01:37<05:48, 40.59it/s, loss=1.5546]


Logits stats - min: -8.5333, max: 2.3524
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0221, max: 3.0205
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9866, max: 3.2395
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4430, max: 1.9190
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▎                  | 4074/18200 [01:37<05:43, 41.09it/s, loss=1.4538]


Logits stats - min: -6.7374, max: 2.0530
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0829, max: 2.3394
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9326, max: 2.5651
Target unique values: tensor([0], device='cuda:0')


Training:  22%|█████▍                  | 4094/18200 [01:38<05:31, 42.61it/s, loss=1.5665]


Logits stats - min: -8.6319, max: 2.2375
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3133, max: 2.9811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6393, max: 3.2965
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▍                  | 4100/18200 [01:38<05:20, 44.06it/s, loss=2.2108]


Logits stats - min: -9.2386, max: 2.1378
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9949, max: 2.5988
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6609, max: 1.7362
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▍                  | 4111/18200 [01:38<05:11, 45.20it/s, loss=2.1966]


Logits stats - min: -7.6187, max: 2.5111
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4057, max: 1.5250
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4597, max: 2.1020
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9557, max: 2.6823
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▍                  | 4121/18200 [01:38<05:27, 43.03it/s, loss=2.8241]


Logits stats - min: -7.7594, max: 2.5271
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9030, max: 1.4776
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0459, max: 1.9409
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9326, max: 2.2352
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▍                  | 4146/18200 [01:39<05:35, 41.85it/s, loss=1.2859]


Logits stats - min: -9.5262, max: 2.0396
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3801, max: 2.6503
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7871, max: 2.6715
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▍                  | 4156/18200 [01:39<05:29, 42.63it/s, loss=1.5149]


Logits stats - min: -7.3731, max: 1.6660
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7955, max: 2.5434
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4024, max: 2.1188
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6674, max: 1.5033
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2071, max: 1.5102
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8058, max: 2.1199
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4172/18200 [01:39<05:04, 46.14it/s, loss=1.2745]


Logits stats - min: -8.2175, max: 2.5179
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1408, max: 2.0415
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4641, max: 2.3131
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7640, max: 2.6073
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0778, max: 2.6469
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4178/18200 [01:40<04:57, 47.19it/s, loss=1.5234]


Logits stats - min: -8.4344, max: 2.5228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5823, max: 2.0766
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8684, max: 2.1149
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7917, max: 2.0791
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6010, max: 2.2223
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4203/18200 [01:40<05:24, 43.12it/s, loss=1.5119]


Logits stats - min: -6.7943, max: 2.3832
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3019, max: 2.4079
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2457, max: 3.2018
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3936, max: 2.3500
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4213/18200 [01:40<05:29, 42.42it/s, loss=1.5579]


Logits stats - min: -7.1504, max: 1.8997
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6738, max: 2.3822
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5683, max: 2.1388
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4223/18200 [01:40<05:19, 43.78it/s, loss=2.8302]


Logits stats - min: -6.6193, max: 1.5425
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2872, max: 1.8757
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4123, max: 2.0825
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9289, max: 2.8255
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4238/18200 [01:41<05:33, 41.84it/s, loss=1.4229]


Logits stats - min: -7.3979, max: 2.3441
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7465, max: 2.4456
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2084, max: 2.6993
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6736, max: 2.0317
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▌                  | 4253/18200 [01:41<05:25, 42.85it/s, loss=2.8847]


Logits stats - min: -7.2856, max: 2.3769
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7749, max: 2.9498
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9258, max: 1.9292
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▋                  | 4268/18200 [01:42<05:34, 41.71it/s, loss=1.5246]


Logits stats - min: -6.3037, max: 1.3825
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8682, max: 2.0010
Target unique values: tensor([0], device='cuda:0')


Training:  23%|█████▋                  | 4273/18200 [01:42<05:34, 41.63it/s, loss=1.4199]


Logits stats - min: -7.1603, max: 1.7040
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6704, max: 2.4173
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4288/18200 [01:42<05:42, 40.64it/s, loss=1.9299]


Logits stats - min: -7.2310, max: 2.0875
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6192, max: 1.8463
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4197, max: 2.2841
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4298/18200 [01:42<05:18, 43.67it/s, loss=1.2930]


Logits stats - min: -6.2972, max: 2.0047
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1872, max: 2.8682
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4308/18200 [01:43<05:34, 41.48it/s, loss=1.5129]


Logits stats - min: -7.0678, max: 1.7817
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6072, max: 2.4737
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4323/18200 [01:43<05:20, 43.35it/s, loss=1.2898]


Logits stats - min: -7.7510, max: 2.5371
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8351, max: 1.4770
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7698, max: 2.0446
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6041, max: 2.4535
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4333/18200 [01:43<05:24, 42.73it/s, loss=1.5726]


Logits stats - min: -7.5712, max: 2.4063
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6053, max: 2.1320
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3989, max: 2.2950
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4350/18200 [01:43<04:53, 47.11it/s, loss=1.5172]


Logits stats - min: -8.0313, max: 2.5109
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3822, max: 2.6581
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5145, max: 1.5897
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8795, max: 2.2004
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0707, max: 2.7718
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5904, max: 2.2875
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▋                  | 4355/18200 [01:44<04:54, 46.95it/s, loss=1.6086]


Logits stats - min: -6.7526, max: 2.0460
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6646, max: 2.6176
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7931, max: 2.5292
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4365/18200 [01:44<05:12, 44.25it/s, loss=1.3028]


Logits stats - min: -6.4188, max: 2.1633
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8618, max: 2.1078
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4380/18200 [01:44<05:34, 41.32it/s, loss=1.4299]


Logits stats - min: -8.8984, max: 2.1076
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4390/18200 [01:44<05:36, 41.06it/s, loss=1.5075]


Logits stats - min: -6.3714, max: 1.6674
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9146, max: 1.7961
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9076, max: 1.6816
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4405/18200 [01:45<05:38, 40.75it/s, loss=1.5674]


Logits stats - min: -6.9119, max: 1.8341
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6427, max: 2.3684
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4415/18200 [01:45<05:32, 41.49it/s, loss=1.5112]


Logits stats - min: -7.6036, max: 2.4579
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6130, max: 2.1451
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4173, max: 2.0936
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4426/18200 [01:45<05:08, 44.68it/s, loss=1.8988]


Logits stats - min: -7.3503, max: 2.4015
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4616, max: 2.3763
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9482, max: 2.1010
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9204, max: 2.1801
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9686, max: 2.7902
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4436/18200 [01:46<05:24, 42.41it/s, loss=1.5731]


Logits stats - min: -6.4050, max: 2.0995
Target unique values: tensor([0], device='cuda:0')


Training:  24%|█████▊                  | 4455/18200 [01:46<05:30, 41.59it/s, loss=2.8203]


Logits stats - min: -6.7192, max: 2.1815
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0629, max: 2.9016
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4973, max: 2.0778
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3265, max: 2.1787
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4465/18200 [01:46<05:26, 42.04it/s, loss=1.5696]


Logits stats - min: -7.5551, max: 2.6568
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8365, max: 2.3936
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4480/18200 [01:47<05:47, 39.45it/s, loss=1.4299]


Logits stats - min: -6.7329, max: 2.5664
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1141, max: 2.0674
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6796, max: 2.9584
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4495/18200 [01:47<05:40, 40.25it/s, loss=1.5082]


Logits stats - min: -6.7698, max: 2.4206
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5703, max: 2.3036
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5158, max: 2.1259
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4510/18200 [01:47<05:36, 40.69it/s, loss=1.5104]


Logits stats - min: -6.4316, max: 2.2410
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5532, max: 2.1522
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7580, max: 1.7528
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4516/18200 [01:48<05:24, 42.16it/s, loss=1.4157]


Logits stats - min: -6.9637, max: 2.2793
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4531/18200 [01:48<05:30, 41.40it/s, loss=1.4368]


Logits stats - min: -6.2203, max: 2.1144
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4017, max: 2.1782
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7348, max: 2.1948
Target unique values: tensor([0], device='cuda:0')


Training:  25%|█████▉                  | 4541/18200 [01:48<05:24, 42.15it/s, loss=1.2689]


Logits stats - min: -6.7058, max: 2.5832
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7575, max: 2.3087
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1021, max: 1.9292
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2246, max: 2.4167
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7788, max: 2.1492
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7334, max: 2.2084
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4299, max: 2.2436
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4556/18200 [01:48<05:18, 42.85it/s, loss=1.6147]


Logits stats - min: -7.7506, max: 2.4002
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5240, max: 2.9004
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1205, max: 2.5284
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4566/18200 [01:49<05:21, 42.36it/s, loss=1.5227]


Logits stats - min: -7.8044, max: 2.4588
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1833, max: 2.4941
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4572/18200 [01:49<04:52, 46.58it/s, loss=1.5692]


Logits stats - min: -9.7394, max: 2.0521
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9466, max: 1.9498
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2464, max: 2.0324
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7086, max: 2.3154
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4583/18200 [01:49<04:53, 46.44it/s, loss=1.5727]


Logits stats - min: -6.9165, max: 1.9190
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5443, max: 2.7876
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5032, max: 2.4122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7737, max: 2.7400
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4595/18200 [01:49<04:58, 45.55it/s, loss=1.5594]


Logits stats - min: -6.6867, max: 2.2546
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1026, max: 2.2160
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4897, max: 2.2071
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5506, max: 2.4695
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4408, max: 1.6275
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4605/18200 [01:50<05:13, 43.35it/s, loss=1.5724]


Logits stats - min: -8.6992, max: 2.9824
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1598, max: 2.0520
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4610/18200 [01:50<05:19, 42.54it/s, loss=1.5141]


Logits stats - min: -6.1648, max: 2.1057
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2808, max: 2.8220
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9900, max: 2.8727
Target unique values: tensor([0], device='cuda:0')



Training:  25%|██████                  | 4620/18200 [01:50<05:09, 43.89it/s, loss=1.8780]

Logits stats - min: -7.4934, max: 1.5181
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2818, max: 2.3818
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7775, max: 2.4651
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6655, max: 2.2491
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5307, max: 2.0200
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0606, max: 2.8738
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4348, max: 1.6880
Target unique values: tensor([0], device='cuda:0')


Training:  25%|██████                  | 4631/18200 [01:50<04:48, 47.05it/s, loss=1.5563]


Logits stats - min: -6.9997, max: 2.1040
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3059, max: 1.6150
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3050, max: 2.2888
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7542, max: 2.3482
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▏                 | 4651/18200 [01:51<05:13, 43.26it/s, loss=1.1682]


Logits stats - min: -7.9436, max: 2.4787
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1445, max: 2.7594
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4255, max: 2.4735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0125, max: 2.3136
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▏                 | 4656/18200 [01:51<05:22, 41.95it/s, loss=1.5143]


Logits stats - min: -7.1119, max: 2.1507
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7427, max: 2.5333
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3024, max: 2.1805
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7029, max: 1.7998
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▏                 | 4687/18200 [01:51<05:36, 40.16it/s, loss=1.4107]


Logits stats - min: -7.8336, max: 2.5551
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7912, max: 2.9207
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▏                 | 4699/18200 [01:52<04:53, 45.95it/s, loss=1.5601]


Logits stats - min: -6.0022, max: 1.4650
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2662, max: 1.5314
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6890, max: 1.9613
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1624, max: 1.8502
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3007, max: 2.1985
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5192, max: 2.3841
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1919, max: 2.4338
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▏                 | 4709/18200 [01:52<05:05, 44.11it/s, loss=1.4382]


Logits stats - min: -7.3412, max: 1.8507
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▏                 | 4719/18200 [01:52<05:22, 41.86it/s, loss=1.4185]


Logits stats - min: -7.8087, max: 2.3357
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4744/18200 [01:53<05:30, 40.70it/s, loss=1.6991]


Logits stats - min: -7.4916, max: 1.8065
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6968, max: 2.6710
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0363, max: 2.4468
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4754/18200 [01:53<05:14, 42.74it/s, loss=1.3801]


Logits stats - min: -7.1670, max: 2.0791
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.6988, max: 1.5358
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7085, max: 2.3667
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1752, max: 2.9354
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4764/18200 [01:53<05:23, 41.56it/s, loss=1.5940]


Logits stats - min: -7.6662, max: 2.6488
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6918, max: 2.1758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9626, max: 2.6403
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4779/18200 [01:54<05:31, 40.44it/s, loss=1.5167]


Logits stats - min: -6.2978, max: 2.2136
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4789/18200 [01:54<05:05, 43.85it/s, loss=1.5754]


Logits stats - min: -8.4114, max: 2.4786
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3434, max: 2.7618
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5239, max: 2.3514
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3807, max: 2.2504
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4799/18200 [01:54<05:16, 42.35it/s, loss=1.9040]


Logits stats - min: -8.9098, max: 2.8782
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8444, max: 2.5216
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3717, max: 2.6108
Target unique values: tensor([0], device='cuda:0')


Training:  26%|██████▎                 | 4814/18200 [01:54<05:30, 40.54it/s, loss=1.4398]


Logits stats - min: -8.1496, max: 2.6100
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0368, max: 2.0741
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7299, max: 2.4256
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▎                 | 4824/18200 [01:55<05:15, 42.40it/s, loss=1.5116]


Logits stats - min: -9.0781, max: 3.3123
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5482, max: 2.4986
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9629, max: 2.3669
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4839/18200 [01:55<05:27, 40.83it/s, loss=1.5809]


Logits stats - min: -6.7540, max: 2.5236
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4848/18200 [01:55<05:40, 39.23it/s, loss=2.7526]


Logits stats - min: -7.9803, max: 2.3736
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6236, max: 1.6521
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4862/18200 [01:56<05:22, 41.33it/s, loss=1.5678]


Logits stats - min: -7.7671, max: 2.6755
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2807, max: 2.6194
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4872/18200 [01:56<05:31, 40.24it/s, loss=1.3220]


Logits stats - min: -7.3084, max: 2.3562
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2265, max: 2.3454
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9784, max: 2.6826
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7129, max: 2.0430
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4882/18200 [01:56<05:22, 41.28it/s, loss=1.5671]


Logits stats - min: -8.4656, max: 2.8020
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4892/18200 [01:56<05:16, 42.11it/s, loss=1.5163]


Logits stats - min: -9.1124, max: 2.5271
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5880, max: 2.4838
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3419, max: 1.5989
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9787, max: 2.4554
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5399, max: 2.4410
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4902/18200 [01:57<05:14, 42.22it/s, loss=1.4164]


Logits stats - min: -6.4503, max: 2.1456
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4912/18200 [01:57<05:22, 41.20it/s, loss=1.5631]


Logits stats - min: -6.9288, max: 2.6184
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5176, max: 1.7697
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5393, max: 2.6701
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8943, max: 2.0131
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3569, max: 2.4453
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▍                 | 4924/18200 [01:57<04:55, 44.93it/s, loss=1.5087]


Logits stats - min: -7.6722, max: 2.3236
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6263, max: 1.9501
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2623, max: 2.3389
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8036, max: 2.4608
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 4934/18200 [01:57<04:48, 45.91it/s, loss=1.2709]


Logits stats - min: -6.1204, max: 1.7573
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3117, max: 2.1374
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6199, max: 2.1375
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1471, max: 2.6438
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 4949/18200 [01:58<05:08, 42.94it/s, loss=2.1160]


Logits stats - min: -6.2583, max: 2.3846
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2014, max: 2.3492
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6465, max: 2.0548
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7159, max: 1.9866
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1783, max: 2.6448
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 4960/18200 [01:58<05:02, 43.83it/s, loss=1.4168]


Logits stats - min: -6.7387, max: 2.4424
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 4970/18200 [01:58<05:07, 43.08it/s, loss=1.5588]


Logits stats - min: -7.3596, max: 1.8739
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6331, max: 1.8656
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8890, max: 2.2235
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 4985/18200 [01:59<05:02, 43.73it/s, loss=1.2929]


Logits stats - min: -6.4147, max: 1.8763
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9746, max: 2.2706
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7247, max: 2.2159
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2983, max: 2.2398
Target unique values: tensor([0], device='cuda:0')


Training:  27%|██████▌                 | 4995/18200 [01:59<05:18, 41.45it/s, loss=1.9847]


Logits stats - min: -8.0647, max: 2.5824
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▌                 | 5010/18200 [01:59<05:33, 39.51it/s, loss=1.5577]


Logits stats - min: -6.5781, max: 2.2610
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5413, max: 1.4171
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3461, max: 2.0960
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▌                 | 5021/18200 [01:59<05:08, 42.73it/s, loss=2.2573]


Logits stats - min: -6.8709, max: 1.9343
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5144, max: 2.8132
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4595, max: 2.2704
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5022, max: 1.4596
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2500, max: 2.6727
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5027/18200 [02:00<04:52, 45.05it/s, loss=1.5087]


Logits stats - min: -5.5938, max: 1.5581
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4146, max: 1.7914
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5042/18200 [02:00<04:58, 44.11it/s, loss=2.1042]


Logits stats - min: -7.7103, max: 2.3718
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3593, max: 2.2225
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5932, max: 2.3705
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2139, max: 2.5062
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5052/18200 [02:00<05:06, 42.94it/s, loss=1.5855]


Logits stats - min: -6.3389, max: 2.3308
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1100, max: 2.1808
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5067/18200 [02:00<05:05, 42.98it/s, loss=2.7784]


Logits stats - min: -6.2533, max: 2.2406
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5299, max: 2.3228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7610, max: 2.3609
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5088/18200 [02:01<05:05, 42.94it/s, loss=1.8261]


Logits stats - min: -8.6895, max: 2.8843
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1596, max: 1.4310
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2044, max: 2.3092
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2906, max: 2.3136
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5093/18200 [02:01<04:56, 44.15it/s, loss=1.1892]


Logits stats - min: -6.3505, max: 2.5145
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.3634, max: 1.5335
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5284, max: 2.5183
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7900, max: 2.4415
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3953, max: 2.0492
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5103/18200 [02:01<05:14, 41.63it/s, loss=1.5579]


Logits stats - min: -7.0486, max: 2.6512
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5624, max: 2.1446
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▋                 | 5113/18200 [02:02<05:02, 43.27it/s, loss=1.4258]


Logits stats - min: -7.6122, max: 2.6280
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8092, max: 1.5565
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7773, max: 2.3907
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▊                 | 5123/18200 [02:02<05:13, 41.75it/s, loss=1.3184]


Logits stats - min: -8.6934, max: 2.7693
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▊                 | 5133/18200 [02:02<05:01, 43.37it/s, loss=1.5655]


Logits stats - min: -6.7914, max: 1.6255
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5828, max: 1.7565
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7779, max: 2.5281
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9356, max: 2.1732
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▊                 | 5143/18200 [02:02<05:00, 43.43it/s, loss=2.8072]


Logits stats - min: -6.3833, max: 2.4007
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4237, max: 2.5039
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1795, max: 2.5201
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5787, max: 1.7945
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▊                 | 5153/18200 [02:02<05:13, 41.66it/s, loss=1.8850]


Logits stats - min: -6.6165, max: 2.3120
Target unique values: tensor([0], device='cuda:0')


Training:  28%|██████▊                 | 5168/18200 [02:03<05:20, 40.64it/s, loss=1.2864]


Logits stats - min: -6.0040, max: 1.9181
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6457, max: 2.4263
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▊                 | 5188/18200 [02:03<05:26, 39.84it/s, loss=1.2863]


Logits stats - min: -6.5788, max: 2.1975
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7588, max: 2.1710
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▊                 | 5202/18200 [02:04<05:20, 40.56it/s, loss=1.5621]


Logits stats - min: -6.2069, max: 2.0005
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8337, max: 1.7679
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7761, max: 3.0654
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9600, max: 1.8863
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▊                 | 5207/18200 [02:04<05:14, 41.30it/s, loss=1.5071]


Logits stats - min: -6.3062, max: 2.1721
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1143, max: 2.5952
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5217/18200 [02:04<05:12, 41.52it/s, loss=1.5816]


Logits stats - min: -8.5855, max: 2.8940
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3765, max: 2.9169
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5232/18200 [02:04<04:54, 44.08it/s, loss=1.5350]


Logits stats - min: -6.4274, max: 2.0893
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8469, max: 2.6543
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7079, max: 2.2638
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2743, max: 2.3462
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5237/18200 [02:05<04:51, 44.48it/s, loss=1.8502]


Logits stats - min: -8.1274, max: 2.7022
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0361, max: 2.7916
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8352, max: 2.4185
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6483, max: 1.8837
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7551, max: 3.1472
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5248/18200 [02:05<04:29, 48.03it/s, loss=1.5704]


Logits stats - min: -7.1778, max: 2.3109
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1681, max: 2.2271
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0576, max: 3.1107
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2272, max: 2.6875
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5258/18200 [02:05<04:37, 46.64it/s, loss=1.5610]


Logits stats - min: -7.7637, max: 3.0012
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2942, max: 2.1480
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7303, max: 1.7224
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5269/18200 [02:05<04:43, 45.61it/s, loss=1.5201]


Logits stats - min: -6.4999, max: 2.3215
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8621, max: 2.2151
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5618, max: 1.9789
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0019, max: 2.4016
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5279/18200 [02:05<04:52, 44.21it/s, loss=1.4142]


Logits stats - min: -7.2434, max: 2.3454
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1257, max: 1.5411
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8323, max: 1.4854
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5290/18200 [02:06<04:47, 44.91it/s, loss=1.3950]


Logits stats - min: -7.9268, max: 2.6069
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7829, max: 2.4377
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5582, max: 2.7003
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8240, max: 2.6259
Target unique values: tensor([0], device='cuda:0')


Training:  29%|██████▉                 | 5300/18200 [02:06<04:49, 44.57it/s, loss=1.2990]


Logits stats - min: -7.8461, max: 2.4424
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8529, max: 2.4080
Target unique values: tensor([0], device='cuda:0')


Training:  29%|███████                 | 5310/18200 [02:06<05:18, 40.43it/s, loss=1.5569]


Logits stats - min: -6.8392, max: 2.3942
Target unique values: tensor([0], device='cuda:0')


Training:  29%|███████                 | 5329/18200 [02:07<05:26, 39.42it/s, loss=2.0286]


Logits stats - min: -6.7233, max: 2.1475
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9039, max: 2.1331
Target unique values: tensor([0], device='cuda:0')


Training:  29%|███████                 | 5339/18200 [02:07<05:15, 40.71it/s, loss=1.5567]


Logits stats - min: -6.1849, max: 2.4811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5400, max: 2.4480
Target unique values: tensor([0], device='cuda:0')


Training:  29%|███████                 | 5350/18200 [02:07<04:50, 44.20it/s, loss=1.8276]


Logits stats - min: -7.1440, max: 2.1024
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2634, max: 1.9538
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5823, max: 2.4883
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6466, max: 2.0373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5795, max: 2.3625
Target unique values: tensor([0], device='cuda:0')


Training:  29%|███████                 | 5360/18200 [02:07<04:48, 44.48it/s, loss=1.5181]


Logits stats - min: -6.4617, max: 2.1757
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0173, max: 2.3664
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7112, max: 1.8778
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████                 | 5376/18200 [02:08<04:56, 43.29it/s, loss=1.5266]


Logits stats - min: -6.0315, max: 2.1870
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2540, max: 2.0301
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5716, max: 2.4877
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4949, max: 1.5152
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████                 | 5386/18200 [02:08<04:57, 43.02it/s, loss=1.3948]


Logits stats - min: -6.2658, max: 2.1459
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3149, max: 2.2946
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████                 | 5396/18200 [02:08<05:05, 41.97it/s, loss=1.5477]


Logits stats - min: -8.1176, max: 2.4524
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3125, max: 2.0403
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6611, max: 2.2422
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1711, max: 2.0070
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4704, max: 2.3734
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7567, max: 2.0907
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0523, max: 1.5658
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5407/18200 [02:09<04:43, 45.11it/s, loss=1.2833]


Logits stats - min: -6.5443, max: 2.3473
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6001, max: 3.2584
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5417/18200 [02:09<04:56, 43.13it/s, loss=1.5257]


Logits stats - min: -5.9917, max: 1.5252
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3715, max: 2.6149
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9543, max: 1.8044
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5427/18200 [02:09<05:05, 41.75it/s, loss=1.5230]


Logits stats - min: -9.1805, max: 2.0026
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0669, max: 2.1086
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8197, max: 2.4338
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3214, max: 2.2696
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5438/18200 [02:09<04:51, 43.82it/s, loss=1.2809]


Logits stats - min: -7.6563, max: 2.6452
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5105, max: 2.3236
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9306, max: 2.0952
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1875, max: 2.4877
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5453/18200 [02:10<05:11, 40.95it/s, loss=1.9134]


Logits stats - min: -7.5205, max: 2.2511
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2927, max: 2.0914
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5463/18200 [02:10<04:59, 42.56it/s, loss=1.5150]


Logits stats - min: -8.5971, max: 2.4587
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5424, max: 1.5638
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6536, max: 2.0472
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5232, max: 1.7699
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7698, max: 2.4903
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5479/18200 [02:10<05:16, 40.13it/s, loss=1.9742]


Logits stats - min: -8.6291, max: 2.7810
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0156, max: 2.6392
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4195, max: 2.6240
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8694, max: 2.3829
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▏                | 5495/18200 [02:11<04:55, 43.04it/s, loss=1.5678]


Logits stats - min: -8.9293, max: 2.9602
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5457, max: 2.2875
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▎                | 5505/18200 [02:11<05:03, 41.89it/s, loss=1.8638]


Logits stats - min: -8.2938, max: 2.8327
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4104, max: 2.4643
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0099, max: 2.5944
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6463, max: 2.6909
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▎                | 5520/18200 [02:11<05:14, 40.28it/s, loss=1.5588]


Logits stats - min: -8.6251, max: 2.8636
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▎                | 5530/18200 [02:11<05:17, 39.89it/s, loss=1.5228]


Logits stats - min: -7.8511, max: 2.5311
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2878, max: 1.5452
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▎                | 5540/18200 [02:12<05:15, 40.14it/s, loss=1.5587]


Logits stats - min: -7.6070, max: 2.6603
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0387, max: 1.5990
Target unique values: tensor([0], device='cuda:0')


Training:  30%|███████▎                | 5550/18200 [02:12<05:03, 41.63it/s, loss=1.4256]


Logits stats - min: -7.4593, max: 2.2963
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5792, max: 1.5169
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0278, max: 2.8642
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▎                | 5560/18200 [02:12<05:17, 39.83it/s, loss=1.5715]


Logits stats - min: -8.8490, max: 1.9478
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▎                | 5570/18200 [02:12<05:01, 41.94it/s, loss=1.3918]


Logits stats - min: -8.0006, max: 2.3557
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5616, max: 2.7381
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7115, max: 2.2728
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▎                | 5580/18200 [02:13<05:10, 40.69it/s, loss=2.1694]


Logits stats - min: -6.7350, max: 2.0180
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6608, max: 2.0689
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5594/18200 [02:13<05:11, 40.41it/s, loss=1.3381]


Logits stats - min: -8.5525, max: 2.7737
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5179, max: 2.8820
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.6515, max: 2.6690
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5605/18200 [02:13<04:42, 44.66it/s, loss=1.2934]


Logits stats - min: -7.8638, max: 2.4885
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3236, max: 1.7152
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5708, max: 2.1100
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6637, max: 1.9544
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7177, max: 2.4778
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6885, max: 2.4452
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5620/18200 [02:14<04:51, 43.15it/s, loss=1.5096]


Logits stats - min: -7.4303, max: 2.4188
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8778, max: 1.9228
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5630/18200 [02:14<05:12, 40.19it/s, loss=1.4083]


Logits stats - min: -7.5679, max: 2.4799
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4464, max: 2.2868
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8206, max: 2.0996
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3453, max: 2.8122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0476, max: 2.1870
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5641/18200 [02:14<04:55, 42.50it/s, loss=1.4068]


Logits stats - min: -6.4759, max: 2.5902
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1734, max: 2.3664
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3881, max: 2.3043
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5651/18200 [02:14<04:41, 44.54it/s, loss=1.5171]


Logits stats - min: -6.9236, max: 2.2228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4880, max: 1.9756
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8772, max: 1.6963
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6927, max: 2.2714
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7909, max: 1.9880
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5666/18200 [02:15<05:25, 38.53it/s, loss=2.0875]


Logits stats - min: -7.4048, max: 2.2461
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5671/18200 [02:15<05:08, 40.65it/s, loss=1.5640]


Logits stats - min: -7.0494, max: 2.4014
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2433, max: 2.4270
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▍                | 5685/18200 [02:15<05:39, 36.89it/s, loss=1.5590]


Logits stats - min: -7.8422, max: 2.8212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7000, max: 2.0273
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4342, max: 1.5491
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▌                | 5694/18200 [02:15<05:09, 40.34it/s, loss=1.6270]


Logits stats - min: -6.6635, max: 2.4182
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6604, max: 2.0240
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4304, max: 2.0202
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3146, max: 2.1424
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4177, max: 2.8443
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9391, max: 2.4363
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▌                | 5706/18200 [02:16<05:03, 41.15it/s, loss=1.2714]


Logits stats - min: -7.5190, max: 2.4354
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5880, max: 2.2618
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1670, max: 2.2654
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▌                | 5721/18200 [02:16<05:08, 40.40it/s, loss=1.5587]


Logits stats - min: -5.1195, max: 1.5288
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1850, max: 2.3114
Target unique values: tensor([0], device='cuda:0')


Training:  31%|███████▌                | 5732/18200 [02:16<04:51, 42.74it/s, loss=1.5592]


Logits stats - min: -9.2816, max: 2.0311
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3971, max: 2.1138
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.1647, max: 2.0239
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4739, max: 2.1820
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9684, max: 2.1667
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▌                | 5742/18200 [02:17<05:05, 40.73it/s, loss=2.1896]


Logits stats - min: -7.8268, max: 2.3720
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9789, max: 2.5156
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8360, max: 2.0850
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▌                | 5752/18200 [02:17<05:20, 38.90it/s, loss=2.8436]


Logits stats - min: -7.5733, max: 2.4568
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0537, max: 1.8074
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▌                | 5765/18200 [02:17<05:13, 39.66it/s, loss=1.5498]


Logits stats - min: -7.2533, max: 2.2383
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0789, max: 2.0782
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9684, max: 2.1663
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7976, max: 2.2022
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▌                | 5776/18200 [02:17<04:39, 44.40it/s, loss=1.6655]


Logits stats - min: -6.6902, max: 2.0088
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1556, max: 2.8907
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9493, max: 2.0667
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3706, max: 1.5174
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5791/18200 [02:18<05:04, 40.70it/s, loss=1.5342]


Logits stats - min: -6.8515, max: 2.1456
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8757, max: 2.5554
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5811/18200 [02:18<05:08, 40.15it/s, loss=2.0853]


Logits stats - min: -6.0778, max: 1.5304
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9542, max: 1.9110
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4445, max: 2.2945
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5832/18200 [02:19<05:03, 40.80it/s, loss=1.2978]


Logits stats - min: -8.4356, max: 2.6363
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4912, max: 2.9549
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5837/18200 [02:19<05:01, 41.07it/s, loss=1.5107]


Logits stats - min: -7.3628, max: 2.4303
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4342, max: 2.4454
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9106, max: 2.0180
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0463, max: 2.4509
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5851/18200 [02:19<05:18, 38.71it/s, loss=1.5111]


Logits stats - min: -7.3880, max: 2.4328
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8745, max: 1.9489
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5860/18200 [02:20<05:06, 40.30it/s, loss=1.3025]


Logits stats - min: -7.6621, max: 2.5291
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9566, max: 2.5109
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8027, max: 1.7920
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▋                | 5870/18200 [02:20<04:46, 42.97it/s, loss=1.5146]


Logits stats - min: -7.1647, max: 2.7595
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3066, max: 2.0962
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▊                | 5890/18200 [02:20<05:13, 39.21it/s, loss=1.6762]


Logits stats - min: -6.1828, max: 2.1859
Target unique values: tensor([0], device='cuda:0')


Training:  32%|███████▊                | 5913/18200 [02:21<05:04, 40.30it/s, loss=2.1652]


Logits stats - min: -8.2004, max: 1.5627
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4510, max: 2.3453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8311, max: 2.3824
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7720, max: 2.0639
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6263, max: 2.6318
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3735, max: 2.1748
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▊                | 5929/18200 [02:21<04:40, 43.82it/s, loss=1.5610]


Logits stats - min: -6.9615, max: 2.0352
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5407, max: 1.9561
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9169, max: 2.0509
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9570, max: 1.9812
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▊                | 5939/18200 [02:22<04:52, 41.92it/s, loss=1.2782]


Logits stats - min: -7.9626, max: 2.4969
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4352, max: 2.6249
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6719, max: 1.9463
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0300, max: 2.5406
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1432, max: 2.3833
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▊                | 5954/18200 [02:22<04:47, 42.54it/s, loss=1.2954]


Logits stats - min: -7.6503, max: 2.7757
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8577, max: 1.6163
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4681, max: 2.0900
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▊                | 5964/18200 [02:22<04:50, 42.18it/s, loss=1.2399]


Logits stats - min: -7.9765, max: 2.6594
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1402, max: 1.7382
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 5974/18200 [02:22<04:48, 42.44it/s, loss=1.5568]


Logits stats - min: -6.4430, max: 2.0782
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4971, max: 2.3729
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4518, max: 2.6958
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 5989/18200 [02:23<04:41, 43.36it/s, loss=1.5626]


Logits stats - min: -8.6197, max: 3.0090
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2533, max: 2.1636
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8697, max: 2.6153
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9814, max: 1.5925
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 5999/18200 [02:23<04:47, 42.50it/s, loss=1.5588]


Logits stats - min: -6.3047, max: 1.6742
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6483, max: 1.5271
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2351, max: 2.0688
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5445, max: 2.0457
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 6009/18200 [02:23<04:30, 45.03it/s, loss=1.4325]


Logits stats - min: -7.2531, max: 2.4250
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6617, max: 2.4065
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5917, max: 2.0922
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 6019/18200 [02:23<04:48, 42.15it/s, loss=1.2761]


Logits stats - min: -6.2387, max: 1.9478
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2647, max: 3.0582
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3401, max: 1.6961
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 6030/18200 [02:24<04:29, 45.15it/s, loss=1.5069]


Logits stats - min: -7.6023, max: 2.0927
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6154, max: 2.7410
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 6040/18200 [02:24<04:52, 41.51it/s, loss=1.2713]


Logits stats - min: -6.3628, max: 2.0715
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 6050/18200 [02:24<05:03, 40.04it/s, loss=1.2909]


Logits stats - min: -6.8651, max: 2.0463
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0729, max: 2.2466
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8718, max: 2.4351
Target unique values: tensor([0], device='cuda:0')


Training:  33%|███████▉                | 6060/18200 [02:24<04:53, 41.35it/s, loss=1.5291]


Logits stats - min: -7.6107, max: 2.3446
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4745, max: 2.2183
Target unique values: tensor([0], device='cuda:0')


Training:  33%|████████                | 6070/18200 [02:25<04:53, 41.31it/s, loss=1.5189]


Logits stats - min: -6.8784, max: 1.9715
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0139, max: 2.6545
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0129, max: 2.3711
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3458, max: 1.6755
Target unique values: tensor([0], device='cuda:0')


Training:  33%|████████                | 6085/18200 [02:25<04:53, 41.34it/s, loss=1.2577]


Logits stats - min: -8.3232, max: 2.7497
Target unique values: tensor([0], device='cuda:0')


Training:  33%|████████                | 6095/18200 [02:25<05:09, 39.06it/s, loss=1.5597]


Logits stats - min: -7.6348, max: 2.3572
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6561, max: 2.5451
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████                | 6103/18200 [02:25<05:10, 38.91it/s, loss=1.5521]


Logits stats - min: -7.0659, max: 1.4414
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7977, max: 2.7142
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1220, max: 2.5259
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████                | 6114/18200 [02:26<04:36, 43.71it/s, loss=2.7875]


Logits stats - min: -8.2134, max: 2.6233
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3560, max: 2.2044
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0278, max: 2.6219
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1672, max: 2.0711
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████                | 6124/18200 [02:26<04:38, 43.35it/s, loss=2.8050]


Logits stats - min: -7.6516, max: 2.2604
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9184, max: 1.8776
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2785, max: 2.5000
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████                | 6135/18200 [02:26<04:22, 46.04it/s, loss=1.3949]


Logits stats - min: -6.5264, max: 2.3087
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0877, max: 2.2051
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3806, max: 2.4118
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9975, max: 2.0078
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████                | 6146/18200 [02:26<04:19, 46.54it/s, loss=1.5212]


Logits stats - min: -7.2686, max: 2.1324
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5455, max: 1.9280
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1361, max: 2.8418
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████                | 6161/18200 [02:27<04:50, 41.46it/s, loss=1.2502]


Logits stats - min: -6.1565, max: 2.1452
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6090, max: 2.6524
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6171/18200 [02:27<04:46, 42.01it/s, loss=1.2653]


Logits stats - min: -7.8946, max: 2.5651
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7700, max: 2.3936
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6181/18200 [02:27<04:37, 43.36it/s, loss=1.7766]


Logits stats - min: -6.4078, max: 2.2463
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8420, max: 2.2071
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7938, max: 1.9368
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4820, max: 2.0869
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6192/18200 [02:27<04:27, 44.92it/s, loss=2.1071]


Logits stats - min: -6.7555, max: 1.9373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4000, max: 2.0897
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6601, max: 2.3171
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1242, max: 2.4971
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6202/18200 [02:28<04:50, 41.26it/s, loss=1.5172]


Logits stats - min: -7.7445, max: 2.6326
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1260, max: 1.5354
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6733, max: 1.9870
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6222/18200 [02:28<04:53, 40.84it/s, loss=1.2711]


Logits stats - min: -6.5007, max: 2.0066
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1757, max: 2.2926
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2405, max: 2.3050
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6227/18200 [02:28<04:38, 42.95it/s, loss=1.4993]


Logits stats - min: -5.8346, max: 2.2319
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3283, max: 2.4964
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▏               | 6242/18200 [02:29<04:48, 41.44it/s, loss=1.4075]


Logits stats - min: -6.9270, max: 1.5155
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.7751, max: 1.4357
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5665, max: 2.1413
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6750, max: 1.5977
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▎               | 6258/18200 [02:29<04:51, 40.94it/s, loss=1.5576]


Logits stats - min: -6.1124, max: 1.4271
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0495, max: 2.5260
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▎               | 6273/18200 [02:29<04:40, 42.48it/s, loss=1.5358]


Logits stats - min: -6.7730, max: 2.0536
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7082, max: 2.1862
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5574, max: 2.0558
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4767, max: 1.9749
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9316, max: 3.0422
Target unique values: tensor([0], device='cuda:0')


Training:  34%|████████▎               | 6279/18200 [02:30<04:23, 45.29it/s, loss=1.9477]


Logits stats - min: -9.0862, max: 2.0819
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1170, max: 2.3894
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4656, max: 2.9004
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▎               | 6289/18200 [02:30<04:18, 46.01it/s, loss=2.0771]


Logits stats - min: -6.9517, max: 2.0831
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3780, max: 2.4185
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▎               | 6309/18200 [02:30<04:41, 42.21it/s, loss=1.9649]


Logits stats - min: -8.1576, max: 1.9015
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3600, max: 2.2133
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7430, max: 2.2662
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0397, max: 1.6874
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6409, max: 2.4574
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▎               | 6320/18200 [02:30<04:30, 43.99it/s, loss=1.5673]


Logits stats - min: -6.5497, max: 2.1976
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5797, max: 1.3008
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7051, max: 2.4451
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0710, max: 2.6604
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▎               | 6330/18200 [02:31<04:26, 44.50it/s, loss=1.5641]


Logits stats - min: -7.4759, max: 2.4000
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2042, max: 2.0392
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▎               | 6341/18200 [02:31<04:12, 47.05it/s, loss=1.8675]


Logits stats - min: -6.7768, max: 2.2574
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5674, max: 2.0362
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8331, max: 2.4240
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0459, max: 2.4438
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5686, max: 1.9577
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▎               | 6351/18200 [02:31<04:19, 45.74it/s, loss=2.7799]


Logits stats - min: -8.4156, max: 2.6755
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4568, max: 2.5127
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6622, max: 2.7104
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3381, max: 2.7170
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6371/18200 [02:32<04:43, 41.72it/s, loss=1.4901]


Logits stats - min: -8.2705, max: 2.4875
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8150, max: 2.5174
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6381/18200 [02:32<04:33, 43.17it/s, loss=1.5592]


Logits stats - min: -7.9603, max: 2.4334
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8206, max: 1.5060
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4654, max: 2.6024
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0118, max: 2.1754
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6391/18200 [02:32<04:35, 42.83it/s, loss=1.9702]


Logits stats - min: -8.0733, max: 2.4635
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7892, max: 2.4470
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2612, max: 2.3019
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6401/18200 [02:32<04:31, 43.49it/s, loss=1.8274]


Logits stats - min: -8.6437, max: 2.6475
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4397, max: 2.4459
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7736, max: 2.0574
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6411/18200 [02:33<04:35, 42.75it/s, loss=1.5136]


Logits stats - min: -8.2240, max: 2.9141
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2659, max: 2.4844
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3770, max: 2.9770
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▍               | 6421/18200 [02:33<04:47, 40.94it/s, loss=1.5505]


Logits stats - min: -8.2290, max: 2.7895
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6232, max: 2.3759
Target unique values: tensor([0], device='cuda:0')


Training:  35%|████████▌               | 6448/18200 [02:34<04:59, 39.19it/s, loss=1.2603]


Logits stats - min: -8.3844, max: 2.4093
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0103, max: 2.1583
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6467/18200 [02:34<04:42, 41.51it/s, loss=1.2354]


Logits stats - min: -6.9354, max: 2.1320
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5085, max: 2.4483
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4884, max: 2.2737
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7302, max: 2.2393
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6477/18200 [02:34<04:55, 39.65it/s, loss=1.5543]


Logits stats - min: -6.9358, max: 1.8114
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2478, max: 1.8177
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9486, max: 2.6132
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3983, max: 2.9126
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6484/18200 [02:34<04:26, 43.97it/s, loss=1.5506]


Logits stats - min: -8.7684, max: 2.5824
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1472, max: 2.6504
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4662, max: 2.5053
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6499/18200 [02:35<04:31, 43.11it/s, loss=1.5147]


Logits stats - min: -10.6478, max: 2.0480
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7179, max: 2.8731
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6504/18200 [02:35<04:28, 43.61it/s, loss=1.3400]


Logits stats - min: -8.2874, max: 2.4634
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6519/18200 [02:35<04:38, 41.97it/s, loss=1.2711]


Logits stats - min: -8.6477, max: 2.5657
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1937, max: 2.6120
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6026, max: 2.1471
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8293, max: 2.3048
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▌               | 6529/18200 [02:35<04:38, 41.86it/s, loss=1.5027]


Logits stats - min: -7.9463, max: 2.4182
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5003, max: 2.5332
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9308, max: 1.4777
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6544/18200 [02:36<04:46, 40.73it/s, loss=1.3896]


Logits stats - min: -7.4940, max: 2.0929
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9926, max: 2.6341
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6549/18200 [02:36<04:40, 41.52it/s, loss=1.9488]


Logits stats - min: -6.4942, max: 1.9059
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6934, max: 2.3063
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6569/18200 [02:36<04:43, 41.07it/s, loss=1.5502]


Logits stats - min: -6.5377, max: 1.9582
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4173, max: 2.0338
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0169, max: 2.4681
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7280, max: 1.9617
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6580/18200 [02:37<04:32, 42.69it/s, loss=1.2740]


Logits stats - min: -6.8003, max: 2.1554
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9370, max: 2.6404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3668, max: 1.9528
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6595/18200 [02:37<04:41, 41.28it/s, loss=2.1865]


Logits stats - min: -9.2429, max: 2.0673
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2687, max: 2.6175
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6605/18200 [02:37<04:36, 41.90it/s, loss=1.3941]


Logits stats - min: -8.0468, max: 2.4492
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3688, max: 2.4965
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6615/18200 [02:37<04:46, 40.44it/s, loss=1.5010]


Logits stats - min: -8.6521, max: 2.9606
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8129, max: 2.0163
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6620/18200 [02:38<04:32, 42.46it/s, loss=1.5169]


Logits stats - min: -7.6452, max: 2.5285
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0464, max: 2.1098
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8654, max: 2.0486
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4037, max: 2.0470
Target unique values: tensor([0], device='cuda:0')


Training:  36%|████████▋               | 6635/18200 [02:38<04:34, 42.18it/s, loss=2.7406]


Logits stats - min: -7.4320, max: 2.7229
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9128, max: 1.7629
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9243, max: 2.4487
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7067, max: 2.0701
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6655/18200 [02:38<04:36, 41.68it/s, loss=1.3814]


Logits stats - min: -7.0094, max: 1.9058
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5405, max: 2.0192
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4831, max: 2.4832
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6165, max: 2.1883
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0724, max: 2.4320
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6665/18200 [02:39<04:31, 42.50it/s, loss=1.5455]


Logits stats - min: -6.4230, max: 2.2973
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8926, max: 2.5513
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1670, max: 2.3904
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7988, max: 2.2240
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6680/18200 [02:39<04:52, 39.43it/s, loss=1.5630]


Logits stats - min: -5.6476, max: 1.7766
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6690/18200 [02:39<04:53, 39.19it/s, loss=1.6957]


Logits stats - min: -7.8633, max: 1.7883
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6703/18200 [02:40<04:36, 41.62it/s, loss=2.1971]


Logits stats - min: -8.3597, max: 2.5863
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8560, max: 2.0181
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2408, max: 2.7056
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▊               | 6713/18200 [02:40<04:46, 40.10it/s, loss=1.3977]


Logits stats - min: -8.8265, max: 2.6050
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4274, max: 1.9121
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1144, max: 2.5595
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6736/18200 [02:41<04:53, 39.10it/s, loss=1.5259]


Logits stats - min: -7.9028, max: 2.6456
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6956, max: 2.9470
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6750/18200 [02:41<04:42, 40.56it/s, loss=1.3950]


Logits stats - min: -6.5563, max: 2.1045
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2896, max: 2.6745
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6765/18200 [02:41<04:43, 40.40it/s, loss=1.5099]


Logits stats - min: -6.7189, max: 1.9394
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6775/18200 [02:41<04:38, 41.09it/s, loss=1.8720]


Logits stats - min: -8.1543, max: 2.4257
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8959, max: 2.2366
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3571, max: 2.3903
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6877, max: 2.0444
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6789/18200 [02:42<04:53, 38.92it/s, loss=1.9981]


Logits stats - min: -7.6973, max: 2.3134
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6795/18200 [02:42<04:32, 41.79it/s, loss=1.7205]


Logits stats - min: -6.7804, max: 2.1237
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6402, max: 2.5972
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7328, max: 1.9999
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6805/18200 [02:42<04:42, 40.38it/s, loss=1.5035]


Logits stats - min: -8.6601, max: 2.4800
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0506, max: 2.0575
Target unique values: tensor([0], device='cuda:0')


Training:  37%|████████▉               | 6815/18200 [02:42<04:28, 42.37it/s, loss=1.5118]


Logits stats - min: -9.6882, max: 2.1450
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9194, max: 1.9189
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8871, max: 2.3332
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8988, max: 2.4385
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6830/18200 [02:43<04:43, 40.16it/s, loss=1.4175]


Logits stats - min: -6.9413, max: 1.9855
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5778, max: 2.6562
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6840/18200 [02:43<04:29, 42.11it/s, loss=1.5717]


Logits stats - min: -8.6839, max: 2.6188
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2843, max: 2.4237
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4135, max: 2.3439
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9445, max: 2.6715
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6850/18200 [02:43<04:33, 41.47it/s, loss=1.6916]


Logits stats - min: -8.2543, max: 2.4087
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1525, max: 2.2801
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6860/18200 [02:44<04:35, 41.11it/s, loss=1.9105]


Logits stats - min: -8.7299, max: 2.5870
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1519, max: 2.0545
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6875/18200 [02:44<04:36, 40.96it/s, loss=1.5450]


Logits stats - min: -6.8074, max: 2.0851
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8680, max: 1.9783
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6885/18200 [02:44<04:35, 41.12it/s, loss=2.8194]


Logits stats - min: -7.3124, max: 2.5513
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0651, max: 2.1496
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6895/18200 [02:44<04:36, 40.83it/s, loss=1.5122]


Logits stats - min: -8.1476, max: 2.4571
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8913, max: 2.1896
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7227, max: 2.4951
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6906, max: 1.9992
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████               | 6905/18200 [02:45<04:32, 41.46it/s, loss=1.8865]


Logits stats - min: -7.6087, max: 2.3701
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6394, max: 2.7625
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6924/18200 [02:45<04:44, 39.62it/s, loss=1.5614]


Logits stats - min: -8.3810, max: 2.5204
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6145, max: 2.3560
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3364, max: 2.9027
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3030, max: 2.2239
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6936/18200 [02:45<04:08, 45.27it/s, loss=1.5059]


Logits stats - min: -7.0075, max: 2.2055
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5288, max: 2.1534
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3785, max: 2.3603
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7705, max: 2.2732
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6946/18200 [02:46<04:28, 41.85it/s, loss=1.2516]


Logits stats - min: -6.6858, max: 2.1419
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9413, max: 2.4570
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6951/18200 [02:46<04:30, 41.63it/s, loss=1.8834]


Logits stats - min: -6.8405, max: 2.2201
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1293, max: 1.9733
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7412, max: 2.1859
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6961/18200 [02:46<04:21, 42.90it/s, loss=2.3150]


Logits stats - min: -8.6431, max: 2.5102
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1874, max: 2.0671
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0263, max: 2.1574
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5932, max: 1.5693
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6976/18200 [02:46<04:27, 42.00it/s, loss=2.8183]


Logits stats - min: -7.4168, max: 2.1341
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6987/18200 [02:47<04:19, 43.20it/s, loss=2.1253]


Logits stats - min: -7.0012, max: 1.4758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5825, max: 1.6774
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7936, max: 2.3238
Target unique values: tensor([0], device='cuda:0')


Training:  38%|█████████▏              | 6997/18200 [02:47<04:30, 41.37it/s, loss=1.5615]


Logits stats - min: -7.6417, max: 1.7650
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5692, max: 2.7580
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0370, max: 1.5194
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▏              | 7012/18200 [02:47<04:26, 41.94it/s, loss=1.5496]


Logits stats - min: -7.1841, max: 2.3228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0036, max: 2.1567
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5908, max: 2.3424
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1296, max: 2.5278
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7023/18200 [02:47<03:54, 47.73it/s, loss=1.5638]


Logits stats - min: -10.0850, max: 2.0502
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9861, max: 2.2050
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8587, max: 1.9766
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0916, max: 2.4580
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2344, max: 2.0027
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9863, max: 2.0640
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2020, max: 2.2101
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7033/18200 [02:48<03:57, 46.98it/s, loss=1.5535]


Logits stats - min: -8.1724, max: 2.6751
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8860, max: 2.0087
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7724, max: 2.2249
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7048/18200 [02:48<04:26, 41.79it/s, loss=1.5032]


Logits stats - min: -6.8318, max: 2.4989
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5180, max: 2.3574
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0883, max: 2.3429
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7068/18200 [02:48<04:31, 40.95it/s, loss=1.4117]


Logits stats - min: -6.6028, max: 1.9929
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5236, max: 2.3484
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6266, max: 2.1671
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0052, max: 2.6075
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7078/18200 [02:49<04:13, 43.96it/s, loss=2.2468]


Logits stats - min: -5.6136, max: 1.6271
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7825, max: 2.3058
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3262, max: 2.8948
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7089/18200 [02:49<04:04, 45.46it/s, loss=1.2714]


Logits stats - min: -7.6516, max: 2.3174
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5441, max: 1.7960
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6307, max: 2.3195
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4526, max: 2.1656
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9679, max: 2.6844
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2580, max: 2.6091
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▎              | 7104/18200 [02:49<04:22, 42.25it/s, loss=1.9096]


Logits stats - min: -8.6489, max: 2.5116
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8025, max: 1.7050
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7570, max: 2.4219
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7115/18200 [02:50<04:07, 44.74it/s, loss=2.1757]


Logits stats - min: -6.9000, max: 1.6651
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7685, max: 2.6262
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6696, max: 2.5576
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7125/18200 [02:50<04:26, 41.58it/s, loss=1.5566]


Logits stats - min: -7.8388, max: 2.8713
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0684, max: 2.1305
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5248, max: 2.3960
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7136/18200 [02:50<04:07, 44.68it/s, loss=1.7763]


Logits stats - min: -6.3818, max: 2.1911
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8245, max: 1.8611
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0119, max: 2.4538
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6406, max: 2.5851
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7146/18200 [02:50<04:02, 45.58it/s, loss=2.2006]


Logits stats - min: -7.4320, max: 2.1259
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9519, max: 2.1312
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9018, max: 2.1986
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4591, max: 1.8625
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7161/18200 [02:51<04:31, 40.73it/s, loss=1.5597]


Logits stats - min: -9.1908, max: 2.7609
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7483, max: 1.5274
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9066, max: 2.7996
Target unique values: tensor([0], device='cuda:0')


Training:  39%|█████████▍              | 7166/18200 [02:51<04:24, 41.74it/s, loss=1.5088]


Logits stats - min: -7.1294, max: 2.2639
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4703, max: 2.2018
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0266, max: 2.7350
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▍              | 7190/18200 [02:51<04:42, 38.92it/s, loss=1.5245]


Logits stats - min: -7.1602, max: 2.0824
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5151, max: 2.1402
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▍              | 7200/18200 [02:52<04:23, 41.73it/s, loss=1.5785]


Logits stats - min: -6.7473, max: 1.4990
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3165, max: 1.5633
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1580, max: 2.0884
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7433, max: 2.0690
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7210/18200 [02:52<04:30, 40.68it/s, loss=2.1686]


Logits stats - min: -6.8963, max: 2.3373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1321, max: 2.2872
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7215/18200 [02:52<04:31, 40.50it/s, loss=1.5706]


Logits stats - min: -7.1554, max: 1.9815
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7860, max: 1.7713
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0424, max: 1.8674
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7230/18200 [02:52<04:19, 42.26it/s, loss=1.5545]


Logits stats - min: -7.7041, max: 2.4887
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5287, max: 2.4784
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6620, max: 2.4564
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8572, max: 2.6335
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7240/18200 [02:53<04:16, 42.74it/s, loss=1.5430]


Logits stats - min: -7.5031, max: 2.0574
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0256, max: 1.4711
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7250/18200 [02:53<04:21, 41.95it/s, loss=1.4177]


Logits stats - min: -8.2666, max: 2.7212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7879, max: 2.1087
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9237, max: 2.3660
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7255/18200 [02:53<04:31, 40.34it/s, loss=1.5841]


Logits stats - min: -6.1706, max: 1.7126
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7271/18200 [02:53<04:09, 43.77it/s, loss=1.5782]


Logits stats - min: -8.6561, max: 2.7566
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3426, max: 1.9871
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9538, max: 1.8709
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6728, max: 2.5460
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7286/18200 [02:54<04:21, 41.77it/s, loss=1.5562]


Logits stats - min: -7.3031, max: 2.2248
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8920, max: 2.0833
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2223, max: 2.3878
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▌              | 7296/18200 [02:54<04:33, 39.88it/s, loss=1.5178]


Logits stats - min: -8.9609, max: 2.8965
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8935, max: 2.6097
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▋              | 7306/18200 [02:54<04:23, 41.30it/s, loss=1.5100]


Logits stats - min: -7.5949, max: 2.1172
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8535, max: 1.9307
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9599, max: 1.9938
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▋              | 7320/18200 [02:55<04:39, 38.89it/s, loss=1.3341]


Logits stats - min: -7.0983, max: 2.2491
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1943, max: 1.9835
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6314, max: 2.0846
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▋              | 7335/18200 [02:55<04:18, 42.06it/s, loss=1.5031]


Logits stats - min: -7.8311, max: 2.3470
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9202, max: 1.7184
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3328, max: 2.4527
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▋              | 7346/18200 [02:55<03:58, 45.52it/s, loss=1.2672]


Logits stats - min: -7.1898, max: 2.1008
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1724, max: 2.3115
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0186, max: 2.5360
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5354, max: 2.3424
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▋              | 7351/18200 [02:55<04:07, 43.77it/s, loss=1.5083]


Logits stats - min: -6.8162, max: 2.0884
Target unique values: tensor([0], device='cuda:0')


Training:  40%|█████████▋              | 7365/18200 [02:56<04:36, 39.12it/s, loss=1.4993]


Logits stats - min: -6.4450, max: 1.7862
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2627, max: 2.2745
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▋              | 7376/18200 [02:56<04:06, 44.00it/s, loss=1.5492]


Logits stats - min: -7.8893, max: 2.3973
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2947, max: 2.1485
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9664, max: 2.4211
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7032, max: 1.9961
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▋              | 7386/18200 [02:56<04:19, 41.69it/s, loss=2.8239]


Logits stats - min: -6.2956, max: 1.6404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5880, max: 1.9331
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7397/18200 [02:56<03:58, 45.25it/s, loss=1.4307]


Logits stats - min: -7.4839, max: 2.1989
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1243, max: 1.6268
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6657, max: 1.9584
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4907, max: 2.5751
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7417/18200 [02:57<04:24, 40.82it/s, loss=1.5564]


Logits stats - min: -8.3443, max: 2.5609
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9314, max: 2.6120
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1237, max: 2.2826
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3366, max: 2.5370
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7427/18200 [02:57<04:19, 41.49it/s, loss=1.3858]


Logits stats - min: -8.0435, max: 2.4716
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8911, max: 2.7348
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7012, max: 2.3497
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7437/18200 [02:57<04:09, 43.08it/s, loss=1.5119]


Logits stats - min: -6.3847, max: 2.1664
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0564, max: 2.2000
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3330, max: 2.0489
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4982, max: 2.3541
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7457/18200 [02:58<04:03, 44.10it/s, loss=1.5476]


Logits stats - min: -7.5426, max: 2.1497
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5135, max: 2.4706
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8815, max: 2.4580
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9979, max: 2.0810
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7467/18200 [02:58<04:05, 43.79it/s, loss=1.5129]


Logits stats - min: -7.7869, max: 2.5283
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7359, max: 2.3968
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0127, max: 1.9376
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7477/18200 [02:58<04:15, 41.91it/s, loss=2.0936]


Logits stats - min: -7.8122, max: 2.6038
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2545, max: 2.1908
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▊              | 7487/18200 [02:58<04:23, 40.58it/s, loss=1.3656]


Logits stats - min: -7.0901, max: 2.0110
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▉              | 7503/18200 [02:59<04:03, 43.86it/s, loss=1.5491]


Logits stats - min: -6.8460, max: 2.5635
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3568, max: 2.2410
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0617, max: 1.9514
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2014, max: 1.6329
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5060, max: 2.1704
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4885, max: 2.0738
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5637, max: 2.0961
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▉              | 7518/18200 [02:59<03:55, 45.32it/s, loss=1.2921]


Logits stats - min: -6.9261, max: 2.0510
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2834, max: 2.6316
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0223, max: 2.7366
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▉              | 7528/18200 [02:59<04:05, 43.51it/s, loss=2.0802]


Logits stats - min: -8.7233, max: 2.4677
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4796, max: 2.0796
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1006, max: 2.5600
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9611, max: 2.6864
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▉              | 7538/18200 [03:00<04:08, 42.93it/s, loss=1.5177]


Logits stats - min: -8.1653, max: 2.6791
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9271, max: 2.2527
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6235, max: 2.1205
Target unique values: tensor([0], device='cuda:0')


Training:  41%|█████████▉              | 7548/18200 [03:00<04:08, 42.87it/s, loss=1.4135]


Logits stats - min: -7.9529, max: 2.3622
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8763, max: 2.5933
Target unique values: tensor([0], device='cuda:0')


Training:  42%|█████████▉              | 7563/18200 [03:00<04:17, 41.27it/s, loss=2.0370]


Logits stats - min: -8.3778, max: 2.5468
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5882, max: 2.5549
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5269, max: 2.6255
Target unique values: tensor([0], device='cuda:0')


Training:  42%|█████████▉              | 7573/18200 [03:00<04:21, 40.60it/s, loss=1.5151]


Logits stats - min: -8.7752, max: 2.7486
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4676, max: 2.5603
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9420, max: 2.2800
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7601/18200 [03:01<04:11, 42.06it/s, loss=1.3759]


Logits stats - min: -7.0732, max: 2.0531
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3325, max: 2.3413
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3225, max: 2.7146
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7611/18200 [03:01<04:05, 43.17it/s, loss=1.2574]


Logits stats - min: -7.0741, max: 2.0526
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7033, max: 2.4162
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3724, max: 2.3154
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7621/18200 [03:02<04:03, 43.46it/s, loss=1.5267]


Logits stats - min: -8.4844, max: 2.7479
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4622, max: 2.1601
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9067, max: 2.9746
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7631/18200 [03:02<04:16, 41.23it/s, loss=1.5788]


Logits stats - min: -6.7538, max: 2.1884
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5615, max: 2.2098
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7641/18200 [03:02<04:17, 40.95it/s, loss=2.7449]


Logits stats - min: -8.1390, max: 2.2958
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7660/18200 [03:03<04:16, 41.05it/s, loss=1.2604]


Logits stats - min: -6.3884, max: 2.0522
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1684, max: 1.4503
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8199, max: 2.2240
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████              | 7675/18200 [03:03<04:16, 41.02it/s, loss=1.3910]


Logits stats - min: -7.5378, max: 2.7917
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1953, max: 2.1895
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5849, max: 2.0815
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6801, max: 2.3911
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5895, max: 2.4284
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████▏             | 7686/18200 [03:03<04:04, 42.95it/s, loss=1.5375]


Logits stats - min: -7.2847, max: 2.3608
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0036, max: 2.9206
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2161, max: 2.1705
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████▏             | 7696/18200 [03:03<04:06, 42.61it/s, loss=1.5431]


Logits stats - min: -8.6631, max: 2.3625
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3181, max: 2.2653
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████▏             | 7706/18200 [03:04<04:05, 42.72it/s, loss=1.5482]


Logits stats - min: -8.1318, max: 2.3243
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0859, max: 2.0606
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4595, max: 2.4395
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6025, max: 2.4859
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████▏             | 7716/18200 [03:04<04:03, 43.10it/s, loss=1.5455]


Logits stats - min: -7.6069, max: 2.2724
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9531, max: 2.1212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8933, max: 2.0303
Target unique values: tensor([0], device='cuda:0')


Training:  42%|██████████▏             | 7726/18200 [03:04<03:55, 44.50it/s, loss=2.0755]


Logits stats - min: -9.5248, max: 2.0672
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0100, max: 2.6523
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▏             | 7736/18200 [03:04<04:00, 43.46it/s, loss=1.5462]


Logits stats - min: -6.9462, max: 2.1000
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6832, max: 2.6680
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2466, max: 2.0783
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▏             | 7751/18200 [03:05<04:28, 38.93it/s, loss=1.5042]


Logits stats - min: -8.3491, max: 2.8793
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5713, max: 2.5999
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▏             | 7765/18200 [03:05<04:22, 39.81it/s, loss=1.9942]


Logits stats - min: -8.3185, max: 2.8028
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7779/18200 [03:05<04:24, 39.42it/s, loss=2.0861]


Logits stats - min: -6.5889, max: 2.0758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6019, max: 2.3725
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7788/18200 [03:06<04:18, 40.35it/s, loss=1.5096]


Logits stats - min: -7.1680, max: 2.3911
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1675, max: 2.6103
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3813, max: 2.5556
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7802/18200 [03:06<04:24, 39.25it/s, loss=1.5222]


Logits stats - min: -7.0183, max: 2.0821
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6428, max: 2.5153
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7807/18200 [03:06<04:12, 41.14it/s, loss=1.5241]


Logits stats - min: -6.4981, max: 2.2951
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8863, max: 1.8884
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7822/18200 [03:06<04:08, 41.76it/s, loss=1.9229]


Logits stats - min: -6.9014, max: 1.4828
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4368, max: 2.5074
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2664, max: 2.3533
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7832/18200 [03:07<04:12, 41.05it/s, loss=1.5514]


Logits stats - min: -7.0725, max: 2.2225
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7842/18200 [03:07<04:11, 41.16it/s, loss=1.3733]


Logits stats - min: -7.4534, max: 2.4231
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3283, max: 2.2850
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8834, max: 1.8718
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7852/18200 [03:07<04:07, 41.87it/s, loss=1.9052]


Logits stats - min: -8.2083, max: 2.4480
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0461, max: 2.3763
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▎             | 7867/18200 [03:08<04:20, 39.73it/s, loss=1.5450]


Logits stats - min: -6.9934, max: 2.0354
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▍             | 7884/18200 [03:08<04:07, 41.65it/s, loss=2.1792]


Logits stats - min: -6.9294, max: 1.6476
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4666, max: 2.4337
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9535, max: 2.5420
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5523, max: 2.1998
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6873, max: 1.7888
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▍             | 7894/18200 [03:08<03:58, 43.19it/s, loss=1.5490]


Logits stats - min: -7.0782, max: 3.1447
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1697, max: 2.3009
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2943, max: 2.4427
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8347, max: 2.2768
Target unique values: tensor([0], device='cuda:0')


Training:  43%|██████████▍             | 7909/18200 [03:09<04:11, 40.96it/s, loss=1.8368]


Logits stats - min: -6.8624, max: 2.5388
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7705, max: 2.4208
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▍             | 7920/18200 [03:09<03:42, 46.18it/s, loss=1.3728]


Logits stats - min: -7.3520, max: 2.2444
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2313, max: 2.0460
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4104, max: 2.3133
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2257, max: 2.8910
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4318, max: 2.0669
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8492, max: 2.7088
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▍             | 7930/18200 [03:09<03:51, 44.28it/s, loss=2.1338]


Logits stats - min: -6.9422, max: 2.1483
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1488, max: 2.0448
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9933, max: 2.0713
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▍             | 7940/18200 [03:09<04:02, 42.38it/s, loss=1.2534]


Logits stats - min: -7.5542, max: 1.7782
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2158, max: 2.4022
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▍             | 7950/18200 [03:10<04:07, 41.46it/s, loss=1.9718]


Logits stats - min: -7.7410, max: 2.6094
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8834, max: 1.9214
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7421, max: 2.2532
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▍             | 7960/18200 [03:10<04:08, 41.22it/s, loss=1.5539]


Logits stats - min: -7.2543, max: 2.3954
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 7975/18200 [03:10<04:16, 39.81it/s, loss=1.3688]


Logits stats - min: -6.7852, max: 1.9902
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 8001/18200 [03:11<04:06, 41.32it/s, loss=2.1739]


Logits stats - min: -7.8658, max: 2.5910
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5958, max: 2.1670
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4512, max: 2.5316
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 8016/18200 [03:11<04:11, 40.52it/s, loss=1.7959]


Logits stats - min: -7.9360, max: 1.8584
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3481, max: 1.6557
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7645, max: 1.9585
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 8026/18200 [03:11<03:58, 42.67it/s, loss=1.5131]


Logits stats - min: -7.3530, max: 1.9716
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9405, max: 1.8663
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4577, max: 2.6932
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0839, max: 2.1386
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 8036/18200 [03:12<03:51, 43.87it/s, loss=1.7001]


Logits stats - min: -8.5812, max: 2.5080
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1481, max: 1.7094
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8968, max: 1.9404
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 8046/18200 [03:12<03:52, 43.71it/s, loss=1.5393]


Logits stats - min: -6.6671, max: 2.8556
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4404, max: 2.9010
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9513, max: 2.1428
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▌             | 8056/18200 [03:12<04:12, 40.16it/s, loss=1.5218]


Logits stats - min: -7.6748, max: 2.4193
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▋             | 8066/18200 [03:12<04:07, 40.88it/s, loss=1.3534]


Logits stats - min: -9.0751, max: 2.7659
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2012, max: 2.7233
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▋             | 8076/18200 [03:13<03:59, 42.25it/s, loss=1.5548]


Logits stats - min: -7.0799, max: 1.6483
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9740, max: 2.0638
Target unique values: tensor([0], device='cuda:0')


Training:  44%|██████████▋             | 8086/18200 [03:13<04:12, 40.05it/s, loss=1.5810]


Logits stats - min: -7.0813, max: 2.0857
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▋             | 8105/18200 [03:13<04:20, 38.74it/s, loss=1.2946]


Logits stats - min: -8.8089, max: 2.9130
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0796, max: 2.4829
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9805, max: 3.0868
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▋             | 8115/18200 [03:14<04:10, 40.25it/s, loss=1.5486]


Logits stats - min: -8.5873, max: 2.5251
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▋             | 8120/18200 [03:14<04:04, 41.25it/s, loss=1.5483]


Logits stats - min: -7.1000, max: 2.0869
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▋             | 8130/18200 [03:14<04:11, 40.12it/s, loss=1.3678]


Logits stats - min: -8.2440, max: 2.6890
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9895, max: 2.1759
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▋             | 8140/18200 [03:14<04:05, 40.95it/s, loss=1.5092]


Logits stats - min: -6.5978, max: 2.1417
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5286, max: 2.3841
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6990, max: 2.6165
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▋             | 8150/18200 [03:15<03:54, 42.79it/s, loss=2.0989]


Logits stats - min: -8.6017, max: 2.6017
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1117, max: 2.4108
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1123, max: 2.1656
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1385, max: 1.5148
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8160/18200 [03:15<04:00, 41.68it/s, loss=1.7528]


Logits stats - min: -6.8263, max: 2.0164
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2327, max: 2.1614
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8924, max: 2.5928
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8166/18200 [03:15<03:48, 43.88it/s, loss=1.5203]


Logits stats - min: -7.2434, max: 2.3037
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9719, max: 2.0123
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8176/18200 [03:15<03:51, 43.38it/s, loss=1.5172]


Logits stats - min: -11.9004, max: 2.2161
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2255, max: 2.6205
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2434, max: 2.4602
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1359, max: 2.4762
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6041, max: 2.4827
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7131, max: 2.4096
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8187/18200 [03:15<03:51, 43.18it/s, loss=2.4214]


Logits stats - min: -6.7292, max: 2.0504
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8197/18200 [03:16<03:55, 42.48it/s, loss=2.9949]


Logits stats - min: -8.6311, max: 2.5459
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7939, max: 1.7212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5809, max: 1.7601
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7944, max: 2.0955
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3492, max: 2.6196
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8223/18200 [03:16<04:03, 40.93it/s, loss=1.5316]


Logits stats - min: -7.5568, max: 1.6318
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8710, max: 2.4697
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▊             | 8233/18200 [03:16<04:09, 40.01it/s, loss=1.4138]


Logits stats - min: -6.9212, max: 1.7112
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▉             | 8248/18200 [03:17<04:17, 38.68it/s, loss=1.2994]


Logits stats - min: -7.3899, max: 1.7015
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4090, max: 2.3564
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▉             | 8253/18200 [03:17<04:05, 40.48it/s, loss=1.5896]


Logits stats - min: -6.2709, max: 2.0811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4522, max: 2.2196
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3582, max: 2.0155
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▉             | 8268/18200 [03:17<03:59, 41.51it/s, loss=1.5245]


Logits stats - min: -7.8970, max: 2.2942
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0670, max: 2.2452
Target unique values: tensor([0], device='cuda:0')


Training:  45%|██████████▉             | 8278/18200 [03:18<04:00, 41.25it/s, loss=2.8468]


Logits stats - min: -14.7662, max: 2.7605
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6971, max: 1.4335
Target unique values: tensor([0], device='cuda:0')


Training:  46%|██████████▉             | 8288/18200 [03:18<03:56, 41.99it/s, loss=1.4877]


Logits stats - min: -8.5561, max: 2.8095
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5813, max: 3.0941
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7203, max: 2.0004
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4772, max: 1.6037
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7684, max: 2.0920
Target unique values: tensor([0], device='cuda:0')


Training:  46%|██████████▉             | 8298/18200 [03:18<03:56, 41.90it/s, loss=1.6889]


Logits stats - min: -8.4201, max: 2.4859
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1999, max: 2.1938
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3498, max: 2.6830
Target unique values: tensor([0], device='cuda:0')


Training:  46%|██████████▉             | 8313/18200 [03:18<04:04, 40.42it/s, loss=1.5316]


Logits stats - min: -9.3294, max: 2.3444
Target unique values: tensor([0], device='cuda:0')


Training:  46%|██████████▉             | 8328/18200 [03:19<03:58, 41.39it/s, loss=1.6250]


Logits stats - min: -6.9487, max: 2.4467
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9923, max: 2.4030
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3148, max: 1.5781
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8784, max: 2.9749
Target unique values: tensor([0], device='cuda:0')


Training:  46%|██████████▉             | 8338/18200 [03:19<03:52, 42.50it/s, loss=1.3882]


Logits stats - min: -6.7333, max: 2.1407
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2854, max: 3.1271
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9891, max: 2.8959
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3267, max: 2.7566
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0750, max: 2.1738
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8349/18200 [03:19<03:39, 44.82it/s, loss=1.5335]


Logits stats - min: -8.5350, max: 2.6774
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9548, max: 2.5187
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5809, max: 2.1364
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0078, max: 3.1070
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8354/18200 [03:19<03:51, 42.49it/s, loss=1.5290]


Logits stats - min: -7.8526, max: 2.4811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0814, max: 3.3485
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8364/18200 [03:20<03:55, 41.68it/s, loss=1.3601]


Logits stats - min: -8.5922, max: 2.9778
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4702, max: 2.8536
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8379/18200 [03:20<04:07, 39.65it/s, loss=2.1730]


Logits stats - min: -7.0405, max: 2.1891
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6733, max: 2.6641
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7750, max: 2.6194
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8390/18200 [03:20<03:57, 41.30it/s, loss=1.4532]


Logits stats - min: -8.0184, max: 2.6916
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4326, max: 1.4654
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9625, max: 2.4021
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8400/18200 [03:20<03:49, 42.71it/s, loss=1.8777]


Logits stats - min: -7.9732, max: 2.6885
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3515, max: 2.8504
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2373, max: 2.1042
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8421/18200 [03:21<03:38, 44.70it/s, loss=1.5483]


Logits stats - min: -7.8646, max: 1.6308
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2320, max: 3.3567
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6557, max: 2.4341
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0286, max: 2.0784
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████             | 8426/18200 [03:21<03:48, 42.73it/s, loss=1.9577]


Logits stats - min: -7.1948, max: 2.0952
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8116, max: 2.4477
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████▏            | 8446/18200 [03:22<03:53, 41.81it/s, loss=2.2439]


Logits stats - min: -8.6422, max: 2.5846
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7621, max: 2.2148
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0442, max: 2.2887
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7221, max: 3.0210
Target unique values: tensor([0], device='cuda:0')


Training:  46%|███████████▏            | 8456/18200 [03:22<03:56, 41.25it/s, loss=1.3570]


Logits stats - min: -7.1529, max: 2.0663
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9667, max: 2.1946
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2908, max: 2.1192
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8471/18200 [03:22<04:04, 39.81it/s, loss=1.5110]


Logits stats - min: -9.9318, max: 2.3736
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1068, max: 1.7490
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3173, max: 2.6932
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8486/18200 [03:23<03:59, 40.64it/s, loss=1.2845]


Logits stats - min: -6.8726, max: 2.0886
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4039, max: 2.1920
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8491/18200 [03:23<03:47, 42.76it/s, loss=1.5433]


Logits stats - min: -8.8000, max: 2.3078
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6455, max: 2.7141
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3995, max: 2.3323
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8501/18200 [03:23<03:49, 42.25it/s, loss=1.4267]


Logits stats - min: -6.9470, max: 2.3068
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5254, max: 1.5218
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0203, max: 2.3333
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8511/18200 [03:23<03:42, 43.58it/s, loss=1.5572]


Logits stats - min: -6.1684, max: 1.6617
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4691, max: 2.0891
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2793, max: 2.0731
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8521/18200 [03:23<04:03, 39.73it/s, loss=2.1218]


Logits stats - min: -6.8772, max: 2.1210
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▏            | 8531/18200 [03:24<04:00, 40.21it/s, loss=1.5556]


Logits stats - min: -8.7606, max: 2.7979
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7929, max: 2.0990
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6922, max: 1.7197
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8550/18200 [03:24<04:05, 39.35it/s, loss=1.5549]


Logits stats - min: -6.8045, max: 2.1593
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0685, max: 2.1070
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8563/18200 [03:25<04:00, 40.09it/s, loss=1.3510]


Logits stats - min: -6.9701, max: 2.1756
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2834, max: 2.7793
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8573/18200 [03:25<03:45, 42.68it/s, loss=1.3829]


Logits stats - min: -8.2644, max: 2.2803
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3506, max: 1.9908
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1903, max: 1.4816
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8153, max: 1.5603
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8583/18200 [03:25<03:48, 42.09it/s, loss=1.5449]


Logits stats - min: -7.6050, max: 2.1849
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3935, max: 2.6905
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7697, max: 2.1413
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8460, max: 2.0376
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8593/18200 [03:25<03:45, 42.53it/s, loss=1.5475]


Logits stats - min: -8.9865, max: 2.7766
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3248, max: 2.0499
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8603/18200 [03:25<03:53, 41.10it/s, loss=1.5437]


Logits stats - min: -7.7134, max: 2.2899
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▎            | 8617/18200 [03:26<03:56, 40.44it/s, loss=1.8807]


Logits stats - min: -7.2866, max: 2.2346
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2653, max: 2.0605
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▍            | 8627/18200 [03:26<04:01, 39.60it/s, loss=1.5458]


Logits stats - min: -7.8457, max: 2.0477
Target unique values: tensor([0], device='cuda:0')


Training:  47%|███████████▍            | 8637/18200 [03:26<03:47, 41.95it/s, loss=1.4979]


Logits stats - min: -7.1889, max: 2.0636
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9991, max: 2.5079
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2113, max: 2.0817
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3784, max: 1.9945
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4764, max: 2.5231
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9849, max: 2.5473
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8648/18200 [03:27<03:35, 44.31it/s, loss=1.5439]


Logits stats - min: -7.2296, max: 2.4897
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2810, max: 2.2192
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6133, max: 2.2337
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8659/18200 [03:27<03:41, 43.05it/s, loss=1.5424]


Logits stats - min: -7.0577, max: 2.2480
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3547, max: 2.5798
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8199, max: 2.3833
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8669/18200 [03:27<03:43, 42.71it/s, loss=1.5260]


Logits stats - min: -7.4677, max: 2.2055
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8041, max: 2.2817
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2265, max: 2.1783
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0713, max: 2.1805
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8684/18200 [03:27<03:37, 43.71it/s, loss=1.5618]


Logits stats - min: -7.1832, max: 1.9439
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3471, max: 2.5116
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7670, max: 2.0574
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8694/18200 [03:28<03:44, 42.40it/s, loss=1.5113]


Logits stats - min: -7.1190, max: 2.3759
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -11.7874, max: 2.3715
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8704/18200 [03:28<03:47, 41.81it/s, loss=1.5177]


Logits stats - min: -7.5268, max: 1.6951
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5584, max: 2.2640
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2090, max: 2.5542
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6351, max: 3.4410
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1160, max: 2.4816
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▍            | 8715/18200 [03:28<03:41, 42.89it/s, loss=1.5400]


Logits stats - min: -7.2843, max: 2.1416
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6303, max: 2.6975
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6770, max: 2.5465
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8725/18200 [03:28<03:40, 42.96it/s, loss=1.5327]


Logits stats - min: -7.5487, max: 2.2509
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0032, max: 2.4026
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0852, max: 1.6684
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8741/18200 [03:29<03:32, 44.41it/s, loss=1.2409]


Logits stats - min: -7.6468, max: 1.6850
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2321, max: 2.6106
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1930, max: 2.6825
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4998, max: 2.0357
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7311, max: 2.6164
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8761/18200 [03:29<03:51, 40.74it/s, loss=1.2889]


Logits stats - min: -7.0364, max: 2.1175
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0355, max: 2.0352
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8766/18200 [03:29<03:41, 42.50it/s, loss=2.0043]


Logits stats - min: -7.0863, max: 1.6898
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2532, max: 3.0538
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3210, max: 3.0296
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8776/18200 [03:30<03:36, 43.48it/s, loss=1.2484]


Logits stats - min: -8.7357, max: 2.9243
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8394, max: 2.3446
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8786/18200 [03:30<03:53, 40.24it/s, loss=1.5991]


Logits stats - min: -7.0900, max: 2.2520
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0027, max: 2.4256
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9126, max: 2.6976
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▌            | 8797/18200 [03:30<03:28, 45.09it/s, loss=1.4217]


Logits stats - min: -7.1079, max: 2.1963
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9066, max: 2.7888
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0273, max: 2.0385
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.2945, max: 1.5654
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8711, max: 1.9515
Target unique values: tensor([0], device='cuda:0')


Training:  48%|███████████▋            | 8826/18200 [03:31<03:56, 39.60it/s, loss=1.5494]


Logits stats - min: -7.7824, max: 2.4105
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1126, max: 2.0213
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2912, max: 2.1440
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6007, max: 2.2864
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8841/18200 [03:31<03:37, 43.01it/s, loss=1.5107]


Logits stats - min: -7.8923, max: 2.5310
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3640, max: 2.4810
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2766, max: 2.3842
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8846/18200 [03:31<03:40, 42.41it/s, loss=1.5636]


Logits stats - min: -7.3708, max: 1.9601
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2225, max: 2.2625
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8856/18200 [03:32<03:50, 40.52it/s, loss=2.0479]


Logits stats - min: -6.8550, max: 2.1928
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8871/18200 [03:32<03:53, 39.90it/s, loss=1.5475]


Logits stats - min: -8.2122, max: 2.5562
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3330, max: 2.2384
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3998, max: 2.0761
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8898/18200 [03:33<03:49, 40.55it/s, loss=2.1344]


Logits stats - min: -7.6714, max: 1.7980
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2652, max: 1.6768
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9599, max: 2.0296
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7043, max: 2.2844
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2545, max: 2.3968
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▋            | 8908/18200 [03:33<03:47, 40.92it/s, loss=1.5130]


Logits stats - min: -9.3426, max: 2.0861
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1785, max: 2.2192
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7578, max: 1.6457
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7447, max: 2.1966
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8928/18200 [03:33<03:52, 39.94it/s, loss=1.5521]


Logits stats - min: -7.2939, max: 2.1400
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4365, max: 2.4241
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3565, max: 2.0773
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8939/18200 [03:34<03:36, 42.74it/s, loss=1.5491]


Logits stats - min: -6.7086, max: 1.8978
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2979, max: 1.5413
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4201, max: 2.0028
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.5951, max: 1.7615
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8954/18200 [03:34<03:44, 41.24it/s, loss=1.5128]


Logits stats - min: -8.7455, max: 2.1496
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1139, max: 2.4063
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8964/18200 [03:34<03:39, 42.16it/s, loss=1.3463]


Logits stats - min: -8.3011, max: 2.5153
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9160, max: 2.1476
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4570, max: 2.2553
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8974/18200 [03:34<03:34, 43.07it/s, loss=1.5445]


Logits stats - min: -8.6892, max: 2.7692
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2934, max: 2.2876
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6132, max: 2.4238
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8984/18200 [03:35<03:44, 41.04it/s, loss=1.5168]


Logits stats - min: -7.4928, max: 2.5941
Target unique values: tensor([0], device='cuda:0')


Training:  49%|███████████▊            | 8995/18200 [03:35<03:34, 42.83it/s, loss=1.5354]


Logits stats - min: -6.9946, max: 2.0840
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6259, max: 2.2758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6249, max: 1.5059
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4528, max: 1.7039
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4262, max: 2.3549
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9018/18200 [03:35<03:56, 38.88it/s, loss=1.3639]


Logits stats - min: -8.3287, max: 2.3027
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9032/18200 [03:36<03:45, 40.70it/s, loss=2.6862]


Logits stats - min: -8.6483, max: 2.3089
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7368, max: 3.2056
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6149, max: 2.1792
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4089, max: 1.7894
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.4440, max: 1.6934
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4436, max: 2.0191
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7029, max: 2.1910
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9049/18200 [03:36<03:38, 41.89it/s, loss=1.2122]


Logits stats - min: -8.8223, max: 2.3967
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5066, max: 1.9684
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2350, max: 2.1263
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9059/18200 [03:36<03:32, 43.06it/s, loss=1.5229]


Logits stats - min: -7.6786, max: 2.5856
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6811, max: 1.9638
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0796, max: 2.0997
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9069/18200 [03:37<03:27, 44.03it/s, loss=2.0346]


Logits stats - min: -6.7217, max: 1.6520
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0188, max: 2.1248
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1024, max: 2.1191
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5512, max: 2.5192
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3806, max: 2.3687
Target unique values: tensor([0], device='cuda:0')


Training:  50%|███████████▉            | 9094/18200 [03:37<03:39, 41.53it/s, loss=2.0553]


Logits stats - min: -8.3638, max: 2.3223
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.9822, max: 1.4829
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2956, max: 2.1972
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9104/18200 [03:38<03:51, 39.28it/s, loss=1.5099]


Logits stats - min: -8.2601, max: 1.5640
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3640, max: 2.0588
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9113/18200 [03:38<03:44, 40.43it/s, loss=2.6267]


Logits stats - min: -6.6085, max: 1.8670
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9132/18200 [03:38<03:43, 40.63it/s, loss=1.8897]


Logits stats - min: -7.4770, max: 2.4016
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3647, max: 2.5204
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2355, max: 2.5578
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9147/18200 [03:39<03:45, 40.22it/s, loss=1.7516]


Logits stats - min: -9.2723, max: 2.8418
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5969, max: 1.6390
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9534, max: 2.2906
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9157/18200 [03:39<03:41, 40.77it/s, loss=1.5096]


Logits stats - min: -8.3848, max: 2.3557
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9167/18200 [03:39<03:48, 39.47it/s, loss=1.2633]


Logits stats - min: -7.1895, max: 2.0150
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0793, max: 2.0847
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1594, max: 2.0584
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9177/18200 [03:39<03:48, 39.52it/s, loss=1.4996]


Logits stats - min: -6.8640, max: 2.0869
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5013, max: 1.8435
Target unique values: tensor([0], device='cuda:0')


Training:  50%|████████████            | 9190/18200 [03:40<03:38, 41.33it/s, loss=1.5497]


Logits stats - min: -6.9512, max: 2.0348
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4461, max: 1.4739
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8654, max: 2.0288
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9205/18200 [03:40<03:34, 41.98it/s, loss=2.1176]


Logits stats - min: -8.5064, max: 2.5566
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3266, max: 2.3307
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9220/18200 [03:40<03:45, 39.84it/s, loss=1.3736]


Logits stats - min: -8.0293, max: 2.6751
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4003, max: 2.3111
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2746, max: 2.4460
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9230/18200 [03:41<03:39, 40.93it/s, loss=2.7209]


Logits stats - min: -7.5525, max: 1.5412
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3436, max: 1.4916
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9240/18200 [03:41<03:27, 43.21it/s, loss=2.1632]


Logits stats - min: -6.6920, max: 2.1210
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3771, max: 2.1274
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4683, max: 2.9903
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9250/18200 [03:41<03:50, 38.88it/s, loss=1.5003]


Logits stats - min: -9.0868, max: 2.7743
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9260/18200 [03:41<03:53, 38.36it/s, loss=1.3383]


Logits stats - min: -7.9869, max: 1.4862
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3026, max: 2.2695
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0464, max: 3.2179
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9268/18200 [03:42<03:57, 37.54it/s, loss=1.3789]


Logits stats - min: -6.5715, max: 2.0406
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7145, max: 1.8893
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7740, max: 2.3418
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9278/18200 [03:42<03:35, 41.44it/s, loss=1.5416]


Logits stats - min: -7.5688, max: 2.2519
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2443, max: 1.9586
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▏           | 9288/18200 [03:42<03:32, 41.97it/s, loss=1.5737]


Logits stats - min: -8.5155, max: 2.4302
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8423, max: 2.0625
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9302/18200 [03:42<03:49, 38.79it/s, loss=1.2343]


Logits stats - min: -7.2818, max: 1.5323
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2018, max: 2.0196
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9311/18200 [03:43<03:47, 39.01it/s, loss=1.2504]


Logits stats - min: -8.8169, max: 2.7405
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3320, max: 1.9468
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1509, max: 2.4507
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2299, max: 2.1390
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9326/18200 [03:43<03:21, 44.13it/s, loss=1.9029]


Logits stats - min: -8.4016, max: 2.5316
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2950, max: 1.3482
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2904, max: 2.0410
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4357, max: 1.5418
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9336/18200 [03:43<03:32, 41.79it/s, loss=2.1002]


Logits stats - min: -7.7178, max: 2.2778
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0868, max: 1.8618
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9346/18200 [03:44<03:36, 40.95it/s, loss=1.3753]


Logits stats - min: -7.7124, max: 2.2196
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6805, max: 2.7419
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9355/18200 [03:44<03:48, 38.67it/s, loss=1.3840]


Logits stats - min: -9.6384, max: 2.8295
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8577, max: 1.6739
Target unique values: tensor([0], device='cuda:0')


Training:  51%|████████████▎           | 9363/18200 [03:44<03:50, 38.41it/s, loss=1.3549]


Logits stats - min: -7.7065, max: 2.1887
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0822, max: 2.2123
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8448, max: 2.6011
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7012, max: 2.4150
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▎           | 9375/18200 [03:44<03:17, 44.74it/s, loss=1.8761]


Logits stats - min: -7.3826, max: 2.3994
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0744, max: 2.8736
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7037, max: 2.2582
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2062, max: 2.3764
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8072, max: 2.7938
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9386/18200 [03:44<03:09, 46.48it/s, loss=2.7803]


Logits stats - min: -6.8672, max: 2.1103
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3757, max: 2.0766
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -5.8548, max: 1.5339
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7473, max: 2.5782
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6052, max: 2.6758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7031, max: 2.5569
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9402/18200 [03:45<03:18, 44.21it/s, loss=2.1353]


Logits stats - min: -7.8732, max: 2.5679
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8446, max: 2.6246
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8296, max: 2.2569
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8115, max: 2.4365
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9412/18200 [03:45<03:22, 43.31it/s, loss=1.4843]


Logits stats - min: -7.0829, max: 2.1886
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6824, max: 2.4899
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9422/18200 [03:45<03:30, 41.69it/s, loss=2.0835]


Logits stats - min: -6.7472, max: 2.4245
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9432/18200 [03:46<03:37, 40.29it/s, loss=1.3457]


Logits stats - min: -7.2377, max: 2.0474
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1465, max: 2.4952
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3350, max: 2.0780
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3970, max: 2.6925
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9442/18200 [03:46<03:40, 39.76it/s, loss=1.5445]


Logits stats - min: -7.7369, max: 2.3147
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2449, max: 2.4114
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9731, max: 2.0268
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6989, max: 1.9226
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9453/18200 [03:46<03:28, 42.03it/s, loss=2.0420]


Logits stats - min: -7.1526, max: 2.2811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0458, max: 2.4854
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9463/18200 [03:46<03:34, 40.76it/s, loss=1.5504]


Logits stats - min: -5.8070, max: 1.6197
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4236, max: 2.5887
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▍           | 9473/18200 [03:47<03:32, 41.14it/s, loss=1.3513]


Logits stats - min: -8.3948, max: 2.3840
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0870, max: 1.6643
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▌           | 9488/18200 [03:47<03:37, 40.10it/s, loss=1.5053]


Logits stats - min: -7.7092, max: 2.7288
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▌           | 9502/18200 [03:47<03:36, 40.12it/s, loss=1.4112]


Logits stats - min: -9.0632, max: 2.6887
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7165, max: 2.2425
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▌           | 9512/18200 [03:48<03:34, 40.43it/s, loss=1.4884]


Logits stats - min: -7.7249, max: 2.5308
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9956, max: 2.4661
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▌           | 9527/18200 [03:48<03:27, 41.72it/s, loss=1.2476]


Logits stats - min: -10.5303, max: 2.3319
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6845, max: 2.2657
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5799, max: 2.6482
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▌           | 9532/18200 [03:48<03:27, 41.86it/s, loss=1.5401]


Logits stats - min: -8.2727, max: 2.5035
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1418, max: 1.9428
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8134, max: 2.6202
Target unique values: tensor([0], device='cuda:0')


Training:  52%|████████████▌           | 9551/18200 [03:49<03:41, 38.97it/s, loss=1.5975]


Logits stats - min: -7.7264, max: 1.9895
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▌           | 9556/18200 [03:49<03:40, 39.29it/s, loss=1.5140]


Logits stats - min: -7.9698, max: 2.7573
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5929, max: 2.4645
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▌           | 9566/18200 [03:49<03:28, 41.36it/s, loss=1.7335]


Logits stats - min: -8.9551, max: 2.9207
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0401, max: 2.8610
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9585/18200 [03:49<03:20, 43.05it/s, loss=1.5120]


Logits stats - min: -7.2060, max: 2.0679
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2624, max: 2.3147
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5236, max: 2.8431
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0096, max: 2.1265
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4404, max: 2.5695
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9590/18200 [03:50<03:24, 42.10it/s, loss=1.5427]


Logits stats - min: -6.7855, max: 2.2854
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6002, max: 2.5202
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8830, max: 2.6020
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9606/18200 [03:50<03:20, 42.81it/s, loss=1.5499]


Logits stats - min: -6.2996, max: 2.4305
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7473, max: 2.4467
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7777, max: 2.6581
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9382, max: 2.0512
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8190, max: 2.3186
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6407, max: 2.6776
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9617/18200 [03:50<03:13, 44.31it/s, loss=3.2079]


Logits stats - min: -8.8476, max: 2.5770
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2822, max: 2.4552
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0148, max: 2.4528
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9627/18200 [03:50<03:23, 42.22it/s, loss=1.2243]


Logits stats - min: -6.0937, max: 2.3505
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2116, max: 2.0809
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9637/18200 [03:51<03:22, 42.34it/s, loss=2.3159]


Logits stats - min: -6.5734, max: 2.4311
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9647/18200 [03:51<03:36, 39.51it/s, loss=1.5326]


Logits stats - min: -6.7279, max: 1.6564
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6123, max: 2.4528
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▋           | 9656/18200 [03:51<03:36, 39.43it/s, loss=1.5328]


Logits stats - min: -9.0428, max: 2.6036
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7271, max: 2.4404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2804, max: 1.9796
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▊           | 9677/18200 [03:52<03:32, 40.15it/s, loss=2.1302]


Logits stats - min: -8.3604, max: 2.5949
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1356, max: 2.3446
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▊           | 9690/18200 [03:52<03:47, 37.43it/s, loss=1.5261]


Logits stats - min: -8.8254, max: 2.6058
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1771, max: 2.5794
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9498, max: 1.7552
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▊           | 9713/18200 [03:52<03:28, 40.77it/s, loss=2.2743]


Logits stats - min: -8.2775, max: 2.6100
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9913, max: 2.6120
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4173, max: 2.4478
Target unique values: tensor([0], device='cuda:0')


Training:  53%|████████████▊           | 9728/18200 [03:53<03:24, 41.38it/s, loss=1.5406]


Logits stats - min: -7.1838, max: 2.2081
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2380, max: 1.8596
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -11.0272, max: 2.3644
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5010, max: 2.1563
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3430, max: 2.3252
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▊           | 9743/18200 [03:53<03:24, 41.26it/s, loss=1.5387]


Logits stats - min: -9.1464, max: 2.7897
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2143, max: 2.4231
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3073, max: 2.5301
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9771/18200 [03:54<03:34, 39.34it/s, loss=2.1823]


Logits stats - min: -6.8247, max: 2.2152
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9743, max: 2.6222
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5764, max: 2.1486
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9781/18200 [03:54<03:20, 42.02it/s, loss=1.5869]


Logits stats - min: -8.4815, max: 2.6151
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2272, max: 2.5371
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0834, max: 2.7349
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9791/18200 [03:54<03:25, 40.89it/s, loss=1.5053]


Logits stats - min: -7.0317, max: 2.1468
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0317, max: 2.9318
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6732, max: 2.2838
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1607, max: 2.5447
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5999, max: 2.7347
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9796/18200 [03:55<03:30, 40.01it/s, loss=1.3380]


Logits stats - min: -8.4063, max: 2.3983
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3828, max: 2.4588
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1259, max: 2.3998
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9806/18200 [03:55<03:16, 42.77it/s, loss=1.5681]


Logits stats - min: -6.9944, max: 2.1811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2674, max: 2.5296
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6455, max: 1.6431
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9817/18200 [03:55<03:14, 43.20it/s, loss=1.2382]


Logits stats - min: -7.7980, max: 2.7009
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0928, max: 2.2317
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9832/18200 [03:55<03:25, 40.66it/s, loss=1.5507]


Logits stats - min: -7.8034, max: 2.3155
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7789, max: 2.4664
Target unique values: tensor([0], device='cuda:0')


Training:  54%|████████████▉           | 9842/18200 [03:56<03:40, 37.98it/s, loss=1.2138]


Logits stats - min: -7.4237, max: 2.2518
Target unique values: tensor([0], device='cuda:0')


Training:  54%|█████████████           | 9868/18200 [03:56<03:26, 40.32it/s, loss=2.2737]


Logits stats - min: -7.3608, max: 2.1866
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1252, max: 2.6955
Target unique values: tensor([0], device='cuda:0')


Training:  54%|█████████████           | 9883/18200 [03:57<03:29, 39.75it/s, loss=1.2640]


Logits stats - min: -8.9185, max: 2.5545
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9205, max: 2.6301
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4624, max: 2.4474
Target unique values: tensor([0], device='cuda:0')


Training:  54%|█████████████           | 9893/18200 [03:57<03:24, 40.58it/s, loss=1.5072]


Logits stats - min: -6.6970, max: 2.2823
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3132, max: 1.5586
Target unique values: tensor([0], device='cuda:0')


Training:  54%|█████████████           | 9904/18200 [03:57<03:11, 43.21it/s, loss=1.4875]


Logits stats - min: -6.7517, max: 2.3259
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1064, max: 2.4759
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4413, max: 2.3871
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7790, max: 2.2764
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5588, max: 2.4805
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████           | 9919/18200 [03:58<03:20, 41.31it/s, loss=1.4596]


Logits stats - min: -7.6261, max: 1.9356
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████           | 9934/18200 [03:58<03:34, 38.55it/s, loss=1.6173]


Logits stats - min: -7.1606, max: 2.0543
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2799, max: 2.4392
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████           | 9947/18200 [03:58<03:27, 39.80it/s, loss=1.5363]


Logits stats - min: -8.6253, max: 2.6925
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3841, max: 2.2690
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1893, max: 2.4656
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8905, max: 2.4665
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████           | 9952/18200 [03:59<03:19, 41.34it/s, loss=1.6270]


Logits stats - min: -8.7098, max: 2.7447
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2310, max: 1.9484
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6900, max: 2.3801
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████▏          | 9963/18200 [03:59<03:02, 45.25it/s, loss=1.5532]


Logits stats - min: -7.1868, max: 1.8276
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7722, max: 2.3036
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7637, max: 2.7950
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9258, max: 2.5217
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████▏          | 9983/18200 [03:59<03:13, 42.38it/s, loss=1.6364]


Logits stats - min: -10.9416, max: 2.1135
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5362, max: 2.3131
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8537, max: 2.6735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7626, max: 2.2219
Target unique values: tensor([0], device='cuda:0')


Training:  55%|█████████████▏          | 9993/18200 [03:59<03:07, 43.77it/s, loss=1.2491]


Logits stats - min: -8.6972, max: 2.8051
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5025, max: 2.4544
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3836, max: 2.4324
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0240, max: 2.3515
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0560, max: 2.7529
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10013/18200 [04:00<03:18, 41.23it/s, loss=2.6710]


Logits stats - min: -8.4463, max: 2.8074
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3353, max: 2.3716
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10023/18200 [04:00<03:18, 41.14it/s, loss=1.2518]


Logits stats - min: -7.7490, max: 2.4135
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10033/18200 [04:00<03:15, 41.86it/s, loss=1.3704]


Logits stats - min: -8.9454, max: 2.9822
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2818, max: 2.7115
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6678, max: 2.0484
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9876, max: 2.0470
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9251, max: 2.9454
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10044/18200 [04:01<02:56, 46.16it/s, loss=1.2302]


Logits stats - min: -6.7932, max: 2.3568
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6012, max: 2.4515
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3088, max: 2.7080
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8111, max: 2.7471
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3822, max: 3.2466
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2009, max: 2.3126
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10054/18200 [04:01<03:13, 42.04it/s, loss=1.2177]


Logits stats - min: -7.8916, max: 2.1551
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10078/18200 [04:02<03:33, 38.03it/s, loss=1.3300]


Logits stats - min: -7.0621, max: 1.8666
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▋          | 10086/18200 [04:02<03:31, 38.34it/s, loss=1.7178]


Logits stats - min: -8.8234, max: 2.2563
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4968, max: 2.3646
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1742, max: 2.5036
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3283, max: 2.4896
Target unique values: tensor([0], device='cuda:0')


Training:  55%|████████████▊          | 10096/18200 [04:02<03:16, 41.15it/s, loss=1.5415]


Logits stats - min: -9.3855, max: 2.3056
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5995, max: 2.4524
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▊          | 10119/18200 [04:03<03:18, 40.68it/s, loss=1.5300]


Logits stats - min: -7.0225, max: 2.3421
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4145, max: 2.3419
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▊          | 10129/18200 [04:03<03:18, 40.70it/s, loss=2.1668]


Logits stats - min: -6.9073, max: 2.1776
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4276, max: 2.0327
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6385, max: 2.2675
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▊          | 10157/18200 [04:04<03:22, 39.78it/s, loss=1.4029]


Logits stats - min: -8.3766, max: 2.6835
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8021, max: 2.7261
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3477, max: 2.7044
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▊          | 10167/18200 [04:04<03:13, 41.49it/s, loss=1.2315]


Logits stats - min: -7.9719, max: 2.0394
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3155, max: 2.2447
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▊          | 10182/18200 [04:04<03:11, 41.98it/s, loss=1.3645]


Logits stats - min: -6.8718, max: 2.1796
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6292, max: 2.2677
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2493, max: 1.5877
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8160, max: 2.2740
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10198/18200 [04:04<03:01, 44.10it/s, loss=2.8394]


Logits stats - min: -8.0293, max: 1.8063
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1427, max: 2.1979
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7737, max: 2.1310
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9907, max: 2.3173
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2399, max: 2.1048
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10213/18200 [04:05<03:10, 41.87it/s, loss=1.5495]


Logits stats - min: -8.0919, max: 2.2007
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4892, max: 2.0631
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6941, max: 2.4192
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3720, max: 1.7097
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10223/18200 [04:05<03:12, 41.40it/s, loss=2.0446]


Logits stats - min: -9.2314, max: 2.8771
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10233/18200 [04:05<03:18, 40.18it/s, loss=2.0320]


Logits stats - min: -7.0082, max: 2.1522
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6558, max: 2.8574
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3486, max: 2.7481
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10247/18200 [04:06<03:12, 41.28it/s, loss=1.1820]


Logits stats - min: -7.9092, max: 1.6349
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3611, max: 3.1087
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5690, max: 2.0321
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10257/18200 [04:06<03:10, 41.68it/s, loss=1.3544]


Logits stats - min: -7.5121, max: 2.1441
Target unique values: tensor([0], device='cuda:0')


Training:  56%|████████████▉          | 10279/18200 [04:07<03:24, 38.78it/s, loss=1.5010]


Logits stats - min: -9.0402, max: 2.7390
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10289/18200 [04:07<03:09, 41.66it/s, loss=1.8811]


Logits stats - min: -9.7090, max: 2.8745
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8145, max: 1.6671
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5298, max: 2.9259
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7978, max: 2.3391
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4380, max: 2.0146
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10299/18200 [04:07<03:04, 42.73it/s, loss=1.5349]


Logits stats - min: -7.3751, max: 2.3619
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7265, max: 2.1807
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7418, max: 3.1047
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5240, max: 2.5818
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10309/18200 [04:07<03:05, 42.56it/s, loss=1.9366]


Logits stats - min: -6.6339, max: 2.1895
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3515, max: 2.1257
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10319/18200 [04:07<03:04, 42.64it/s, loss=1.4942]


Logits stats - min: -8.6114, max: 2.6823
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1951, max: 2.0099
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10329/18200 [04:08<03:13, 40.77it/s, loss=1.5347]


Logits stats - min: -7.1059, max: 2.4169
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10343/18200 [04:08<03:13, 40.50it/s, loss=1.2355]


Logits stats - min: -8.5821, max: 2.6197
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0782, max: 2.5281
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10357/18200 [04:08<03:14, 40.30it/s, loss=1.5084]


Logits stats - min: -8.6713, max: 2.8812
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2696, max: 2.2449
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6213, max: 2.6045
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9869, max: 2.1620
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2782, max: 2.4940
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10367/18200 [04:09<03:07, 41.75it/s, loss=1.2128]


Logits stats - min: -6.0108, max: 1.6705
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1869, max: 2.0559
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9401, max: 2.1466
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1576, max: 2.6706
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████          | 10377/18200 [04:09<03:04, 42.49it/s, loss=2.0757]


Logits stats - min: -8.9760, max: 2.8505
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9285, max: 2.5239
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2923, max: 2.5022
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████▏         | 10392/18200 [04:09<03:09, 41.17it/s, loss=1.3381]


Logits stats - min: -6.8324, max: 2.1437
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0562, max: 2.0951
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7350, max: 2.4778
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████▏         | 10397/18200 [04:09<03:11, 40.79it/s, loss=1.5431]


Logits stats - min: -8.0267, max: 2.4834
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5830, max: 1.6626
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4419, max: 3.0603
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1550, max: 2.5917
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1782, max: 2.4975
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3066, max: 2.6083
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1961, max: 2.5576
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████▏         | 10423/18200 [04:10<03:13, 40.29it/s, loss=1.5061]


Logits stats - min: -10.5800, max: 2.2795
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3580, max: 2.1750
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3667, max: 2.2287
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████▏         | 10433/18200 [04:10<03:15, 39.74it/s, loss=1.8320]


Logits stats - min: -7.1586, max: 2.1646
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0315, max: 2.1091
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6938, max: 2.7733
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6868, max: 2.5757
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████▏         | 10444/18200 [04:11<02:52, 44.95it/s, loss=2.7154]


Logits stats - min: -9.1302, max: 2.6669
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1690, max: 2.4470
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2311, max: 2.3686
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6346, max: 2.9956
Target unique values: tensor([0], device='cuda:0')


Training:  57%|█████████████▏         | 10459/18200 [04:11<03:04, 42.06it/s, loss=1.3631]


Logits stats - min: -6.5631, max: 1.6016
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2157, max: 2.3351
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▏         | 10474/18200 [04:11<03:14, 39.65it/s, loss=1.2288]


Logits stats - min: -6.9504, max: 2.0870
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6203, max: 2.5792
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2416, max: 2.5654
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1683, max: 2.5319
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8448, max: 2.0586
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10495/18200 [04:12<02:56, 43.76it/s, loss=1.5431]


Logits stats - min: -6.9749, max: 2.3102
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8201, max: 2.4022
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2178, max: 2.3442
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7486, max: 2.7506
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10506/18200 [04:12<02:57, 43.32it/s, loss=2.0039]


Logits stats - min: -6.7155, max: 2.1539
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1537, max: 2.4318
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7681, max: 2.4718
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1497, max: 2.5044
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10516/18200 [04:12<03:01, 42.43it/s, loss=2.1729]


Logits stats - min: -8.5245, max: 2.4727
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6883, max: 2.1039
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10526/18200 [04:12<03:05, 41.41it/s, loss=1.9537]


Logits stats - min: -7.1242, max: 2.4301
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0349, max: 2.8325
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2640, max: 1.7039
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10536/18200 [04:13<03:05, 41.27it/s, loss=1.2392]


Logits stats - min: -9.8289, max: 2.4692
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5839, max: 2.5452
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9597, max: 1.8637
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3683, max: 2.4022
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10556/18200 [04:13<03:13, 39.53it/s, loss=1.8625]


Logits stats - min: -6.5997, max: 1.7183
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2881, max: 2.2421
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10570/18200 [04:14<03:02, 41.89it/s, loss=1.4863]


Logits stats - min: -7.2660, max: 2.1723
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1005, max: 2.6116
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6432, max: 2.4951
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5720, max: 2.0073
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7248, max: 2.4920
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▎         | 10581/18200 [04:14<02:50, 44.61it/s, loss=1.5037]


Logits stats - min: -6.9950, max: 2.2597
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5883, max: 1.7842
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0073, max: 1.9870
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7483, max: 2.6729
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5832, max: 2.2350
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0251, max: 2.6491
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▍         | 10587/18200 [04:14<02:43, 46.68it/s, loss=2.1734]


Logits stats - min: -8.6301, max: 2.4985
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7151, max: 2.9308
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▍         | 10602/18200 [04:14<02:58, 42.64it/s, loss=1.5515]


Logits stats - min: -7.2533, max: 2.1778
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3310, max: 1.5599
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8442, max: 2.1610
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▍         | 10613/18200 [04:15<02:46, 45.56it/s, loss=1.4891]


Logits stats - min: -8.6618, max: 2.1716
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0414, max: 1.6147
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5804, max: 1.5280
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0945, max: 2.1811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2565, max: 2.7493
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2231, max: 2.3268
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▍         | 10623/18200 [04:15<03:05, 40.88it/s, loss=1.3503]


Logits stats - min: -8.6081, max: 2.5279
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2838, max: 2.2235
Target unique values: tensor([0], device='cuda:0')


Training:  58%|█████████████▍         | 10633/18200 [04:15<03:09, 40.03it/s, loss=1.3507]


Logits stats - min: -5.9131, max: 1.5223
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8838, max: 3.2469
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▍         | 10648/18200 [04:15<02:59, 42.02it/s, loss=1.8740]


Logits stats - min: -7.9785, max: 1.7982
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4231, max: 1.7918
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8691, max: 2.3523
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0387, max: 2.0718
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▍         | 10663/18200 [04:16<03:05, 40.69it/s, loss=1.4871]


Logits stats - min: -6.9242, max: 2.1819
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4363, max: 1.7039
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3933, max: 2.0985
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▍         | 10673/18200 [04:16<03:04, 40.70it/s, loss=1.2324]


Logits stats - min: -8.6959, max: 2.8122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7904, max: 2.5598
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8466, max: 2.6531
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3826, max: 2.9928
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10688/18200 [04:16<03:07, 39.98it/s, loss=1.5099]


Logits stats - min: -7.4483, max: 1.9221
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9598, max: 1.9073
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10694/18200 [04:17<02:59, 41.88it/s, loss=2.8204]


Logits stats - min: -9.1376, max: 2.6743
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10709/18200 [04:17<03:04, 40.60it/s, loss=1.2444]


Logits stats - min: -7.5722, max: 1.9417
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8997, max: 2.8639
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10719/18200 [04:17<03:00, 41.55it/s, loss=1.5009]


Logits stats - min: -8.8028, max: 2.4629
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8837, max: 2.9662
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6057, max: 1.5937
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10735/18200 [04:17<02:48, 44.21it/s, loss=1.5342]


Logits stats - min: -6.8396, max: 2.1956
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4307, max: 2.4943
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3936, max: 2.1360
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1916, max: 1.6911
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1022, max: 2.9842
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10745/18200 [04:18<02:51, 43.41it/s, loss=1.5078]


Logits stats - min: -8.5275, max: 2.4749
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2528, max: 2.4346
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4514, max: 1.8565
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10751/18200 [04:18<02:48, 44.08it/s, loss=1.5191]


Logits stats - min: -7.2470, max: 1.9417
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1730, max: 2.0805
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10761/18200 [04:18<02:54, 42.65it/s, loss=1.7491]


Logits stats - min: -6.8358, max: 2.1985
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10771/18200 [04:18<03:04, 40.35it/s, loss=1.5371]


Logits stats - min: -7.4011, max: 2.3180
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7570, max: 2.1615
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▌         | 10781/18200 [04:19<03:05, 40.07it/s, loss=1.3410]


Logits stats - min: -7.4604, max: 2.1455
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3177, max: 1.5486
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2655, max: 2.6220
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▋         | 10791/18200 [04:19<02:59, 41.16it/s, loss=1.5297]


Logits stats - min: -7.4317, max: 2.2126
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2826, max: 2.0484
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▋         | 10801/18200 [04:19<03:10, 38.90it/s, loss=2.0373]


Logits stats - min: -9.5000, max: 2.6734
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▋         | 10815/18200 [04:20<03:08, 39.08it/s, loss=1.5429]


Logits stats - min: -8.8399, max: 2.4257
Target unique values: tensor([0], device='cuda:0')


Training:  59%|█████████████▋         | 10827/18200 [04:20<03:20, 36.84it/s, loss=1.3536]


Logits stats - min: -7.4846, max: 2.0883
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1433, max: 1.6104
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7861, max: 2.5889
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5409, max: 2.8289
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▋         | 10837/18200 [04:20<03:01, 40.56it/s, loss=1.7199]


Logits stats - min: -6.7517, max: 1.6308
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4219, max: 2.7588
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▋         | 10847/18200 [04:20<02:59, 41.01it/s, loss=1.4925]


Logits stats - min: -9.5115, max: 2.3850
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5242, max: 2.8249
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3641, max: 2.6560
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5646, max: 1.6308
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1807, max: 2.3939
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▋         | 10864/18200 [04:21<02:38, 46.23it/s, loss=1.5438]


Logits stats - min: -5.9555, max: 1.7809
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2317, max: 2.6740
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0893, max: 1.4742
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9907, max: 2.3218
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6425, max: 2.0840
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9636, max: 2.3043
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▋         | 10874/18200 [04:21<02:47, 43.86it/s, loss=1.5379]


Logits stats - min: -7.0868, max: 2.0559
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0664, max: 2.5586
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4337, max: 1.9033
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▋         | 10879/18200 [04:21<02:51, 42.64it/s, loss=1.3495]


Logits stats - min: -7.8019, max: 2.2314
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8074, max: 1.5525
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10889/18200 [04:21<02:54, 41.82it/s, loss=1.5417]


Logits stats - min: -8.2348, max: 2.5289
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8465, max: 1.7184
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10899/18200 [04:21<02:49, 43.13it/s, loss=2.2974]


Logits stats - min: -6.9083, max: 1.9054
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5389, max: 1.8307
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5282, max: 2.6731
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2810, max: 2.3317
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8959, max: 2.8452
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10910/18200 [04:22<02:47, 43.45it/s, loss=1.3544]


Logits stats - min: -9.0741, max: 2.6339
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7646, max: 2.6819
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10920/18200 [04:22<02:45, 44.12it/s, loss=1.2120]


Logits stats - min: -7.1458, max: 1.9186
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5546, max: 2.8458
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5346, max: 2.1259
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10930/18200 [04:22<02:59, 40.43it/s, loss=1.5404]


Logits stats - min: -6.8967, max: 2.0742
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4372, max: 1.6237
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1525, max: 3.1966
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7845, max: 2.4198
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10945/18200 [04:23<02:48, 43.02it/s, loss=1.7169]


Logits stats - min: -9.5303, max: 3.3980
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2131, max: 2.5475
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9690, max: 1.8973
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3112, max: 2.4951
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10955/18200 [04:23<02:41, 44.86it/s, loss=1.3185]


Logits stats - min: -5.8822, max: 1.5658
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9048, max: 2.4186
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5070, max: 3.0759
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7421, max: 2.0880
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10961/18200 [04:23<02:31, 47.76it/s, loss=1.5611]


Logits stats - min: -9.1029, max: 3.0036
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6652, max: 3.1471
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6771, max: 2.7227
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8324, max: 1.8301
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7588, max: 2.4750
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▊         | 10971/18200 [04:23<02:40, 45.08it/s, loss=1.5467]


Logits stats - min: -9.5849, max: 3.0613
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▉         | 10981/18200 [04:23<02:55, 41.14it/s, loss=1.2253]


Logits stats - min: -9.0147, max: 3.0189
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1632, max: 2.5577
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0581, max: 2.7243
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▉         | 10991/18200 [04:24<02:56, 40.79it/s, loss=1.4969]


Logits stats - min: -9.0341, max: 2.7345
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2698, max: 1.9128
Target unique values: tensor([0], device='cuda:0')


Training:  60%|█████████████▉         | 11001/18200 [04:24<02:56, 40.70it/s, loss=1.4979]


Logits stats - min: -7.7518, max: 2.0422
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5606, max: 2.4039
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9317, max: 1.9535
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3954, max: 3.0570
Target unique values: tensor([0], device='cuda:0')


Training:  61%|█████████████▉         | 11018/18200 [04:24<02:33, 46.86it/s, loss=1.5336]


Logits stats - min: -9.9954, max: 2.8209
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2092, max: 1.9767
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3818, max: 2.3297
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6302, max: 2.3995
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1941, max: 1.8575
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9634, max: 2.5673
Target unique values: tensor([0], device='cuda:0')


Training:  61%|█████████████▉         | 11025/18200 [04:24<02:24, 49.62it/s, loss=2.1965]


Logits stats - min: -7.5237, max: 2.2311
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0170, max: 2.0108
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7124, max: 2.5056
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0987, max: 1.9985
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5811, max: 2.1395
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0935, max: 2.8914
Target unique values: tensor([0], device='cuda:0')


Training:  61%|█████████████▉         | 11040/18200 [04:25<02:50, 41.99it/s, loss=1.5186]


Logits stats - min: -8.4658, max: 2.4335
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1155, max: 3.1025
Target unique values: tensor([0], device='cuda:0')


Training:  61%|█████████████▉         | 11055/18200 [04:25<02:52, 41.43it/s, loss=1.5411]


Logits stats - min: -6.9347, max: 1.9069
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2382, max: 3.3784
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2561, max: 2.8061
Target unique values: tensor([0], device='cuda:0')


Training:  61%|█████████████▉         | 11065/18200 [04:25<02:57, 40.21it/s, loss=1.4747]


Logits stats - min: -8.7856, max: 2.6853
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1908, max: 2.7557
Target unique values: tensor([0], device='cuda:0')


Training:  61%|█████████████▉         | 11075/18200 [04:26<02:45, 43.15it/s, loss=2.1494]


Logits stats - min: -7.9359, max: 2.3498
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8795, max: 2.4111
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0656, max: 2.3947
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11085/18200 [04:26<02:50, 41.79it/s, loss=1.3087]


Logits stats - min: -8.8067, max: 2.5734
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4170, max: 2.4217
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0590, max: 2.1869
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2299, max: 2.6205
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11095/18200 [04:26<03:02, 38.92it/s, loss=1.4900]


Logits stats - min: -8.6778, max: 2.2663
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2248, max: 2.0908
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11106/18200 [04:26<02:49, 41.89it/s, loss=1.4797]


Logits stats - min: -7.0632, max: 2.2842
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2785, max: 2.6817
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3706, max: 2.1788
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1119, max: 1.9559
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11116/18200 [04:27<02:53, 40.92it/s, loss=1.4946]


Logits stats - min: -6.9080, max: 2.2280
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8929, max: 1.9470
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8100, max: 2.7059
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11127/18200 [04:27<02:47, 42.26it/s, loss=1.5420]


Logits stats - min: -7.5457, max: 2.1467
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2276, max: 2.5188
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11132/18200 [04:27<02:46, 42.52it/s, loss=2.2024]


Logits stats - min: -6.6606, max: 2.0601
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11147/18200 [04:27<02:47, 42.06it/s, loss=1.6723]


Logits stats - min: -8.4432, max: 2.3750
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0301, max: 2.1573
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0257, max: 1.5382
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5512, max: 2.7923
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11163/18200 [04:28<02:46, 42.24it/s, loss=2.2690]


Logits stats - min: -9.0647, max: 2.7562
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7449, max: 1.8968
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0851, max: 2.5057
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3092, max: 2.0382
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1220, max: 2.2925
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████         | 11174/18200 [04:28<02:34, 45.57it/s, loss=1.8241]


Logits stats - min: -7.8858, max: 1.9281
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8424, max: 2.0448
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9475, max: 2.5943
Target unique values: tensor([0], device='cuda:0')


Training:  61%|██████████████▏        | 11184/18200 [04:28<02:47, 41.97it/s, loss=1.2324]


Logits stats - min: -8.0072, max: 2.3827
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4902, max: 3.1437
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11194/18200 [04:28<02:54, 40.09it/s, loss=1.5520]


Logits stats - min: -7.7184, max: 2.4383
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11209/18200 [04:29<02:57, 39.28it/s, loss=1.5429]


Logits stats - min: -5.9633, max: 1.8737
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11214/18200 [04:29<02:50, 41.00it/s, loss=1.5337]


Logits stats - min: -7.2937, max: 2.2535
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8143, max: 2.1110
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4905, max: 2.4713
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11235/18200 [04:29<02:37, 44.21it/s, loss=1.2076]


Logits stats - min: -7.0016, max: 2.0535
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7519, max: 2.4538
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9252, max: 2.6001
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0534, max: 2.6467
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11246/18200 [04:30<02:43, 42.55it/s, loss=1.4827]


Logits stats - min: -8.6629, max: 2.3980
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3127, max: 2.5496
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6827, max: 2.5643
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6036, max: 2.3467
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11256/18200 [04:30<02:49, 41.07it/s, loss=1.7822]


Logits stats - min: -7.4884, max: 1.7356
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▏        | 11266/18200 [04:30<02:53, 40.02it/s, loss=1.5365]


Logits stats - min: -9.5076, max: 3.1868
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5366, max: 2.2235
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9984, max: 2.1416
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11281/18200 [04:31<02:51, 40.29it/s, loss=1.3691]


Logits stats - min: -8.9212, max: 2.9988
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11286/18200 [04:31<02:53, 39.93it/s, loss=1.5358]


Logits stats - min: -8.7733, max: 2.4913
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11295/18200 [04:31<02:57, 38.91it/s, loss=2.7761]


Logits stats - min: -8.3042, max: 2.5095
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5952, max: 2.9281
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11305/18200 [04:31<02:58, 38.68it/s, loss=1.5326]


Logits stats - min: -7.2171, max: 2.0832
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2911, max: 2.1744
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11319/18200 [04:31<02:41, 42.57it/s, loss=1.1352]


Logits stats - min: -9.9275, max: 3.7692
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5848, max: 2.2325
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8659, max: 2.8410
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8758, max: 2.9383
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11329/18200 [04:32<02:45, 41.51it/s, loss=1.2212]


Logits stats - min: -7.3991, max: 2.6375
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5535, max: 2.6316
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11339/18200 [04:32<02:53, 39.50it/s, loss=1.2342]


Logits stats - min: -9.0209, max: 2.4583
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11349/18200 [04:32<02:42, 42.22it/s, loss=2.2633]


Logits stats - min: -8.4278, max: 2.4834
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0981, max: 1.7944
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1748, max: 3.1181
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0160, max: 2.8809
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11354/18200 [04:32<02:44, 41.49it/s, loss=1.1993]


Logits stats - min: -7.1663, max: 2.4012
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1695, max: 1.5293
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9261, max: 2.0708
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5783, max: 2.1136
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▎        | 11364/18200 [04:33<02:47, 40.75it/s, loss=1.2207]


Logits stats - min: -9.4114, max: 2.9010
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8510, max: 2.9691
Target unique values: tensor([0], device='cuda:0')


Training:  62%|██████████████▍        | 11375/18200 [04:33<02:29, 45.70it/s, loss=1.5375]


Logits stats - min: -9.4545, max: 3.3386
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1350, max: 2.6559
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7838, max: 2.2035
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4932, max: 2.1662
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6252, max: 2.7373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6108, max: 2.1359
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11385/18200 [04:33<02:42, 41.82it/s, loss=1.4747]


Logits stats - min: -7.7361, max: 2.5532
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2677, max: 1.9251
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2161, max: 2.1280
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0119, max: 2.2231
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11413/18200 [04:34<02:53, 39.14it/s, loss=1.2125]


Logits stats - min: -7.6638, max: 2.1649
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7438, max: 2.3689
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3700, max: 2.3021
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11428/18200 [04:34<02:44, 41.06it/s, loss=1.5377]


Logits stats - min: -7.0117, max: 2.1022
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3290, max: 2.2133
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9132, max: 2.1724
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0123, max: 2.2760
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7566, max: 2.5175
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11439/18200 [04:34<02:32, 44.25it/s, loss=1.3438]


Logits stats - min: -7.3355, max: 2.2160
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3520, max: 2.7340
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0859, max: 2.1798
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1397, max: 1.9940
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11450/18200 [04:35<02:25, 46.32it/s, loss=1.5005]


Logits stats - min: -7.1767, max: 2.2212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4537, max: 2.7127
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0058, max: 1.9485
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.0865, max: 1.5902
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11455/18200 [04:35<02:32, 44.29it/s, loss=2.2733]


Logits stats - min: -9.2357, max: 2.7418
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4689, max: 2.8336
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▍        | 11465/18200 [04:35<02:39, 42.29it/s, loss=1.4739]


Logits stats - min: -8.5054, max: 2.4811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8209, max: 2.0882
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1495, max: 1.5971
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5843, max: 2.3195
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1919, max: 1.7524
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5273, max: 2.6531
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▌        | 11475/18200 [04:35<02:39, 42.25it/s, loss=1.5413]


Logits stats - min: -7.9418, max: 2.6714
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▌        | 11490/18200 [04:36<02:46, 40.38it/s, loss=1.4865]


Logits stats - min: -7.2290, max: 2.1723
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▌        | 11505/18200 [04:36<02:46, 40.33it/s, loss=1.4839]


Logits stats - min: -6.6324, max: 2.3353
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8804, max: 2.7622
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3985, max: 2.7198
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4947, max: 2.2938
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▌        | 11515/18200 [04:36<02:37, 42.33it/s, loss=1.5129]


Logits stats - min: -10.7178, max: 2.0496
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2844, max: 2.3286
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▌        | 11525/18200 [04:36<02:45, 40.28it/s, loss=1.9158]


Logits stats - min: -8.1709, max: 2.3907
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▌        | 11540/18200 [04:37<02:42, 41.02it/s, loss=1.4812]


Logits stats - min: -8.5907, max: 2.6400
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5213, max: 2.0620
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0817, max: 1.9253
Target unique values: tensor([0], device='cuda:0')


Training:  63%|██████████████▌        | 11555/18200 [04:37<02:48, 39.46it/s, loss=1.5347]


Logits stats - min: -8.7246, max: 2.5677
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1715, max: 2.4051
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▌        | 11566/18200 [04:38<02:41, 41.18it/s, loss=1.5396]


Logits stats - min: -8.5735, max: 2.2799
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6870, max: 2.7087
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11580/18200 [04:38<02:45, 40.06it/s, loss=1.7618]


Logits stats - min: -8.7661, max: 2.7003
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11590/18200 [04:38<02:46, 39.70it/s, loss=1.2295]


Logits stats - min: -7.8708, max: 1.6970
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9171, max: 2.1512
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0435, max: 2.3890
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11605/18200 [04:39<02:41, 40.88it/s, loss=1.2048]


Logits stats - min: -7.6026, max: 2.2184
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2801, max: 2.4779
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11633/18200 [04:39<02:42, 40.35it/s, loss=2.2766]


Logits stats - min: -7.0573, max: 1.9579
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3082, max: 2.1060
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1700, max: 2.1300
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6596, max: 2.4143
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11643/18200 [04:39<02:39, 41.12it/s, loss=1.9361]


Logits stats - min: -7.5690, max: 2.0758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4286, max: 2.8349
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▋        | 11658/18200 [04:40<02:37, 41.57it/s, loss=1.4717]


Logits stats - min: -7.3203, max: 2.0547
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7204, max: 2.3545
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▊        | 11672/18200 [04:40<02:51, 38.16it/s, loss=1.4968]


Logits stats - min: -8.3877, max: 2.7317
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2931, max: 2.2680
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1043, max: 2.7550
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▊        | 11686/18200 [04:40<02:38, 41.09it/s, loss=1.4468]


Logits stats - min: -7.6174, max: 1.7467
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8453, max: 2.4806
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0879, max: 2.0861
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5757, max: 2.6898
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▊        | 11696/18200 [04:41<02:43, 39.82it/s, loss=2.7804]


Logits stats - min: -8.0265, max: 2.3612
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▊        | 11706/18200 [04:41<02:40, 40.55it/s, loss=1.5367]


Logits stats - min: -7.4525, max: 2.4261
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5150, max: 2.7071
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8264, max: 2.4954
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0388, max: 2.6443
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▊        | 11725/18200 [04:42<02:58, 36.33it/s, loss=1.4915]


Logits stats - min: -9.1373, max: 2.8445
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -12.6036, max: 2.0704
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4996, max: 2.0201
Target unique values: tensor([0], device='cuda:0')


Training:  64%|██████████████▊        | 11737/18200 [04:42<02:26, 43.99it/s, loss=1.5343]


Logits stats - min: -7.3177, max: 2.0911
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8688, max: 2.2876
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0321, max: 2.6441
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1603, max: 2.4007
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9805, max: 2.3583
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3225, max: 2.1454
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▊        | 11747/18200 [04:42<02:33, 42.10it/s, loss=1.4906]


Logits stats - min: -8.4845, max: 2.7477
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4774, max: 2.0104
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8808, max: 2.8377
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▊        | 11757/18200 [04:42<02:28, 43.52it/s, loss=1.7208]


Logits stats - min: -6.3085, max: 2.1090
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2605, max: 1.5078
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▊        | 11767/18200 [04:42<02:35, 41.26it/s, loss=1.8340]


Logits stats - min: -7.9415, max: 2.4060
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11777/18200 [04:43<02:42, 39.51it/s, loss=1.3533]


Logits stats - min: -6.8308, max: 2.4573
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6612, max: 2.3172
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3606, max: 2.3850
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8752, max: 2.7123
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11787/18200 [04:43<02:33, 41.75it/s, loss=1.9855]


Logits stats - min: -8.5472, max: 1.6770
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2818, max: 2.2168
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4022, max: 2.4111
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11797/18200 [04:43<02:36, 40.84it/s, loss=1.4890]


Logits stats - min: -7.1010, max: 1.6422
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11807/18200 [04:43<02:37, 40.52it/s, loss=1.2048]


Logits stats - min: -8.9413, max: 2.3511
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1329, max: 2.4360
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11818/18200 [04:44<02:25, 43.94it/s, loss=1.3735]


Logits stats - min: -8.8704, max: 2.4095
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7506, max: 2.4311
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4644, max: 2.4608
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9109, max: 2.2639
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4668, max: 2.3336
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11828/18200 [04:44<02:34, 41.13it/s, loss=1.4888]


Logits stats - min: -7.5091, max: 1.9323
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4174, max: 2.3485
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11843/18200 [04:44<02:31, 42.06it/s, loss=1.5321]


Logits stats - min: -8.7800, max: 1.7835
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4043, max: 2.3716
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9775, max: 2.9034
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3625, max: 1.7538
Target unique values: tensor([0], device='cuda:0')


Training:  65%|██████████████▉        | 11853/18200 [04:45<02:36, 40.51it/s, loss=1.4811]


Logits stats - min: -8.1129, max: 2.3122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1688, max: 2.4543
Target unique values: tensor([0], device='cuda:0')


Training:  65%|███████████████        | 11872/18200 [04:45<02:42, 38.88it/s, loss=1.5330]


Logits stats - min: -8.7898, max: 2.3695
Target unique values: tensor([0], device='cuda:0')


Training:  65%|███████████████        | 11884/18200 [04:45<02:52, 36.65it/s, loss=1.8928]


Logits stats - min: -7.1814, max: 2.0467
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1722, max: 2.1417
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.5306, max: 2.2892
Target unique values: tensor([0], device='cuda:0')


Training:  65%|███████████████        | 11894/18200 [04:46<02:38, 39.89it/s, loss=1.6086]


Logits stats - min: -7.4095, max: 2.4395
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0755, max: 2.1100
Target unique values: tensor([0], device='cuda:0')


Training:  65%|███████████████        | 11904/18200 [04:46<02:36, 40.19it/s, loss=1.5360]


Logits stats - min: -8.8108, max: 2.6855
Target unique values: tensor([0], device='cuda:0')


Training:  65%|███████████████        | 11915/18200 [04:46<02:24, 43.43it/s, loss=1.3509]


Logits stats - min: -8.8976, max: 2.5852
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9146, max: 2.4692
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6272, max: 2.3133
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0640, max: 2.7872
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6637, max: 2.9165
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6678, max: 1.5915
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████        | 11930/18200 [04:46<02:28, 42.11it/s, loss=1.5349]


Logits stats - min: -7.0133, max: 1.7347
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2222, max: 1.6670
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████        | 11949/18200 [04:47<02:36, 39.85it/s, loss=1.5479]


Logits stats - min: -7.4516, max: 1.5543
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2085, max: 2.1773
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0528, max: 2.6412
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████        | 11964/18200 [04:47<02:41, 38.59it/s, loss=1.5476]


Logits stats - min: -6.6955, max: 2.1132
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 11977/18200 [04:48<02:38, 39.21it/s, loss=1.2137]


Logits stats - min: -9.2887, max: 2.6502
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 11995/18200 [04:48<02:39, 39.00it/s, loss=1.5315]


Logits stats - min: -7.0791, max: 2.1515
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0064, max: 2.5469
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5520, max: 2.0440
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 12005/18200 [04:48<02:29, 41.51it/s, loss=1.5334]


Logits stats - min: -8.4896, max: 2.3583
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5567, max: 2.6043
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1743, max: 1.9631
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1221, max: 2.0267
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1737, max: 2.2020
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 12015/18200 [04:49<02:32, 40.45it/s, loss=2.0502]


Logits stats - min: -8.1542, max: 2.4857
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 12024/18200 [04:49<02:36, 39.42it/s, loss=1.4373]


Logits stats - min: -8.3309, max: 2.4641
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6159, max: 2.1168
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 12036/18200 [04:49<02:39, 38.55it/s, loss=1.1873]


Logits stats - min: -9.1269, max: 2.5469
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 12045/18200 [04:50<02:35, 39.49it/s, loss=1.4718]


Logits stats - min: -8.0855, max: 1.8309
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1109, max: 3.2921
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▏       | 12054/18200 [04:50<02:39, 38.53it/s, loss=1.9623]


Logits stats - min: -10.1651, max: 3.1803
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7399, max: 2.6766
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▎       | 12076/18200 [04:50<02:23, 42.78it/s, loss=2.2503]


Logits stats - min: -7.3142, max: 2.3186
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8294, max: 2.6824
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8609, max: 2.5262
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0491, max: 1.9784
Target unique values: tensor([0], device='cuda:0')


Training:  66%|███████████████▎       | 12095/18200 [04:51<02:34, 39.40it/s, loss=1.4687]


Logits stats - min: -8.2077, max: 2.0660
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5048, max: 2.2066
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▎       | 12105/18200 [04:51<02:34, 39.36it/s, loss=1.4702]


Logits stats - min: -8.8736, max: 2.7025
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▎       | 12122/18200 [04:51<02:10, 46.47it/s, loss=1.2081]


Logits stats - min: -8.9886, max: 2.0922
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0202, max: 2.3536
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8718, max: 3.0819
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3982, max: 2.2366
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5616, max: 1.8221
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0270, max: 2.2210
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2933, max: 2.3229
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9880, max: 2.5352
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▎       | 12142/18200 [04:52<02:34, 39.24it/s, loss=1.4744]


Logits stats - min: -6.8376, max: 2.3809
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4986, max: 2.5308
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▎       | 12157/18200 [04:52<02:31, 39.76it/s, loss=1.5215]


Logits stats - min: -7.3105, max: 2.2920
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0867, max: 1.8774
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8111, max: 2.5944
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12172/18200 [04:53<02:25, 41.35it/s, loss=1.4722]


Logits stats - min: -7.1182, max: 2.0283
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9892, max: 2.8399
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7219, max: 2.6278
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7619, max: 2.1295
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1990, max: 1.5513
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12184/18200 [04:53<02:11, 45.72it/s, loss=1.5332]


Logits stats - min: -8.6840, max: 2.5376
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4982, max: 1.5762
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.6630, max: 3.5091
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0436, max: 2.1841
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2321, max: 1.6026
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2222, max: 1.9948
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12199/18200 [04:53<02:26, 41.00it/s, loss=1.4694]


Logits stats - min: -8.7568, max: 2.2300
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6003, max: 2.0321
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12209/18200 [04:53<02:27, 40.61it/s, loss=1.5287]


Logits stats - min: -7.3445, max: 2.1099
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6602, max: 3.2649
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6163, max: 2.7630
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12224/18200 [04:54<02:20, 42.67it/s, loss=1.3239]


Logits stats - min: -7.2445, max: 2.0457
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1651, max: 2.5308
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.1336, max: 1.6661
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12234/18200 [04:54<02:24, 41.30it/s, loss=1.4713]


Logits stats - min: -8.4391, max: 2.5024
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2710, max: 2.6868
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12248/18200 [04:54<02:41, 36.83it/s, loss=1.2202]


Logits stats - min: -6.9104, max: 2.1175
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4160, max: 2.5330
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▍       | 12256/18200 [04:55<02:40, 37.02it/s, loss=1.5308]


Logits stats - min: -6.5919, max: 2.2044
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▌       | 12270/18200 [04:55<02:27, 40.11it/s, loss=2.2762]


Logits stats - min: -10.1869, max: 3.3323
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1365, max: 2.2011
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8227, max: 2.1261
Target unique values: tensor([0], device='cuda:0')


Training:  67%|███████████████▌       | 12280/18200 [04:55<02:21, 41.89it/s, loss=1.2981]


Logits stats - min: -8.4914, max: 1.8553
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1432, max: 2.0896
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0276, max: 2.6466
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8521, max: 2.5083
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2492, max: 2.6646
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12295/18200 [04:56<02:23, 41.04it/s, loss=1.2057]


Logits stats - min: -9.3777, max: 2.4290
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8483, max: 2.4150
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12305/18200 [04:56<02:20, 41.94it/s, loss=1.5295]


Logits stats - min: -8.3959, max: 2.4024
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1115, max: 3.2413
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5287, max: 1.4824
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5539, max: 2.7387
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4120, max: 2.7735
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12317/18200 [04:56<02:06, 46.50it/s, loss=1.7905]


Logits stats - min: -7.0103, max: 2.3315
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9351, max: 2.5542
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1733, max: 2.6018
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4585, max: 1.5941
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5835, max: 2.1856
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12332/18200 [04:56<02:16, 43.00it/s, loss=1.4696]


Logits stats - min: -8.8669, max: 2.4694
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0176, max: 2.1872
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9045, max: 1.6385
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12342/18200 [04:57<02:13, 43.95it/s, loss=1.4920]


Logits stats - min: -7.9827, max: 2.4911
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8724, max: 2.3945
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2235, max: 2.1874
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▌       | 12352/18200 [04:57<02:23, 40.67it/s, loss=1.5362]


Logits stats - min: -7.8408, max: 2.2592
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3160, max: 2.7417
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12368/18200 [04:57<02:08, 45.56it/s, loss=1.4648]


Logits stats - min: -6.9573, max: 2.4618
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0619, max: 2.5144
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0156, max: 2.7780
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3899, max: 2.0486
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2465, max: 1.9649
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1380, max: 2.1333
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12393/18200 [04:58<02:21, 41.10it/s, loss=1.4676]


Logits stats - min: -7.9172, max: 2.1168
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4661, max: 2.0844
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12403/18200 [04:58<02:20, 41.33it/s, loss=1.2022]


Logits stats - min: -6.6830, max: 2.0663
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5057, max: 2.1027
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12413/18200 [04:58<02:25, 39.88it/s, loss=1.5306]


Logits stats - min: -7.7532, max: 2.4061
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9815, max: 2.5407
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2468, max: 2.4938
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12428/18200 [04:59<02:16, 42.41it/s, loss=1.1916]


Logits stats - min: -7.1291, max: 2.0807
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0165, max: 2.3089
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0008, max: 2.4329
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12433/18200 [04:59<02:13, 43.08it/s, loss=1.5033]


Logits stats - min: -7.6791, max: 2.0375
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4019, max: 1.5151
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12448/18200 [04:59<02:25, 39.57it/s, loss=1.4734]


Logits stats - min: -8.1541, max: 1.8188
Target unique values: tensor([0], device='cuda:0')


Training:  68%|███████████████▋       | 12463/18200 [05:00<02:25, 39.38it/s, loss=1.8425]


Logits stats - min: -7.3086, max: 2.2594
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1588, max: 2.2458
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12473/18200 [05:00<02:12, 43.19it/s, loss=1.5850]


Logits stats - min: -6.3916, max: 1.7272
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8667, max: 2.2156
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4314, max: 2.2399
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2144, max: 2.0404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4213, max: 2.1465
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5290, max: 2.2589
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12483/18200 [05:00<02:13, 42.96it/s, loss=1.3437]


Logits stats - min: -8.5495, max: 2.5178
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0699, max: 2.6732
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8032, max: 2.7668
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12493/18200 [05:00<02:17, 41.40it/s, loss=1.9074]


Logits stats - min: -8.8979, max: 2.7899
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12498/18200 [05:01<02:13, 42.68it/s, loss=1.4679]


Logits stats - min: -8.1778, max: 2.5407
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5905, max: 2.2735
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12513/18200 [05:01<02:19, 40.75it/s, loss=2.8478]


Logits stats - min: -10.2745, max: 3.0979
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2543, max: 2.7304
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1582, max: 1.7505
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12518/18200 [05:01<02:21, 40.08it/s, loss=1.5352]


Logits stats - min: -7.7060, max: 2.0769
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12536/18200 [05:01<02:21, 40.12it/s, loss=1.9312]


Logits stats - min: -7.0244, max: 2.1101
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12551/18200 [05:02<02:13, 42.32it/s, loss=2.2650]


Logits stats - min: -6.9946, max: 2.5108
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.4930, max: 1.9646
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7470, max: 2.0366
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5824, max: 2.4412
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▊       | 12561/18200 [05:02<02:06, 44.41it/s, loss=1.5294]


Logits stats - min: -7.4687, max: 2.2173
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2390, max: 2.3104
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8630, max: 2.6303
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0925, max: 1.7456
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12576/18200 [05:02<02:18, 40.65it/s, loss=1.9264]


Logits stats - min: -7.2794, max: 2.1332
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2784, max: 2.3198
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12592/18200 [05:03<02:04, 45.18it/s, loss=2.0117]


Logits stats - min: -7.2466, max: 2.1329
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2593, max: 2.5617
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.5537, max: 1.9617
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1273, max: 2.8287
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8133, max: 2.3147
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6560, max: 2.5521
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12607/18200 [05:03<02:08, 43.40it/s, loss=1.3537]


Logits stats - min: -8.8531, max: 2.6584
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4109, max: 2.0311
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1571, max: 3.1440
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12617/18200 [05:03<02:16, 40.92it/s, loss=1.2108]


Logits stats - min: -10.9508, max: 2.6861
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12622/18200 [05:04<02:21, 39.36it/s, loss=1.3376]


Logits stats - min: -7.0934, max: 2.1638
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12631/18200 [05:04<02:24, 38.55it/s, loss=1.5371]


Logits stats - min: -12.2345, max: 2.2753
Target unique values: tensor([0], device='cuda:0')


Training:  69%|███████████████▉       | 12645/18200 [05:04<02:16, 40.61it/s, loss=2.8178]


Logits stats - min: -10.5973, max: 2.1907
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2114, max: 2.9565
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2994, max: 2.1535
Target unique values: tensor([0], device='cuda:0')


Training:  70%|███████████████▉       | 12650/18200 [05:04<02:16, 40.64it/s, loss=1.3457]


Logits stats - min: -8.6782, max: 2.4089
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12665/18200 [05:05<02:11, 42.04it/s, loss=1.2045]


Logits stats - min: -7.3963, max: 2.5051
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4795, max: 1.5301
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4106, max: 2.0425
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12680/18200 [05:05<02:20, 39.17it/s, loss=1.3411]


Logits stats - min: -9.4969, max: 2.5247
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4234, max: 2.1443
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12690/18200 [05:05<02:08, 42.94it/s, loss=1.4738]


Logits stats - min: -9.7680, max: 2.9779
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2749, max: 2.0483
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3531, max: 2.0840
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1023, max: 2.0969
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2348, max: 2.7354
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12706/18200 [05:05<02:00, 45.59it/s, loss=2.2272]


Logits stats - min: -7.0701, max: 2.1856
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7550, max: 3.0058
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3352, max: 2.7419
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9470, max: 2.4191
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5544, max: 1.8868
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12716/18200 [05:06<02:04, 43.95it/s, loss=2.2836]


Logits stats - min: -9.0461, max: 2.4753
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0605, max: 2.4660
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12726/18200 [05:06<02:09, 42.17it/s, loss=1.4750]


Logits stats - min: -7.2222, max: 2.2197
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6145, max: 2.4007
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4883, max: 3.0366
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9376, max: 2.7088
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12742/18200 [05:06<01:57, 46.55it/s, loss=1.4592]


Logits stats - min: -9.5250, max: 2.9813
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5148, max: 1.6083
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7263, max: 2.4333
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1897, max: 2.0438
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6446, max: 2.2406
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3829, max: 2.5219
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████       | 12752/18200 [05:07<02:02, 44.62it/s, loss=1.9957]


Logits stats - min: -8.2471, max: 1.6009
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9273, max: 2.2427
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9854, max: 2.5165
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12764/18200 [05:07<01:55, 47.16it/s, loss=1.3584]


Logits stats - min: -7.0389, max: 2.1258
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3369, max: 2.2892
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3836, max: 2.6834
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7746, max: 2.6579
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7151, max: 2.1152
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12774/18200 [05:07<02:00, 44.97it/s, loss=1.3495]


Logits stats - min: -7.1720, max: 1.9860
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12789/18200 [05:07<02:15, 39.88it/s, loss=1.4705]


Logits stats - min: -7.3146, max: 2.1152
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12799/18200 [05:08<02:14, 40.20it/s, loss=1.3354]


Logits stats - min: -7.1930, max: 2.1724
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12804/18200 [05:08<02:09, 41.80it/s, loss=1.4876]


Logits stats - min: -7.3644, max: 2.0491
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12819/18200 [05:08<02:07, 42.21it/s, loss=1.5440]


Logits stats - min: -8.4559, max: 2.7291
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1642, max: 2.2669
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9270, max: 2.1187
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7975, max: 2.1613
Target unique values: tensor([0], device='cuda:0')


Training:  70%|████████████████▏      | 12825/18200 [05:08<01:55, 46.54it/s, loss=2.7806]


Logits stats - min: -7.6383, max: 1.6532
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6169, max: 2.2277
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5190, max: 2.4951
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5118, max: 2.7494
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3239, max: 1.7461
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▏      | 12840/18200 [05:09<02:18, 38.59it/s, loss=1.3441]


Logits stats - min: -9.3009, max: 2.4076
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▏      | 12850/18200 [05:09<02:12, 40.30it/s, loss=1.5358]


Logits stats - min: -8.0963, max: 2.8457
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5831, max: 2.4296
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12860/18200 [05:09<02:13, 39.87it/s, loss=1.5242]


Logits stats - min: -9.2234, max: 2.3762
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6669, max: 2.5817
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3250, max: 2.3581
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3154, max: 2.1549
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12881/18200 [05:10<02:01, 43.73it/s, loss=1.4754]


Logits stats - min: -6.8832, max: 1.3877
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8736, max: 2.5295
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4614, max: 2.0319
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12886/18200 [05:10<02:07, 41.77it/s, loss=1.5272]


Logits stats - min: -8.0443, max: 2.8833
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4261, max: 2.5021
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2297, max: 1.9509
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12901/18200 [05:10<02:06, 41.93it/s, loss=1.5309]


Logits stats - min: -7.4471, max: 2.4616
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2817, max: 2.0322
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.8829, max: 2.1979
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12920/18200 [05:11<02:09, 40.79it/s, loss=1.4793]


Logits stats - min: -8.5788, max: 2.6051
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7791, max: 3.2519
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.5739, max: 2.4030
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3923, max: 2.5299
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6316, max: 2.6510
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12931/18200 [05:11<01:56, 45.12it/s, loss=2.7948]


Logits stats - min: -7.3541, max: 1.8747
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0228, max: 2.1992
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8338, max: 2.2516
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7532, max: 2.7339
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0347, max: 2.6536
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3156, max: 2.1542
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3428, max: 2.0639
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5910, max: 1.9214
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12942/18200 [05:11<02:03, 42.60it/s, loss=1.1904]


Logits stats - min: -9.0971, max: 2.7304
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▎      | 12952/18200 [05:11<02:06, 41.64it/s, loss=1.2887]


Logits stats - min: -7.2773, max: 2.2341
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9791, max: 2.1469
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▍      | 12962/18200 [05:12<02:08, 40.86it/s, loss=1.1949]


Logits stats - min: -7.6018, max: 1.9781
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7662, max: 2.5993
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3964, max: 2.0699
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▍      | 12977/18200 [05:12<02:03, 42.19it/s, loss=1.6289]


Logits stats - min: -7.2707, max: 2.5252
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8373, max: 2.4653
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0259, max: 2.0685
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▍      | 12987/18200 [05:12<02:00, 43.36it/s, loss=1.5332]


Logits stats - min: -8.7611, max: 2.5625
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0841, max: 2.4075
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0700, max: 1.9723
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▍      | 13002/18200 [05:13<02:14, 38.69it/s, loss=1.3478]


Logits stats - min: -7.5090, max: 2.3091
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1472, max: 1.7992
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7839, max: 2.0066
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6376, max: 2.3628
Target unique values: tensor([0], device='cuda:0')


Training:  71%|████████████████▍      | 13012/18200 [05:13<02:06, 41.15it/s, loss=1.8963]


Logits stats - min: -7.8695, max: 1.9901
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.5465, max: 2.7334
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8583, max: 3.2822
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▍      | 13022/18200 [05:13<02:00, 43.07it/s, loss=1.9047]


Logits stats - min: -7.4378, max: 2.1317
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3242, max: 2.3916
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▍      | 13032/18200 [05:13<02:00, 42.76it/s, loss=2.0815]


Logits stats - min: -7.8699, max: 2.2091
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5908, max: 2.4425
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4384, max: 2.6765
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▍      | 13042/18200 [05:14<02:02, 41.99it/s, loss=1.7563]


Logits stats - min: -9.9554, max: 3.1057
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -16.9598, max: 2.3150
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1582, max: 2.3147
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9130, max: 2.2283
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13062/18200 [05:14<02:04, 41.22it/s, loss=1.2152]


Logits stats - min: -10.4006, max: 2.2526
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9316, max: 2.0645
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13086/18200 [05:15<02:01, 42.01it/s, loss=1.2135]


Logits stats - min: -9.3220, max: 2.6496
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4939, max: 1.4434
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3458, max: 2.2505
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7542, max: 1.5425
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13101/18200 [05:15<02:04, 40.89it/s, loss=1.5207]


Logits stats - min: -6.7365, max: 1.5046
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1660, max: 2.3067
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0495, max: 2.2514
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13124/18200 [05:16<02:00, 42.13it/s, loss=2.2296]


Logits stats - min: -9.8363, max: 2.6140
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8093, max: 2.3527
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1729, max: 2.0520
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3418, max: 2.3609
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8633, max: 2.0557
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13139/18200 [05:16<02:01, 41.59it/s, loss=1.7220]


Logits stats - min: -7.3331, max: 2.1653
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8242, max: 1.7652
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13144/18200 [05:16<01:58, 42.65it/s, loss=1.5682]


Logits stats - min: -7.3437, max: 2.1355
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5016, max: 2.2335
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6108, max: 2.6440
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4589, max: 2.0590
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▌      | 13154/18200 [05:16<02:05, 40.12it/s, loss=1.4869]


Logits stats - min: -7.6230, max: 1.8698
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2461, max: 2.0144
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▋      | 13164/18200 [05:17<02:05, 40.19it/s, loss=1.5540]


Logits stats - min: -7.2516, max: 2.2915
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▋      | 13174/18200 [05:17<02:02, 40.89it/s, loss=1.1996]


Logits stats - min: -7.5976, max: 2.1977
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8641, max: 2.6223
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▋      | 13184/18200 [05:17<02:02, 40.85it/s, loss=1.6652]


Logits stats - min: -8.4178, max: 2.6323
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6699, max: 2.6713
Target unique values: tensor([0], device='cuda:0')


Training:  72%|████████████████▋      | 13195/18200 [05:17<01:52, 44.53it/s, loss=2.0457]


Logits stats - min: -7.1356, max: 2.2584
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8787, max: 2.2681
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7639, max: 1.7150
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2026, max: 2.1615
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▋      | 13211/18200 [05:18<01:46, 46.90it/s, loss=1.4804]


Logits stats - min: -9.2414, max: 2.4134
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8345, max: 3.1082
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4740, max: 3.0045
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9867, max: 2.5784
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3645, max: 2.6660
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3759, max: 2.5131
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1994, max: 2.4006
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0910, max: 2.5836
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▋      | 13221/18200 [05:18<01:47, 46.34it/s, loss=2.7741]


Logits stats - min: -9.5152, max: 2.8734
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9305, max: 3.0810
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▋      | 13227/18200 [05:18<01:41, 49.07it/s, loss=1.3621]


Logits stats - min: -6.3738, max: 1.7029
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6108, max: 2.1415
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9495, max: 2.4700
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8129, max: 2.4212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7436, max: 2.1889
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▋      | 13242/18200 [05:18<01:50, 44.80it/s, loss=1.5255]


Logits stats - min: -9.2168, max: 3.1371
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8667, max: 2.1973
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▋      | 13253/18200 [05:19<01:51, 44.33it/s, loss=1.5271]


Logits stats - min: -7.7294, max: 2.1730
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7200, max: 2.0934
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3144, max: 2.6454
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13273/18200 [05:19<01:59, 41.36it/s, loss=1.4696]


Logits stats - min: -8.6185, max: 2.4269
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2917, max: 2.2535
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13283/18200 [05:19<01:56, 42.09it/s, loss=1.1947]


Logits stats - min: -9.2101, max: 2.7151
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9855, max: 2.4759
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2116, max: 2.5351
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13293/18200 [05:20<01:55, 42.44it/s, loss=1.5277]


Logits stats - min: -8.5905, max: 2.3904
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1825, max: 2.8735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2667, max: 2.2563
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13303/18200 [05:20<01:58, 41.48it/s, loss=1.3257]


Logits stats - min: -7.4853, max: 2.2756
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2064, max: 2.2453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3390, max: 2.2727
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4312, max: 2.1167
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13313/18200 [05:20<02:03, 39.48it/s, loss=1.5372]


Logits stats - min: -7.3114, max: 2.1324
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4471, max: 2.8684
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13318/18200 [05:20<02:05, 38.91it/s, loss=1.5903]


Logits stats - min: -7.2548, max: 2.3639
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13327/18200 [05:20<02:01, 40.09it/s, loss=1.2172]


Logits stats - min: -7.6874, max: 2.0299
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7791, max: 2.1540
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13337/18200 [05:21<02:02, 39.63it/s, loss=1.4717]


Logits stats - min: -8.5660, max: 2.0218
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0813, max: 2.7970
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7044, max: 2.4323
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▊      | 13352/18200 [05:21<01:57, 41.33it/s, loss=1.4749]


Logits stats - min: -10.1314, max: 2.4185
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2294, max: 2.2121
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▉      | 13362/18200 [05:21<01:54, 42.21it/s, loss=1.3610]


Logits stats - min: -8.8836, max: 2.6378
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1253, max: 2.3695
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6555, max: 3.3610
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6537, max: 2.4307
Target unique values: tensor([0], device='cuda:0')


Training:  73%|████████████████▉      | 13372/18200 [05:21<01:57, 41.19it/s, loss=1.5438]


Logits stats - min: -7.6988, max: 2.0876
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4803, max: 2.1239
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13382/18200 [05:22<01:52, 42.94it/s, loss=1.3281]


Logits stats - min: -9.8475, max: 3.1169
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1969, max: 2.6429
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9437, max: 2.3508
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7614, max: 2.3467
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13392/18200 [05:22<01:50, 43.40it/s, loss=1.4708]


Logits stats - min: -9.5766, max: 1.8428
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2557, max: 2.4206
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13402/18200 [05:22<01:53, 42.21it/s, loss=2.2851]


Logits stats - min: -7.7421, max: 1.7474
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7672, max: 1.4441
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13417/18200 [05:23<02:06, 37.95it/s, loss=1.5301]


Logits stats - min: -7.7686, max: 1.8858
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13430/18200 [05:23<02:01, 39.14it/s, loss=1.5217]


Logits stats - min: -7.3660, max: 2.1738
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5397, max: 2.3595
Target unique values: tensor([0], device='cuda:0')


Training:  74%|████████████████▉      | 13445/18200 [05:23<01:49, 43.38it/s, loss=1.9828]


Logits stats - min: -7.6821, max: 2.2763
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7679, max: 2.0413
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9118, max: 2.1383
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9827, max: 3.0161
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13460/18200 [05:24<01:54, 41.39it/s, loss=1.5327]


Logits stats - min: -7.1501, max: 2.3577
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4020, max: 2.3695
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6131, max: 3.1214
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13480/18200 [05:24<01:52, 41.99it/s, loss=1.7959]


Logits stats - min: -7.0630, max: 2.1926
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.5359, max: 2.0292
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2774, max: 2.1672
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13495/18200 [05:24<01:53, 41.50it/s, loss=1.9010]


Logits stats - min: -7.6758, max: 2.2919
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1882, max: 2.3390
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5746, max: 2.5429
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13510/18200 [05:25<01:57, 40.04it/s, loss=1.4700]


Logits stats - min: -7.5787, max: 2.1390
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13520/18200 [05:25<02:00, 38.84it/s, loss=1.3390]


Logits stats - min: -6.5590, max: 2.0276
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1814, max: 2.0446
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.5541, max: 2.0405
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1519, max: 2.3388
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13528/18200 [05:25<02:00, 38.82it/s, loss=1.5366]


Logits stats - min: -7.4158, max: 1.6063
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7036, max: 2.6685
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1253, max: 2.3666
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████      | 13539/18200 [05:26<01:47, 43.40it/s, loss=1.5253]


Logits stats - min: -8.9028, max: 2.3763
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5338, max: 2.4993
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5198, max: 2.2869
Target unique values: tensor([0], device='cuda:0')


Training:  74%|█████████████████▏     | 13554/18200 [05:26<01:52, 41.29it/s, loss=1.4606]


Logits stats - min: -10.4712, max: 2.1365
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3358, max: 2.0578
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5819, max: 3.0532
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13569/18200 [05:26<01:57, 39.48it/s, loss=1.2134]


Logits stats - min: -8.9129, max: 2.6345
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3071, max: 2.0437
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13578/18200 [05:27<01:54, 40.25it/s, loss=1.4648]


Logits stats - min: -9.4574, max: 2.9318
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2538, max: 2.5221
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8063, max: 2.2311
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9983, max: 2.1676
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13588/18200 [05:27<01:46, 43.15it/s, loss=1.5278]


Logits stats - min: -8.5382, max: 2.7027
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3999, max: 2.4699
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3834, max: 2.1357
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13603/18200 [05:27<01:47, 42.77it/s, loss=1.1764]


Logits stats - min: -9.4172, max: 2.4035
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0003, max: 2.6714
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6120, max: 3.0632
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.2754, max: 1.6190
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13613/18200 [05:27<01:50, 41.49it/s, loss=1.4674]


Logits stats - min: -9.7140, max: 2.7868
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7248, max: 2.9842
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3668, max: 2.7233
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8127, max: 2.5214
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13623/18200 [05:28<01:54, 39.84it/s, loss=1.5198]


Logits stats - min: -7.3858, max: 2.1707
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2785, max: 2.0562
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13633/18200 [05:28<01:50, 41.36it/s, loss=1.4781]


Logits stats - min: -8.2212, max: 2.7033
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0528, max: 2.6518
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2635, max: 3.1838
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1611, max: 2.5497
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▏     | 13644/18200 [05:28<01:39, 45.79it/s, loss=1.4845]


Logits stats - min: -8.4412, max: 1.8382
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4325, max: 2.5384
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2148, max: 2.5085
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7778, max: 2.5101
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13654/18200 [05:28<01:52, 40.30it/s, loss=1.3882]


Logits stats - min: -8.2010, max: 2.6303
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1316, max: 1.7445
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13664/18200 [05:29<01:49, 41.30it/s, loss=1.5455]


Logits stats - min: -7.7682, max: 2.1161
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2786, max: 1.8795
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9794, max: 2.3722
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5412, max: 3.0864
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13683/18200 [05:29<01:55, 39.10it/s, loss=2.2526]


Logits stats - min: -7.4951, max: 2.0517
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3911, max: 1.9218
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3514, max: 2.4887
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13693/18200 [05:29<01:44, 42.97it/s, loss=1.5293]


Logits stats - min: -8.2785, max: 2.7654
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7385, max: 2.6444
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2400, max: 2.6263
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4955, max: 2.3141
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13711/18200 [05:30<01:54, 39.20it/s, loss=1.9924]


Logits stats - min: -7.2994, max: 2.2115
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9856, max: 2.6465
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5792, max: 2.1760
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9863, max: 2.1774
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1287, max: 2.5482
Target unique values: tensor([0], device='cuda:0')


Training:  75%|█████████████████▎     | 13729/18200 [05:30<01:58, 37.85it/s, loss=1.4799]


Logits stats - min: -8.8932, max: 2.4837
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1268, max: 2.4431
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3668, max: 2.0314
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▎     | 13744/18200 [05:31<01:48, 41.25it/s, loss=2.8515]


Logits stats - min: -9.2007, max: 3.1363
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0024, max: 2.7433
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4183, max: 3.1566
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0286, max: 2.4515
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0185, max: 2.6140
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13760/18200 [05:31<01:45, 42.12it/s, loss=1.2137]


Logits stats - min: -7.4780, max: 2.2855
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9966, max: 2.0941
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9251, max: 2.6782
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3613, max: 2.7061
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13770/18200 [05:31<01:45, 42.09it/s, loss=1.5317]


Logits stats - min: -7.9187, max: 1.9565
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4392, max: 2.6900
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7877, max: 2.4689
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3243, max: 2.0651
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13780/18200 [05:32<01:45, 41.71it/s, loss=1.5280]


Logits stats - min: -9.0042, max: 2.7462
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3710, max: 2.1940
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7397, max: 2.2953
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13790/18200 [05:32<01:44, 42.20it/s, loss=1.5296]


Logits stats - min: -7.3100, max: 2.2122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8783, max: 2.2512
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0343, max: 2.4004
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13805/18200 [05:32<01:47, 41.02it/s, loss=1.3118]


Logits stats - min: -6.4026, max: 1.7366
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6355, max: 2.1617
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2388, max: 2.7326
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8012, max: 2.3688
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0995, max: 2.6990
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13815/18200 [05:32<01:45, 41.62it/s, loss=1.4698]


Logits stats - min: -7.2445, max: 2.2453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0590, max: 2.9819
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1553, max: 2.5395
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0326, max: 2.9567
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13825/18200 [05:33<01:48, 40.17it/s, loss=1.4710]


Logits stats - min: -8.9048, max: 2.7612
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1295, max: 1.5271
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▍     | 13840/18200 [05:33<01:48, 40.26it/s, loss=1.4738]


Logits stats - min: -8.6983, max: 2.7589
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7755, max: 3.1542
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9710, max: 2.3818
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1767, max: 2.2111
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6175, max: 2.3417
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▌     | 13850/18200 [05:33<01:46, 40.77it/s, loss=1.4793]


Logits stats - min: -7.4040, max: 2.3118
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4603, max: 2.5028
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7634, max: 2.3774
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▌     | 13861/18200 [05:33<01:38, 43.98it/s, loss=1.5222]


Logits stats - min: -10.3523, max: 2.2917
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2488, max: 2.2039
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8004, max: 2.0839
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5745, max: 2.3539
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▌     | 13877/18200 [05:34<01:34, 45.79it/s, loss=2.2215]


Logits stats - min: -8.9885, max: 2.5911
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4118, max: 1.3416
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9362, max: 2.4497
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3428, max: 2.4183
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▌     | 13906/18200 [05:35<01:52, 38.13it/s, loss=1.5274]


Logits stats - min: -7.4470, max: 2.1625
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9411, max: 2.3178
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3960, max: 2.5368
Target unique values: tensor([0], device='cuda:0')


Training:  76%|█████████████████▌     | 13921/18200 [05:35<01:40, 42.50it/s, loss=2.7745]


Logits stats - min: -8.8483, max: 2.7230
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1430, max: 2.1136
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6856, max: 1.9964
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6314, max: 2.3758
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5241, max: 1.4995
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5815, max: 2.2446
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▌     | 13937/18200 [05:35<01:45, 40.51it/s, loss=1.8907]


Logits stats - min: -9.4143, max: 2.6746
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8019, max: 1.6357
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 13947/18200 [05:36<01:52, 37.83it/s, loss=1.1939]


Logits stats - min: -7.8245, max: 2.3252
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4763, max: 2.6049
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 13957/18200 [05:36<01:44, 40.55it/s, loss=1.4792]


Logits stats - min: -9.5838, max: 3.0255
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5935, max: 2.1629
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2214, max: 2.6961
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 13962/18200 [05:36<01:43, 40.79it/s, loss=1.4648]


Logits stats - min: -8.2903, max: 2.2159
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1686, max: 2.3118
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7973, max: 1.6334
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8511, max: 2.5508
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 13973/18200 [05:36<01:33, 44.99it/s, loss=1.4631]


Logits stats - min: -7.4567, max: 2.4117
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0701, max: 2.6501
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8687, max: 2.2833
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3564, max: 2.4215
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 13988/18200 [05:36<01:41, 41.59it/s, loss=2.0510]


Logits stats - min: -7.4379, max: 1.9069
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2930, max: 2.8472
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2379, max: 1.8737
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2661, max: 2.4979
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2727, max: 2.6051
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 13998/18200 [05:37<01:34, 44.30it/s, loss=2.2251]


Logits stats - min: -7.6183, max: 1.6428
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0660, max: 2.9305
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8145, max: 1.8513
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0373, max: 2.3348
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8423, max: 2.5034
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 14008/18200 [05:37<01:33, 44.95it/s, loss=1.5241]


Logits stats - min: -7.6048, max: 2.1004
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9793, max: 2.1650
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2023, max: 2.2355
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5926, max: 1.7407
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 14018/18200 [05:37<01:31, 45.88it/s, loss=1.3482]


Logits stats - min: -9.7725, max: 2.9126
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1094, max: 2.6029
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8291, max: 2.2614
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 14028/18200 [05:37<01:44, 39.99it/s, loss=1.5156]


Logits stats - min: -9.7250, max: 2.8560
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -11.0780, max: 2.9219
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▋     | 14033/18200 [05:38<01:40, 41.40it/s, loss=2.7887]


Logits stats - min: -7.5652, max: 2.0804
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8620, max: 2.3169
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▊     | 14052/18200 [05:38<01:46, 38.93it/s, loss=1.8308]


Logits stats - min: -8.9474, max: 2.5784
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.5766, max: 1.6232
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▊     | 14057/18200 [05:38<01:44, 39.81it/s, loss=1.4697]


Logits stats - min: -9.3340, max: 2.3880
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1274, max: 2.3897
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4201, max: 1.9322
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3016, max: 2.4232
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▊     | 14073/18200 [05:38<01:39, 41.63it/s, loss=1.4657]


Logits stats - min: -9.2309, max: 2.4733
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▊     | 14078/18200 [05:39<01:41, 40.55it/s, loss=1.5195]


Logits stats - min: -8.7790, max: 2.5527
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▊     | 14088/18200 [05:39<01:39, 41.23it/s, loss=1.2240]


Logits stats - min: -8.1639, max: 2.5280
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3234, max: 2.3966
Target unique values: tensor([0], device='cuda:0')


Training:  77%|█████████████████▊     | 14102/18200 [05:39<01:46, 38.37it/s, loss=1.4783]


Logits stats - min: -9.5627, max: 2.5682
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7368, max: 2.6096
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1695, max: 1.5265
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▊     | 14116/18200 [05:40<01:38, 41.26it/s, loss=1.4666]


Logits stats - min: -7.3480, max: 2.1260
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2093, max: 2.0380
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9353, max: 2.6484
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▊     | 14126/18200 [05:40<01:33, 43.42it/s, loss=1.5290]


Logits stats - min: -8.4080, max: 2.3990
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1476, max: 1.5792
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3722, max: 2.0945
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9281, max: 2.1003
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▊     | 14141/18200 [05:40<01:39, 40.64it/s, loss=2.0681]


Logits stats - min: -9.0030, max: 2.5271
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5314, max: 2.0793
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5971, max: 2.3609
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8803, max: 2.5831
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14150/18200 [05:40<01:43, 39.23it/s, loss=2.2593]


Logits stats - min: -7.3874, max: 2.4211
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9916, max: 2.7946
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14165/18200 [05:41<01:39, 40.67it/s, loss=1.5704]


Logits stats - min: -7.6956, max: 2.4556
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6625, max: 2.1197
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14175/18200 [05:41<01:39, 40.56it/s, loss=1.2821]


Logits stats - min: -7.7629, max: 2.2505
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4802, max: 2.3036
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14185/18200 [05:41<01:40, 39.87it/s, loss=1.3404]


Logits stats - min: -9.4172, max: 2.8187
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14203/18200 [05:42<01:36, 41.55it/s, loss=1.3410]


Logits stats - min: -9.0279, max: 2.2786
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7344, max: 2.4867
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6289, max: 2.2031
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6880, max: 2.1944
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14218/18200 [05:42<01:35, 41.73it/s, loss=1.1739]


Logits stats - min: -9.3205, max: 2.5365
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2027, max: 3.5284
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14228/18200 [05:42<01:37, 40.83it/s, loss=1.4711]


Logits stats - min: -9.3344, max: 2.6486
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4617, max: 2.1515
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4899, max: 2.3589
Target unique values: tensor([0], device='cuda:0')


Training:  78%|█████████████████▉     | 14239/18200 [05:43<01:32, 42.76it/s, loss=1.4737]


Logits stats - min: -10.2147, max: 3.3034
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9574, max: 2.9566
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2202, max: 2.6314
Target unique values: tensor([0], device='cuda:0')


Training:  78%|██████████████████     | 14254/18200 [05:43<01:36, 40.69it/s, loss=1.2032]


Logits stats - min: -8.1138, max: 2.7875
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3045, max: 2.0898
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5175, max: 2.4180
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4402, max: 2.4290
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8914, max: 2.1033
Target unique values: tensor([0], device='cuda:0')


Training:  78%|██████████████████     | 14260/18200 [05:43<01:30, 43.69it/s, loss=1.5213]


Logits stats - min: -10.2520, max: 2.0916
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1221, max: 2.1142
Target unique values: tensor([0], device='cuda:0')


Training:  78%|██████████████████     | 14281/18200 [05:44<01:21, 47.94it/s, loss=1.5240]


Logits stats - min: -8.0495, max: 2.0636
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0325, max: 2.4925
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1791, max: 2.0459
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9744, max: 2.0542
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -11.0613, max: 2.2477
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5067, max: 2.8373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6177, max: 1.8760
Target unique values: tensor([0], device='cuda:0')


Training:  78%|██████████████████     | 14287/18200 [05:44<01:21, 47.88it/s, loss=1.5219]


Logits stats - min: -7.6722, max: 2.4775
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8134, max: 2.0235
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4093, max: 2.8961
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████     | 14297/18200 [05:44<01:29, 43.58it/s, loss=1.4546]


Logits stats - min: -8.8677, max: 2.5849
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████     | 14312/18200 [05:44<01:39, 39.19it/s, loss=1.4568]


Logits stats - min: -10.0633, max: 2.9457
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████     | 14321/18200 [05:45<01:36, 40.03it/s, loss=1.4750]


Logits stats - min: -7.2190, max: 2.1367
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5797, max: 2.3075
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████     | 14331/18200 [05:45<01:30, 42.52it/s, loss=1.1961]


Logits stats - min: -9.5539, max: 2.6035
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8263, max: 2.5539
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8429, max: 2.2381
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████     | 14341/18200 [05:45<01:30, 42.56it/s, loss=1.5467]


Logits stats - min: -8.7814, max: 2.5930
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1131, max: 2.2182
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5990, max: 1.7634
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14356/18200 [05:45<01:34, 40.58it/s, loss=1.5348]


Logits stats - min: -9.5407, max: 2.6805
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0110, max: 2.2064
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7492, max: 2.2847
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1108, max: 2.2893
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14372/18200 [05:46<01:33, 41.07it/s, loss=1.4645]


Logits stats - min: -10.3136, max: 2.3375
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6898, max: 2.2944
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8524, max: 2.1976
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14383/18200 [05:46<01:25, 44.66it/s, loss=1.5259]


Logits stats - min: -7.8237, max: 2.5615
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3634, max: 2.5794
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1344, max: 2.5540
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3942, max: 2.2081
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14393/18200 [05:46<01:28, 42.83it/s, loss=1.4701]


Logits stats - min: -8.7712, max: 2.5251
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9702, max: 2.1197
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8836, max: 2.7644
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14403/18200 [05:47<01:33, 40.48it/s, loss=1.4715]


Logits stats - min: -8.9217, max: 2.5329
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5704, max: 1.7772
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14413/18200 [05:47<01:37, 38.79it/s, loss=2.2271]


Logits stats - min: -8.9617, max: 2.5467
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14425/18200 [05:47<01:26, 43.42it/s, loss=1.1802]


Logits stats - min: -7.3679, max: 2.2428
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -11.1880, max: 2.6726
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9082, max: 2.8229
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2609, max: 2.1474
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5728, max: 2.0543
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2538, max: 2.6522
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▏    | 14436/18200 [05:47<01:20, 46.77it/s, loss=1.5320]


Logits stats - min: -8.3976, max: 2.5523
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8369, max: 1.6623
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9347, max: 2.4446
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3313, max: 1.4548
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▎    | 14446/18200 [05:47<01:26, 43.44it/s, loss=1.5143]


Logits stats - min: -8.2158, max: 2.1873
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4878, max: 2.3842
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5802, max: 2.5996
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▎    | 14451/18200 [05:48<01:31, 41.02it/s, loss=1.3364]


Logits stats - min: -8.8518, max: 2.8396
Target unique values: tensor([0], device='cuda:0')


Training:  79%|██████████████████▎    | 14466/18200 [05:48<01:32, 40.30it/s, loss=2.2603]


Logits stats - min: -10.1175, max: 2.1530
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1060, max: 2.4910
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14476/18200 [05:48<01:27, 42.78it/s, loss=1.2108]


Logits stats - min: -7.5723, max: 2.2612
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -13.4929, max: 2.3352
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3471, max: 2.1799
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4910, max: 2.2281
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9390, max: 1.6833
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14487/18200 [05:48<01:23, 44.60it/s, loss=1.5187]


Logits stats - min: -11.1794, max: 3.3024
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6243, max: 2.5908
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14497/18200 [05:49<01:29, 41.22it/s, loss=1.4694]


Logits stats - min: -8.4500, max: 2.7821
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5711, max: 2.4366
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4850, max: 2.2242
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3582, max: 2.4805
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14502/18200 [05:49<01:26, 42.60it/s, loss=1.5399]


Logits stats - min: -9.0108, max: 2.6836
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7443, max: 2.4348
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2124, max: 2.5890
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1469, max: 2.4610
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14517/18200 [05:49<01:28, 41.65it/s, loss=1.9424]


Logits stats - min: -8.9160, max: 2.6920
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2938, max: 2.6551
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -11.0164, max: 3.2803
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9663, max: 2.7957
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3839, max: 2.6716
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14527/18200 [05:49<01:23, 43.80it/s, loss=1.3510]


Logits stats - min: -8.6640, max: 2.4241
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5902, max: 2.4766
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2163, max: 1.8043
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1465, max: 2.5812
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▎    | 14537/18200 [05:50<01:24, 43.38it/s, loss=1.4679]


Logits stats - min: -7.4324, max: 2.1913
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0345, max: 2.3425
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7942, max: 2.5515
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14547/18200 [05:50<01:27, 41.85it/s, loss=1.4633]


Logits stats - min: -8.0421, max: 2.1983
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9118, max: 2.4828
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7083, max: 2.4702
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14561/18200 [05:50<01:27, 41.43it/s, loss=1.1722]


Logits stats - min: -9.1048, max: 2.5655
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9842, max: 1.9814
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2682, max: 2.1618
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14571/18200 [05:51<01:27, 41.60it/s, loss=1.5206]


Logits stats - min: -6.9563, max: 1.7096
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6683, max: 1.4837
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14581/18200 [05:51<01:31, 39.61it/s, loss=1.4614]


Logits stats - min: -9.1275, max: 2.7025
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14591/18200 [05:51<01:27, 41.12it/s, loss=1.4624]


Logits stats - min: -8.7471, max: 2.5911
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9214, max: 2.5983
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4696, max: 2.3825
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14601/18200 [05:51<01:25, 42.01it/s, loss=1.8093]


Logits stats - min: -11.3074, max: 2.1187
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9820, max: 2.5408
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2778, max: 2.5246
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3807, max: 2.5621
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8372, max: 2.5386
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14611/18200 [05:52<01:26, 41.73it/s, loss=1.5233]


Logits stats - min: -7.5644, max: 2.1713
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8504, max: 1.9100
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14621/18200 [05:52<01:32, 38.82it/s, loss=2.0140]


Logits stats - min: -7.8137, max: 2.1735
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▍    | 14632/18200 [05:52<01:23, 42.63it/s, loss=1.5291]


Logits stats - min: -8.1503, max: 2.3551
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9481, max: 2.5914
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7679, max: 2.2005
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0607, max: 2.2901
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8477, max: 2.4482
Target unique values: tensor([0], device='cuda:0')


Training:  80%|██████████████████▌    | 14643/18200 [05:52<01:19, 45.00it/s, loss=1.6312]


Logits stats - min: -8.8438, max: 2.4075
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8532, max: 2.5479
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3862, max: 2.0204
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4385, max: 2.5852
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3882, max: 2.8475
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14653/18200 [05:52<01:21, 43.42it/s, loss=1.9583]


Logits stats - min: -9.5107, max: 2.6154
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6022, max: 2.3919
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14663/18200 [05:53<01:21, 43.33it/s, loss=1.1775]


Logits stats - min: -7.5574, max: 2.2742
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9007, max: 2.4177
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8352, max: 2.5071
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6072, max: 2.5121
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3346, max: 1.9968
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14678/18200 [05:53<01:26, 40.74it/s, loss=1.4844]


Logits stats - min: -7.2060, max: 2.7559
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14688/18200 [05:53<01:28, 39.91it/s, loss=1.8524]


Logits stats - min: -9.1563, max: 2.6482
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7846, max: 2.5624
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3024, max: 2.0637
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14703/18200 [05:54<01:29, 39.08it/s, loss=1.4837]


Logits stats - min: -8.6504, max: 2.5269
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2602, max: 2.3757
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3892, max: 3.1135
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14711/18200 [05:54<01:30, 38.71it/s, loss=1.4672]


Logits stats - min: -7.4774, max: 2.2085
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1883, max: 2.5948
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4572, max: 2.2951
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14721/18200 [05:54<01:22, 42.40it/s, loss=1.5240]


Logits stats - min: -7.7122, max: 1.9963
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9601, max: 2.7948
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8059, max: 2.4228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4751, max: 2.8515
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▌    | 14736/18200 [05:54<01:21, 42.76it/s, loss=1.5545]


Logits stats - min: -6.0294, max: 1.7905
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6439, max: 2.6890
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1673, max: 2.1723
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14746/18200 [05:55<01:23, 41.50it/s, loss=1.5218]


Logits stats - min: -9.2345, max: 2.3947
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3835, max: 2.8587
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14757/18200 [05:55<01:20, 43.00it/s, loss=1.4581]


Logits stats - min: -9.9898, max: 2.7918
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0206, max: 2.9782
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6173, max: 2.2817
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3397, max: 2.1920
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2685, max: 3.1545
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1277, max: 2.4097
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14772/18200 [05:55<01:21, 41.95it/s, loss=1.2077]


Logits stats - min: -8.6245, max: 2.0676
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8287, max: 2.0799
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4863, max: 2.9476
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14782/18200 [05:56<01:20, 42.45it/s, loss=1.5225]


Logits stats - min: -8.5579, max: 2.4510
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2601, max: 2.6794
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9760, max: 2.2383
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14797/18200 [05:56<01:20, 42.18it/s, loss=1.4721]


Logits stats - min: -8.5848, max: 2.2044
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6443, max: 2.7002
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1355, max: 3.2237
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14807/18200 [05:56<01:23, 40.73it/s, loss=1.5196]


Logits stats - min: -8.9680, max: 2.4924
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8361, max: 2.9208
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3120, max: 2.6802
Target unique values: tensor([0], device='cuda:0')


Training:  81%|██████████████████▋    | 14817/18200 [05:56<01:18, 43.37it/s, loss=1.3389]


Logits stats - min: -7.6302, max: 2.2435
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9077, max: 2.9558
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4649, max: 3.0480
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0398, max: 2.2878
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14837/18200 [05:57<01:17, 43.27it/s, loss=1.5266]


Logits stats - min: -9.0797, max: 2.7526
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6292, max: 2.3624
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6068, max: 2.4204
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1240, max: 2.4736
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4144, max: 2.3336
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14847/18200 [05:57<01:18, 42.46it/s, loss=2.2707]


Logits stats - min: -9.4768, max: 2.9059
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8093, max: 2.6392
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4606, max: 2.8241
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1586, max: 2.3018
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14863/18200 [05:57<01:18, 42.66it/s, loss=2.0981]


Logits stats - min: -9.1666, max: 2.5518
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5052, max: 2.2908
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8495, max: 2.8526
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14868/18200 [05:58<01:20, 41.64it/s, loss=2.2120]


Logits stats - min: -7.6930, max: 2.2039
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2709, max: 1.6673
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14883/18200 [05:58<01:18, 42.13it/s, loss=1.4699]


Logits stats - min: -7.9579, max: 2.1068
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3059, max: 2.4097
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2043, max: 2.3313
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14893/18200 [05:58<01:16, 43.46it/s, loss=1.5144]


Logits stats - min: -9.2049, max: 2.7311
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6820, max: 2.4113
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1875, max: 2.2794
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14898/18200 [05:58<01:14, 44.29it/s, loss=1.3087]


Logits stats - min: -8.5308, max: 2.3727
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0462, max: 2.3380
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5482, max: 2.4119
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14919/18200 [05:59<01:11, 46.18it/s, loss=1.6937]


Logits stats - min: -7.0683, max: 2.2705
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9016, max: 1.7554
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0922, max: 2.2376
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2699, max: 2.7743
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3002, max: 2.5393
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▊    | 14929/18200 [05:59<01:12, 45.28it/s, loss=1.6513]


Logits stats - min: -12.8609, max: 1.8520
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4964, max: 2.9342
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5177, max: 2.7604
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8415, max: 2.5188
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5812, max: 2.4591
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 14939/18200 [05:59<01:12, 44.87it/s, loss=1.1682]


Logits stats - min: -8.5551, max: 2.5128
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6982, max: 2.1892
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6234, max: 2.2112
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 14949/18200 [05:59<01:13, 44.42it/s, loss=1.4684]


Logits stats - min: -10.0041, max: 3.0811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4192, max: 2.1247
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3100, max: 2.3114
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 14959/18200 [06:00<01:14, 43.31it/s, loss=1.4626]


Logits stats - min: -7.1705, max: 2.1129
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9511, max: 2.5104
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6599, max: 2.3846
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 14969/18200 [06:00<01:15, 42.54it/s, loss=1.4649]


Logits stats - min: -7.5227, max: 2.6098
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8051, max: 3.0738
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2094, max: 2.0928
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 14979/18200 [06:00<01:15, 42.74it/s, loss=2.9265]


Logits stats - min: -8.4016, max: 1.6499
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0113, max: 2.1037
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3267, max: 2.2466
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3317, max: 2.8884
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 14994/18200 [06:01<01:17, 41.53it/s, loss=1.2096]


Logits stats - min: -11.3809, max: 2.4519
Target unique values: tensor([0], device='cuda:0')


Training:  82%|██████████████████▉    | 15009/18200 [06:01<01:19, 40.15it/s, loss=2.2181]


Logits stats - min: -9.7792, max: 2.7717
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2383, max: 2.7680
Target unique values: tensor([0], device='cuda:0')


Training:  83%|██████████████████▉    | 15019/18200 [06:01<01:14, 42.85it/s, loss=1.5265]


Logits stats - min: -6.5418, max: 2.2737
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9410, max: 2.6278
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0098, max: 2.4796
Target unique values: tensor([0], device='cuda:0')


Training:  83%|██████████████████▉    | 15029/18200 [06:01<01:16, 41.46it/s, loss=1.1858]


Logits stats - min: -7.2847, max: 2.2953
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9508, max: 3.4327
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8286, max: 2.5856
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15044/18200 [06:02<01:14, 42.36it/s, loss=1.8538]


Logits stats - min: -9.1710, max: 2.7208
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5297, max: 2.3698
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0415, max: 2.9653
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15054/18200 [06:02<01:17, 40.35it/s, loss=2.0365]


Logits stats - min: -9.7933, max: 2.8798
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1635, max: 2.9212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6209, max: 1.7135
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8258, max: 1.6485
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15064/18200 [06:02<01:18, 39.92it/s, loss=1.5266]


Logits stats - min: -6.9053, max: 2.0543
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2206, max: 2.3803
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15079/18200 [06:03<01:13, 42.41it/s, loss=1.9182]


Logits stats - min: -9.6104, max: 3.1382
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1362, max: 1.7475
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8136, max: 1.6492
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4021, max: 2.8289
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15089/18200 [06:03<01:17, 39.99it/s, loss=2.0215]


Logits stats - min: -7.4973, max: 2.1586
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2051, max: 2.7048
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15102/18200 [06:03<01:19, 39.10it/s, loss=1.5074]


Logits stats - min: -7.1850, max: 2.4327
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9948, max: 2.8071
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8068, max: 3.1430
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15113/18200 [06:03<01:10, 43.68it/s, loss=1.5207]


Logits stats - min: -9.2482, max: 2.6438
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8417, max: 2.7139
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6769, max: 2.0949
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9649, max: 1.4272
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3238, max: 2.4416
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6087, max: 2.6423
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████    | 15124/18200 [06:04<01:09, 44.00it/s, loss=1.5002]


Logits stats - min: -7.8652, max: 2.2055
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8365, max: 2.1957
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████▏   | 15134/18200 [06:04<01:09, 43.86it/s, loss=1.4654]


Logits stats - min: -7.8715, max: 2.4167
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7224, max: 2.4545
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████▏   | 15149/18200 [06:04<01:13, 41.50it/s, loss=1.3084]


Logits stats - min: -8.9430, max: 2.4899
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2104, max: 2.3316
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0411, max: 2.2125
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████▏   | 15159/18200 [06:05<01:12, 42.21it/s, loss=2.8728]


Logits stats - min: -8.1905, max: 2.3903
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5275, max: 2.3526
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1844, max: 2.1997
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████▏   | 15169/18200 [06:05<01:13, 41.10it/s, loss=1.1978]


Logits stats - min: -9.8821, max: 2.7923
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1952, max: 2.4392
Target unique values: tensor([0], device='cuda:0')


Training:  83%|███████████████████▏   | 15179/18200 [06:05<01:10, 42.79it/s, loss=1.4777]


Logits stats - min: -7.6544, max: 2.2710
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8095, max: 2.7168
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1940, max: 2.6906
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▏   | 15209/18200 [06:06<01:12, 41.04it/s, loss=1.4743]


Logits stats - min: -7.8741, max: 2.3360
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7961, max: 2.5379
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7740, max: 2.1931
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4352, max: 2.4899
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▏   | 15219/18200 [06:06<01:09, 42.97it/s, loss=1.3384]


Logits stats - min: -8.3755, max: 1.9605
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6752, max: 2.0586
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▏   | 15229/18200 [06:06<01:11, 41.35it/s, loss=1.5136]


Logits stats - min: -7.4084, max: 2.3206
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15239/18200 [06:06<01:08, 43.38it/s, loss=1.8091]


Logits stats - min: -7.9436, max: 2.1894
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0596, max: 2.0285
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8913, max: 2.2324
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0979, max: 1.8988
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1241, max: 2.1411
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15254/18200 [06:07<01:11, 41.39it/s, loss=1.1524]


Logits stats - min: -9.4411, max: 2.6259
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15269/18200 [06:07<01:09, 42.15it/s, loss=1.4993]


Logits stats - min: -7.8214, max: 2.2078
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9233, max: 2.0652
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0257, max: 1.9269
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0971, max: 3.1860
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5435, max: 2.1822
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15285/18200 [06:08<01:04, 45.30it/s, loss=1.3679]


Logits stats - min: -9.7221, max: 2.9874
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7281, max: 2.2277
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0661, max: 2.4748
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0994, max: 2.4634
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6546, max: 1.7983
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15306/18200 [06:08<01:04, 45.19it/s, loss=1.4712]


Logits stats - min: -9.0805, max: 2.4493
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.9021, max: 2.0931
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0743, max: 2.1182
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1457, max: 2.3029
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15316/18200 [06:08<01:08, 42.31it/s, loss=1.3076]


Logits stats - min: -8.2551, max: 2.2538
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0692, max: 2.3164
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▎   | 15331/18200 [06:09<01:07, 42.64it/s, loss=2.8017]


Logits stats - min: -7.6858, max: 2.2655
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1047, max: 2.4242
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0823, max: 2.5679
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▍   | 15341/18200 [06:09<01:07, 42.15it/s, loss=1.4738]


Logits stats - min: -9.6278, max: 2.5700
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4847, max: 2.0689
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.7943, max: 2.1656
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4514, max: 2.3561
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▍   | 15346/18200 [06:09<01:06, 42.61it/s, loss=2.7644]


Logits stats - min: -10.0541, max: 2.8659
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9254, max: 2.2074
Target unique values: tensor([0], device='cuda:0')


Training:  84%|███████████████████▍   | 15361/18200 [06:09<01:11, 39.44it/s, loss=1.5251]


Logits stats - min: -7.3471, max: 2.2171
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5382, max: 2.6144
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0570, max: 2.5514
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▍   | 15388/18200 [06:10<01:11, 39.60it/s, loss=1.3564]


Logits stats - min: -8.0817, max: 2.3574
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2859, max: 2.1435
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7995, max: 2.6071
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▍   | 15406/18200 [06:10<01:07, 41.61it/s, loss=1.3073]


Logits stats - min: -10.5151, max: 2.3603
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.6097, max: 2.2887
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4147, max: 2.7965
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▍   | 15411/18200 [06:11<01:10, 39.77it/s, loss=1.5131]


Logits stats - min: -8.1108, max: 2.4658
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8946, max: 2.3614
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▍   | 15429/18200 [06:11<01:08, 40.29it/s, loss=1.4658]


Logits stats - min: -9.2763, max: 2.8066
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4957, max: 2.6280
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4915, max: 2.1622
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15439/18200 [06:11<01:07, 40.90it/s, loss=1.4708]


Logits stats - min: -6.9771, max: 1.6832
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3020, max: 1.7180
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7889, max: 2.4948
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15450/18200 [06:12<01:01, 44.65it/s, loss=1.3848]


Logits stats - min: -10.0664, max: 2.4333
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5445, max: 1.9436
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6459, max: 2.0513
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3925, max: 1.6799
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15466/18200 [06:12<01:01, 44.65it/s, loss=2.8268]


Logits stats - min: -9.0714, max: 2.5706
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4291, max: 2.5197
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1580, max: 2.8047
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7359, max: 2.7026
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2189, max: 2.4284
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15476/18200 [06:12<01:00, 44.67it/s, loss=1.4765]


Logits stats - min: -7.5986, max: 2.2966
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4425, max: 2.9074
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1901, max: 1.7309
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5289, max: 2.6290
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15501/18200 [06:13<01:06, 40.88it/s, loss=1.5314]


Logits stats - min: -7.5015, max: 2.1699
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1704, max: 2.4267
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3915, max: 2.9040
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▌   | 15521/18200 [06:13<01:06, 40.43it/s, loss=1.3305]


Logits stats - min: -10.1392, max: 2.3509
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2816, max: 2.4997
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5529, max: 2.0524
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0164, max: 2.3469
Target unique values: tensor([0], device='cuda:0')


Training:  85%|███████████████████▋   | 15532/18200 [06:14<01:02, 42.61it/s, loss=1.1815]


Logits stats - min: -8.6485, max: 2.0897
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5839, max: 2.4863
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3885, max: 1.7520
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3071, max: 2.6093
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▋   | 15562/18200 [06:14<01:02, 42.39it/s, loss=1.5322]


Logits stats - min: -7.4487, max: 2.2377
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4777, max: 1.4698
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1074, max: 2.4536
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▋   | 15572/18200 [06:15<01:02, 42.15it/s, loss=2.1936]


Logits stats - min: -10.0948, max: 2.7537
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5434, max: 2.7471
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2604, max: 2.5193
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1739, max: 2.0529
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▋   | 15582/18200 [06:15<01:01, 42.24it/s, loss=1.5138]


Logits stats - min: -7.2853, max: 2.0892
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4135, max: 2.3227
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3693, max: 2.3223
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▋   | 15593/18200 [06:15<00:58, 44.29it/s, loss=1.3294]


Logits stats - min: -6.9280, max: 1.8497
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1023, max: 2.2663
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6413, max: 2.3329
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8840, max: 2.4503
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▋   | 15608/18200 [06:15<01:02, 41.59it/s, loss=2.2582]


Logits stats - min: -7.0792, max: 2.2170
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1329, max: 2.4173
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▋   | 15623/18200 [06:16<01:05, 39.52it/s, loss=1.3196]


Logits stats - min: -7.8577, max: 2.1874
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15633/18200 [06:16<01:02, 41.18it/s, loss=1.4163]


Logits stats - min: -7.7287, max: 1.5505
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2737, max: 2.3596
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6567, max: 2.6490
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5897, max: 2.3101
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1849, max: 2.3613
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15643/18200 [06:16<01:02, 40.65it/s, loss=1.9206]


Logits stats - min: -8.4775, max: 2.2759
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15659/18200 [06:17<00:55, 45.38it/s, loss=1.3152]


Logits stats - min: -7.7640, max: 2.0464
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9357, max: 1.8892
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2863, max: 2.3958
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5757, max: 2.0860
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3987, max: 2.6274
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0001, max: 2.4712
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0226, max: 2.4500
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15674/18200 [06:17<00:58, 43.15it/s, loss=2.2154]


Logits stats - min: -7.7546, max: 2.1110
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8271, max: 2.5293
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15684/18200 [06:17<01:00, 41.79it/s, loss=2.0323]


Logits stats - min: -9.3437, max: 2.4185
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0832, max: 2.3634
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15694/18200 [06:17<01:02, 40.14it/s, loss=1.1849]


Logits stats - min: -9.3166, max: 2.1961
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15704/18200 [06:18<01:01, 40.73it/s, loss=1.9598]


Logits stats - min: -7.9885, max: 2.1536
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1989, max: 2.6382
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15709/18200 [06:18<01:01, 40.41it/s, loss=1.5302]


Logits stats - min: -6.9101, max: 1.5752
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3209, max: 2.4783
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6879, max: 2.3151
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8023, max: 2.8191
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3508, max: 2.4774
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▊   | 15724/18200 [06:18<00:58, 42.21it/s, loss=1.1850]


Logits stats - min: -9.0544, max: 2.3130
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4748, max: 1.6742
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5973, max: 2.2264
Target unique values: tensor([0], device='cuda:0')


Training:  86%|███████████████████▉   | 15735/18200 [06:18<00:54, 45.27it/s, loss=2.1459]


Logits stats - min: -7.4364, max: 1.7913
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9505, max: 2.4453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1299, max: 2.2386
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4171, max: 2.4323
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15745/18200 [06:19<01:00, 40.85it/s, loss=1.1602]


Logits stats - min: -8.5362, max: 2.5829
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1277, max: 1.8969
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15756/18200 [06:19<00:55, 43.99it/s, loss=1.4695]


Logits stats - min: -7.9111, max: 2.0535
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5511, max: 2.6838
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6774, max: 2.1713
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2789, max: 2.4617
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15766/18200 [06:19<00:55, 44.20it/s, loss=1.5185]


Logits stats - min: -7.7575, max: 2.2583
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3560, max: 2.1716
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2850, max: 2.3115
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3920, max: 2.7361
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3795, max: 2.3094
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6583, max: 2.2218
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15781/18200 [06:19<00:53, 44.89it/s, loss=1.4708]


Logits stats - min: -9.1755, max: 2.4117
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4270, max: 2.5728
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4770, max: 1.6958
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15796/18200 [06:20<00:58, 40.84it/s, loss=1.4706]


Logits stats - min: -8.0015, max: 2.0436
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4196, max: 2.4424
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4108, max: 2.5917
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9915, max: 2.4705
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15807/18200 [06:20<00:55, 42.94it/s, loss=1.3757]


Logits stats - min: -10.0232, max: 2.6349
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2461, max: 2.6504
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5648, max: 2.4813
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5196, max: 2.0508
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3886, max: 2.3864
Target unique values: tensor([0], device='cuda:0')


Training:  87%|███████████████████▉   | 15818/18200 [06:20<00:57, 41.64it/s, loss=1.5232]


Logits stats - min: -10.6719, max: 2.2581
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2668, max: 1.5682
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0597, max: 2.0715
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15829/18200 [06:21<00:55, 42.86it/s, loss=2.0047]


Logits stats - min: -7.4127, max: 2.1358
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0276, max: 2.0773
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7923, max: 2.0716
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1914, max: 2.6547
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15844/18200 [06:21<00:56, 41.57it/s, loss=1.4739]


Logits stats - min: -7.4821, max: 2.3177
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8070, max: 2.2427
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4434, max: 2.9409
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15855/18200 [06:21<00:55, 42.02it/s, loss=1.5074]


Logits stats - min: -8.4704, max: 2.4842
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4388, max: 2.2514
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3363, max: 2.9123
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9350, max: 2.5184
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15865/18200 [06:21<00:55, 42.35it/s, loss=1.3554]


Logits stats - min: -9.2496, max: 2.3894
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4048, max: 2.3761
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9480, max: 2.8508
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15875/18200 [06:22<00:55, 41.60it/s, loss=1.4696]


Logits stats - min: -7.7474, max: 2.4991
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15885/18200 [06:22<00:55, 41.99it/s, loss=1.4811]


Logits stats - min: -9.1635, max: 2.7594
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3198, max: 2.2080
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8448, max: 1.6172
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0485, max: 2.5468
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15900/18200 [06:22<00:57, 40.18it/s, loss=1.5833]


Logits stats - min: -7.5052, max: 1.7111
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8286, max: 2.5282
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15910/18200 [06:22<00:52, 43.39it/s, loss=1.8368]


Logits stats - min: -9.9215, max: 2.3911
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5294, max: 2.7255
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2955, max: 2.3161
Target unique values: tensor([0], device='cuda:0')


Training:  87%|████████████████████   | 15920/18200 [06:23<00:54, 41.94it/s, loss=1.4780]


Logits stats - min: -7.2347, max: 2.2302
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0071, max: 2.4752
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3502, max: 3.0153
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4635, max: 2.1943
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 15936/18200 [06:23<00:51, 44.17it/s, loss=1.5349]


Logits stats - min: -8.0255, max: 2.1856
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0703, max: 2.2139
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3198, max: 2.2600
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0777, max: 2.4390
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4503, max: 2.1512
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 15951/18200 [06:24<00:54, 41.42it/s, loss=1.4702]


Logits stats - min: -10.6668, max: 2.1222
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4230, max: 2.2317
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 15961/18200 [06:24<00:55, 40.42it/s, loss=1.3377]


Logits stats - min: -6.5966, max: 1.5988
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5430, max: 2.1366
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 15971/18200 [06:24<00:58, 38.05it/s, loss=1.4585]


Logits stats - min: -8.5174, max: 2.6242
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 15981/18200 [06:24<00:52, 41.87it/s, loss=1.5203]


Logits stats - min: -7.5033, max: 2.3387
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0806, max: 1.6750
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1693, max: 2.6358
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4435, max: 2.1720
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 15991/18200 [06:25<00:53, 41.54it/s, loss=1.3362]


Logits stats - min: -9.4003, max: 2.2423
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 16006/18200 [06:25<00:52, 41.59it/s, loss=1.4694]


Logits stats - min: -7.6246, max: 2.3575
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8077, max: 2.4265
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▏  | 16016/18200 [06:25<00:53, 41.15it/s, loss=1.2857]


Logits stats - min: -9.2817, max: 2.4871
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6382, max: 2.7623
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1644, max: 3.1007
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0748, max: 2.2838
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1159, max: 2.5267
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16031/18200 [06:25<00:50, 43.16it/s, loss=1.5235]


Logits stats - min: -7.8999, max: 1.9628
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3252, max: 2.7262
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0852, max: 2.1425
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4629, max: 2.2108
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7434, max: 1.6548
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3446, max: 2.5732
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16042/18200 [06:26<00:47, 45.03it/s, loss=1.5288]


Logits stats - min: -8.1294, max: 1.6260
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7289, max: 2.2798
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8169, max: 2.1928
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16053/18200 [06:26<00:46, 46.17it/s, loss=1.3205]


Logits stats - min: -8.6802, max: 1.8840
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5428, max: 1.9803
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4650, max: 2.6040
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5047, max: 2.4737
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7349, max: 3.0701
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16058/18200 [06:26<00:46, 45.66it/s, loss=1.4602]


Logits stats - min: -7.7587, max: 2.0979
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8936, max: 2.2245
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2117, max: 1.8093
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16068/18200 [06:26<00:50, 42.00it/s, loss=1.3069]


Logits stats - min: -9.3435, max: 2.5163
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9273, max: 1.7777
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16083/18200 [06:27<00:49, 42.75it/s, loss=1.1808]


Logits stats - min: -7.8168, max: 2.7777
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7501, max: 2.2212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3531, max: 2.9301
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16088/18200 [06:27<00:49, 42.51it/s, loss=1.1775]


Logits stats - min: -9.1139, max: 2.8102
Target unique values: tensor([0], device='cuda:0')


Training:  88%|████████████████████▎  | 16098/18200 [06:27<00:51, 41.12it/s, loss=1.5184]


Logits stats - min: -9.8266, max: 2.6061
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1551, max: 2.3864
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3390, max: 1.7230
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▎  | 16109/18200 [06:27<00:47, 44.07it/s, loss=1.4728]


Logits stats - min: -9.1278, max: 2.1983
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2104, max: 2.2835
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9479, max: 2.4771
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0456, max: 2.2863
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5378, max: 2.4790
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16124/18200 [06:28<00:51, 40.37it/s, loss=1.5213]


Logits stats - min: -7.6918, max: 2.2228
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16139/18200 [06:28<00:49, 41.57it/s, loss=1.1726]


Logits stats - min: -7.4935, max: 2.4946
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4038, max: 2.3748
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9725, max: 1.4927
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4272, max: 1.5209
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16149/18200 [06:28<00:49, 41.21it/s, loss=1.4786]


Logits stats - min: -8.5239, max: 2.2416
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7711, max: 2.9453
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16164/18200 [06:29<00:48, 42.05it/s, loss=2.1906]


Logits stats - min: -8.7343, max: 1.6239
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2244, max: 2.3745
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3665, max: 2.7467
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16184/18200 [06:29<00:49, 40.69it/s, loss=1.1640]


Logits stats - min: -7.9050, max: 2.2238
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7069, max: 2.3105
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16204/18200 [06:30<00:45, 44.25it/s, loss=1.4573]


Logits stats - min: -9.0020, max: 2.1876
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5519, max: 2.7005
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.7637, max: 2.5325
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6651, max: 2.5710
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2702, max: 1.5845
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2395, max: 1.8987
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▍  | 16214/18200 [06:30<00:45, 43.87it/s, loss=1.3186]


Logits stats - min: -10.2153, max: 3.0377
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4129, max: 2.4984
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▌  | 16224/18200 [06:30<00:50, 39.38it/s, loss=1.4873]


Logits stats - min: -8.1740, max: 2.1021
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8879, max: 2.3649
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▌  | 16234/18200 [06:30<00:47, 41.03it/s, loss=2.1849]


Logits stats - min: -9.6325, max: 2.6186
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9470, max: 2.3113
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6236, max: 2.2155
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▌  | 16244/18200 [06:31<00:47, 41.19it/s, loss=1.3358]


Logits stats - min: -9.4715, max: 2.8763
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2465, max: 2.9998
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9285, max: 2.4474
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1851, max: 2.1491
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5462, max: 2.5117
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0275, max: 2.2134
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▌  | 16255/18200 [06:31<00:45, 43.20it/s, loss=1.5240]


Logits stats - min: -7.5361, max: 2.1256
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1024, max: 2.1628
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9368, max: 2.1393
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9410, max: 2.0861
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▌  | 16265/18200 [06:31<00:43, 44.98it/s, loss=1.4574]


Logits stats - min: -10.8158, max: 1.8342
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2206, max: 2.9780
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9696, max: 2.2411
Target unique values: tensor([0], device='cuda:0')


Training:  89%|████████████████████▌  | 16280/18200 [06:31<00:46, 41.28it/s, loss=1.4753]


Logits stats - min: -7.8881, max: 2.3811
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0892, max: 2.3656
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3894, max: 2.3663
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▌  | 16304/18200 [06:32<00:47, 40.29it/s, loss=1.4550]


Logits stats - min: -8.3527, max: 2.2605
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8327, max: 2.0382
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▌  | 16319/18200 [06:32<00:47, 39.58it/s, loss=1.8766]


Logits stats - min: -7.9523, max: 2.2740
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16335/18200 [06:33<00:52, 35.19it/s, loss=1.9433]


Logits stats - min: -10.2432, max: 3.0808
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16347/18200 [06:33<00:50, 36.36it/s, loss=2.2219]


Logits stats - min: -8.0358, max: 1.9171
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8921, max: 2.3203
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16357/18200 [06:33<00:46, 39.31it/s, loss=1.4538]


Logits stats - min: -6.9847, max: 2.3057
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9524, max: 2.6584
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6074, max: 2.1418
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6798, max: 2.3956
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16365/18200 [06:34<00:48, 38.16it/s, loss=1.4630]


Logits stats - min: -9.4264, max: 2.4466
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1144, max: 2.6197
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16378/18200 [06:34<00:47, 38.06it/s, loss=2.6606]


Logits stats - min: -7.8564, max: 2.1783
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5378, max: 2.6562
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5994, max: 1.8937
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16387/18200 [06:34<00:47, 38.37it/s, loss=1.3548]


Logits stats - min: -9.5162, max: 2.5837
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▋  | 16395/18200 [06:34<00:48, 37.15it/s, loss=1.5388]


Logits stats - min: -9.4823, max: 2.7162
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4631, max: 2.5886
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1578, max: 2.9165
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0470, max: 2.5515
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▊  | 16424/18200 [06:35<00:42, 41.31it/s, loss=2.1791]


Logits stats - min: -7.5511, max: 2.2118
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1750, max: 2.6418
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1520, max: 2.2779
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5760, max: 2.6681
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▊  | 16435/18200 [06:35<00:39, 44.59it/s, loss=1.5205]


Logits stats - min: -8.7139, max: 2.6032
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3764, max: 2.6365
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8884, max: 2.6183
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9036, max: 2.5182
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▊  | 16445/18200 [06:36<00:42, 41.65it/s, loss=1.1630]


Logits stats - min: -10.1647, max: 2.5191
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▊  | 16455/18200 [06:36<00:41, 41.84it/s, loss=1.4790]


Logits stats - min: -7.4620, max: 2.0924
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4570, max: 2.4418
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2951, max: 2.6685
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4381, max: 2.7219
Target unique values: tensor([0], device='cuda:0')


Training:  90%|████████████████████▊  | 16470/18200 [06:36<00:42, 40.56it/s, loss=1.4754]


Logits stats - min: -10.4249, max: 3.0590
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▊  | 16480/18200 [06:36<00:42, 40.34it/s, loss=1.1827]


Logits stats - min: -9.4497, max: 1.6486
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5197, max: 2.5406
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5113, max: 2.8329
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▊  | 16490/18200 [06:37<00:41, 41.11it/s, loss=1.9441]


Logits stats - min: -9.7711, max: 2.6875
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6064, max: 2.1537
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▊  | 16510/18200 [06:37<00:41, 41.09it/s, loss=1.4712]


Logits stats - min: -9.7801, max: 2.5906
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3405, max: 2.4066
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0142, max: 3.3544
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16529/18200 [06:38<00:43, 38.71it/s, loss=1.4724]


Logits stats - min: -8.9343, max: 2.4805
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5898, max: 2.1797
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16538/18200 [06:38<00:41, 40.07it/s, loss=1.1836]


Logits stats - min: -8.1429, max: 2.1733
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5234, max: 1.9526
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8298, max: 2.8390
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16548/18200 [06:38<00:38, 42.77it/s, loss=1.7982]


Logits stats - min: -8.4972, max: 1.9242
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2894, max: 2.5973
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5333, max: 2.4869
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4160, max: 2.6405
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2000, max: 3.0197
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16564/18200 [06:39<00:38, 42.82it/s, loss=1.4143]


Logits stats - min: -7.2163, max: 2.2320
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.4344, max: 2.3987
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8997, max: 2.5886
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9879, max: 3.1219
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9969, max: 2.2923
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16579/18200 [06:39<00:41, 39.39it/s, loss=2.0223]


Logits stats - min: -7.8579, max: 2.4847
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6303, max: 2.3196
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7927, max: 2.5359
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16589/18200 [06:39<00:39, 40.88it/s, loss=1.4733]


Logits stats - min: -8.9874, max: 2.5228
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16602/18200 [06:39<00:42, 37.45it/s, loss=1.3215]


Logits stats - min: -9.0699, max: 2.5102
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7171, max: 2.5708
Target unique values: tensor([0], device='cuda:0')


Training:  91%|████████████████████▉  | 16612/18200 [06:40<00:38, 41.11it/s, loss=1.4698]


Logits stats - min: -8.5735, max: 1.6300
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0717, max: 2.7111
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6639, max: 2.4425
Target unique values: tensor([0], device='cuda:0')


Training:  91%|█████████████████████  | 16623/18200 [06:40<00:36, 43.53it/s, loss=1.5287]


Logits stats - min: -7.0107, max: 2.2201
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.7346, max: 3.0228
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1995, max: 2.3853
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0766, max: 2.0026
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7436, max: 1.8411
Target unique values: tensor([0], device='cuda:0')


Training:  91%|█████████████████████  | 16628/18200 [06:40<00:38, 41.00it/s, loss=1.5191]


Logits stats - min: -7.8281, max: 2.5559
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3972, max: 2.9020
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0445, max: 2.1684
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████  | 16655/18200 [06:41<00:41, 36.96it/s, loss=1.1638]


Logits stats - min: -9.3523, max: 2.4465
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3234, max: 2.3195
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████  | 16665/18200 [06:41<00:39, 38.87it/s, loss=1.4681]


Logits stats - min: -7.9479, max: 2.2817
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6676, max: 2.5660
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████  | 16682/18200 [06:41<00:38, 39.38it/s, loss=1.9670]


Logits stats - min: -9.0389, max: 1.7862
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1598, max: 2.5412
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5793, max: 2.2772
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████  | 16700/18200 [06:42<00:39, 37.61it/s, loss=1.3554]


Logits stats - min: -10.1256, max: 2.7033
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7455, max: 2.5327
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████  | 16714/18200 [06:42<00:37, 39.62it/s, loss=1.9907]


Logits stats - min: -6.3574, max: 1.7210
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4279, max: 2.3547
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0088, max: 2.8687
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16719/18200 [06:43<00:36, 40.17it/s, loss=1.5054]


Logits stats - min: -7.4285, max: 2.2860
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16733/18200 [06:43<00:37, 39.52it/s, loss=1.9155]


Logits stats - min: -7.3132, max: 2.1863
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9874, max: 2.6941
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4833, max: 2.6187
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16743/18200 [06:43<00:36, 39.70it/s, loss=2.0268]


Logits stats - min: -7.5673, max: 2.3669
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5232, max: 2.3045
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5866, max: 2.6116
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8452, max: 2.4898
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16757/18200 [06:43<00:37, 38.56it/s, loss=1.5337]


Logits stats - min: -7.1408, max: 2.1144
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16767/18200 [06:44<00:33, 42.71it/s, loss=1.9570]


Logits stats - min: -7.3188, max: 2.2821
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5289, max: 2.4481
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6269, max: 2.3068
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4268, max: 2.0650
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.7127, max: 2.2453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4681, max: 2.7229
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16777/18200 [06:44<00:34, 41.49it/s, loss=1.7973]


Logits stats - min: -9.6541, max: 2.5680
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2083, max: 1.9511
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4383, max: 2.7709
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7650, max: 2.6561
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16787/18200 [06:44<00:34, 41.10it/s, loss=1.9562]


Logits stats - min: -8.1046, max: 2.5169
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5835, max: 2.5825
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1782, max: 2.4311
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16796/18200 [06:44<00:36, 38.92it/s, loss=1.5411]


Logits stats - min: -9.6761, max: 2.4951
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.9287, max: 3.2023
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▏ | 16811/18200 [06:45<00:33, 41.97it/s, loss=2.3545]


Logits stats - min: -7.9254, max: 2.1976
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3413, max: 3.2373
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.1166, max: 1.8577
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3674, max: 2.6587
Target unique values: tensor([0], device='cuda:0')


Training:  92%|█████████████████████▎ | 16826/18200 [06:45<00:33, 41.00it/s, loss=1.4544]


Logits stats - min: -9.6790, max: 2.7163
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6896, max: 1.9515
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3117, max: 2.5609
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8904, max: 2.4793
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.6191, max: 2.3791
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16841/18200 [06:46<00:32, 41.25it/s, loss=1.3427]


Logits stats - min: -11.2307, max: 2.7155
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2179, max: 2.8549
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0294, max: 2.3857
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16851/18200 [06:46<00:32, 41.21it/s, loss=1.4722]


Logits stats - min: -8.9043, max: 2.3624
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3311, max: 2.5417
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3756, max: 3.3219
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.8496, max: 1.5912
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16871/18200 [06:46<00:33, 39.54it/s, loss=2.8765]


Logits stats - min: -10.2744, max: 2.0916
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8310, max: 2.3939
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16884/18200 [06:47<00:31, 41.28it/s, loss=1.5207]


Logits stats - min: -10.7193, max: 3.0060
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5713, max: 2.4738
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2033, max: 1.6574
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4039, max: 2.3694
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16894/18200 [06:47<00:32, 40.47it/s, loss=1.1607]


Logits stats - min: -9.4832, max: 2.6091
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1470, max: 2.9979
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▎ | 16903/18200 [06:47<00:34, 38.08it/s, loss=1.3702]


Logits stats - min: -8.4345, max: 2.5960
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4278, max: 2.6976
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6823, max: 2.1530
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.5399, max: 2.7086
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16919/18200 [06:47<00:30, 41.99it/s, loss=1.3120]


Logits stats - min: -9.4549, max: 2.7288
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8867, max: 2.5328
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9229, max: 2.4583
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16934/18200 [06:48<00:30, 42.13it/s, loss=1.5013]


Logits stats - min: -7.4909, max: 2.4156
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8825, max: 2.4747
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5762, max: 2.1833
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16944/18200 [06:48<00:31, 40.15it/s, loss=1.4666]


Logits stats - min: -8.3143, max: 1.6853
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16954/18200 [06:48<00:31, 39.61it/s, loss=1.5198]


Logits stats - min: -10.1622, max: 2.8136
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7833, max: 2.2159
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16960/18200 [06:48<00:29, 42.54it/s, loss=1.1635]


Logits stats - min: -11.5666, max: 2.1163
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6082, max: 2.2306
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0121, max: 2.5353
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.4414, max: 3.0858
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16980/18200 [06:49<00:29, 41.91it/s, loss=1.1897]


Logits stats - min: -7.1852, max: 2.2870
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6732, max: 2.2763
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8132, max: 1.6495
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 16990/18200 [06:49<00:29, 41.70it/s, loss=1.7840]


Logits stats - min: -10.0965, max: 2.7610
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8953, max: 2.7849
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1506, max: 2.3948
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4013, max: 2.6095
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -11.1001, max: 3.5423
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 17000/18200 [06:49<00:28, 42.14it/s, loss=1.5029]


Logits stats - min: -9.7861, max: 2.7280
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1424, max: 2.2098
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.8733, max: 2.9130
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0052, max: 2.7313
Target unique values: tensor([0], device='cuda:0')


Training:  93%|█████████████████████▍ | 17010/18200 [06:50<00:27, 42.67it/s, loss=1.8117]


Logits stats - min: -7.3006, max: 2.1611
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8893, max: 2.0848
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17020/18200 [06:50<00:29, 40.40it/s, loss=1.1819]


Logits stats - min: -8.5276, max: 2.4444
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.6722, max: 2.7012
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17030/18200 [06:50<00:28, 41.16it/s, loss=2.7790]


Logits stats - min: -7.9883, max: 2.3273
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.7759, max: 1.9125
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3013, max: 2.3294
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17050/18200 [06:51<00:29, 38.53it/s, loss=2.2730]


Logits stats - min: -10.3886, max: 2.9068
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7962, max: 2.5887
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17058/18200 [06:51<00:29, 38.29it/s, loss=1.1763]


Logits stats - min: -10.6591, max: 3.3192
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3496, max: 2.7851
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17072/18200 [06:51<00:28, 40.10it/s, loss=1.9781]


Logits stats - min: -7.7060, max: 2.0930
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6212, max: 1.7410
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2635, max: 2.7063
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9543, max: 2.3779
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0937, max: 1.6919
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17082/18200 [06:51<00:25, 43.10it/s, loss=1.5232]


Logits stats - min: -7.4729, max: 2.2947
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6827, max: 2.5848
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7687, max: 2.2321
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17097/18200 [06:52<00:27, 40.29it/s, loss=1.1714]


Logits stats - min: -7.8596, max: 2.1738
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▌ | 17102/18200 [06:52<00:28, 38.95it/s, loss=1.4773]


Logits stats - min: -7.5053, max: 2.1299
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8019, max: 2.4230
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17117/18200 [06:52<00:25, 41.94it/s, loss=2.8132]


Logits stats - min: -9.3782, max: 2.5772
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -11.8807, max: 2.1391
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4153, max: 2.1719
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17127/18200 [06:53<00:26, 41.17it/s, loss=1.8765]


Logits stats - min: -7.1284, max: 2.4927
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17137/18200 [06:53<00:28, 37.02it/s, loss=1.5170]


Logits stats - min: -6.9812, max: 1.7646
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9864, max: 2.9249
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17151/18200 [06:53<00:28, 37.43it/s, loss=1.4573]


Logits stats - min: -7.3429, max: 2.6348
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1222, max: 2.5104
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17165/18200 [06:53<00:25, 39.81it/s, loss=1.9104]


Logits stats - min: -9.9278, max: 2.9423
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8273, max: 3.0478
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17176/18200 [06:54<00:22, 45.98it/s, loss=1.4887]


Logits stats - min: -10.5364, max: 3.5143
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6252, max: 2.6410
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5130, max: 2.7231
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5782, max: 2.1360
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0184, max: 2.2435
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4616, max: 2.6012
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17181/18200 [06:54<00:23, 43.64it/s, loss=2.0914]


Logits stats - min: -9.2663, max: 2.6754
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2613, max: 2.9352
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5972, max: 2.7583
Target unique values: tensor([0], device='cuda:0')


Training:  94%|█████████████████████▋ | 17196/18200 [06:54<00:22, 44.80it/s, loss=1.1694]


Logits stats - min: -7.6558, max: 2.2162
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7171, max: 3.1685
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5360, max: 2.2915
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0070, max: 2.5213
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5111, max: 3.0062
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▋ | 17206/18200 [06:54<00:22, 43.57it/s, loss=1.0675]


Logits stats - min: -7.7117, max: 2.3069
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.5791, max: 2.4781
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17216/18200 [06:55<00:24, 40.72it/s, loss=1.5120]


Logits stats - min: -6.8562, max: 1.5866
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.7987, max: 3.2274
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17226/18200 [06:55<00:22, 43.42it/s, loss=1.9287]


Logits stats - min: -8.1114, max: 2.3491
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.4254, max: 3.0506
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4219, max: 2.1796
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17236/18200 [06:55<00:24, 39.38it/s, loss=1.4804]


Logits stats - min: -9.1181, max: 1.6046
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -11.9682, max: 2.3733
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17250/18200 [06:56<00:23, 40.73it/s, loss=1.8459]


Logits stats - min: -7.8335, max: 2.4448
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7505, max: 2.5934
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -11.6644, max: 3.6720
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7295, max: 2.1552
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17265/18200 [06:56<00:22, 40.74it/s, loss=1.4672]


Logits stats - min: -7.4754, max: 2.4397
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1258, max: 2.8197
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0511, max: 2.4860
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17281/18200 [06:56<00:20, 43.96it/s, loss=1.5167]


Logits stats - min: -9.9392, max: 2.6889
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.7095, max: 3.6780
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9487, max: 2.7111
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8174, max: 1.9424
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17291/18200 [06:56<00:22, 40.19it/s, loss=1.4621]


Logits stats - min: -7.7813, max: 2.4842
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9055, max: 2.1158
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▊ | 17301/18200 [06:57<00:22, 40.05it/s, loss=1.1573]


Logits stats - min: -8.2711, max: 2.4049
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4266, max: 1.7537
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▉ | 17316/18200 [06:57<00:22, 39.73it/s, loss=1.9240]


Logits stats - min: -7.6914, max: 2.1030
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4094, max: 2.2562
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5777, max: 2.8464
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▉ | 17326/18200 [06:57<00:21, 40.25it/s, loss=1.4575]


Logits stats - min: -7.9579, max: 2.1083
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▉ | 17344/18200 [06:58<00:22, 37.52it/s, loss=2.7340]


Logits stats - min: -8.0453, max: 2.2922
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6534, max: 2.5371
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▉ | 17361/18200 [06:58<00:21, 39.27it/s, loss=1.1657]


Logits stats - min: -6.9520, max: 2.3490
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9273, max: 2.7454
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1720, max: 2.4840
Target unique values: tensor([0], device='cuda:0')


Training:  95%|█████████████████████▉ | 17371/18200 [06:58<00:20, 41.23it/s, loss=1.3029]


Logits stats - min: -10.1510, max: 2.7574
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2632, max: 2.4920
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8083, max: 2.3545
Target unique values: tensor([0], device='cuda:0')


Training:  96%|█████████████████████▉ | 17381/18200 [06:59<00:19, 41.19it/s, loss=1.5366]


Logits stats - min: -8.0706, max: 2.0810
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4389, max: 2.6569
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6073, max: 2.7405
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -11.7164, max: 3.4577
Target unique values: tensor([0], device='cuda:0')


Training:  96%|█████████████████████▉ | 17396/18200 [06:59<00:19, 42.28it/s, loss=1.3161]


Logits stats - min: -7.0752, max: 1.8223
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2738, max: 2.5219
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3295, max: 2.6324
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6907, max: 2.6297
Target unique values: tensor([0], device='cuda:0')


Training:  96%|█████████████████████▉ | 17406/18200 [06:59<00:18, 43.57it/s, loss=2.7134]


Logits stats - min: -9.4867, max: 2.3621
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4246, max: 2.3199
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17421/18200 [07:00<00:18, 43.12it/s, loss=1.4433]


Logits stats - min: -9.3805, max: 2.5690
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9545, max: 2.6539
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4370, max: 2.3155
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8604, max: 2.3844
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17436/18200 [07:00<00:19, 39.05it/s, loss=1.5188]


Logits stats - min: -7.4024, max: 2.2391
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2180, max: 2.4573
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17446/18200 [07:00<00:18, 40.51it/s, loss=1.5191]


Logits stats - min: -7.9441, max: 2.3912
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6351, max: 2.5099
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8260, max: 2.3227
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3974, max: 2.7807
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8013, max: 2.2112
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17456/18200 [07:01<00:17, 42.52it/s, loss=1.5135]


Logits stats - min: -7.7007, max: 2.5050
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8076, max: 2.4518
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17479/18200 [07:01<00:18, 39.26it/s, loss=1.1554]


Logits stats - min: -7.2967, max: 2.1575
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9573, max: 2.4804
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3286, max: 2.2234
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17487/18200 [07:01<00:18, 38.43it/s, loss=1.5090]


Logits stats - min: -7.6219, max: 2.0453
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9020, max: 2.3015
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████ | 17497/18200 [07:02<00:17, 40.26it/s, loss=1.5213]


Logits stats - min: -7.7968, max: 2.2480
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.6506, max: 1.7294
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3858, max: 2.7150
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4706, max: 2.3467
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████▏| 17508/18200 [07:02<00:16, 42.54it/s, loss=1.3581]


Logits stats - min: -7.6733, max: 2.2329
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5689, max: 2.4209
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████▏| 17518/18200 [07:02<00:16, 40.81it/s, loss=2.0799]


Logits stats - min: -7.2731, max: 2.0541
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0184, max: 2.5218
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████▏| 17532/18200 [07:03<00:17, 39.19it/s, loss=1.4517]


Logits stats - min: -9.8060, max: 2.5948
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0809, max: 2.3315
Target unique values: tensor([0], device='cuda:0')


Training:  96%|██████████████████████▏| 17552/18200 [07:03<00:16, 38.35it/s, loss=1.5128]


Logits stats - min: -9.6795, max: 2.5149
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1217, max: 1.5989
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▏| 17570/18200 [07:04<00:15, 39.76it/s, loss=2.1841]


Logits stats - min: -10.4812, max: 3.3714
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▏| 17578/18200 [07:04<00:16, 37.78it/s, loss=1.2967]


Logits stats - min: -10.3394, max: 2.5614
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7897, max: 2.1085
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▏| 17587/18200 [07:04<00:15, 39.48it/s, loss=1.4467]


Logits stats - min: -10.0320, max: 2.8697
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8404, max: 2.3712
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3233, max: 2.3650
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▏| 17602/18200 [07:04<00:14, 40.18it/s, loss=1.3045]


Logits stats - min: -7.6382, max: 2.2816
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8739, max: 2.1056
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17611/18200 [07:05<00:15, 38.94it/s, loss=1.5125]


Logits stats - min: -8.4720, max: 1.4782
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17621/18200 [07:05<00:13, 41.56it/s, loss=2.2232]


Logits stats - min: -8.0538, max: 1.9651
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1908, max: 2.4863
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5719, max: 2.5533
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5238, max: 2.5458
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17630/18200 [07:05<00:14, 38.50it/s, loss=1.3012]


Logits stats - min: -10.6762, max: 3.0265
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0090, max: 2.1961
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17646/18200 [07:05<00:14, 37.99it/s, loss=1.3348]


Logits stats - min: -7.9877, max: 2.3844
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6265, max: 2.8069
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17656/18200 [07:06<00:13, 41.74it/s, loss=1.5093]


Logits stats - min: -10.0237, max: 2.8715
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2583, max: 3.1266
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8824, max: 2.7216
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17666/18200 [07:06<00:12, 42.71it/s, loss=2.0359]


Logits stats - min: -10.6377, max: 3.1599
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.5097, max: 2.5911
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3750, max: 2.4385
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0308, max: 2.6272
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17681/18200 [07:06<00:12, 40.19it/s, loss=1.5020]


Logits stats - min: -8.0581, max: 2.2060
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4789, max: 2.7315
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0288, max: 2.6852
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17695/18200 [07:07<00:12, 40.10it/s, loss=1.4891]


Logits stats - min: -9.7299, max: 2.4511
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▎| 17705/18200 [07:07<00:12, 39.50it/s, loss=1.5175]


Logits stats - min: -7.1814, max: 2.2118
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.9532, max: 2.3511
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▍| 17720/18200 [07:07<00:12, 38.69it/s, loss=2.3280]


Logits stats - min: -10.3879, max: 2.6243
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▍| 17730/18200 [07:07<00:11, 42.18it/s, loss=2.7688]


Logits stats - min: -8.2072, max: 2.4193
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4836, max: 2.4144
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7402, max: 2.4934
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3678, max: 2.7254
Target unique values: tensor([0], device='cuda:0')


Training:  97%|██████████████████████▍| 17740/18200 [07:08<00:10, 42.46it/s, loss=1.5204]


Logits stats - min: -9.8910, max: 2.6180
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5966, max: 2.1316
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▍| 17750/18200 [07:08<00:11, 38.85it/s, loss=2.2209]


Logits stats - min: -7.5074, max: 2.2085
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▍| 17766/18200 [07:08<00:12, 35.98it/s, loss=1.4698]


Logits stats - min: -8.9510, max: 2.5345
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▍| 17781/18200 [07:09<00:09, 42.12it/s, loss=1.3045]


Logits stats - min: -7.7519, max: 2.2424
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6168, max: 2.6582
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0303, max: 2.7074
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4962, max: 2.6240
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.8864, max: 2.5397
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▍| 17786/18200 [07:09<00:09, 42.53it/s, loss=1.5159]


Logits stats - min: -9.5974, max: 2.8385
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3722, max: 2.6803
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▍| 17801/18200 [07:09<00:09, 43.16it/s, loss=2.1867]


Logits stats - min: -10.7082, max: 2.7880
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.5959, max: 2.9679
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4990, max: 2.6822
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.5748, max: 1.6455
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1570, max: 2.5054
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17806/18200 [07:09<00:09, 43.25it/s, loss=1.6370]


Logits stats - min: -9.8608, max: 2.3416
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.4350, max: 2.8091
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2059, max: 1.7714
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17816/18200 [07:10<00:09, 39.53it/s, loss=1.4592]


Logits stats - min: -10.3223, max: 2.3899
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0819, max: 1.9077
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17826/18200 [07:10<00:08, 42.09it/s, loss=2.2712]


Logits stats - min: -8.5141, max: 2.5460
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.5668, max: 2.9493
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.8001, max: 2.8950
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0719, max: 2.6612
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4202, max: 2.5189
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17841/18200 [07:10<00:08, 43.56it/s, loss=1.1575]


Logits stats - min: -7.5860, max: 2.4380
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.0577, max: 2.2495
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.4307, max: 2.4735
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.9755, max: 1.6040
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17856/18200 [07:11<00:08, 39.67it/s, loss=1.4552]


Logits stats - min: -7.1595, max: 2.2158
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17866/18200 [07:11<00:08, 39.43it/s, loss=1.3334]


Logits stats - min: -7.8530, max: 2.2577
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.8853, max: 2.2738
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.6279, max: 2.2294
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17876/18200 [07:11<00:07, 41.20it/s, loss=1.5213]


Logits stats - min: -7.3297, max: 1.6939
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3982, max: 2.8355
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17886/18200 [07:11<00:07, 41.63it/s, loss=1.5108]


Logits stats - min: -7.8606, max: 2.2975
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.6395, max: 2.8404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0242, max: 2.3741
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▌| 17896/18200 [07:12<00:07, 41.47it/s, loss=2.8708]


Logits stats - min: -9.6000, max: 2.6114
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.6605, max: 3.2705
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1731, max: 2.8649
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▋| 17906/18200 [07:12<00:07, 41.10it/s, loss=1.3160]


Logits stats - min: -9.3558, max: 2.4376
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.5566, max: 2.0909
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0786, max: 2.1267
Target unique values: tensor([0], device='cuda:0')


Training:  98%|██████████████████████▋| 17916/18200 [07:12<00:06, 42.76it/s, loss=2.8126]


Logits stats - min: -8.5036, max: 1.6424
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2513, max: 2.3346
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.3801, max: 2.1848
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▋| 17931/18200 [07:12<00:06, 39.51it/s, loss=1.5204]


Logits stats - min: -10.5593, max: 2.8118
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▋| 17940/18200 [07:13<00:06, 40.33it/s, loss=1.3268]


Logits stats - min: -9.2432, max: 2.5212
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3778, max: 2.7926
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.9737, max: 2.8375
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▋| 17950/18200 [07:13<00:06, 39.81it/s, loss=1.5174]


Logits stats - min: -10.7721, max: 3.1301
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -6.3020, max: 1.8956
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▋| 17964/18200 [07:13<00:06, 36.96it/s, loss=1.6212]


Logits stats - min: -7.8293, max: 1.9900
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▋| 17973/18200 [07:14<00:05, 37.93it/s, loss=1.1431]


Logits stats - min: -10.2137, max: 2.5759
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7992, max: 1.7301
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.2445, max: 2.2227
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▋| 17987/18200 [07:14<00:05, 40.10it/s, loss=2.0279]


Logits stats - min: -10.8036, max: 3.1697
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▋| 17997/18200 [07:14<00:05, 38.56it/s, loss=2.8043]


Logits stats - min: -9.5321, max: 2.6661
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18007/18200 [07:14<00:05, 37.38it/s, loss=1.1342]


Logits stats - min: -7.5001, max: 2.2100
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18015/18200 [07:15<00:05, 36.59it/s, loss=1.5145]


Logits stats - min: -9.7183, max: 2.8563
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8571, max: 2.3452
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18024/18200 [07:15<00:04, 38.60it/s, loss=1.2923]


Logits stats - min: -10.0043, max: 2.8811
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18040/18200 [07:15<00:03, 44.76it/s, loss=1.4743]


Logits stats - min: -9.8344, max: 2.9124
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1851, max: 2.4979
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.7993, max: 3.0293
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7988, max: 2.3920
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4602, max: 2.8405
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18050/18200 [07:15<00:03, 43.09it/s, loss=1.5089]


Logits stats - min: -9.5166, max: 2.6233
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2970, max: 1.7555
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18060/18200 [07:16<00:03, 39.34it/s, loss=1.5132]


Logits stats - min: -10.1053, max: 1.7636
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.9754, max: 2.8918
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18074/18200 [07:16<00:03, 40.49it/s, loss=1.5097]


Logits stats - min: -8.9457, max: 2.3207
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.8402, max: 2.3511
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.0224, max: 2.3716
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4488, max: 2.3524
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18089/18200 [07:16<00:02, 44.21it/s, loss=1.5122]


Logits stats - min: -7.4296, max: 2.4135
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.0127, max: 2.6522
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.4600, max: 3.0946
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2275, max: 2.8623
Target unique values: tensor([0], device='cuda:0')


Training:  99%|██████████████████████▊| 18100/18200 [07:17<00:02, 45.49it/s, loss=1.1365]


Logits stats - min: -7.9459, max: 2.1058
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.6809, max: 2.9050
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1339, max: 1.8693
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18110/18200 [07:17<00:02, 40.91it/s, loss=2.0056]


Logits stats - min: -9.5341, max: 2.7188
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18120/18200 [07:17<00:01, 40.58it/s, loss=1.1577]


Logits stats - min: -7.6130, max: 2.3613
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2297, max: 1.6532
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18135/18200 [07:17<00:01, 43.09it/s, loss=1.4768]


Logits stats - min: -9.7433, max: 2.3868
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.1942, max: 2.6914
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.0942, max: 2.6009
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3506, max: 2.4742
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3496, max: 2.1651
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18146/18200 [07:18<00:01, 45.69it/s, loss=1.1332]


Logits stats - min: -10.1170, max: 2.5980
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.4754, max: 2.1701
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.6089, max: 2.7019
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18156/18200 [07:18<00:00, 44.02it/s, loss=1.1453]


Logits stats - min: -9.2710, max: 1.6529
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4849, max: 2.4008
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4916, max: 2.3739
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18171/18200 [07:18<00:00, 42.64it/s, loss=1.3049]


Logits stats - min: -8.6679, max: 2.6411
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.7058, max: 2.3265
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.2075, max: 2.4593
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18177/18200 [07:19<00:00, 43.69it/s, loss=1.5154]


Logits stats - min: -8.6660, max: 1.5542
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.1930, max: 1.7456
Target unique values: tensor([0], device='cuda:0')


Training: 100%|██████████████████████▉| 18188/18200 [07:19<00:00, 46.48it/s, loss=1.4623]


Logits stats - min: -8.0247, max: 1.7707
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.2849, max: 2.1221
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.7642, max: 2.6122
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -10.3496, max: 2.5068
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.2433, max: 2.6456
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -7.4818, max: 2.2320
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.1215, max: 2.6332
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -8.3717, max: 1.5535
Target unique values: tensor([0], device='cuda:0')


                                                                                         


Logits stats - min: -7.5258, max: 2.0404
Target unique values: tensor([0], device='cuda:0')

Logits stats - min: -9.3145, max: 2.4750
Target unique values: tensor([0], device='cuda:0')


                                                                                         


Results:
  Train Loss: 1.2185 | Train Acc: 0.8215
  Val Loss  : nan | Val Acc  : 0.0480
  Val mIoU  : 0.0141
  Time      : 9.30 min
  IoU per class: [0.0000000e+00 1.8885680e-02 1.9671199e-05 0.0000000e+00 0.0000000e+00
 6.5649413e-02]

Training selesai!
Best validation mIoU: 0.0649




In [None]:
# ===========================================
# Cell 8. Plot Training History
# ===========================================

fig, axes = plt.subplots(1, 3, figsize=(18, 4))

# Loss
axes[0].plot(history["train_loss"], label="Train Loss", marker='o')
axes[0].plot(history["val_loss"], label="Val Loss", marker='s')
axes[0].set_xlabel("Epoch")
axes[0].set_ylabel("Loss")
axes[0].set_title("Training vs Validation Loss")
axes[0].legend()
axes[0].grid(True)

# Accuracy
axes[1].plot(history["train_acc"], label="Train Acc", marker='o')
axes[1].plot(history["val_acc"], label="Val Acc", marker='s')
axes[1].set_xlabel("Epoch")
axes[1].set_ylabel("Pixel Accuracy")
axes[1].set_title("Training vs Validation Accuracy")
axes[1].legend()
axes[1].grid(True)

# mIoU
axes[2].plot(history["val_miou"], label="Val mIoU", marker='d', color='green')
axes[2].set_xlabel("Epoch")
axes[2].set_ylabel("Mean IoU")
axes[2].set_title("Validation mIoU")
axes[2].legend()
axes[2].grid(True)

plt.tight_layout()
plt.savefig("training_history_ver3.png", dpi=150)
plt.show()

In [None]:
# ===========================================
# Cell 9. Testing dan Visualisasi
# ===========================================

# Load best model
best_checkpoint = torch.load(best_model_path, map_location=device)
model.load_state_dict(best_checkpoint["model_state"])
print(f"Loaded best model from epoch {best_checkpoint['epoch']}")

# Test evaluation
test_metrics = SegmentationMetrics(num_classes_actual, ignore_index=0)
test_loss, test_acc, test_miou, test_iou_per_class = validate(model, test_loader, criterion, test_metrics, device)

print("\n" + "="*60)
print("TEST RESULTS")
print("="*60)
print(f"Test Loss     : {test_loss:.4f}")
print(f"Test Accuracy : {test_acc:.4f}")
print(f"Test mIoU     : {test_miou:.4f}")
print(f"IoU per class : {test_iou_per_class[1:]}")

# Visualisasi beberapa prediksi
model.eval()
num_vis = 3
vis_samples = []

with torch.no_grad():
    for i, (xb, yb) in enumerate(test_loader):
        if i >= num_vis:
            break
        xb = xb.to(device)
        logits = model(xb)
        preds = logits.argmax(dim=1)
        
        vis_samples.append((xb[0], yb[0], preds[0]))

# Plot visualisasi
for i, (x, y_true, y_pred) in enumerate(vis_samples):
    visualize_tile(x, y_true.cpu().numpy(), y_pred.cpu().numpy(), 
                   json_path=label_json_path, idx=i)

print("\nSelesai!")

In [None]:
# ===========================================
# Cell 10. Confusion Matrix
# ===========================================

from sklearn.metrics import confusion_matrix
import seaborn as sns

# Collect predictions untuk confusion matrix
all_preds = []
all_targets = []

model.eval()
with torch.no_grad():
    for xb, yb in tqdm(test_loader, desc="Computing CM"):
        xb = xb.to(device)
        logits = model(xb)
        preds = logits.argmax(dim=1)
        
        # Flatten dan filter valid pixels
        preds_flat = preds.cpu().numpy().flatten()
        targets_flat = yb.numpy().flatten()
        
        valid = targets_flat != 0  # Exclude background
        all_preds.extend(preds_flat[valid])
        all_targets.extend(targets_flat[valid])

# Compute confusion matrix
cm = confusion_matrix(all_targets, all_preds, labels=list(range(1, num_classes_actual)))

# Plot
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=range(1, num_classes_actual),
            yticklabels=range(1, num_classes_actual))
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix (Test Set - Excluding Background)')
plt.tight_layout()
plt.savefig('confusion_matrix_ver3.png', dpi=150)
plt.show()

print("Confusion matrix saved!")