In [1]:
import torch

print("="*80)
print("CUDA DIAGNOSTICS")
print("="*80)

# Check CUDA availability
print(f"\nCUDA Available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA Version: {torch.version.cuda}")
    print(f"PyTorch Version: {torch.__version__}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    
    for i in range(torch.cuda.device_count()):
        print(f"\n--- GPU {i} ---")
        print(f"Name: {torch.cuda.get_device_name(i)}")
        print(f"Capability: {torch.cuda.get_device_capability(i)}")
        
        # Memory info
        props = torch.cuda.get_device_properties(i)
        print(f"Total Memory: {props.total_memory / 1024**3:.2f} GB")
        print(f"Available Memory: {torch.cuda.mem_get_info(i)[0] / 1024**3:.2f} GB")
        print(f"Allocated Memory: {torch.cuda.memory_allocated(i) / 1024**3:.2f} GB")
        
        # Test tensor creation
        try:
            test_tensor = torch.randn(100, 100).cuda(i)
            print(f"‚úì Can create tensors on GPU {i}")
        except Exception as e:
            print(f"‚úó Error creating tensor: {e}")
else:
    print("\n‚úó CUDA is NOT available!")


CUDA DIAGNOSTICS

CUDA Available: True
CUDA Version: 12.6
PyTorch Version: 2.9.1+cu126
Number of GPUs: 1

--- GPU 0 ---
Name: NVIDIA RTX A1000
Capability: (8, 6)
Total Memory: 8.00 GB
Available Memory: 7.03 GB
Allocated Memory: 0.00 GB
‚úì Can create tensors on GPU 0


In [1]:
import torch
import gc

# Kill all CUDA processes
torch.cuda.empty_cache()
gc.collect()

# Reset peak memory stats
torch.cuda.reset_peak_memory_stats()
torch.cuda.reset_accumulated_memory_stats()

print(f"Available Memory: {torch.cuda.mem_get_info(0)[0] / 1024**3:.2f} GB")

Available Memory: 7.03 GB


In [2]:
# Core dependencies
#!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

!pip install torchaudio transformers einops tqdm descript-audio-codec
#audiotools

Defaulting to user installation because normal site-packages is not writeable
Collecting protobuf<3.20,>=3.9.2 (from descript-audiotools>=0.7.2->descript-audio-codec)
  Using cached protobuf-3.19.6-py2.py3-none-any.whl.metadata (828 bytes)
Using cached protobuf-3.19.6-py2.py3-none-any.whl (162 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 5.28.3
    Uninstalling protobuf-5.28.3:
      Successfully uninstalled protobuf-5.28.3
Successfully installed protobuf-3.19.6


  You can safely remove it manually.

[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
!pip install --retries 10 --timeout 30 descript-audio-codec
!pip install --retries 10 --timeout 30 git+https://github.com/descriptinc/audiotools

Collecting descript-audio-codec
  Using cached descript_audio_codec-1.0.0-py3-none-any.whl.metadata (7.8 kB)
Collecting argbind>=0.3.7 (from descript-audio-codec)
  Downloading argbind-0.3.9.tar.gz (17 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting descript-audiotools>=0.7.2 (from descript-audio-codec)
  Downloading descript_audiotools-0.7.2-py2.py3-none-any.whl.metadata (3.4 kB)
Collecting docstring-parser (from argbind>=0.3.7->descript-audio-codec)
  Downloading docstring_parser-0.17.0-py3-none-any.whl.metadata (3.5 kB)
Collecting pyloudnorm (from descript-audiotools>=0.7.2->descript-audio-codec)
  Downloading pyloudnorm-0.1.1-py3-none-any.whl.metadata (5.6 kB)
Collecting importlib-resources

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
databricks-sdk 0.70.0 requires protobuf<7.0,>=4.21.0, but you have protobuf 3.19.6 which is incompatible.
opentelemetry-proto 1.38.0 requires protobuf<7.0,>=5.0, but you have protobuf 3.19.6 which is incompatible.
ray 2.52.1 requires protobuf>=3.20.3, but you have protobuf 3.19.6 which is incompatible.
tensorflow 2.20.0 requires protobuf>=5.28.0, but you have protobuf 3.19.6 which is incompatible.


Collecting git+https://github.com/descriptinc/audiotools
  Cloning https://github.com/descriptinc/audiotools to c:\users\user\appdata\local\temp\pip-req-build-5_d7i_qg


  Running command git clone --filter=blob:none --quiet https://github.com/descriptinc/audiotools 'C:\Users\user\AppData\Local\Temp\pip-req-build-5_d7i_qg'
  fatal: unable to access 'https://github.com/descriptinc/audiotools/': Could not resolve host: github.com
  error: subprocess-exited-with-error
  
  git clone --filter=blob:none --quiet https://github.com/descriptinc/audiotools 'C:\Users\user\AppData\Local\Temp\pip-req-build-5_d7i_qg' did not run successfully.
  exit code: 128
  
  No available output.
  
  note: This error originates from a subprocess, and is likely not a problem with pip.
ERROR: Failed to build 'git+https://github.com/descriptinc/audiotools' when git clone --filter=blob:none --quiet https://github.com/descriptinc/audiotools 'c:\users\user\appdata\local\temp\pip-req-build-5_d7i_qg'


In [3]:
# In notebook:
!pip install "C:/Users/user/Downloads/protobuf-5.28.3-cp310-abi3-win_amd64.whl" --force-reinstal

Defaulting to user installation because normal site-packages is not writeable
Processing c:\users\user\downloads\protobuf-5.28.3-cp310-abi3-win_amd64.whl
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.19.6
    Uninstalling protobuf-3.19.6:
      Successfully uninstalled protobuf-3.19.6
Successfully installed protobuf-5.28.3


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
descript-audiotools 0.7.2 requires protobuf<3.20,>=3.9.2, but you have protobuf 5.28.3 which is incompatible.

[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import os

dac_model_path = "C:/Users/user/Downloads/weights_44khz_16kbps.pth"

if os.path.exists(dac_model_path):
    print("‚úÖ DAC model found! You can train!")
    print(f"   Location: {dac_model_path}")
    file_size = os.path.getsize(dac_model_path) / (1024 * 1024)
    print(f"   Size: {file_size:.2f} MB")
else:
    print("‚ùå DAC model NOT found!")
    print(f"   Expected location: {dac_model_path}")
    print("\nüì• You need to download it first!")

‚úÖ DAC model found! You can train!
   Location: C:/Users/user/Downloads/weights_44khz_16kbps.pth
   Size: 245.08 MB


In [1]:
"""
FINAL OPTIMIZED Audio Effect Generator using DAC-VAE
PHASE 2: 2K/10K Dataset with Maximum Stability

KEY OPTIMIZATIONS:
- Medium dataset (2K inputs, 10K outputs) - balanced learning
- 3-second audio - faster training, less storage
- 100 epochs (conservative, proven)
- Balanced learning rate (3e-5) with 300-step warmup
- Balanced audio loss (2.0x weight)
- soundfile for audio loading (no torchaudio issues)
- MAXIMUM STABILITY: Effective batch = 16 (2x Phase 1)

GOAL: Production-ready model with strong generalization
NO AUDIOTOOLS DEPENDENCY!
"""

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
import pandas as pd
import os
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel
from sklearn.model_selection import train_test_split
from einops import rearrange
import matplotlib.pyplot as plt
import json
import numpy as np
from tqdm import tqdm
import soundfile as sf
import gc

# DAC import
try:
    import dac
    print("‚úì DAC library imported successfully")
except ImportError:
    print("‚ùå DAC not installed. Run: pip install descript-audio-codec")
    exit(1)

print("‚úì soundfile available")

#############################################
#   OPTIMIZED CONFIG - OPTION B + WARMUP
#############################################

class CFG:
    # ==========================================
    # PATHS - UPDATE TO YOUR 2K/10K DATASET
    # ==========================================
    csv_path = "C:/zahra/EchoMind4/data/10000_datapoints_clean.csv"
    base_path = "C:/zahra/EchoMind4/data"
    checkpoint_path = f"{base_path}/result/model.pt"
    best_model_path = f"{base_path}/result/model_best.pt"
    plot_path = f"{base_path}/result/training_curves.png"
    
    os.makedirs(f"{base_path}/result", exist_ok=True)
    
    # ==========================================
    # COLUMNS
    # ==========================================
    audio_col_in = "input_audio_path"
    audio_col_out = "output_audio_path"
    text_col = "prompt"
    
    # ==========================================
    # AUDIO - 3 SECONDS
    # ==========================================
    sample_rate = 44100
    max_audio_length = 3 * 44100  # 3 seconds (matches your dataset)
    
    # ==========================================
    # TRAINING - OPTION B (MAX STABILITY)
    # ==========================================
    batch_size = 4  # Same as Phase 1
    accumulation_steps = 4  # 2x Phase 1 for MORE stability
    epochs = 100  # Conservative, proven to work
    
    # Effective batch = 4 √ó 4 = 16 (2x more stable than Phase 1's 8)
    
    # ==========================================
    # LEARNING RATES - BALANCED
    # ==========================================
    lr_unet = 3e-5  # Balanced (not too fast, not too slow)
    #lr_text = 5e-7  # Text encoder learning rate (frozen mostly)
    warmup_steps = 300  # Gradual warmup for first 300 steps
    weight_decay = 0.01
    grad_clip = 1.0
    
    # ==========================================
    # LOSS WEIGHTS - BALANCED
    # ==========================================
    audio_loss_weight = 2.0  # Balanced for quality + generalization
    latent_loss_weight = 0.15  # Helps generalization
    
    # ==========================================
    # MIXED PRECISION
    # ==========================================
    use_amp = True
    
    # ==========================================
    # LOGGING
    # ==========================================
    log_interval = 100  # Your preference
    
    # ==========================================
    # ARCHITECTURE
    # ==========================================
    unet_channels = [64, 128, 256, 512]
    text_dim = 768
    
    # ==========================================
    # DATA SPLITS
    # ==========================================
    train_ratio = 0.7
    val_ratio = 0.15
    test_ratio = 0.15
    
    # ==========================================
    # FREEZING (Proven in Phase 1)
    # ==========================================
    freeze_text_encoder = True
    freeze_dac = True
    
    # ==========================================
    # EARLY STOPPING (Optional)
    # ==========================================
    patience = 40
    
    # ==========================================
    # DEVICE
    # ==========================================
    device = "cuda" if torch.cuda.is_available() else "cpu"
    num_workers = 0

cfg = CFG()

print("="*60)
print("PHASE 2: 2K/10K TRAINING (OPTION B + WARMUP)")
print("="*60)
print(f"Device: {cfg.device}")
print(f"Audio: {cfg.max_audio_length / cfg.sample_rate:.1f} seconds")
print(f"Batch: {cfg.batch_size} √ó {cfg.accumulation_steps} = {cfg.batch_size * cfg.accumulation_steps} (effective)")
print(f"Epochs: {cfg.epochs}")
print(f"LR: {cfg.lr_unet} with {cfg.warmup_steps}-step warmup")
print(f"Loss weights: Audio={cfg.audio_loss_weight}, Latent={cfg.latent_loss_weight}")
print("="*60 + "\n")

#############################################
#          LOAD DAC MODEL
#############################################

print("Loading DAC model...")
dac_model_path = "C:/Users/user/Downloads/weights_44khz_16kbps.pth"

if not os.path.exists(dac_model_path):
    print(f"‚ùå DAC model not found at: {dac_model_path}")
    exit(1)

dac_model = dac.DAC.load(dac_model_path)
dac_model = dac_model.to(cfg.device)
dac_model.eval()

for param in dac_model.parameters():
    param.requires_grad = False

with torch.no_grad():
    dummy_audio = torch.randn(1, 1, cfg.sample_rate).to(cfg.device)
    z = dac_model.encoder(dummy_audio)
    latent_channels = z.shape[1]

print(f"‚úì DAC loaded | Latent channels: {latent_channels}\n")

#############################################
#   DATASET PREPARATION
#############################################

print("="*60)
print("LOADING DATASET")
print("="*60)

# Load or create small dataset
if not os.path.exists(cfg.csv_path):
    print("Creating 2K/10K dataset...")
    original_csv = "C:/zahra/EchoMind4/15000_datapoints_clean.csv"
    df_full = pd.read_csv(original_csv)
    
    for col in [cfg.audio_col_in, cfg.audio_col_out]:
        df_full[col] = df_full[col].apply(lambda p: os.path.join(cfg.base_path, str(p).replace('\\', '/')))
    
    df_full = df_full.drop_duplicates(
        subset=['input_audio_path', 'output_audio_path'], 
        keep='first'
    )
    
    unique_inputs = df_full['input_audio_path'].unique()
    
    if len(unique_inputs) >= 2000:
        np.random.seed(42)
        selected_inputs = np.random.choice(unique_inputs, 2000, replace=False)
        df = df_full[df_full['input_audio_path'].isin(selected_inputs)].reset_index(drop=True)
    else:
        df = df_full.reset_index(drop=True)
    
    df.to_csv(cfg.csv_path, index=False)
    print(f"‚úì Created: {len(df)} samples")
else:
    df = pd.read_csv(cfg.csv_path)
    print(f"‚úì Loaded: {len(df)} samples")

# Fix paths
for col in [cfg.audio_col_in, cfg.audio_col_out]:
    df[col] = df[col].apply(lambda p: os.path.join(cfg.base_path, str(p).replace('\\', '/')))

# Validate
print("Validating files...")
valid_indices = []
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Validating"):
    if os.path.exists(row[cfg.audio_col_in]) and os.path.exists(row[cfg.audio_col_out]):
        valid_indices.append(idx)

df = df.iloc[valid_indices].reset_index(drop=True)
print(f"‚úì Valid: {len(df)} samples")

print("\nPrompt distribution:")
print(df[cfg.text_col].value_counts())

# Split
train_df, temp_df = train_test_split(df, test_size=(cfg.val_ratio + cfg.test_ratio), random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=cfg.test_ratio/(cfg.val_ratio + cfg.test_ratio), random_state=42)

print(f"\nSplits: Train={len(train_df)}, Val={len(val_df)}, Test={len(test_df)}\n")

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
print("‚úì Tokenizer loaded\n")

#############################################
#   DATASET CLASS WITH SOUNDFILE
#############################################

class AudioEffectDataset(Dataset):
    """Dataset using soundfile (no torchaudio issues!)"""
    
    def __init__(self, df):
        self.df = df.reset_index(drop=True)
    
    def __len__(self):
        return len(self.df)
    
    def _load_and_process(self, path):
        """Load audio with soundfile"""
        # Load with soundfile
        wav, sr = sf.read(path)
        wav = torch.from_numpy(wav).float()
        
        # Ensure shape: (channels, samples)
        if wav.dim() == 1:
            wav = wav.unsqueeze(0)
        elif wav.dim() == 2 and wav.size(0) > wav.size(1):
            wav = wav.t()
        
        # Resample if needed
        if sr != cfg.sample_rate:
            wav = torchaudio.functional.resample(wav, sr, cfg.sample_rate)
        
        # Convert to mono
        if wav.size(0) > 1:
            wav = wav.mean(dim=0, keepdim=True)
        
        # Take first 3 seconds or pad
        if wav.size(1) > cfg.max_audio_length:
            wav = wav[:, :cfg.max_audio_length]
        elif wav.size(1) < cfg.max_audio_length:
            wav = F.pad(wav, (0, cfg.max_audio_length - wav.size(1)))
        
        return wav
    
    def __getitem__(self, idx):
        try:
            row = self.df.iloc[idx]
            wav_in = self._load_and_process(row[cfg.audio_col_in])
            wav_out = self._load_and_process(row[cfg.audio_col_out])
            text = row[cfg.text_col]
            return wav_in, wav_out, text
        except Exception as e:
            print(f"Error loading {idx}: {e}")
            return (
                torch.zeros(1, cfg.max_audio_length),
                torch.zeros(1, cfg.max_audio_length),
                "error"
            )

def collate_fn(batch):
    wav_in, wav_out, texts = zip(*batch)
    wav_in = torch.stack(wav_in)
    wav_out = torch.stack(wav_out)
    tokens = tokenizer(
        list(texts),
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors="pt"
    )
    return wav_in, wav_out, tokens.input_ids, tokens.attention_mask

#############################################
#      CREATE DATALOADERS
#############################################

train_ds = AudioEffectDataset(train_df)
val_ds = AudioEffectDataset(val_df)
test_ds = AudioEffectDataset(test_df)

train_dl = DataLoader(train_ds, batch_size=cfg.batch_size, shuffle=True, 
                      num_workers=cfg.num_workers, collate_fn=collate_fn, pin_memory=True)
val_dl = DataLoader(val_ds, batch_size=cfg.batch_size, shuffle=False,
                    num_workers=cfg.num_workers, collate_fn=collate_fn, pin_memory=True)
test_dl = DataLoader(test_ds, batch_size=cfg.batch_size, shuffle=False,
                     num_workers=cfg.num_workers, collate_fn=collate_fn, pin_memory=True)

print(f"Batches: Train={len(train_dl)}, Val={len(val_dl)}, Test={len(test_dl)}")
print(f"Est. time/epoch: ~{len(train_dl) * 0.5:.0f} min\n")

#############################################
#   MODEL ARCHITECTURE (SAME AS BEFORE)
#############################################

class CrossAttention(nn.Module):
    def __init__(self, audio_dim, text_dim, n_heads=8):
        super().__init__()
        self.n_heads = n_heads
        self.scale = (audio_dim // n_heads) ** -0.5
        self.to_q = nn.Linear(audio_dim, audio_dim)
        self.to_k = nn.Linear(text_dim, audio_dim)
        self.to_v = nn.Linear(text_dim, audio_dim)
        self.to_out = nn.Linear(audio_dim, audio_dim)
        
    def forward(self, x, context):
        B, C, T = x.shape
        x_flat = rearrange(x, 'b c t -> b t c')
        q = self.to_q(x_flat)
        k = self.to_k(context)
        v = self.to_v(context)
        q = rearrange(q, 'b t (h d) -> b h t d', h=self.n_heads)
        k = rearrange(k, 'b s (h d) -> b h s d', h=self.n_heads)
        v = rearrange(v, 'b s (h d) -> b h s d', h=self.n_heads)
        attn = torch.einsum('bhqd,bhkd->bhqk', q, k) * self.scale
        attn = F.softmax(attn, dim=-1)
        out = torch.einsum('bhqk,bhvd->bhqd', attn, v)
        out = rearrange(out, 'b h t d -> b t (h d)')
        out = self.to_out(out)
        return rearrange(out, 'b t c -> b c t')

class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.conv1 = nn.Conv1d(channels, channels, 3, padding=1)
        self.conv2 = nn.Conv1d(channels, channels, 3, padding=1)
        self.norm1 = nn.GroupNorm(8, channels)
        self.norm2 = nn.GroupNorm(8, channels)
        self.act = nn.SiLU()
        
    def forward(self, x):
        residual = x
        x = self.act(self.norm1(self.conv1(x)))
        x = self.act(self.norm2(self.conv2(x)))
        return x + residual

class DownBlock(nn.Module):
    def __init__(self, in_c, out_c, text_dim=768, use_attn=False):
        super().__init__()
        self.use_attn = use_attn
        self.conv = nn.Conv1d(in_c, out_c, 3, padding=1)
        self.res1 = ResidualBlock(out_c)
        self.res2 = ResidualBlock(out_c)
        if use_attn:
            self.attn = CrossAttention(out_c, text_dim)
        self.downsample = nn.Conv1d(out_c, out_c, 4, stride=2, padding=1)
        
    def forward(self, x, text_emb=None):
        x = self.conv(x)
        x = self.res1(x)
        x = self.res2(x)
        if self.use_attn and text_emb is not None:
            x = x + self.attn(x, text_emb)
        skip = x
        x = self.downsample(x)
        return x, skip

class UpBlock(nn.Module):
    def __init__(self, in_c, out_c, skip_c, text_dim=768, use_attn=False):
        super().__init__()
        self.use_attn = use_attn
        self.upsample = nn.ConvTranspose1d(in_c, out_c, 4, stride=2, padding=1)
        self.conv = nn.Conv1d(out_c + skip_c, out_c, 3, padding=1)
        self.res1 = ResidualBlock(out_c)
        self.res2 = ResidualBlock(out_c)
        if use_attn:
            self.attn = CrossAttention(out_c, text_dim)
        
    def forward(self, x, skip, text_emb=None):
        x = self.upsample(x)
        if x.size(-1) != skip.size(-1):
            x = F.interpolate(x, size=skip.size(-1), mode='linear', align_corners=False)
        x = torch.cat([x, skip], dim=1)
        x = self.conv(x)
        x = self.res1(x)
        x = self.res2(x)
        if self.use_attn and text_emb is not None:
            x = x + self.attn(x, text_emb)
        return x

class LatentUNet(nn.Module):
    def __init__(self, latent_channels, channels, text_dim=768):
        super().__init__()
        self.input_conv = nn.Conv1d(latent_channels, channels[0], 7, padding=3)
        
        self.down_blocks = nn.ModuleList()
        for i in range(len(channels) - 1):
            use_attn = i >= 2
            self.down_blocks.append(DownBlock(channels[i], channels[i+1], text_dim, use_attn))
        
        self.mid_block1 = ResidualBlock(channels[-1])
        self.mid_attn = CrossAttention(channels[-1], text_dim)
        self.mid_block2 = ResidualBlock(channels[-1])
        
        self.up_blocks = nn.ModuleList()
        for i in range(len(channels) - 1, 0, -1):
            use_attn = i >= 2
            self.up_blocks.append(
                UpBlock(channels[i], channels[i-1], channels[i], text_dim, use_attn)
            )
        
        self.output_conv = nn.Conv1d(channels[0], latent_channels, 7, padding=3)
        
    def forward(self, z, text_emb):
        original_length = z.size(-1)
        x = self.input_conv(z)
        
        skips = []
        for down in self.down_blocks:
            x, skip = down(x, text_emb)
            skips.append(skip)
        
        x = self.mid_block1(x)
        x = x + self.mid_attn(x, text_emb)
        x = self.mid_block2(x)
        
        for up in self.up_blocks:
            skip = skips.pop()
            x = up(x, skip, text_emb)
        
        x = self.output_conv(x)
        
        if x.size(-1) != original_length:
            x = F.interpolate(x, size=original_length, mode='linear', align_corners=False)
        
        return x

class AudioEffectModel(nn.Module):
    def __init__(self, dac_model, latent_channels, unet_channels, text_dim):
        super().__init__()
        self.text_encoder = AutoModel.from_pretrained("bert-base-uncased")
        
        if cfg.freeze_text_encoder:
            for param in self.text_encoder.parameters():
                param.requires_grad = False
            print("Text encoder: FROZEN ‚ùÑÔ∏è")
        
        self.dac = dac_model
        self.unet = LatentUNet(latent_channels, unet_channels, text_dim)
        
    def forward(self, wav_in, wav_out, input_ids, attention_mask):
        if torch.isnan(wav_in).any() or torch.isnan(wav_out).any():
            return None, None, None
        
        text_output = self.text_encoder(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        text_emb = text_output.last_hidden_state
        
        with torch.no_grad():
            z_in = self.dac.encoder(wav_in)
            z_target = self.dac.encoder(wav_out)
        
        if torch.isnan(z_in).any() or torch.isnan(z_target).any():
            return None, None, None
        
        z_pred = self.unet(z_in, text_emb)
        
        if torch.isnan(z_pred).any():
            return None, None, None
        
        with torch.no_grad():
            wav_pred = self.dac.decoder(z_pred)
        
        if torch.isnan(wav_pred).any():
            return None, None, None
        
        return wav_pred, z_pred, z_target

def init_weights(m):
    if isinstance(m, (nn.Conv1d, nn.ConvTranspose1d)):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        m.weight.data *= 0.1
        if m.bias is not None:
            nn.init.zeros_(m.bias)
    elif isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight, gain=0.02)
        if m.bias is not None:
            nn.init.zeros_(m.bias)
    elif isinstance(m, nn.GroupNorm):
        nn.init.ones_(m.weight)
        nn.init.zeros_(m.bias)

#############################################
#     MODEL INITIALIZATION
#############################################

model = AudioEffectModel(
    dac_model=dac_model,
    latent_channels=latent_channels,
    unet_channels=cfg.unet_channels,
    text_dim=cfg.text_dim
).to(cfg.device)

model.unet.apply(init_weights)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"‚úì Model: {total_params:,} total, {trainable_params:,} trainable\n")

#############################################
#   OPTIMIZER WITH WARMUP + COSINE
#############################################

optimizer = torch.optim.AdamW(
    model.unet.parameters(),
    lr=cfg.lr_unet,
    weight_decay=cfg.weight_decay
)

# Cosine annealing scheduler (same as Phase 1)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=cfg.epochs * len(train_dl),
    eta_min=cfg.lr_unet * 0.1
)

criterion_audio = nn.L1Loss()
criterion_latent = nn.MSELoss()
scaler = torch.amp.GradScaler('cuda', enabled=cfg.use_amp)

print(f"‚úì Optimizer: AdamW, LR={cfg.lr_unet}")
print(f"‚úì Scheduler: CosineAnnealing with {cfg.warmup_steps}-step warmup")
print(f"‚úì Effective batch: {cfg.batch_size * cfg.accumulation_steps}\n")

#############################################
#   MEMORY MANAGEMENT FUNCTION
#############################################

def clear_memory():
    """Aggressive memory clearing for long training"""
    gc.collect()
    torch.cuda.empty_cache()
    if torch.cuda.is_available():
        torch.cuda.synchronize()

#############################################
#   TRAINING FUNCTIONS WITH WARMUP
#############################################

def train_epoch(model, dataloader, optimizer, scheduler, scaler, epoch):
    model.train()
    total_loss = total_audio_loss = total_latent_loss = nan_count = 0
    optimizer.zero_grad()
    
    pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{cfg.epochs}")
    
    for step, (wav_in, wav_out, ids, mask) in enumerate(pbar):
        wav_in, wav_out, ids, mask = wav_in.to(cfg.device), wav_out.to(cfg.device), ids.to(cfg.device), mask.to(cfg.device)
        
        if torch.isnan(wav_in).any() or torch.isnan(wav_out).any():
            nan_count += 1
            continue
        
        with torch.amp.autocast('cuda', enabled=cfg.use_amp):
            wav_pred, z_pred, z_target = model(wav_in, wav_out, ids, mask)
            
            if wav_pred is None:
                nan_count += 1
                continue
            
            if wav_pred.size(-1) != wav_out.size(-1):
                min_len = min(wav_pred.size(-1), wav_out.size(-1))
                wav_pred, wav_out = wav_pred[..., :min_len], wav_out[..., :min_len]
            
            if z_pred.size(-1) != z_target.size(-1):
                min_len = min(z_pred.size(-1), z_target.size(-1))
                z_pred, z_target = z_pred[..., :min_len], z_target[..., :min_len]
            
            audio_loss = criterion_audio(wav_pred, wav_out)
            latent_loss = criterion_latent(z_pred, z_target)
            loss = (cfg.audio_loss_weight * audio_loss + cfg.latent_loss_weight * latent_loss) / cfg.accumulation_steps
        
        if torch.isnan(loss) or torch.isinf(loss):
            nan_count += 1
            continue
        
        scaler.scale(loss).backward()
        
        if (step + 1) % cfg.accumulation_steps == 0:
            scaler.unscale_(optimizer)
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.grad_clip)
            
            if torch.isnan(grad_norm) or torch.isinf(grad_norm) or grad_norm > 100:
                optimizer.zero_grad()
                scaler.update()
                nan_count += 1
                continue
            
            scaler.step(optimizer)
            scaler.update()
            
            # ==========================================
            # WARMUP + SCHEDULER (NEW!)
            # ==========================================
            total_steps = epoch * len(dataloader) + step
            
            if total_steps < cfg.warmup_steps:
                # Warmup phase: gradually increase LR
                warmup_factor = total_steps / cfg.warmup_steps
                current_lr = cfg.lr_unet * warmup_factor
                for param_group in optimizer.param_groups:
                    param_group['lr'] = current_lr
            else:
                # Normal cosine annealing after warmup
                scheduler.step()
            
            optimizer.zero_grad()
        
        total_loss += loss.item() * cfg.accumulation_steps
        total_audio_loss += audio_loss.item()
        total_latent_loss += latent_loss.item()
        
        pbar.set_postfix({
            'loss': f'{loss.item() * cfg.accumulation_steps:.4f}',
            'audio': f'{audio_loss.item():.4f}',
            'latent': f'{latent_loss.item():.4f}',
            'nans': nan_count
        })
        
        # Clear GPU cache every 50 batches
        if step > 0 and step % 50 == 0:
            torch.cuda.empty_cache()
        
        if (step + 1) % cfg.log_interval == 0:
            print(f"\n  Step {step+1}/{len(dataloader)} | "
                  f"Loss: {total_loss/(step+1):.6f} | "
                  f"Audio: {total_audio_loss/(step+1):.6f} | "
                  f"Latent: {total_latent_loss/(step+1):.6f}")
    
    if nan_count > 0:
        print(f"\n‚ö†Ô∏è {nan_count} NaN occurrences")
    
    return total_loss / len(dataloader), total_audio_loss / len(dataloader), total_latent_loss / len(dataloader)

@torch.no_grad()
def validate_epoch(model, dataloader, epoch):
    model.eval()
    total_loss = total_audio_loss = total_latent_loss = 0
    
    pbar = tqdm(dataloader, desc=f"Validation {epoch+1}/{cfg.epochs}")
    
    for wav_in, wav_out, ids, mask in pbar:
        wav_in, wav_out, ids, mask = wav_in.to(cfg.device), wav_out.to(cfg.device), ids.to(cfg.device), mask.to(cfg.device)
        
        with torch.amp.autocast('cuda', enabled=cfg.use_amp):
            wav_pred, z_pred, z_target = model(wav_in, wav_out, ids, mask)
            
            if wav_pred is None:
                continue
            
            if wav_pred.size(-1) != wav_out.size(-1):
                min_len = min(wav_pred.size(-1), wav_out.size(-1))
                wav_pred, wav_out = wav_pred[..., :min_len], wav_out[..., :min_len]
            
            if z_pred.size(-1) != z_target.size(-1):
                min_len = min(z_pred.size(-1), z_target.size(-1))
                z_pred, z_target = z_pred[..., :min_len], z_target[..., :min_len]
            
            audio_loss = criterion_audio(wav_pred, wav_out)
            latent_loss = criterion_latent(z_pred, z_target)
            loss = cfg.audio_loss_weight * audio_loss + cfg.latent_loss_weight * latent_loss
        
        total_loss += loss.item()
        total_audio_loss += audio_loss.item()
        total_latent_loss += latent_loss.item()
        
        pbar.set_postfix({'loss': f'{loss.item():.4f}', 'audio': f'{audio_loss.item():.4f}'})
    
    return total_loss / len(dataloader), total_audio_loss / len(dataloader), total_latent_loss / len(dataloader)

#############################################
#   TRAINING LOOP WITH MEMORY MANAGEMENT
#############################################

print("="*60)
print("STARTING PHASE 2 TRAINING")
print("="*60 + "\n")

train_losses = []
val_losses = []
train_audio_losses = []
train_latent_losses = []
val_audio_losses = []
val_latent_losses = []

best_val_loss = float('inf')
best_epoch = 0
patience_counter = 0
start_epoch = 0

# ==========================================
# CHECKPOINT LOADING (ADD THIS SECTION to load checkpoints!)
# ==========================================
if os.path.exists(cfg.checkpoint_path):
    print("="*60)
    print("CHECKPOINT FOUND - RESUMING TRAINING")
    print("="*60)
    print(f"Loading checkpoint from: {cfg.checkpoint_path}\n")
    
    ckpt = torch.load(cfg.checkpoint_path, map_location=cfg.device)
    
    # Load model weights
    model.load_state_dict(ckpt['model'])
    print("‚úì Model weights loaded")
    
    # Load optimizer state
    optimizer.load_state_dict(ckpt['optimizer'])
    print("‚úì Optimizer state loaded")
    
    # Load scheduler state
    scheduler.load_state_dict(ckpt['scheduler'])
    print("‚úì Scheduler state loaded")
    
    # Load scaler state
    scaler.load_state_dict(ckpt['scaler'])
    print("‚úì Scaler state loaded")
    
    # Load training history
    start_epoch = ckpt['epoch'] + 1
    train_losses = ckpt.get('train_losses', [])
    val_losses = ckpt.get('val_losses', [])
    train_audio_losses = ckpt.get('train_audio_losses', [])
    train_latent_losses = ckpt.get('train_latent_losses', [])
    val_audio_losses = ckpt.get('val_audio_losses', [])
    val_latent_losses = ckpt.get('val_latent_losses', [])
    best_val_loss = ckpt.get('best_val_loss', float('inf'))
    best_epoch = ckpt.get('best_epoch', 0)
    
    print(f"\n‚úì Resuming from epoch {start_epoch}")
    print(f"‚úì Best validation loss so far: {best_val_loss:.6f}")
    print(f"‚úì Best epoch: {best_epoch + 1}")
    print(f"‚úì Training history: {len(train_losses)} epochs loaded")
    print("="*60 + "\n")
else:
    print("No checkpoint found - starting from scratch\n")

# Clear memory before starting
clear_memory()

CLEAR_CACHE_EVERY_N_EPOCHS = 10

# ==========================================
# TRAINING LOOP (CHANGE THIS LINE!)
# ==========================================
for epoch in range(start_epoch, cfg.epochs):  # ‚Üê CHANGE FROM range(cfg.epochs)
    print(f"\n{'='*60}\nEPOCH {epoch+1}/{cfg.epochs}\n{'='*60}\n")
    
    # Clear GPU cache every N epochs
    if epoch % CLEAR_CACHE_EVERY_N_EPOCHS == 0:
        clear_memory()
        mem_allocated = torch.cuda.memory_allocated() / 1e9
        mem_reserved = torch.cuda.memory_reserved() / 1e9
        print(f"üßπ Cleared GPU cache at epoch {epoch}")
        print(f"   Allocated: {mem_allocated:.2f}GB | Reserved: {mem_reserved:.2f}GB\n")
    
    train_loss, train_audio, train_latent = train_epoch(
        model, train_dl, optimizer, scheduler, scaler, epoch
    )
    
    # Clear after training
    clear_memory()
    
    val_loss, val_audio, val_latent = validate_epoch(model, val_dl, epoch)
    
    # Clear after validation
    clear_memory()
    
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_audio_losses.append(train_audio)
    train_latent_losses.append(train_latent)
    val_audio_losses.append(val_audio)
    val_latent_losses.append(val_latent)
    
    print(f"\n{'='*60}")
    print(f"EPOCH {epoch+1}/{cfg.epochs} SUMMARY")
    print(f"{'='*60}")
    print(f"Train Loss:  {train_loss:.6f} (Audio: {train_audio:.6f}, Latent: {train_latent:.6f})")
    print(f"Val Loss:    {val_loss:.6f} (Audio: {val_audio:.6f}, Latent: {val_latent:.6f})")
    
    # Get current LR (after warmup if applicable)
    current_lr = optimizer.param_groups[0]['lr']
    print(f"Current LR: {current_lr:.2e}")
    print(f"{'='*60}\n")
    
    checkpoint = {
        'epoch': epoch,
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'scheduler': scheduler.state_dict(),
        'scaler': scaler.state_dict(),
        'train_losses': train_losses,
        'val_losses': val_losses,
        'train_audio_losses': train_audio_losses,
        'train_latent_losses': train_latent_losses,
        'val_audio_losses': val_audio_losses,
        'val_latent_losses': val_latent_losses,
        'best_val_loss': best_val_loss,
        'config': {
            'latent_channels': latent_channels,
            'unet_channels': cfg.unet_channels,
            'text_dim': cfg.text_dim,
            'sample_rate': cfg.sample_rate,
            'max_audio_length': cfg.max_audio_length
        }
    }
    torch.save(checkpoint, cfg.checkpoint_path)
    
    # Clear after saving checkpoint
    clear_memory()
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_epoch = epoch
        patience_counter = 0
        torch.save({
            'epoch': epoch, 
            'model': model.state_dict(), 
            'val_loss': best_val_loss, 
            'config': checkpoint['config']
        }, cfg.best_model_path)
        print(f"‚úÖ NEW BEST MODEL! Val Loss: {best_val_loss:.6f}\n")
        clear_memory()
    else:
        patience_counter += 1
    
    if patience_counter >= cfg.patience:
        print(f"\n‚ö†Ô∏è Early stopping ({cfg.patience} epochs no improvement)")
        break

print(f"\n{'='*60}\nTRAINING COMPLETE!\n{'='*60}")
print(f"Best: {best_val_loss:.6f} at epoch {best_epoch+1}")
print(f"Trained: {len(train_losses)} epochs\n")

# Final memory clear
clear_memory()

#############################################
#   TEST & SAVE
#############################################

best_ckpt = torch.load(cfg.best_model_path, map_location=cfg.device)
model.load_state_dict(best_ckpt['model'])
test_loss, test_audio, test_latent = validate_epoch(model, test_dl, cfg.epochs)

print(f"TEST RESULTS:")
print(f"  Loss: {test_loss:.6f}")
print(f"  Audio Loss: {test_audio:.6f}")
print(f"  Latent Loss: {test_latent:.6f}\n")

# Plot
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
epochs_range = range(len(train_losses))

axes[0,0].plot(epochs_range, train_losses, 'b-', label='Train', linewidth=2, marker='o', markersize=4)
axes[0,0].plot(epochs_range, val_losses, 'r-', label='Val', linewidth=2, marker='s', markersize=4)
axes[0,0].set_xlabel('Epoch')
axes[0,0].set_ylabel('Total Loss')
axes[0,0].set_title('Total Loss (Audio + Latent)')
axes[0,0].legend()
axes[0,0].grid(True, alpha=0.3)

axes[0,1].plot(epochs_range, train_audio_losses, 'b-', label='Train', linewidth=2, marker='o', markersize=4)
axes[0,1].plot(epochs_range, val_audio_losses, 'r-', label='Val', linewidth=2, marker='s', markersize=4)
axes[0,1].set_xlabel('Epoch')
axes[0,1].set_ylabel('Audio Loss (L1)')
axes[0,1].set_title('Audio Reconstruction Loss')
axes[0,1].legend()
axes[0,1].grid(True, alpha=0.3)

axes[1,0].plot(epochs_range, train_latent_losses, 'b-', label='Train', linewidth=2, marker='o', markersize=4)
axes[1,0].plot(epochs_range, val_latent_losses, 'r-', label='Val', linewidth=2, marker='s', markersize=4)
axes[1,0].set_xlabel('Epoch')
axes[1,0].set_ylabel('Latent Loss (MSE)')
axes[1,0].set_title('Latent Space Loss')
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)

gap = [v - t for t, v in zip(train_losses, val_losses)]
axes[1,1].plot(epochs_range, gap, 'g-', linewidth=2, marker='d', markersize=4)
axes[1,1].axhline(y=0, color='k', linestyle='--', alpha=0.3)
axes[1,1].set_xlabel('Epoch')
axes[1,1].set_ylabel('Loss Gap')
axes[1,1].set_title('Generalization Gap (Val - Train)')
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(cfg.plot_path, dpi=300, bbox_inches='tight')
print(f"‚úì Plot saved to: {cfg.plot_path}\n")

# Summary
summary = {
    'dataset': {
        'total': len(df), 
        'train': len(train_df), 
        'val': len(val_df), 
        'test': len(test_df), 
        'audio_seconds': cfg.max_audio_length / cfg.sample_rate
    },
    'training': {
        'epochs_trained': len(train_losses), 
        'best_epoch': best_epoch + 1, 
        'effective_batch_size': cfg.batch_size * cfg.accumulation_steps,
        'warmup_steps': cfg.warmup_steps
    },
    'model': {
        'total_params': total_params, 
        'trainable_params': trainable_params
    },
    'results': {
        'best_val_loss': float(best_val_loss), 
        'test_loss': float(test_loss), 
        'test_audio_loss': float(test_audio),
        'test_latent_loss': float(test_latent)
    },
    'config': {
        'sample_rate': cfg.sample_rate, 
        'lr': cfg.lr_unet, 
        'audio_weight': cfg.audio_loss_weight,
        'latent_weight': cfg.latent_loss_weight
    }
}

summary_path = f"{cfg.base_path}/result_DAC_PHASE2/training_summary.json"
with open(summary_path, 'w') as f:
    json.dump(summary, f, indent=2)

print(f"‚úì Summary saved to: {summary_path}\n")
print("="*60)
print("ALL FILES SAVED")
print("="*60)
print(f"‚úì Best model: {cfg.best_model_path}")
print(f"‚úì Checkpoint: {cfg.checkpoint_path}")
print(f"‚úì Plot: {cfg.plot_path}")
print(f"‚úì Summary: {summary_path}")
print("="*60)

print(f"\nüéâ PHASE 2 COMPLETE!")
print(f"‚úÖ Best validation loss: {best_val_loss:.6f}")
print(f"‚úÖ Test loss: {test_loss:.6f}")
print("\nüöÄ Use inference script to test on new audio!")
print("üí° Compare with Phase 1 results to see improvement!")

  from .autonotebook import tqdm as notebook_tqdm


‚úì DAC library imported successfully
‚úì soundfile available
PHASE 2: 2K/10K TRAINING (OPTION B + WARMUP)
Device: cuda
Audio: 3.0 seconds
Batch: 4 √ó 4 = 16 (effective)
Epochs: 100
LR: 3e-05 with 300-step warmup
Loss weights: Audio=2.0, Latent=0.15

Loading DAC model...


  WeightNorm.apply(module, name, dim)


‚úì DAC loaded | Latent channels: 128

LOADING DATASET
‚úì Loaded: 10000 samples
Validating files...


Validating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10000/10000 [00:01<00:00, 7537.96it/s]


‚úì Valid: 10000 samples

Prompt distribution:
prompt
add rain sounds         2000
add birds sounds        2000
add dogs sounds         2000
add cat sounds          2000
add lightning sounds    2000
Name: count, dtype: int64

Splits: Train=7000, Val=1500, Test=1500

‚úì Tokenizer loaded

Batches: Train=1750, Val=375, Test=375
Est. time/epoch: ~875 min

Text encoder: FROZEN ‚ùÑÔ∏è
‚úì Model: 188,877,378 total, 15,181,888 trainable

‚úì Optimizer: AdamW, LR=3e-05
‚úì Scheduler: CosineAnnealing with 300-step warmup
‚úì Effective batch: 16

STARTING PHASE 2 TRAINING

No checkpoint found - starting from scratch


EPOCH 1/100

üßπ Cleared GPU cache at epoch 0
   Allocated: 0.84GB | Reserved: 0.88GB



Epoch 1/100:   6%|‚ñä              | 100/1750 [01:15<20:03,  1.37it/s, loss=4.7793, audio=0.1724, latent=29.5625, nans=0]


  Step 100/1750 | Loss: 4.592318 | Audio: 0.145443 | Latent: 28.676211


Epoch 1/100:  11%|‚ñà‚ñã             | 200/1750 [02:27<18:59,  1.36it/s, loss=6.4363, audio=0.2811, latent=39.1601, nans=0]


  Step 200/1750 | Loss: 4.635977 | Audio: 0.149730 | Latent: 28.910113


Epoch 1/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:40<17:27,  1.38it/s, loss=4.7767, audio=0.1579, latent=29.7399, nans=0]


  Step 300/1750 | Loss: 4.611140 | Audio: 0.149469 | Latent: 28.748011


Epoch 1/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:54<16:22,  1.37it/s, loss=4.6994, audio=0.1583, latent=29.2180, nans=0]


  Step 400/1750 | Loss: 4.566992 | Audio: 0.148292 | Latent: 28.469381


Epoch 1/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:12<15:09,  1.37it/s, loss=5.4920, audio=0.2229, latent=33.6413, nans=0]


  Step 500/1750 | Loss: 4.532058 | Audio: 0.148365 | Latent: 28.235522


Epoch 1/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:25<13:51,  1.38it/s, loss=4.5245, audio=0.1478, latent=28.1925, nans=0]


  Step 600/1750 | Loss: 4.529338 | Audio: 0.150228 | Latent: 28.192548


Epoch 1/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:38<12:33,  1.39it/s, loss=3.6322, audio=0.1070, latent=22.7879, nans=0]


  Step 700/1750 | Loss: 4.501897 | Audio: 0.149822 | Latent: 28.015020


Epoch 1/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:49<11:36,  1.36it/s, loss=3.8718, audio=0.1120, latent=24.3192, nans=0]


  Step 800/1750 | Loss: 4.470186 | Audio: 0.149125 | Latent: 27.812911


Epoch 1/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [11:02<10:03,  1.41it/s, loss=3.7478, audio=0.1147, latent=23.4566, nans=0]


  Step 900/1750 | Loss: 4.445883 | Audio: 0.149182 | Latent: 27.650123


Epoch 1/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:14<08:58,  1.39it/s, loss=4.4226, audio=0.1633, latent=27.3063, nans=0]


  Step 1000/1750 | Loss: 4.423418 | Audio: 0.149709 | Latent: 27.493340


Epoch 1/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:26<07:51,  1.38it/s, loss=4.1045, audio=0.1466, latent=25.4084, nans=0]


  Step 1100/1750 | Loss: 4.397535 | Audio: 0.149633 | Latent: 27.321794


Epoch 1/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:39<06:44,  1.36it/s, loss=4.1729, audio=0.1566, latent=25.7312, nans=0]


  Step 1200/1750 | Loss: 4.370688 | Audio: 0.149351 | Latent: 27.146575


Epoch 1/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:52<05:27,  1.37it/s, loss=4.1444, audio=0.1593, latent=25.5057, nans=0]


  Step 1300/1750 | Loss: 4.348864 | Audio: 0.149419 | Latent: 27.000175


Epoch 1/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [17:05<04:15,  1.37it/s, loss=4.8479, audio=0.2321, latent=29.2252, nans=0]


  Step 1400/1750 | Loss: 4.330587 | Audio: 0.149721 | Latent: 26.874296


Epoch 1/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:19<03:06,  1.34it/s, loss=4.8090, audio=0.2299, latent=28.9954, nans=0]


  Step 1500/1750 | Loss: 4.316729 | Audio: 0.150198 | Latent: 26.775554


Epoch 1/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:31<01:49,  1.37it/s, loss=3.5947, audio=0.1039, latent=22.5788, nans=0]


  Step 1600/1750 | Loss: 4.301336 | Audio: 0.150495 | Latent: 26.668969


Epoch 1/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:45<00:36,  1.37it/s, loss=3.5637, audio=0.1121, latent=22.2634, nans=0]


  Step 1700/1750 | Loss: 4.284400 | Audio: 0.150563 | Latent: 26.555152


Epoch 1/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:22<00:00,  1.37it/s, loss=4.0940, audio=0.1605, latent=25.1532, nans=0]
Validation 1/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:12<00:00,  1.49it/s, loss=3.2678, audio=0.0848]



EPOCH 1/100 SUMMARY
Train Loss:  4.277732 (Audio: 0.150745, Latent: 26.508279)
Val Loss:    3.997308 (Audio: 0.151947, Latent: 24.622760)
Current LR: 2.99e-05

‚úÖ NEW BEST MODEL! Val Loss: 3.997308


EPOCH 2/100



Epoch 2/100:   6%|‚ñä              | 100/1750 [01:11<19:25,  1.42it/s, loss=3.5572, audio=0.1055, latent=22.3084, nans=0]


  Step 100/1750 | Loss: 3.949723 | Audio: 0.148911 | Latent: 24.346008


Epoch 2/100:  11%|‚ñà‚ñã             | 200/1750 [02:22<18:19,  1.41it/s, loss=5.1215, audio=0.2821, latent=30.3821, nans=0]


  Step 200/1750 | Loss: 3.972478 | Audio: 0.151784 | Latent: 24.459398


Epoch 2/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:51,  1.35it/s, loss=3.5381, audio=0.0984, latent=22.2751, nans=0]


  Step 300/1750 | Loss: 3.995403 | Audio: 0.155597 | Latent: 24.561389


Epoch 2/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:46<16:16,  1.38it/s, loss=4.0337, audio=0.1616, latent=24.7363, nans=0]


  Step 400/1750 | Loss: 3.981454 | Audio: 0.155692 | Latent: 24.467137


Epoch 2/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:58<15:34,  1.34it/s, loss=5.0398, audio=0.3039, latent=29.5472, nans=0]


  Step 500/1750 | Loss: 3.966219 | Audio: 0.155306 | Latent: 24.370715


Epoch 2/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:10<14:07,  1.36it/s, loss=3.3547, audio=0.1059, latent=20.9534, nans=0]


  Step 600/1750 | Loss: 3.941498 | Audio: 0.153763 | Latent: 24.226476


Epoch 2/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:22<12:57,  1.35it/s, loss=3.4397, audio=0.0943, latent=21.6738, nans=0]


  Step 700/1750 | Loss: 3.928561 | Audio: 0.153547 | Latent: 24.143116


Epoch 2/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<12:03,  1.31it/s, loss=3.8775, audio=0.1709, latent=23.5705, nans=0]


  Step 800/1750 | Loss: 3.911630 | Audio: 0.152926 | Latent: 24.038527


Epoch 2/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:48<10:13,  1.39it/s, loss=4.1191, audio=0.2185, latent=24.5469, nans=0]


  Step 900/1750 | Loss: 3.902301 | Audio: 0.153529 | Latent: 23.968281


Epoch 2/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:01<09:10,  1.36it/s, loss=3.9862, audio=0.1758, latent=24.2307, nans=0]


  Step 1000/1750 | Loss: 3.893612 | Audio: 0.154066 | Latent: 23.903196


Epoch 2/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:14<07:37,  1.42it/s, loss=3.9670, audio=0.1684, latent=24.2017, nans=0]


  Step 1100/1750 | Loss: 3.876632 | Audio: 0.153457 | Latent: 23.798127


Epoch 2/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:26<06:31,  1.41it/s, loss=3.4814, audio=0.0975, latent=21.9089, nans=0]


  Step 1200/1750 | Loss: 3.864906 | Audio: 0.153323 | Latent: 23.721725


Epoch 2/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:36<05:23,  1.39it/s, loss=3.8343, audio=0.1586, latent=23.4480, nans=0]


  Step 1300/1750 | Loss: 3.857351 | Audio: 0.153866 | Latent: 23.664122


Epoch 2/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:49<04:15,  1.37it/s, loss=3.3065, audio=0.0921, latent=20.8151, nans=0]


  Step 1400/1750 | Loss: 3.845946 | Audio: 0.153766 | Latent: 23.589427


Epoch 2/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:01<03:03,  1.36it/s, loss=3.4514, audio=0.1090, latent=21.5556, nans=0]


  Step 1500/1750 | Loss: 3.833622 | Audio: 0.153399 | Latent: 23.512154


Epoch 2/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:15<01:50,  1.35it/s, loss=4.1421, audio=0.2196, latent=24.6864, nans=0]


  Step 1600/1750 | Loss: 3.824643 | Audio: 0.153511 | Latent: 23.450805


Epoch 2/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:28<00:37,  1.34it/s, loss=3.9342, audio=0.1837, latent=23.7792, nans=0]


  Step 1700/1750 | Loss: 3.820172 | Audio: 0.154201 | Latent: 23.411797


Epoch 2/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:04<00:00,  1.38it/s, loss=3.8480, audio=0.1531, latent=23.6116, nans=0]
Validation 2/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:11<00:00,  1.49it/s, loss=3.1094, audio=0.0833]



EPOCH 2/100 SUMMARY
Train Loss:  3.815702 (Audio: 0.154155, Latent: 23.382617)
Val Loss:    3.657107 (Audio: 0.153993, Latent: 22.327467)
Current LR: 2.99e-05

‚úÖ NEW BEST MODEL! Val Loss: 3.657107


EPOCH 3/100



Epoch 3/100:   6%|‚ñä              | 100/1750 [01:12<20:06,  1.37it/s, loss=4.1389, audio=0.2376, latent=24.4246, nans=0]


  Step 100/1750 | Loss: 3.681416 | Audio: 0.155094 | Latent: 22.474859


Epoch 3/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:23,  1.41it/s, loss=4.1504, audio=0.2296, latent=24.6080, nans=0]


  Step 200/1750 | Loss: 3.713932 | Audio: 0.162543 | Latent: 22.592302


Epoch 3/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:25,  1.39it/s, loss=3.3729, audio=0.1204, latent=20.8807, nans=0]


  Step 300/1750 | Loss: 3.715082 | Audio: 0.163865 | Latent: 22.582344


Epoch 3/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<16:35,  1.36it/s, loss=3.6306, audio=0.1653, latent=21.9998, nans=0]


  Step 400/1750 | Loss: 3.686828 | Audio: 0.160924 | Latent: 22.433195


Epoch 3/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:59<14:33,  1.43it/s, loss=3.6879, audio=0.1597, latent=22.4568, nans=0]


  Step 500/1750 | Loss: 3.671476 | Audio: 0.159483 | Latent: 22.350066


Epoch 3/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:10<13:49,  1.39it/s, loss=3.2594, audio=0.0942, latent=20.4732, nans=0]


  Step 600/1750 | Loss: 3.659679 | Audio: 0.158512 | Latent: 22.284367


Epoch 3/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:22<12:25,  1.41it/s, loss=3.4204, audio=0.1119, latent=21.3113, nans=0]


  Step 700/1750 | Loss: 3.650410 | Audio: 0.158182 | Latent: 22.226974


Epoch 3/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:34<11:25,  1.39it/s, loss=4.2623, audio=0.2829, latent=24.6440, nans=0]


  Step 800/1750 | Loss: 3.644406 | Audio: 0.158540 | Latent: 22.182169


Epoch 3/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:45<10:25,  1.36it/s, loss=3.6086, audio=0.1773, latent=21.6934, nans=0]


  Step 900/1750 | Loss: 3.631706 | Audio: 0.157655 | Latent: 22.109302


Epoch 3/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:58<09:07,  1.37it/s, loss=3.8690, audio=0.2323, latent=22.6956, nans=0]


  Step 1000/1750 | Loss: 3.626889 | Audio: 0.158024 | Latent: 22.072277


Epoch 3/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:09<07:58,  1.36it/s, loss=3.6622, audio=0.1663, latent=22.1974, nans=0]


  Step 1100/1750 | Loss: 3.618170 | Audio: 0.157840 | Latent: 22.016596


Epoch 3/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:20<06:30,  1.41it/s, loss=3.6854, audio=0.1717, latent=22.2806, nans=0]


  Step 1200/1750 | Loss: 3.611032 | Audio: 0.157903 | Latent: 21.968169


Epoch 3/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:32<05:27,  1.37it/s, loss=3.3202, audio=0.1054, latent=20.7299, nans=0]


  Step 1300/1750 | Loss: 3.602557 | Audio: 0.157466 | Latent: 21.917504


Epoch 3/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:45<04:13,  1.38it/s, loss=2.9985, audio=0.0855, latent=18.8506, nans=0]


  Step 1400/1750 | Loss: 3.595611 | Audio: 0.157344 | Latent: 21.872818


Epoch 3/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:57<03:00,  1.38it/s, loss=3.3941, audio=0.1179, latent=21.0557, nans=0]


  Step 1500/1750 | Loss: 3.587224 | Audio: 0.156618 | Latent: 21.826587


Epoch 3/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:09<01:48,  1.39it/s, loss=3.5831, audio=0.1758, latent=21.5432, nans=0]


  Step 1600/1750 | Loss: 3.580462 | Audio: 0.156296 | Latent: 21.785803


Epoch 3/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:22<00:36,  1.37it/s, loss=3.1759, audio=0.1054, latent=19.7672, nans=0]


  Step 1700/1750 | Loss: 3.574439 | Audio: 0.156152 | Latent: 21.747562


Epoch 3/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:58<00:00,  1.39it/s, loss=3.3002, audio=0.1652, latent=19.7984, nans=0]
Validation 3/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.51it/s, loss=3.0427, audio=0.0838]



EPOCH 3/100 SUMMARY
Train Loss:  3.571404 (Audio: 0.156223, Latent: 21.726385)
Val Loss:    3.484032 (Audio: 0.155371, Latent: 21.155270)
Current LR: 2.99e-05

‚úÖ NEW BEST MODEL! Val Loss: 3.484032


EPOCH 4/100



Epoch 4/100:   6%|‚ñä              | 100/1750 [01:11<19:21,  1.42it/s, loss=3.2024, audio=0.0998, latent=20.0183, nans=0]


  Step 100/1750 | Loss: 3.506856 | Audio: 0.157605 | Latent: 21.277636


Epoch 4/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:18,  1.41it/s, loss=3.5480, audio=0.1659, latent=21.4420, nans=0]


  Step 200/1750 | Loss: 3.493162 | Audio: 0.155961 | Latent: 21.208263


Epoch 4/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:37,  1.37it/s, loss=3.5791, audio=0.1712, latent=21.5772, nans=0]


  Step 300/1750 | Loss: 3.488140 | Audio: 0.155837 | Latent: 21.176436


Epoch 4/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:10,  1.39it/s, loss=3.6874, audio=0.1594, latent=22.4568, nans=0]


  Step 400/1750 | Loss: 3.483992 | Audio: 0.156708 | Latent: 21.137178


Epoch 4/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<15:13,  1.37it/s, loss=3.8181, audio=0.2269, latent=22.4289, nans=0]


  Step 500/1750 | Loss: 3.483743 | Audio: 0.157169 | Latent: 21.129360


Epoch 4/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:12<13:45,  1.39it/s, loss=3.2578, audio=0.1184, latent=20.1399, nans=0]


  Step 600/1750 | Loss: 3.478973 | Audio: 0.156780 | Latent: 21.102756


Epoch 4/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:24<12:21,  1.42it/s, loss=3.2064, audio=0.0963, latent=20.0916, nans=0]


  Step 700/1750 | Loss: 3.470091 | Audio: 0.155949 | Latent: 21.054615


Epoch 4/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:22,  1.39it/s, loss=3.3961, audio=0.1844, latent=20.1822, nans=0]


  Step 800/1750 | Loss: 3.469475 | Audio: 0.156236 | Latent: 21.046676


Epoch 4/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:46<09:59,  1.42it/s, loss=3.0963, audio=0.0829, latent=19.5371, nans=0]


  Step 900/1750 | Loss: 3.466880 | Audio: 0.156159 | Latent: 21.030414


Epoch 4/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:58<09:02,  1.38it/s, loss=3.5500, audio=0.1832, latent=21.2234, nans=0]


  Step 1000/1750 | Loss: 3.462364 | Audio: 0.155499 | Latent: 21.009101


Epoch 4/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:10<07:58,  1.36it/s, loss=3.0415, audio=0.1137, latent=18.7605, nans=0]


  Step 1100/1750 | Loss: 3.456080 | Audio: 0.155146 | Latent: 20.971916


Epoch 4/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:23<06:31,  1.41it/s, loss=3.1815, audio=0.1172, latent=19.6474, nans=0]


  Step 1200/1750 | Loss: 3.455844 | Audio: 0.155907 | Latent: 20.960207


Epoch 4/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:34<05:22,  1.40it/s, loss=3.2068, audio=0.0998, latent=20.0477, nans=0]


  Step 1300/1750 | Loss: 3.454017 | Audio: 0.156054 | Latent: 20.946066


Epoch 4/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:46<04:10,  1.40it/s, loss=3.4523, audio=0.1888, latent=20.4984, nans=0]


  Step 1400/1750 | Loss: 3.452557 | Audio: 0.156256 | Latent: 20.933628


Epoch 4/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:58<02:57,  1.41it/s, loss=3.5606, audio=0.2060, latent=20.9904, nans=0]


  Step 1500/1750 | Loss: 3.452809 | Audio: 0.156788 | Latent: 20.928228


Epoch 4/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:10<01:47,  1.39it/s, loss=3.6511, audio=0.2288, latent=21.2900, nans=0]


  Step 1600/1750 | Loss: 3.450233 | Audio: 0.156806 | Latent: 20.910797


Epoch 4/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:22<00:35,  1.40it/s, loss=3.5267, audio=0.1689, latent=21.2596, nans=0]


  Step 1700/1750 | Loss: 3.446352 | Audio: 0.156553 | Latent: 20.888311


Epoch 4/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:58<00:00,  1.39it/s, loss=3.1694, audio=0.0977, latent=19.8264, nans=0]
Validation 4/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:10<00:00,  1.50it/s, loss=3.0039, audio=0.0841]



EPOCH 4/100 SUMMARY
Train Loss:  3.445531 (Audio: 0.156671, Latent: 20.881253)
Val Loss:    3.388754 (Audio: 0.155841, Latent: 20.513809)
Current LR: 2.99e-05

‚úÖ NEW BEST MODEL! Val Loss: 3.388754


EPOCH 5/100



Epoch 5/100:   6%|‚ñä              | 100/1750 [01:12<19:56,  1.38it/s, loss=3.4287, audio=0.1853, latent=20.3874, nans=0]


  Step 100/1750 | Loss: 3.376171 | Audio: 0.156440 | Latent: 20.421933


Epoch 5/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:35,  1.39it/s, loss=3.3815, audio=0.1618, latent=20.3858, nans=0]


  Step 200/1750 | Loss: 3.380872 | Audio: 0.155800 | Latent: 20.461815


Epoch 5/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:10,  1.41it/s, loss=3.7846, audio=0.2292, latent=22.1747, nans=0]


  Step 300/1750 | Loss: 3.386316 | Audio: 0.156333 | Latent: 20.491005


Epoch 5/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:20,  1.38it/s, loss=3.0059, audio=0.0987, latent=18.7226, nans=0]


  Step 400/1750 | Loss: 3.375276 | Audio: 0.154385 | Latent: 20.443374


Epoch 5/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:59<15:01,  1.39it/s, loss=3.0248, audio=0.0987, latent=18.8493, nans=0]


  Step 500/1750 | Loss: 3.370687 | Audio: 0.154350 | Latent: 20.413243


Epoch 5/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:11<13:49,  1.39it/s, loss=3.6208, audio=0.2348, latent=21.0082, nans=0]


  Step 600/1750 | Loss: 3.371908 | Audio: 0.154628 | Latent: 20.417676


Epoch 5/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:22<12:25,  1.41it/s, loss=3.1468, audio=0.1057, latent=19.5686, nans=0]


  Step 700/1750 | Loss: 3.369978 | Audio: 0.154611 | Latent: 20.405039


Epoch 5/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:33<11:20,  1.40it/s, loss=3.3410, audio=0.1047, latent=20.8771, nans=0]


  Step 800/1750 | Loss: 3.371793 | Audio: 0.155062 | Latent: 20.411132


Epoch 5/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:44<10:07,  1.40it/s, loss=3.5155, audio=0.1751, latent=21.1023, nans=0]


  Step 900/1750 | Loss: 3.376751 | Audio: 0.156145 | Latent: 20.429742


Epoch 5/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:55<08:48,  1.42it/s, loss=3.4489, audio=0.1812, latent=20.5757, nans=0]


  Step 1000/1750 | Loss: 3.374977 | Audio: 0.156110 | Latent: 20.418384


Epoch 5/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:06<07:36,  1.42it/s, loss=2.9325, audio=0.0868, latent=18.3928, nans=0]


  Step 1100/1750 | Loss: 3.372799 | Audio: 0.156115 | Latent: 20.403788


Epoch 5/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:16<06:31,  1.41it/s, loss=3.3140, audio=0.1696, latent=19.8317, nans=0]


  Step 1200/1750 | Loss: 3.370962 | Audio: 0.156242 | Latent: 20.389848


Epoch 5/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:28<05:20,  1.41it/s, loss=3.2916, audio=0.1641, latent=19.7566, nans=0]


  Step 1300/1750 | Loss: 3.365783 | Audio: 0.155671 | Latent: 20.362936


Epoch 5/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:39<04:12,  1.39it/s, loss=3.1531, audio=0.1084, latent=19.5760, nans=1]


  Step 1400/1750 | Loss: 3.362318 | Audio: 0.155973 | Latent: 20.335813


Epoch 5/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:51<03:00,  1.38it/s, loss=3.3017, audio=0.1743, latent=19.6865, nans=1]


  Step 1500/1750 | Loss: 3.359124 | Audio: 0.155456 | Latent: 20.321404


Epoch 5/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:03<01:47,  1.39it/s, loss=3.4319, audio=0.1708, latent=20.6018, nans=1]


  Step 1600/1750 | Loss: 3.357363 | Audio: 0.155712 | Latent: 20.306267


Epoch 5/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:14<00:36,  1.36it/s, loss=3.4314, audio=0.1954, latent=20.2714, nans=1]


  Step 1700/1750 | Loss: 3.357607 | Audio: 0.156107 | Latent: 20.302618


Epoch 5/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:50<00:00,  1.40it/s, loss=3.3246, audio=0.1784, latent=19.7844, nans=1]



‚ö†Ô∏è 1 NaN occurrences


Validation 5/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=2.9668, audio=0.0840]



EPOCH 5/100 SUMMARY
Train Loss:  3.356186 (Audio: 0.156077, Latent: 20.293538)
Val Loss:    3.313496 (Audio: 0.154966, Latent: 20.023751)
Current LR: 2.99e-05

‚úÖ NEW BEST MODEL! Val Loss: 3.313496


EPOCH 6/100



Epoch 6/100:   6%|‚ñä              | 100/1750 [01:10<19:31,  1.41it/s, loss=3.3224, audio=0.1813, latent=19.7315, nans=0]


  Step 100/1750 | Loss: 3.324714 | Audio: 0.159224 | Latent: 20.041779


Epoch 6/100:  11%|‚ñà‚ñã             | 200/1750 [02:21<18:32,  1.39it/s, loss=3.3167, audio=0.1131, latent=20.6030, nans=0]


  Step 200/1750 | Loss: 3.319938 | Audio: 0.157278 | Latent: 20.035874


Epoch 6/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:32<17:22,  1.39it/s, loss=3.5950, audio=0.2369, latent=20.8080, nans=0]


  Step 300/1750 | Loss: 3.316341 | Audio: 0.156406 | Latent: 20.023520


Epoch 6/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:44<16:12,  1.39it/s, loss=3.2771, audio=0.1787, latent=19.4652, nans=0]


  Step 400/1750 | Loss: 3.313522 | Audio: 0.156373 | Latent: 20.005180


Epoch 6/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:56<14:28,  1.44it/s, loss=3.3948, audio=0.1600, latent=20.4980, nans=0]


  Step 500/1750 | Loss: 3.314340 | Audio: 0.157230 | Latent: 19.999201


Epoch 6/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:07<13:42,  1.40it/s, loss=3.5858, audio=0.2237, latent=20.9226, nans=0]


  Step 600/1750 | Loss: 3.317446 | Audio: 0.157685 | Latent: 20.013846


Epoch 6/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:18<12:36,  1.39it/s, loss=3.3272, audio=0.1813, latent=19.7644, nans=0]


  Step 700/1750 | Loss: 3.315416 | Audio: 0.157485 | Latent: 20.002982


Epoch 6/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:30<11:19,  1.40it/s, loss=3.1693, audio=0.1279, latent=19.4237, nans=0]


  Step 800/1750 | Loss: 3.311965 | Audio: 0.156820 | Latent: 19.988832


Epoch 6/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:42<10:20,  1.37it/s, loss=3.4946, audio=0.2255, latent=20.2903, nans=0]


  Step 900/1750 | Loss: 3.310264 | Audio: 0.157110 | Latent: 19.973630


Epoch 6/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:54<09:05,  1.37it/s, loss=3.3825, audio=0.1654, latent=20.3443, nans=0]


  Step 1000/1750 | Loss: 3.307722 | Audio: 0.157194 | Latent: 19.955562


Epoch 6/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:06<07:43,  1.40it/s, loss=3.2141, audio=0.1908, latent=18.8830, nans=0]


  Step 1100/1750 | Loss: 3.304382 | Audio: 0.156920 | Latent: 19.936953


Epoch 6/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:17<06:31,  1.40it/s, loss=3.1349, audio=0.1205, latent=19.2932, nans=0]


  Step 1200/1750 | Loss: 3.303301 | Audio: 0.157030 | Latent: 19.928271


Epoch 6/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:29<05:26,  1.38it/s, loss=3.1171, audio=0.1203, latent=19.1767, nans=0]


  Step 1300/1750 | Loss: 3.301213 | Audio: 0.157209 | Latent: 19.911962


Epoch 6/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:41<04:12,  1.38it/s, loss=3.0400, audio=0.0936, latent=19.0190, nans=0]


  Step 1400/1750 | Loss: 3.300703 | Audio: 0.157244 | Latent: 19.908104


Epoch 6/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:53<03:00,  1.39it/s, loss=3.1731, audio=0.1755, latent=18.8142, nans=0]


  Step 1500/1750 | Loss: 3.295353 | Audio: 0.156521 | Latent: 19.882068


Epoch 6/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:04<01:49,  1.37it/s, loss=2.9385, audio=0.1154, latent=18.0513, nans=0]


  Step 1600/1750 | Loss: 3.291550 | Audio: 0.156110 | Latent: 19.862196


Epoch 6/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:16<00:35,  1.42it/s, loss=3.2845, audio=0.1724, latent=19.5988, nans=0]


  Step 1700/1750 | Loss: 3.288234 | Audio: 0.155696 | Latent: 19.845619


Epoch 6/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:52<00:00,  1.40it/s, loss=3.6218, audio=0.2515, latent=20.7916, nans=0]
Validation 6/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.51it/s, loss=2.9174, audio=0.0815]



EPOCH 6/100 SUMMARY
Train Loss:  3.285858 (Audio: 0.155385, Latent: 19.833918)
Val Loss:    3.237714 (Audio: 0.153307, Latent: 19.540670)
Current LR: 2.99e-05

‚úÖ NEW BEST MODEL! Val Loss: 3.237714


EPOCH 7/100



Epoch 7/100:   6%|‚ñä              | 100/1750 [01:12<19:42,  1.40it/s, loss=3.4099, audio=0.1681, latent=20.4915, nans=0]


  Step 100/1750 | Loss: 3.239930 | Audio: 0.155248 | Latent: 19.529564


Epoch 7/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:20,  1.41it/s, loss=3.2959, audio=0.1028, latent=20.6018, nans=0]


  Step 200/1750 | Loss: 3.238996 | Audio: 0.153891 | Latent: 19.541419


Epoch 7/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:07,  1.41it/s, loss=3.4308, audio=0.2082, latent=20.0961, nans=0]


  Step 300/1750 | Loss: 3.245500 | Audio: 0.155079 | Latent: 19.568945


Epoch 7/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<16:24,  1.37it/s, loss=3.2977, audio=0.1589, latent=19.8658, nans=0]


  Step 400/1750 | Loss: 3.254673 | Audio: 0.156609 | Latent: 19.609701


Epoch 7/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<15:28,  1.35it/s, loss=3.0305, audio=0.0913, latent=18.9861, nans=0]


  Step 500/1750 | Loss: 3.240842 | Audio: 0.154477 | Latent: 19.545912


Epoch 7/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:12<13:52,  1.38it/s, loss=3.1406, audio=0.1027, latent=19.5685, nans=0]


  Step 600/1750 | Loss: 3.245037 | Audio: 0.156371 | Latent: 19.548634


Epoch 7/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:27<14:09,  1.24it/s, loss=3.3435, audio=0.1755, latent=19.9498, nans=0]


  Step 700/1750 | Loss: 3.237315 | Audio: 0.154838 | Latent: 19.517591


Epoch 7/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:43<11:49,  1.34it/s, loss=3.0985, audio=0.1093, latent=19.1996, nans=0]


  Step 800/1750 | Loss: 3.231050 | Audio: 0.153588 | Latent: 19.492493


Epoch 7/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:57<10:51,  1.30it/s, loss=3.2459, audio=0.1714, latent=19.3542, nans=0]


  Step 900/1750 | Loss: 3.229431 | Audio: 0.154155 | Latent: 19.474142


Epoch 7/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:11<09:17,  1.34it/s, loss=3.3360, audio=0.1868, latent=19.7485, nans=0]


  Step 1000/1750 | Loss: 3.228384 | Audio: 0.154594 | Latent: 19.461299


Epoch 7/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:26<08:29,  1.27it/s, loss=3.6380, audio=0.2775, latent=20.5538, nans=0]


  Step 1100/1750 | Loss: 3.222951 | Audio: 0.154002 | Latent: 19.432978


Epoch 7/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:42<07:23,  1.24it/s, loss=3.3100, audio=0.1078, latent=20.6298, nans=0]


  Step 1200/1750 | Loss: 3.220563 | Audio: 0.153815 | Latent: 19.419554


Epoch 7/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [16:00<05:30,  1.36it/s, loss=3.3228, audio=0.1959, latent=19.5400, nans=0]


  Step 1300/1750 | Loss: 3.219622 | Audio: 0.154310 | Latent: 19.406680


Epoch 7/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [17:15<04:13,  1.38it/s, loss=3.0589, audio=0.1714, latent=18.1081, nans=0]


  Step 1400/1750 | Loss: 3.216341 | Audio: 0.154013 | Latent: 19.388760


Epoch 7/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:27<02:59,  1.39it/s, loss=2.8355, audio=0.0753, latent=17.8995, nans=0]


  Step 1500/1750 | Loss: 3.211847 | Audio: 0.153654 | Latent: 19.363588


Epoch 7/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:42<01:50,  1.36it/s, loss=3.2621, audio=0.1731, latent=19.4386, nans=0]


  Step 1600/1750 | Loss: 3.208339 | Audio: 0.153434 | Latent: 19.343132


Epoch 7/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:55<00:36,  1.36it/s, loss=3.0062, audio=0.1128, latent=18.5374, nans=0]


  Step 1700/1750 | Loss: 3.204756 | Audio: 0.153412 | Latent: 19.319544


Epoch 7/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:32<00:00,  1.35it/s, loss=3.2544, audio=0.1599, latent=19.5646, nans=1]



‚ö†Ô∏è 1 NaN occurrences


Validation 7/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:11<00:00,  1.49it/s, loss=2.8743, audio=0.0798]



EPOCH 7/100 SUMMARY
Train Loss:  3.205424 (Audio: 0.153573, Latent: 19.321849)
Val Loss:    3.193153 (Audio: 0.150055, Latent: 19.286952)
Current LR: 2.99e-05

‚úÖ NEW BEST MODEL! Val Loss: 3.193153


EPOCH 8/100



Epoch 8/100:   6%|‚ñä              | 100/1750 [01:12<19:32,  1.41it/s, loss=3.0206, audio=0.0844, latent=19.0125, nans=0]


  Step 100/1750 | Loss: 3.216192 | Audio: 0.156196 | Latent: 19.358665


Epoch 8/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:12,  1.42it/s, loss=3.1947, audio=0.1107, latent=19.8220, nans=0]


  Step 200/1750 | Loss: 3.201195 | Audio: 0.155790 | Latent: 19.264101


Epoch 8/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:26,  1.39it/s, loss=2.8505, audio=0.0872, latent=17.8409, nans=0]


  Step 300/1750 | Loss: 3.176771 | Audio: 0.152301 | Latent: 19.147790


Epoch 8/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:05,  1.40it/s, loss=3.1397, audio=0.1025, latent=19.5652, nans=0]


  Step 400/1750 | Loss: 3.167791 | Audio: 0.152523 | Latent: 19.084967


Epoch 8/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<15:14,  1.37it/s, loss=3.0782, audio=0.1158, latent=18.9770, nans=0]


  Step 500/1750 | Loss: 3.160639 | Audio: 0.151502 | Latent: 19.050900


Epoch 8/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:12<13:31,  1.42it/s, loss=2.7074, audio=0.0909, latent=16.8376, nans=0]


  Step 600/1750 | Loss: 3.150771 | Audio: 0.150945 | Latent: 18.992533


Epoch 8/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:24<12:32,  1.39it/s, loss=3.3000, audio=0.1590, latent=19.8796, nans=0]


  Step 700/1750 | Loss: 3.145814 | Audio: 0.150306 | Latent: 18.968014


Epoch 8/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:36<11:17,  1.40it/s, loss=3.2887, audio=0.2239, latent=18.9392, nans=0]


  Step 800/1750 | Loss: 3.143038 | Audio: 0.150676 | Latent: 18.944571


Epoch 8/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:47<10:05,  1.40it/s, loss=2.8183, audio=0.0984, latent=17.4770, nans=0]


  Step 900/1750 | Loss: 3.140453 | Audio: 0.150680 | Latent: 18.927284


Epoch 8/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:59<08:57,  1.40it/s, loss=3.1739, audio=0.2339, latent=18.0405, nans=0]


  Step 1000/1750 | Loss: 3.144729 | Audio: 0.152069 | Latent: 18.937275


Epoch 8/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:11<08:00,  1.35it/s, loss=2.8072, audio=0.1134, latent=17.2021, nans=0]


  Step 1100/1750 | Loss: 3.142209 | Audio: 0.151832 | Latent: 18.923631


Epoch 8/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:23<06:31,  1.40it/s, loss=3.2337, audio=0.1776, latent=19.1907, nans=0]


  Step 1200/1750 | Loss: 3.137637 | Audio: 0.151518 | Latent: 18.897346


Epoch 8/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:35<05:28,  1.37it/s, loss=3.3918, audio=0.2252, latent=19.6095, nans=0]


  Step 1300/1750 | Loss: 3.134978 | Audio: 0.151510 | Latent: 18.879725


Epoch 8/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:47<04:05,  1.42it/s, loss=3.1095, audio=0.1593, latent=18.6061, nans=0]


  Step 1400/1750 | Loss: 3.133465 | Audio: 0.151631 | Latent: 18.868012


Epoch 8/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:59<03:02,  1.37it/s, loss=3.4804, audio=0.2446, latent=19.9419, nans=0]


  Step 1500/1750 | Loss: 3.133117 | Audio: 0.152176 | Latent: 18.858436


Epoch 8/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:10<01:49,  1.37it/s, loss=3.1919, audio=0.1497, latent=19.2831, nans=0]


  Step 1600/1750 | Loss: 3.129511 | Audio: 0.151750 | Latent: 18.840071


Epoch 8/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:23<00:35,  1.40it/s, loss=2.9175, audio=0.0766, latent=18.4296, nans=0]


  Step 1700/1750 | Loss: 3.126775 | Audio: 0.151566 | Latent: 18.824277


Epoch 8/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:59<00:00,  1.39it/s, loss=3.2900, audio=0.1564, latent=19.8482, nans=0]
Validation 8/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=2.7964, audio=0.0774]



EPOCH 8/100 SUMMARY
Train Loss:  3.126115 (Audio: 0.151635, Latent: 18.818974)
Val Loss:    3.077680 (Audio: 0.150069, Latent: 18.516949)
Current LR: 2.99e-05

‚úÖ NEW BEST MODEL! Val Loss: 3.077680


EPOCH 9/100



Epoch 9/100:   6%|‚ñä              | 100/1750 [01:11<20:04,  1.37it/s, loss=3.0316, audio=0.1512, latent=18.1942, nans=0]


  Step 100/1750 | Loss: 3.066307 | Audio: 0.151832 | Latent: 18.417614


Epoch 9/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:12,  1.42it/s, loss=3.0218, audio=0.0919, latent=18.9198, nans=0]


  Step 200/1750 | Loss: 3.065789 | Audio: 0.147779 | Latent: 18.468213


Epoch 9/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:33,  1.38it/s, loss=3.0676, audio=0.1404, latent=18.5780, nans=0]


  Step 300/1750 | Loss: 3.085626 | Audio: 0.153180 | Latent: 18.528441


Epoch 9/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<15:55,  1.41it/s, loss=3.0070, audio=0.1786, latent=17.6646, nans=0]


  Step 400/1750 | Loss: 3.082074 | Audio: 0.152718 | Latent: 18.510926


Epoch 9/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:58<15:16,  1.36it/s, loss=3.0754, audio=0.0807, latent=19.4264, nans=0]


  Step 500/1750 | Loss: 3.077014 | Audio: 0.151137 | Latent: 18.498261


Epoch 9/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:10<13:32,  1.42it/s, loss=3.2151, audio=0.1677, latent=19.1983, nans=0]


  Step 600/1750 | Loss: 3.071960 | Audio: 0.151294 | Latent: 18.462478


Epoch 9/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:21<12:30,  1.40it/s, loss=2.9782, audio=0.0984, latent=18.5434, nans=0]


  Step 700/1750 | Loss: 3.069639 | Audio: 0.151457 | Latent: 18.444830


Epoch 9/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:33<11:32,  1.37it/s, loss=2.8120, audio=0.0926, latent=17.5120, nans=0]


  Step 800/1750 | Loss: 3.063294 | Audio: 0.150581 | Latent: 18.414209


Epoch 9/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:44<10:17,  1.38it/s, loss=2.9532, audio=0.0807, latent=18.6119, nans=0]


  Step 900/1750 | Loss: 3.058154 | Audio: 0.149811 | Latent: 18.390213


Epoch 9/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:56<08:53,  1.41it/s, loss=3.1470, audio=0.1482, latent=19.0046, nans=0]


  Step 1000/1750 | Loss: 3.058087 | Audio: 0.149558 | Latent: 18.393135


Epoch 9/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:08<07:43,  1.40it/s, loss=3.1700, audio=0.1085, latent=19.6867, nans=0]


  Step 1100/1750 | Loss: 3.058389 | Audio: 0.148888 | Latent: 18.404086


Epoch 9/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:20<06:38,  1.38it/s, loss=3.2892, audio=0.1815, latent=19.5077, nans=0]


  Step 1200/1750 | Loss: 3.056197 | Audio: 0.149052 | Latent: 18.387284


Epoch 9/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:32<05:21,  1.40it/s, loss=3.4510, audio=0.2191, latent=20.0860, nans=0]


  Step 1300/1750 | Loss: 3.056918 | Audio: 0.149602 | Latent: 18.384755


Epoch 9/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:44<04:10,  1.40it/s, loss=3.2628, audio=0.2189, latent=18.8336, nans=0]


  Step 1400/1750 | Loss: 3.053742 | Audio: 0.149348 | Latent: 18.366974


Epoch 9/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:56<03:02,  1.37it/s, loss=3.3274, audio=0.2254, latent=19.1772, nans=0]


  Step 1500/1750 | Loss: 3.051872 | Audio: 0.149547 | Latent: 18.351856


Epoch 9/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:08<01:47,  1.40it/s, loss=3.2183, audio=0.2200, latent=18.5225, nans=0]


  Step 1600/1750 | Loss: 3.048950 | Audio: 0.149854 | Latent: 18.328284


Epoch 9/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:20<00:36,  1.37it/s, loss=2.9515, audio=0.1417, latent=17.7874, nans=0]


  Step 1700/1750 | Loss: 3.046526 | Audio: 0.149934 | Latent: 18.311055


Epoch 9/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:56<00:00,  1.39it/s, loss=2.8503, audio=0.1655, latent=16.7950, nans=0]
Validation 9/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:07<00:00,  1.52it/s, loss=2.7331, audio=0.0763]



EPOCH 9/100 SUMMARY
Train Loss:  3.045887 (Audio: 0.149755, Latent: 18.309184)
Val Loss:    2.993278 (Audio: 0.148442, Latent: 17.975968)
Current LR: 2.99e-05

‚úÖ NEW BEST MODEL! Val Loss: 2.993278


EPOCH 10/100



Epoch 10/100:   6%|‚ñä             | 100/1750 [01:12<20:03,  1.37it/s, loss=2.9234, audio=0.1743, latent=17.1650, nans=0]


  Step 100/1750 | Loss: 3.004404 | Audio: 0.151462 | Latent: 18.009869


Epoch 10/100:  11%|‚ñà‚ñå            | 200/1750 [02:23<18:19,  1.41it/s, loss=2.9990, audio=0.1496, latent=17.9978, nans=0]


  Step 200/1750 | Loss: 3.012650 | Audio: 0.155608 | Latent: 18.009565


Epoch 10/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:35<17:39,  1.37it/s, loss=2.9491, audio=0.0957, latent=18.3846, nans=0]


  Step 300/1750 | Loss: 3.002866 | Audio: 0.151601 | Latent: 17.997759


Epoch 10/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:46<16:26,  1.37it/s, loss=2.7151, audio=0.0897, latent=16.9053, nans=0]


  Step 400/1750 | Loss: 2.998367 | Audio: 0.151994 | Latent: 17.962527


Epoch 10/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [05:58<14:40,  1.42it/s, loss=2.6368, audio=0.1210, latent=15.9648, nans=0]


  Step 500/1750 | Loss: 2.999827 | Audio: 0.152677 | Latent: 17.963153


Epoch 10/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:09<13:54,  1.38it/s, loss=2.6975, audio=0.0823, latent=16.8856, nans=0]


  Step 600/1750 | Loss: 2.985458 | Audio: 0.149764 | Latent: 17.906205


Epoch 10/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:21<12:49,  1.36it/s, loss=2.9297, audio=0.1619, latent=17.3728, nans=0]


  Step 700/1750 | Loss: 2.980717 | Audio: 0.149919 | Latent: 17.872526


Epoch 10/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:33<11:16,  1.40it/s, loss=2.8375, audio=0.0855, latent=17.7767, nans=0]


  Step 800/1750 | Loss: 2.977963 | Audio: 0.149438 | Latent: 17.860572


Epoch 10/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:44<10:01,  1.41it/s, loss=2.6465, audio=0.0651, latent=16.7751, nans=0]


  Step 900/1750 | Loss: 2.977438 | Audio: 0.149772 | Latent: 17.852631


Epoch 10/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [11:55<09:01,  1.38it/s, loss=2.9448, audio=0.1617, latent=17.4754, nans=0]


  Step 1000/1750 | Loss: 2.971102 | Audio: 0.148796 | Latent: 17.823398


Epoch 10/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [13:06<07:32,  1.44it/s, loss=2.6438, audio=0.1171, latent=16.0641, nans=0]


  Step 1100/1750 | Loss: 2.968563 | Audio: 0.148730 | Latent: 17.807353


Epoch 10/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:17<06:35,  1.39it/s, loss=2.8545, audio=0.1712, latent=16.7472, nans=0]


  Step 1200/1750 | Loss: 2.963041 | Audio: 0.148039 | Latent: 17.779746


Epoch 10/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:29<05:23,  1.39it/s, loss=2.9199, audio=0.1675, latent=17.2336, nans=0]


  Step 1300/1750 | Loss: 2.959396 | Audio: 0.148152 | Latent: 17.753946


Epoch 10/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:40<04:10,  1.40it/s, loss=2.7553, audio=0.0958, latent=17.0913, nans=0]


  Step 1400/1750 | Loss: 2.956281 | Audio: 0.148330 | Latent: 17.730804


Epoch 10/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [17:52<02:58,  1.40it/s, loss=3.0130, audio=0.1448, latent=18.1563, nans=0]


  Step 1500/1750 | Loss: 2.951095 | Audio: 0.147919 | Latent: 17.701710


Epoch 10/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [19:04<01:47,  1.40it/s, loss=3.3019, audio=0.2161, latent=19.1304, nans=0]


  Step 1600/1750 | Loss: 2.951052 | Audio: 0.148297 | Latent: 17.696383


Epoch 10/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [20:15<00:36,  1.38it/s, loss=2.8743, audio=0.1184, latent=17.5834, nans=0]


  Step 1700/1750 | Loss: 2.946683 | Audio: 0.147945 | Latent: 17.671950


Epoch 10/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:51<00:00,  1.40it/s, loss=3.0132, audio=0.1667, latent=17.8650, nans=0]
Validation 10/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:07<00:00,  1.51it/s, loss=2.6433, audio=0.0731]



EPOCH 10/100 SUMMARY
Train Loss:  2.945312 (Audio: 0.147785, Latent: 17.664947)
Val Loss:    2.889885 (Audio: 0.145785, Latent: 17.322092)
Current LR: 2.99e-05

‚úÖ NEW BEST MODEL! Val Loss: 2.889885


EPOCH 11/100

üßπ Cleared GPU cache at epoch 10
   Allocated: 1.23GB | Reserved: 1.56GB



Epoch 11/100:   6%|‚ñä             | 100/1750 [01:11<20:00,  1.37it/s, loss=2.9731, audio=0.1533, latent=17.7764, nans=0]


  Step 100/1750 | Loss: 2.935757 | Audio: 0.155887 | Latent: 17.493228


Epoch 11/100:  11%|‚ñà‚ñå            | 200/1750 [02:23<18:32,  1.39it/s, loss=3.2676, audio=0.2791, latent=18.0628, nans=0]


  Step 200/1750 | Loss: 2.923742 | Audio: 0.155472 | Latent: 17.418657


Epoch 11/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:34<17:49,  1.36it/s, loss=3.1868, audio=0.2341, latent=18.1242, nans=0]


  Step 300/1750 | Loss: 2.905606 | Audio: 0.151479 | Latent: 17.350988


Epoch 11/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:46<15:57,  1.41it/s, loss=2.9462, audio=0.1069, latent=18.2163, nans=0]


  Step 400/1750 | Loss: 2.889409 | Audio: 0.148160 | Latent: 17.287265


Epoch 11/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [05:58<14:51,  1.40it/s, loss=2.5839, audio=0.1093, latent=15.7688, nans=0]


  Step 500/1750 | Loss: 2.883852 | Audio: 0.147016 | Latent: 17.265470


Epoch 11/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:10<13:54,  1.38it/s, loss=2.8183, audio=0.0843, latent=17.6640, nans=0]


  Step 600/1750 | Loss: 2.874656 | Audio: 0.145303 | Latent: 17.226996


Epoch 11/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:21<12:45,  1.37it/s, loss=2.8310, audio=0.1646, latent=16.6793, nans=0]


  Step 700/1750 | Loss: 2.871342 | Audio: 0.145783 | Latent: 17.198504


Epoch 11/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:33<11:26,  1.38it/s, loss=2.9523, audio=0.1415, latent=17.7951, nans=0]


  Step 800/1750 | Loss: 2.867142 | Audio: 0.145251 | Latent: 17.177594


Epoch 11/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:45<10:15,  1.38it/s, loss=2.8617, audio=0.0833, latent=17.9676, nans=0]


  Step 900/1750 | Loss: 2.868472 | Audio: 0.146168 | Latent: 17.174243


Epoch 11/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [11:57<09:06,  1.37it/s, loss=2.9825, audio=0.1602, latent=17.7469, nans=0]


  Step 1000/1750 | Loss: 2.870205 | Audio: 0.147092 | Latent: 17.173479


Epoch 11/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [13:09<07:39,  1.42it/s, loss=2.4975, audio=0.1085, latent=15.2030, nans=0]


  Step 1100/1750 | Loss: 2.867904 | Audio: 0.147064 | Latent: 17.158506


Epoch 11/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:21<06:36,  1.39it/s, loss=2.9169, audio=0.1518, latent=17.4217, nans=0]


  Step 1200/1750 | Loss: 2.867220 | Audio: 0.147518 | Latent: 17.147892


Epoch 11/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:32<05:23,  1.39it/s, loss=2.9247, audio=0.1276, latent=17.7965, nans=0]


  Step 1300/1750 | Loss: 2.862442 | Audio: 0.146793 | Latent: 17.125704


Epoch 11/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:44<04:16,  1.37it/s, loss=2.7379, audio=0.0833, latent=17.1412, nans=0]


  Step 1400/1750 | Loss: 2.860269 | Audio: 0.146915 | Latent: 17.109592


Epoch 11/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [17:56<03:00,  1.38it/s, loss=2.6706, audio=0.1006, latent=16.4628, nans=0]


  Step 1500/1750 | Loss: 2.856416 | Audio: 0.146483 | Latent: 17.089666


Epoch 11/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [19:07<01:48,  1.39it/s, loss=2.5284, audio=0.1036, latent=15.4753, nans=0]


  Step 1600/1750 | Loss: 2.853097 | Audio: 0.146153 | Latent: 17.071943


Epoch 11/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [20:20<00:35,  1.40it/s, loss=2.7429, audio=0.1363, latent=16.4679, nans=0]


  Step 1700/1750 | Loss: 2.850487 | Audio: 0.145683 | Latent: 17.060811


Epoch 11/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:55<00:00,  1.39it/s, loss=2.5358, audio=0.0555, latent=16.1652, nans=0]
Validation 11/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:06<00:00,  1.52it/s, loss=2.5617, audio=0.0706]



EPOCH 11/100 SUMMARY
Train Loss:  2.849543 (Audio: 0.145743, Latent: 17.053707)
Val Loss:    2.796454 (Audio: 0.142839, Latent: 16.738501)
Current LR: 2.99e-05

‚úÖ NEW BEST MODEL! Val Loss: 2.796454


EPOCH 12/100



Epoch 12/100:   6%|‚ñä             | 100/1750 [01:11<19:40,  1.40it/s, loss=3.0729, audio=0.1625, latent=18.3195, nans=0]


  Step 100/1750 | Loss: 2.808671 | Audio: 0.143197 | Latent: 16.815176


Epoch 12/100:  11%|‚ñà‚ñå            | 200/1750 [02:23<18:31,  1.39it/s, loss=2.7393, audio=0.1366, latent=16.4403, nans=0]


  Step 200/1750 | Loss: 2.817154 | Audio: 0.144842 | Latent: 16.849803


Epoch 12/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:35<17:29,  1.38it/s, loss=2.6332, audio=0.1589, latent=15.4366, nans=0]


  Step 300/1750 | Loss: 2.804973 | Audio: 0.144170 | Latent: 16.777552


Epoch 12/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:47<16:00,  1.41it/s, loss=3.0222, audio=0.2033, latent=17.4374, nans=0]


  Step 400/1750 | Loss: 2.799530 | Audio: 0.143093 | Latent: 16.755629


Epoch 12/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [05:59<15:18,  1.36it/s, loss=2.7546, audio=0.1534, latent=16.3183, nans=0]


  Step 500/1750 | Loss: 2.796460 | Audio: 0.142733 | Latent: 16.739958


Epoch 12/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:11<13:53,  1.38it/s, loss=2.3604, audio=0.1039, latent=14.3504, nans=0]


  Step 600/1750 | Loss: 2.794636 | Audio: 0.143697 | Latent: 16.714950


Epoch 12/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:23<12:37,  1.39it/s, loss=2.8522, audio=0.1443, latent=17.0901, nans=0]


  Step 700/1750 | Loss: 2.787622 | Audio: 0.142909 | Latent: 16.678692


Epoch 12/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:36<11:32,  1.37it/s, loss=2.6996, audio=0.1700, latent=15.7300, nans=0]


  Step 800/1750 | Loss: 2.784895 | Audio: 0.143088 | Latent: 16.658123


Epoch 12/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:48<10:12,  1.39it/s, loss=2.8994, audio=0.1665, latent=17.1097, nans=0]


  Step 900/1750 | Loss: 2.782448 | Audio: 0.142782 | Latent: 16.645888


Epoch 12/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [12:00<09:13,  1.36it/s, loss=2.7507, audio=0.0893, latent=17.1471, nans=0]


  Step 1000/1750 | Loss: 2.777481 | Audio: 0.142395 | Latent: 16.617937


Epoch 12/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [13:13<07:49,  1.38it/s, loss=2.4977, audio=0.0793, latent=15.5943, nans=0]


  Step 1100/1750 | Loss: 2.773914 | Audio: 0.142559 | Latent: 16.591974


Epoch 12/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:24<06:30,  1.41it/s, loss=2.9677, audio=0.2219, latent=16.8254, nans=0]


  Step 1200/1750 | Loss: 2.772458 | Audio: 0.143320 | Latent: 16.572119


Epoch 12/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:37<05:25,  1.38it/s, loss=2.6581, audio=0.1096, latent=16.2601, nans=0]


  Step 1300/1750 | Loss: 2.770838 | Audio: 0.143535 | Latent: 16.558447


Epoch 12/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:49<04:16,  1.36it/s, loss=2.8149, audio=0.1696, latent=16.5042, nans=0]


  Step 1400/1750 | Loss: 2.765897 | Audio: 0.143131 | Latent: 16.530898


Epoch 12/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [18:02<03:01,  1.38it/s, loss=2.4600, audio=0.0892, latent=15.2104, nans=0]


  Step 1500/1750 | Loss: 2.763586 | Audio: 0.142987 | Latent: 16.517420


Epoch 12/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [19:14<01:50,  1.36it/s, loss=2.5777, audio=0.0942, latent=15.9281, nans=0]


  Step 1600/1750 | Loss: 2.761781 | Audio: 0.143273 | Latent: 16.501566


Epoch 12/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [20:26<00:36,  1.36it/s, loss=2.7117, audio=0.1327, latent=16.3087, nans=0]


  Step 1700/1750 | Loss: 2.758355 | Audio: 0.143136 | Latent: 16.480553


Epoch 12/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:03<00:00,  1.39it/s, loss=2.6805, audio=0.0773, latent=16.8396, nans=0]
Validation 12/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:10<00:00,  1.50it/s, loss=2.4800, audio=0.0685]



EPOCH 12/100 SUMMARY
Train Loss:  2.757670 (Audio: 0.143386, Latent: 16.472646)
Val Loss:    2.704073 (Audio: 0.140327, Latent: 16.156128)
Current LR: 2.98e-05

‚úÖ NEW BEST MODEL! Val Loss: 2.704073


EPOCH 13/100



Epoch 13/100:   6%|‚ñä             | 100/1750 [01:12<19:48,  1.39it/s, loss=2.4564, audio=0.0957, latent=15.0992, nans=0]


  Step 100/1750 | Loss: 2.678249 | Audio: 0.134164 | Latent: 16.066137


Epoch 13/100:  11%|‚ñà‚ñå            | 200/1750 [02:24<18:33,  1.39it/s, loss=2.9223, audio=0.1628, latent=17.3111, nans=0]


  Step 200/1750 | Loss: 2.696176 | Audio: 0.140317 | Latent: 16.103609


Epoch 13/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:36<17:37,  1.37it/s, loss=2.7274, audio=0.1534, latent=16.1369, nans=0]


  Step 300/1750 | Loss: 2.696794 | Audio: 0.142066 | Latent: 16.084406


Epoch 13/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:48<16:40,  1.35it/s, loss=2.6077, audio=0.1369, latent=15.5602, nans=0]


  Step 400/1750 | Loss: 2.691217 | Audio: 0.141339 | Latent: 16.056918


Epoch 13/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [06:00<15:00,  1.39it/s, loss=2.5154, audio=0.0951, latent=15.5016, nans=0]


  Step 500/1750 | Loss: 2.690031 | Audio: 0.142015 | Latent: 16.040012


Epoch 13/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:12<13:50,  1.38it/s, loss=2.7007, audio=0.1496, latent=16.0097, nans=0]


  Step 600/1750 | Loss: 2.687558 | Audio: 0.143160 | Latent: 16.008245


Epoch 13/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:24<12:36,  1.39it/s, loss=2.5034, audio=0.0817, latent=15.5997, nans=0]


  Step 700/1750 | Loss: 2.683325 | Audio: 0.142290 | Latent: 15.991628


Epoch 13/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:36<11:47,  1.34it/s, loss=2.6524, audio=0.0976, latent=16.3811, nans=0]


  Step 800/1750 | Loss: 2.680782 | Audio: 0.141592 | Latent: 15.983986


Epoch 13/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:48<09:52,  1.43it/s, loss=2.5117, audio=0.0636, latent=15.8965, nans=0]


  Step 900/1750 | Loss: 2.681758 | Audio: 0.141203 | Latent: 15.995677


Epoch 13/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [12:01<09:08,  1.37it/s, loss=2.9417, audio=0.1669, latent=17.3864, nans=0]


  Step 1000/1750 | Loss: 2.679509 | Audio: 0.140640 | Latent: 15.988186


Epoch 13/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [13:13<07:57,  1.36it/s, loss=3.5390, audio=0.1686, latent=21.3458, nans=1]


  Step 1100/1750 | Loss: 2.683072 | Audio: 0.141034 | Latent: 16.006693


Epoch 13/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:26<07:06,  1.29it/s, loss=3.1069, audio=0.2902, latent=16.8436, nans=1]


  Step 1200/1750 | Loss: 2.682211 | Audio: 0.140753 | Latent: 16.004696


Epoch 13/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:37<05:25,  1.38it/s, loss=3.0423, audio=0.2700, latent=16.6823, nans=1]


  Step 1300/1750 | Loss: 2.675577 | Audio: 0.140085 | Latent: 15.969380


Epoch 13/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:49<04:14,  1.38it/s, loss=2.5340, audio=0.0990, latent=15.5729, nans=1]


  Step 1400/1750 | Loss: 2.675209 | Audio: 0.140647 | Latent: 15.959430


Epoch 13/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [18:02<03:01,  1.38it/s, loss=2.6870, audio=0.1591, latent=15.7924, nans=1]


  Step 1500/1750 | Loss: 2.673747 | Audio: 0.140711 | Latent: 15.948829


Epoch 13/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [19:13<01:47,  1.40it/s, loss=2.4203, audio=0.0923, latent=14.9050, nans=1]


  Step 1600/1750 | Loss: 2.671834 | Audio: 0.140603 | Latent: 15.937515


Epoch 13/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [20:25<00:36,  1.38it/s, loss=2.2861, audio=0.0721, latent=14.2795, nans=1]


  Step 1700/1750 | Loss: 2.670914 | Audio: 0.140590 | Latent: 15.931554


Epoch 13/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:01<00:00,  1.39it/s, loss=2.5522, audio=0.1388, latent=15.1640, nans=1]



‚ö†Ô∏è 1 NaN occurrences


Validation 13/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=2.3904, audio=0.0636]



EPOCH 13/100 SUMMARY
Train Loss:  2.668944 (Audio: 0.140428, Latent: 15.920584)
Val Loss:    2.613227 (Audio: 0.136308, Latent: 15.604069)
Current LR: 2.98e-05

‚úÖ NEW BEST MODEL! Val Loss: 2.613227


EPOCH 14/100



Epoch 14/100:   6%|‚ñä             | 100/1750 [01:12<20:04,  1.37it/s, loss=2.6674, audio=0.1356, latent=15.9746, nans=0]


  Step 100/1750 | Loss: 2.613965 | Audio: 0.133530 | Latent: 15.646030


Epoch 14/100:  11%|‚ñà‚ñå            | 200/1750 [02:24<18:30,  1.40it/s, loss=2.5893, audio=0.1428, latent=15.3577, nans=0]


  Step 200/1750 | Loss: 2.619011 | Audio: 0.138010 | Latent: 15.619948


Epoch 14/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:36<17:10,  1.41it/s, loss=2.3106, audio=0.0666, latent=14.5156, nans=0]


  Step 300/1750 | Loss: 2.613120 | Audio: 0.136657 | Latent: 15.598705


Epoch 14/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:48<16:31,  1.36it/s, loss=2.3095, audio=0.0649, latent=14.5319, nans=0]


  Step 400/1750 | Loss: 2.617010 | Audio: 0.137456 | Latent: 15.613993


Epoch 14/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [06:00<15:05,  1.38it/s, loss=2.7374, audio=0.2149, latent=15.3832, nans=0]


  Step 500/1750 | Loss: 2.617651 | Audio: 0.139458 | Latent: 15.591562


Epoch 14/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:12<13:37,  1.41it/s, loss=2.2527, audio=0.0800, latent=13.9513, nans=0]


  Step 600/1750 | Loss: 2.613917 | Audio: 0.140204 | Latent: 15.556727


Epoch 14/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:24<12:26,  1.41it/s, loss=2.3511, audio=0.0933, latent=14.4301, nans=0]


  Step 700/1750 | Loss: 2.606012 | Audio: 0.138779 | Latent: 15.523032


Epoch 14/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:36<11:27,  1.38it/s, loss=2.7899, audio=0.1450, latent=16.6665, nans=0]


  Step 800/1750 | Loss: 2.602093 | Audio: 0.138731 | Latent: 15.497541


Epoch 14/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:48<10:19,  1.37it/s, loss=2.6642, audio=0.1509, latent=15.7495, nans=0]


  Step 900/1750 | Loss: 2.600782 | Audio: 0.139229 | Latent: 15.482163


Epoch 14/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [12:00<09:04,  1.38it/s, loss=2.5405, audio=0.0980, latent=15.6296, nans=0]


  Step 1000/1750 | Loss: 2.599928 | Audio: 0.139362 | Latent: 15.474692


Epoch 14/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [13:12<07:41,  1.41it/s, loss=2.4248, audio=0.0733, latent=15.1878, nans=0]


  Step 1100/1750 | Loss: 2.595723 | Audio: 0.138916 | Latent: 15.452604


Epoch 14/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:24<06:43,  1.36it/s, loss=2.6514, audio=0.1481, latent=15.7018, nans=0]


  Step 1200/1750 | Loss: 2.591619 | Audio: 0.138670 | Latent: 15.428525


Epoch 14/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:35<05:25,  1.38it/s, loss=2.3969, audio=0.1123, latent=14.4815, nans=0]


  Step 1300/1750 | Loss: 2.588411 | Audio: 0.138370 | Latent: 15.411139


Epoch 14/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:48<04:10,  1.40it/s, loss=2.4862, audio=0.0854, latent=15.4363, nans=0]


  Step 1400/1750 | Loss: 2.584101 | Audio: 0.137901 | Latent: 15.388658


Epoch 14/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [18:00<03:01,  1.38it/s, loss=2.5388, audio=0.1458, latent=14.9809, nans=0]


  Step 1500/1750 | Loss: 2.579147 | Audio: 0.137322 | Latent: 15.363350


Epoch 14/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [19:12<01:48,  1.38it/s, loss=2.3952, audio=0.0693, latent=15.0443, nans=0]


  Step 1600/1750 | Loss: 2.574266 | Audio: 0.137061 | Latent: 15.334291


Epoch 14/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [20:24<00:36,  1.39it/s, loss=2.4335, audio=0.1507, latent=14.2143, nans=0]


  Step 1700/1750 | Loss: 2.571044 | Audio: 0.136744 | Latent: 15.317038


Epoch 14/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:00<00:00,  1.39it/s, loss=2.5601, audio=0.1307, latent=15.3237, nans=0]
Validation 14/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=2.2947, audio=0.0616]



EPOCH 14/100 SUMMARY
Train Loss:  2.569391 (Audio: 0.136887, Latent: 15.304111)
Val Loss:    2.511078 (Audio: 0.132962, Latent: 14.967696)
Current LR: 2.98e-05

‚úÖ NEW BEST MODEL! Val Loss: 2.511078


EPOCH 15/100



Epoch 15/100:   6%|‚ñä             | 100/1750 [01:11<19:41,  1.40it/s, loss=2.7251, audio=0.2152, latent=15.2979, nans=0]


  Step 100/1750 | Loss: 2.499172 | Audio: 0.129962 | Latent: 14.928318


Epoch 15/100:  11%|‚ñà‚ñå            | 200/1750 [02:23<18:35,  1.39it/s, loss=2.2976, audio=0.0794, latent=14.2579, nans=0]


  Step 200/1750 | Loss: 2.522514 | Audio: 0.136795 | Latent: 14.992824


Epoch 15/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:35<17:09,  1.41it/s, loss=2.4650, audio=0.1370, latent=14.6062, nans=0]


  Step 300/1750 | Loss: 2.517191 | Audio: 0.136505 | Latent: 14.961201


Epoch 15/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:46<16:28,  1.37it/s, loss=2.5972, audio=0.1469, latent=15.3558, nans=0]


  Step 400/1750 | Loss: 2.516153 | Audio: 0.136679 | Latent: 14.951958


Epoch 15/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [05:58<14:54,  1.40it/s, loss=2.4213, audio=0.0858, latent=14.9977, nans=0]


  Step 500/1750 | Loss: 2.506227 | Audio: 0.134171 | Latent: 14.919232


Epoch 15/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:10<13:56,  1.37it/s, loss=2.5354, audio=0.1643, latent=14.7120, nans=0]


  Step 600/1750 | Loss: 2.501404 | Audio: 0.133940 | Latent: 14.890164


Epoch 15/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:22<12:40,  1.38it/s, loss=2.6858, audio=0.1967, latent=15.2832, nans=0]


  Step 700/1750 | Loss: 2.496750 | Audio: 0.133622 | Latent: 14.863370


Epoch 15/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:34<11:14,  1.41it/s, loss=2.2879, audio=0.0927, latent=14.0166, nans=0]


  Step 800/1750 | Loss: 2.495793 | Audio: 0.133589 | Latent: 14.857431


Epoch 15/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:46<10:09,  1.39it/s, loss=2.4843, audio=0.0801, latent=15.4940, nans=0]


  Step 900/1750 | Loss: 2.494994 | Audio: 0.134259 | Latent: 14.843176


Epoch 15/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [11:58<09:01,  1.39it/s, loss=2.3999, audio=0.0670, latent=15.1055, nans=0]


  Step 1000/1750 | Loss: 2.496612 | Audio: 0.134533 | Latent: 14.850301


Epoch 15/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [13:10<07:47,  1.39it/s, loss=2.1349, audio=0.0860, latent=13.0862, nans=0]


  Step 1100/1750 | Loss: 2.493734 | Audio: 0.133778 | Latent: 14.841177


Epoch 15/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:22<06:23,  1.43it/s, loss=2.6845, audio=0.1909, latent=15.3506, nans=0]


  Step 1200/1750 | Loss: 2.492721 | Audio: 0.134414 | Latent: 14.825949


Epoch 15/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:33<05:18,  1.41it/s, loss=2.2880, audio=0.0532, latent=14.5438, nans=0]


  Step 1300/1750 | Loss: 2.491525 | Audio: 0.134606 | Latent: 14.815420


Epoch 15/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:45<04:18,  1.35it/s, loss=2.6623, audio=0.2292, latent=14.6930, nans=0]


  Step 1400/1750 | Loss: 2.487575 | Audio: 0.133918 | Latent: 14.798257


Epoch 15/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [17:57<03:01,  1.38it/s, loss=2.4049, audio=0.1455, latent=14.0924, nans=0]


  Step 1500/1750 | Loss: 2.482992 | Audio: 0.133350 | Latent: 14.775281


Epoch 15/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [19:09<01:45,  1.42it/s, loss=2.3316, audio=0.0753, latent=14.5405, nans=0]


  Step 1600/1750 | Loss: 2.478398 | Audio: 0.133031 | Latent: 14.748906


Epoch 15/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [20:21<00:36,  1.38it/s, loss=2.3254, audio=0.0885, latent=14.3223, nans=0]


  Step 1700/1750 | Loss: 2.475860 | Audio: 0.133020 | Latent: 14.732132


Epoch 15/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:57<00:00,  1.39it/s, loss=1.9732, audio=0.0890, latent=11.9686, nans=0]
Validation 15/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.50it/s, loss=2.2057, audio=0.0580]



EPOCH 15/100 SUMMARY
Train Loss:  2.474035 (Audio: 0.132790, Latent: 14.723034)
Val Loss:    2.419227 (Audio: 0.129442, Latent: 14.402291)
Current LR: 2.98e-05

‚úÖ NEW BEST MODEL! Val Loss: 2.419227


EPOCH 16/100



Epoch 16/100:   6%|‚ñä             | 100/1750 [01:11<19:39,  1.40it/s, loss=2.6300, audio=0.1348, latent=15.7364, nans=0]


  Step 100/1750 | Loss: 2.431441 | Audio: 0.129973 | Latent: 14.476634


Epoch 16/100:  11%|‚ñà‚ñå            | 200/1750 [02:24<18:58,  1.36it/s, loss=2.1607, audio=0.0766, latent=13.3836, nans=0]


  Step 200/1750 | Loss: 2.428523 | Audio: 0.132437 | Latent: 14.424323


Epoch 16/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:35<17:05,  1.41it/s, loss=2.4843, audio=0.1292, latent=14.8391, nans=0]


  Step 300/1750 | Loss: 2.415185 | Audio: 0.129618 | Latent: 14.372990


Epoch 16/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:48<16:00,  1.41it/s, loss=2.1092, audio=0.0778, latent=13.0239, nans=0]


  Step 400/1750 | Loss: 2.419458 | Audio: 0.130783 | Latent: 14.385942


Epoch 16/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [06:00<14:53,  1.40it/s, loss=2.2614, audio=0.0783, latent=14.0321, nans=0]


  Step 500/1750 | Loss: 2.415309 | Audio: 0.131070 | Latent: 14.354455


Epoch 16/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:12<13:36,  1.41it/s, loss=2.4202, audio=0.1419, latent=14.2427, nans=0]


  Step 600/1750 | Loss: 2.412382 | Audio: 0.131319 | Latent: 14.331630


Epoch 16/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:24<13:31,  1.29it/s, loss=2.3225, audio=0.1411, latent=13.6024, nans=0]


  Step 700/1750 | Loss: 2.406506 | Audio: 0.129928 | Latent: 14.311005


Epoch 16/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:32<10:34,  1.50it/s, loss=2.5669, audio=0.2078, latent=14.3423, nans=0]


  Step 800/1750 | Loss: 2.400836 | Audio: 0.129132 | Latent: 14.283819


Epoch 16/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:43<10:46,  1.31it/s, loss=2.3285, audio=0.1339, latent=13.7384, nans=0]


  Step 900/1750 | Loss: 2.395646 | Audio: 0.128704 | Latent: 14.254913


Epoch 16/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [11:57<09:08,  1.37it/s, loss=2.5225, audio=0.1386, latent=14.9690, nans=0]


  Step 1000/1750 | Loss: 2.391019 | Audio: 0.128705 | Latent: 14.224059


Epoch 16/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [13:09<07:44,  1.40it/s, loss=2.0279, audio=0.0758, latent=12.5089, nans=0]


  Step 1100/1750 | Loss: 2.389906 | Audio: 0.129174 | Latent: 14.210378


Epoch 16/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:21<06:42,  1.37it/s, loss=2.6645, audio=0.2032, latent=15.0548, nans=0]


  Step 1200/1750 | Loss: 2.388487 | Audio: 0.129559 | Latent: 14.195788


Epoch 16/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:35<05:57,  1.26it/s, loss=2.5109, audio=0.1839, latent=14.2882, nans=0]


  Step 1300/1750 | Loss: 2.384119 | Audio: 0.129037 | Latent: 14.173627


Epoch 16/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:49<04:08,  1.41it/s, loss=2.1259, audio=0.0972, latent=12.8764, nans=0]


  Step 1400/1750 | Loss: 2.382277 | Audio: 0.129278 | Latent: 14.158142


Epoch 16/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [18:01<03:11,  1.31it/s, loss=2.3356, audio=0.1403, latent=13.7009, nans=0]


  Step 1500/1750 | Loss: 2.377097 | Audio: 0.128362 | Latent: 14.135812


Epoch 16/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [19:19<01:56,  1.29it/s, loss=2.3818, audio=0.1370, latent=14.0519, nans=0]


  Step 1600/1750 | Loss: 2.375381 | Audio: 0.128769 | Latent: 14.118950


Epoch 16/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [20:33<00:36,  1.38it/s, loss=1.9781, audio=0.0882, latent=12.0110, nans=0]


  Step 1700/1750 | Loss: 2.374505 | Audio: 0.129503 | Latent: 14.103328


Epoch 16/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:09<00:00,  1.38it/s, loss=2.0894, audio=0.0872, latent=12.7667, nans=0]
Validation 16/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.50it/s, loss=2.1128, audio=0.0568]



EPOCH 16/100 SUMMARY
Train Loss:  2.372151 (Audio: 0.129143, Latent: 14.092432)
Val Loss:    2.316457 (Audio: 0.125246, Latent: 13.773108)
Current LR: 2.98e-05

‚úÖ NEW BEST MODEL! Val Loss: 2.316457


EPOCH 17/100



Epoch 17/100:   6%|‚ñä             | 100/1750 [01:12<19:37,  1.40it/s, loss=2.2693, audio=0.0695, latent=14.2022, nans=0]


  Step 100/1750 | Loss: 2.299743 | Audio: 0.123703 | Latent: 13.682250


Epoch 17/100:  11%|‚ñà‚ñå            | 200/1750 [02:27<20:57,  1.23it/s, loss=2.4644, audio=0.1663, latent=14.2124, nans=0]


  Step 200/1750 | Loss: 2.315855 | Audio: 0.127869 | Latent: 13.734109


Epoch 17/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:43<17:15,  1.40it/s, loss=2.1699, audio=0.1395, latent=12.6053, nans=0]


  Step 300/1750 | Loss: 2.315689 | Audio: 0.128261 | Latent: 13.727777


Epoch 17/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [05:03<16:31,  1.36it/s, loss=2.3038, audio=0.0679, latent=14.4531, nans=0]


  Step 400/1750 | Loss: 2.314552 | Audio: 0.126500 | Latent: 13.743683


Epoch 17/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [06:28<15:27,  1.35it/s, loss=2.4210, audio=0.1964, latent=13.5205, nans=0]


  Step 500/1750 | Loss: 2.322348 | Audio: 0.130052 | Latent: 13.748299


Epoch 17/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:51<13:42,  1.40it/s, loss=2.0862, audio=0.0639, latent=13.0564, nans=0]


  Step 600/1750 | Loss: 2.311309 | Audio: 0.128128 | Latent: 13.700345


Epoch 17/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [09:03<13:28,  1.30it/s, loss=2.5272, audio=0.1972, latent=14.2186, nans=0]


  Step 700/1750 | Loss: 2.306102 | Audio: 0.127419 | Latent: 13.675098


Epoch 17/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [10:17<11:19,  1.40it/s, loss=2.3588, audio=0.1456, latent=13.7844, nans=0]


  Step 800/1750 | Loss: 2.303875 | Audio: 0.126496 | Latent: 13.672550


Epoch 17/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [11:36<10:56,  1.29it/s, loss=2.4356, audio=0.1512, latent=14.2214, nans=0]


  Step 900/1750 | Loss: 2.300469 | Audio: 0.125915 | Latent: 13.657586


Epoch 17/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [12:55<08:36,  1.45it/s, loss=2.3177, audio=0.1406, latent=13.5767, nans=0]


  Step 1000/1750 | Loss: 2.296692 | Audio: 0.125765 | Latent: 13.634417


Epoch 17/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [14:18<08:13,  1.32it/s, loss=2.0133, audio=0.0647, latent=12.5590, nans=0]


  Step 1100/1750 | Loss: 2.293984 | Audio: 0.126048 | Latent: 13.612586


Epoch 17/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [15:42<06:28,  1.41it/s, loss=2.5338, audio=0.2028, latent=14.1877, nans=0]


  Step 1200/1750 | Loss: 2.290357 | Audio: 0.125741 | Latent: 13.592495


Epoch 17/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [16:54<05:15,  1.43it/s, loss=2.4240, audio=0.1994, latent=13.5017, nans=0]


  Step 1300/1750 | Loss: 2.290012 | Audio: 0.126133 | Latent: 13.584973


Epoch 17/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [18:08<04:18,  1.35it/s, loss=2.0431, audio=0.0598, latent=12.8241, nans=0]


  Step 1400/1750 | Loss: 2.286289 | Audio: 0.125981 | Latent: 13.562180


Epoch 17/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [19:19<02:58,  1.40it/s, loss=1.9349, audio=0.0648, latent=12.0350, nans=0]


  Step 1500/1750 | Loss: 2.285353 | Audio: 0.126310 | Latent: 13.551552


Epoch 17/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [20:31<01:49,  1.37it/s, loss=1.9627, audio=0.0777, latent=12.0487, nans=0]


  Step 1600/1750 | Loss: 2.283156 | Audio: 0.126310 | Latent: 13.536911


Epoch 17/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [21:43<00:35,  1.39it/s, loss=2.3786, audio=0.1362, latent=14.0408, nans=0]


  Step 1700/1750 | Loss: 2.280256 | Audio: 0.125858 | Latent: 13.523595


Epoch 17/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [22:18<00:00,  1.31it/s, loss=2.5393, audio=0.2014, latent=14.2437, nans=0]
Validation 17/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:06<00:00,  1.52it/s, loss=2.0249, audio=0.0532]



EPOCH 17/100 SUMMARY
Train Loss:  2.278896 (Audio: 0.125775, Latent: 13.515634)
Val Loss:    2.226440 (Audio: 0.121668, Latent: 13.220688)
Current LR: 2.98e-05

‚úÖ NEW BEST MODEL! Val Loss: 2.226440


EPOCH 18/100



Epoch 18/100:   6%|‚ñä             | 100/1750 [01:11<19:35,  1.40it/s, loss=2.0542, audio=0.0743, latent=12.7038, nans=0]


  Step 100/1750 | Loss: 2.215688 | Audio: 0.122015 | Latent: 13.144390


Epoch 18/100:  11%|‚ñà‚ñå            | 200/1750 [02:23<18:19,  1.41it/s, loss=2.4208, audio=0.2014, latent=13.4534, nans=0]


  Step 200/1750 | Loss: 2.204066 | Audio: 0.121773 | Latent: 13.070141


Epoch 18/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:34<17:05,  1.41it/s, loss=2.2649, audio=0.1335, latent=13.3192, nans=0]


  Step 300/1750 | Loss: 2.209991 | Audio: 0.122944 | Latent: 13.094018


Epoch 18/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:45<16:07,  1.40it/s, loss=2.5057, audio=0.2060, latent=13.9582, nans=0]


  Step 400/1750 | Loss: 2.215518 | Audio: 0.123671 | Latent: 13.121174


Epoch 18/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [05:57<15:14,  1.37it/s, loss=2.1860, audio=0.1363, latent=12.7564, nans=0]


  Step 500/1750 | Loss: 2.215756 | Audio: 0.124907 | Latent: 13.106281


Epoch 18/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:09<13:46,  1.39it/s, loss=2.2699, audio=0.1474, latent=13.1671, nans=0]


  Step 600/1750 | Loss: 2.216344 | Audio: 0.125839 | Latent: 13.097777


Epoch 18/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:20<12:11,  1.44it/s, loss=2.0331, audio=0.0697, latent=12.6238, nans=0]


  Step 700/1750 | Loss: 2.211297 | Audio: 0.124492 | Latent: 13.082085


Epoch 18/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:31<11:15,  1.41it/s, loss=2.0955, audio=0.0850, latent=12.8368, nans=0]


  Step 800/1750 | Loss: 2.209321 | Audio: 0.124272 | Latent: 13.071848


Epoch 18/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:43<10:06,  1.40it/s, loss=2.2453, audio=0.1265, latent=13.2821, nans=0]


  Step 900/1750 | Loss: 2.206601 | Audio: 0.124111 | Latent: 13.055863


Epoch 18/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [11:54<08:50,  1.41it/s, loss=2.2766, audio=0.1879, latent=12.6721, nans=0]


  Step 1000/1750 | Loss: 2.200280 | Audio: 0.122802 | Latent: 13.031172


Epoch 18/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [13:05<07:51,  1.38it/s, loss=2.2155, audio=0.1323, latent=13.0057, nans=0]


  Step 1100/1750 | Loss: 2.201150 | Audio: 0.123668 | Latent: 13.025428


Epoch 18/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:19<06:45,  1.36it/s, loss=2.0890, audio=0.1559, latent=11.8475, nans=0]


  Step 1200/1750 | Loss: 2.198537 | Audio: 0.123094 | Latent: 13.015656


Epoch 18/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:35<05:45,  1.30it/s, loss=2.0637, audio=0.1427, latent=11.8546, nans=0]


  Step 1300/1750 | Loss: 2.196200 | Audio: 0.122981 | Latent: 13.001579


Epoch 18/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:46<04:03,  1.44it/s, loss=2.3819, audio=0.1847, latent=13.4165, nans=0]


  Step 1400/1750 | Loss: 2.192892 | Audio: 0.122616 | Latent: 12.984402


Epoch 18/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [17:56<03:07,  1.34it/s, loss=1.9282, audio=0.0806, latent=11.7800, nans=0]


  Step 1500/1750 | Loss: 2.190086 | Audio: 0.122235 | Latent: 12.970771


Epoch 18/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [19:07<01:47,  1.40it/s, loss=2.3045, audio=0.1496, latent=13.3685, nans=0]


  Step 1600/1750 | Loss: 2.187057 | Audio: 0.122099 | Latent: 12.952396


Epoch 18/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [20:17<00:34,  1.45it/s, loss=1.9805, audio=0.0828, latent=12.0992, nans=0]


  Step 1700/1750 | Loss: 2.186350 | Audio: 0.122532 | Latent: 12.941910


Epoch 18/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:53<00:00,  1.40it/s, loss=2.1144, audio=0.1230, latent=12.4557, nans=0]
Validation 18/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.9316, audio=0.0517]



EPOCH 18/100 SUMMARY
Train Loss:  2.184473 (Audio: 0.122584, Latent: 12.928699)
Val Loss:    2.132085 (Audio: 0.119391, Latent: 12.622014)
Current LR: 2.98e-05

‚úÖ NEW BEST MODEL! Val Loss: 2.132085


EPOCH 19/100



Epoch 19/100:   6%|‚ñä             | 100/1750 [01:11<19:47,  1.39it/s, loss=2.3994, audio=0.1820, latent=13.5691, nans=0]


  Step 100/1750 | Loss: 2.136248 | Audio: 0.126123 | Latent: 12.560008


Epoch 19/100:  11%|‚ñà‚ñå            | 200/1750 [02:23<18:17,  1.41it/s, loss=1.8527, audio=0.0745, latent=11.3581, nans=0]


  Step 200/1750 | Loss: 2.133539 | Audio: 0.124554 | Latent: 12.562878


Epoch 19/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:33<17:09,  1.41it/s, loss=2.0973, audio=0.0627, latent=13.1459, nans=0]


  Step 300/1750 | Loss: 2.133415 | Audio: 0.122727 | Latent: 12.586407


Epoch 19/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:45<16:12,  1.39it/s, loss=2.3075, audio=0.1880, latent=12.8769, nans=0]


  Step 400/1750 | Loss: 2.135681 | Audio: 0.123295 | Latent: 12.593941


Epoch 19/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [05:57<14:55,  1.40it/s, loss=2.1528, audio=0.1243, latent=12.6944, nans=0]


  Step 500/1750 | Loss: 2.133768 | Audio: 0.123973 | Latent: 12.572138


Epoch 19/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:09<13:53,  1.38it/s, loss=2.0600, audio=0.0539, latent=13.0147, nans=0]


  Step 600/1750 | Loss: 2.126461 | Audio: 0.122711 | Latent: 12.540254


Epoch 19/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:21<13:07,  1.33it/s, loss=2.2389, audio=0.1350, latent=13.1265, nans=0]


  Step 700/1750 | Loss: 2.123078 | Audio: 0.122034 | Latent: 12.526728


Epoch 19/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:32<11:25,  1.39it/s, loss=2.0139, audio=0.0719, latent=12.4677, nans=0]


  Step 800/1750 | Loss: 2.121697 | Audio: 0.122230 | Latent: 12.514907


Epoch 19/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:44<10:14,  1.38it/s, loss=2.1302, audio=0.1536, latent=12.1525, nans=0]


  Step 900/1750 | Loss: 2.119724 | Audio: 0.122420 | Latent: 12.499226


Epoch 19/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [11:56<08:47,  1.42it/s, loss=1.8117, audio=0.0818, latent=10.9876, nans=0]


  Step 1000/1750 | Loss: 2.113990 | Audio: 0.121473 | Latent: 12.473625


Epoch 19/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [13:08<07:33,  1.43it/s, loss=1.9141, audio=0.0771, latent=11.7329, nans=0]


  Step 1100/1750 | Loss: 2.110879 | Audio: 0.121110 | Latent: 12.457733


Epoch 19/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:19<06:38,  1.38it/s, loss=1.8841, audio=0.0605, latent=11.7531, nans=0]


  Step 1200/1750 | Loss: 2.108939 | Audio: 0.121127 | Latent: 12.444571


Epoch 19/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:31<05:24,  1.39it/s, loss=2.1567, audio=0.1314, latent=12.6255, nans=0]


  Step 1300/1750 | Loss: 2.105300 | Audio: 0.120469 | Latent: 12.429087


Epoch 19/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:42<04:13,  1.38it/s, loss=1.8856, audio=0.0640, latent=11.7182, nans=0]


  Step 1400/1750 | Loss: 2.102566 | Audio: 0.119988 | Latent: 12.417264


Epoch 19/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [17:51<03:00,  1.39it/s, loss=1.8467, audio=0.0825, latent=11.2117, nans=0]


  Step 1500/1750 | Loss: 2.098992 | Audio: 0.119780 | Latent: 12.396221


Epoch 19/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [19:02<01:45,  1.42it/s, loss=2.1565, audio=0.1834, latent=11.9319, nans=0]


  Step 1600/1750 | Loss: 2.096168 | Audio: 0.119558 | Latent: 12.380342


Epoch 19/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [20:14<00:37,  1.34it/s, loss=1.9899, audio=0.0734, latent=12.2880, nans=0]


  Step 1700/1750 | Loss: 2.094282 | Audio: 0.119388 | Latent: 12.370042


Epoch 19/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:49<00:00,  1.40it/s, loss=1.8884, audio=0.0606, latent=11.7820, nans=0]
Validation 19/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.50it/s, loss=1.8471, audio=0.0489]



EPOCH 19/100 SUMMARY
Train Loss:  2.094456 (Audio: 0.119637, Latent: 12.367882)
Val Loss:    2.048506 (Audio: 0.117451, Latent: 12.090699)
Current LR: 2.98e-05

‚úÖ NEW BEST MODEL! Val Loss: 2.048506


EPOCH 20/100



Epoch 20/100:   6%|‚ñä             | 100/1750 [01:11<20:19,  1.35it/s, loss=2.0009, audio=0.1309, latent=11.5940, nans=0]


  Step 100/1750 | Loss: 2.059265 | Audio: 0.121543 | Latent: 12.107852


Epoch 20/100:  11%|‚ñà‚ñå            | 200/1750 [02:23<18:34,  1.39it/s, loss=1.9247, audio=0.0652, latent=11.9621, nans=0]


  Step 200/1750 | Loss: 2.038304 | Audio: 0.116595 | Latent: 12.034090


Epoch 20/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:35<17:12,  1.40it/s, loss=1.7834, audio=0.0658, latent=11.0118, nans=0]


  Step 300/1750 | Loss: 2.033096 | Audio: 0.115902 | Latent: 12.008618


Epoch 20/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:45<15:40,  1.43it/s, loss=1.9321, audio=0.0622, latent=12.0508, nans=0]


  Step 400/1750 | Loss: 2.030327 | Audio: 0.115516 | Latent: 11.995295


Epoch 20/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [05:56<14:36,  1.43it/s, loss=2.4846, audio=0.2431, latent=13.3227, nans=0]


  Step 500/1750 | Loss: 2.031464 | Audio: 0.115483 | Latent: 12.003317


Epoch 20/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:07<13:42,  1.40it/s, loss=2.1367, audio=0.1971, latent=11.6165, nans=0]


  Step 600/1750 | Loss: 2.033730 | Audio: 0.116345 | Latent: 12.006936


Epoch 20/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:17<12:29,  1.40it/s, loss=1.9083, audio=0.0513, latent=12.0384, nans=0]


  Step 700/1750 | Loss: 2.030471 | Audio: 0.116274 | Latent: 11.986153


Epoch 20/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:29<11:23,  1.39it/s, loss=1.9309, audio=0.1430, latent=10.9659, nans=0]


  Step 800/1750 | Loss: 2.027414 | Audio: 0.116431 | Latent: 11.963686


Epoch 20/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:41<10:04,  1.41it/s, loss=1.8261, audio=0.0700, latent=11.2405, nans=0]


  Step 900/1750 | Loss: 2.028323 | Audio: 0.117060 | Latent: 11.961348


Epoch 20/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [11:52<08:38,  1.45it/s, loss=2.2081, audio=0.1995, latent=12.0610, nans=0]


  Step 1000/1750 | Loss: 2.026979 | Audio: 0.117243 | Latent: 11.949950


Epoch 20/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [13:03<07:49,  1.38it/s, loss=1.9371, audio=0.0636, latent=12.0661, nans=0]


  Step 1100/1750 | Loss: 2.025852 | Audio: 0.117532 | Latent: 11.938584


Epoch 20/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:14<06:18,  1.45it/s, loss=1.8989, audio=0.0683, latent=11.7490, nans=0]


  Step 1200/1750 | Loss: 2.024401 | Audio: 0.117677 | Latent: 11.926981


Epoch 20/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:25<05:23,  1.39it/s, loss=1.8285, audio=0.0547, latent=11.4604, nans=0]


  Step 1300/1750 | Loss: 2.021071 | Audio: 0.117190 | Latent: 11.911266


Epoch 20/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:37<04:19,  1.35it/s, loss=1.9858, audio=0.1407, latent=11.3617, nans=0]


  Step 1400/1750 | Loss: 2.017999 | Audio: 0.117314 | Latent: 11.889139


Epoch 20/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [17:48<02:59,  1.39it/s, loss=1.8431, audio=0.0670, latent=11.3934, nans=0]


  Step 1500/1750 | Loss: 2.015850 | Audio: 0.117411 | Latent: 11.873524


Epoch 20/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [19:00<01:48,  1.39it/s, loss=2.1828, audio=0.1895, latent=12.0250, nans=0]


  Step 1600/1750 | Loss: 2.012713 | Audio: 0.117031 | Latent: 11.857670


Epoch 20/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [20:11<00:35,  1.40it/s, loss=1.8519, audio=0.1266, latent=10.6572, nans=0]


  Step 1700/1750 | Loss: 2.008188 | Audio: 0.116494 | Latent: 11.834665


Epoch 20/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:47<00:00,  1.40it/s, loss=1.7050, audio=0.0751, latent=10.3656, nans=0]
Validation 20/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.7624, audio=0.0464]



EPOCH 20/100 SUMMARY
Train Loss:  2.007736 (Audio: 0.116840, Latent: 11.827039)
Val Loss:    1.962752 (Audio: 0.113660, Latent: 11.569546)
Current LR: 2.97e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.962752


EPOCH 21/100

üßπ Cleared GPU cache at epoch 20
   Allocated: 1.23GB | Reserved: 1.55GB



Epoch 21/100:   6%|‚ñä             | 100/1750 [01:11<19:19,  1.42it/s, loss=1.9803, audio=0.1316, latent=11.4475, nans=0]


  Step 100/1750 | Loss: 1.958620 | Audio: 0.114954 | Latent: 11.524742


Epoch 21/100:  11%|‚ñà‚ñå            | 200/1750 [02:22<18:19,  1.41it/s, loss=1.6901, audio=0.0667, latent=10.3778, nans=0]


  Step 200/1750 | Loss: 1.954338 | Audio: 0.113898 | Latent: 11.510277


Epoch 21/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:33<17:10,  1.41it/s, loss=2.1733, audio=0.1781, latent=12.1136, nans=0]


  Step 300/1750 | Loss: 1.950157 | Audio: 0.112006 | Latent: 11.507630


Epoch 21/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:44<16:06,  1.40it/s, loss=2.0644, audio=0.1987, latent=11.1136, nans=0]


  Step 400/1750 | Loss: 1.954427 | Audio: 0.114774 | Latent: 11.499196


Epoch 21/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [05:55<14:59,  1.39it/s, loss=2.0049, audio=0.1195, latent=11.7731, nans=0]


  Step 500/1750 | Loss: 1.956697 | Audio: 0.115810 | Latent: 11.500514


Epoch 21/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:07<13:50,  1.39it/s, loss=1.8968, audio=0.1355, latent=10.8382, nans=0]


  Step 600/1750 | Loss: 1.954172 | Audio: 0.115375 | Latent: 11.489475


Epoch 21/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:17<12:38,  1.38it/s, loss=1.7220, audio=0.0576, latent=10.7121, nans=0]


  Step 700/1750 | Loss: 1.950536 | Audio: 0.114958 | Latent: 11.470796


Epoch 21/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:28<11:17,  1.40it/s, loss=2.0452, audio=0.1202, latent=12.0318, nans=0]


  Step 800/1750 | Loss: 1.949516 | Audio: 0.115107 | Latent: 11.462019


Epoch 21/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:39<10:14,  1.38it/s, loss=2.1447, audio=0.1869, latent=11.8060, nans=0]


  Step 900/1750 | Loss: 1.947402 | Audio: 0.115385 | Latent: 11.444220


Epoch 21/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [11:51<08:59,  1.39it/s, loss=1.7461, audio=0.0762, latent=10.6241, nans=0]


  Step 1000/1750 | Loss: 1.945776 | Audio: 0.115475 | Latent: 11.432169


Epoch 21/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [13:02<07:50,  1.38it/s, loss=1.9126, audio=0.0622, latent=11.9218, nans=0]


  Step 1100/1750 | Loss: 1.945364 | Audio: 0.115729 | Latent: 11.426038


Epoch 21/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:13<06:27,  1.42it/s, loss=1.9908, audio=0.1322, latent=11.5093, nans=0]


  Step 1200/1750 | Loss: 1.940073 | Audio: 0.114907 | Latent: 11.401733


Epoch 21/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:25<05:28,  1.37it/s, loss=1.8312, audio=0.0825, latent=11.1085, nans=0]


  Step 1300/1750 | Loss: 1.937159 | Audio: 0.114599 | Latent: 11.386413


Epoch 21/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:36<04:04,  1.43it/s, loss=1.9592, audio=0.1397, latent=11.1982, nans=0]


  Step 1400/1750 | Loss: 1.937049 | Audio: 0.114936 | Latent: 11.381180


Epoch 21/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [17:46<02:57,  1.41it/s, loss=1.7502, audio=0.0616, latent=10.8466, nans=0]


  Step 1500/1750 | Loss: 1.933829 | Audio: 0.114951 | Latent: 11.359509


Epoch 21/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [18:57<01:48,  1.38it/s, loss=1.7747, audio=0.0609, latent=11.0193, nans=0]


  Step 1600/1750 | Loss: 1.931631 | Audio: 0.115154 | Latent: 11.342155


Epoch 21/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [20:09<00:36,  1.37it/s, loss=1.8326, audio=0.0588, latent=11.4328, nans=0]


  Step 1700/1750 | Loss: 1.928371 | Audio: 0.114600 | Latent: 11.327801


Epoch 21/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:45<00:00,  1.41it/s, loss=1.8264, audio=0.1311, latent=10.4287, nans=0]
Validation 21/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.6914, audio=0.0450]



EPOCH 21/100 SUMMARY
Train Loss:  1.925896 (Audio: 0.114258, Latent: 11.315867)
Val Loss:    1.887986 (Audio: 0.111811, Latent: 11.095753)
Current LR: 2.97e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.887986


EPOCH 22/100



Epoch 22/100:   6%|‚ñä             | 100/1750 [01:12<20:16,  1.36it/s, loss=2.2698, audio=0.2403, latent=11.9285, nans=0]


  Step 100/1750 | Loss: 1.892339 | Audio: 0.113642 | Latent: 11.100363


Epoch 22/100:  11%|‚ñà‚ñå            | 200/1750 [02:23<18:39,  1.38it/s, loss=1.9139, audio=0.1246, latent=11.0979, nans=0]


  Step 200/1750 | Loss: 1.875355 | Audio: 0.110843 | Latent: 11.024464


Epoch 22/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:35<17:08,  1.41it/s, loss=1.9787, audio=0.1119, latent=11.6989, nans=0]


  Step 300/1750 | Loss: 1.875970 | Audio: 0.110840 | Latent: 11.028602


Epoch 22/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:47<16:28,  1.37it/s, loss=2.3191, audio=0.2410, latent=12.2469, nans=0]


  Step 400/1750 | Loss: 1.872543 | Audio: 0.109626 | Latent: 11.021933


Epoch 22/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [05:59<14:45,  1.41it/s, loss=1.8776, audio=0.1274, latent=10.8190, nans=0]


  Step 500/1750 | Loss: 1.867599 | Audio: 0.109588 | Latent: 10.989484


Epoch 22/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:11<13:54,  1.38it/s, loss=1.9913, audio=0.1185, latent=11.6957, nans=0]


  Step 600/1750 | Loss: 1.863441 | Audio: 0.108732 | Latent: 10.973183


Epoch 22/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:23<12:37,  1.39it/s, loss=1.7276, audio=0.0782, latent=10.4740, nans=0]


  Step 700/1750 | Loss: 1.863241 | Audio: 0.109557 | Latent: 10.960845


Epoch 22/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:35<11:36,  1.36it/s, loss=2.0974, audio=0.1768, latent=11.6246, nans=0]


  Step 800/1750 | Loss: 1.862646 | Audio: 0.109622 | Latent: 10.956016


Epoch 22/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:47<10:20,  1.37it/s, loss=1.7299, audio=0.0798, latent=10.4690, nans=0]


  Step 900/1750 | Loss: 1.857985 | Audio: 0.109136 | Latent: 10.931425


Epoch 22/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç     | 1000/1750 [12:00<08:57,  1.40it/s, loss=1.8091, audio=0.1347, latent=10.2648, nans=0]


  Step 1000/1750 | Loss: 1.860928 | Audio: 0.110271 | Latent: 10.935909


Epoch 22/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 1100/1750 [13:12<07:54,  1.37it/s, loss=1.7150, audio=0.0626, latent=10.5986, nans=0]


  Step 1100/1750 | Loss: 1.858795 | Audio: 0.110371 | Latent: 10.920349


Epoch 22/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:23<06:42,  1.37it/s, loss=1.9027, audio=0.1141, latent=11.1633, nans=0]


  Step 1200/1750 | Loss: 1.856265 | Audio: 0.110328 | Latent: 10.904062


Epoch 22/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:35<05:21,  1.40it/s, loss=1.7057, audio=0.0696, latent=10.4434, nans=0]


  Step 1300/1750 | Loss: 1.854394 | Audio: 0.110531 | Latent: 10.888884


Epoch 22/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:47<04:12,  1.38it/s, loss=1.7323, audio=0.0768, latent=10.5249, nans=0]


  Step 1400/1750 | Loss: 1.853628 | Audio: 0.111089 | Latent: 10.876333


Epoch 22/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:00<02:58,  1.40it/s, loss=1.5671, audio=0.0848, latent=9.3161, nans=0]


  Step 1500/1750 | Loss: 1.851791 | Audio: 0.111086 | Latent: 10.864122


Epoch 22/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 1600/1750 [19:12<01:48,  1.39it/s, loss=1.8393, audio=0.1024, latent=10.8963, nans=0]


  Step 1600/1750 | Loss: 1.850539 | Audio: 0.111221 | Latent: 10.853982


Epoch 22/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 1700/1750 [20:23<00:35,  1.39it/s, loss=1.6542, audio=0.0556, latent=10.2864, nans=0]


  Step 1700/1750 | Loss: 1.848610 | Audio: 0.111146 | Latent: 10.842117


Epoch 22/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:00<00:00,  1.39it/s, loss=1.8295, audio=0.1330, latent=10.4232, nans=0]
Validation 22/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.6217, audio=0.0437]



EPOCH 22/100 SUMMARY
Train Loss:  1.849695 (Audio: 0.111911, Latent: 10.839149)
Val Loss:    1.813260 (Audio: 0.110233, Latent: 10.618627)
Current LR: 2.97e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.813260


EPOCH 23/100



Epoch 23/100:   6%|‚ñä             | 100/1750 [01:11<19:39,  1.40it/s, loss=1.8325, audio=0.1098, latent=10.7529, nans=0]


  Step 100/1750 | Loss: 1.803863 | Audio: 0.111664 | Latent: 10.536906


Epoch 23/100:  11%|‚ñà‚ñå            | 200/1750 [02:22<18:31,  1.40it/s, loss=1.8928, audio=0.1163, latent=11.0676, nans=0]


  Step 200/1750 | Loss: 1.796149 | Audio: 0.108974 | Latent: 10.521342


Epoch 23/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:34<17:30,  1.38it/s, loss=2.1910, audio=0.1952, latent=12.0049, nans=0]


  Step 300/1750 | Loss: 1.796990 | Audio: 0.109533 | Latent: 10.519489


Epoch 23/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:45<16:19,  1.38it/s, loss=1.9610, audio=0.1186, latent=11.4919, nans=0]


  Step 400/1750 | Loss: 1.804343 | Audio: 0.110929 | Latent: 10.549896


Epoch 23/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [05:57<14:57,  1.39it/s, loss=1.6076, audio=0.0445, latent=10.1240, nans=0]


  Step 500/1750 | Loss: 1.800217 | Audio: 0.109873 | Latent: 10.536478


Epoch 23/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñä         | 600/1750 [07:09<13:41,  1.40it/s, loss=1.8529, audio=0.1018, latent=10.9958, nans=0]


  Step 600/1750 | Loss: 1.798667 | Audio: 0.110126 | Latent: 10.522761


Epoch 23/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:21<12:32,  1.39it/s, loss=2.0075, audio=0.1751, latent=11.0489, nans=0]


  Step 700/1750 | Loss: 1.796632 | Audio: 0.110569 | Latent: 10.503288


Epoch 23/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç       | 800/1750 [09:32<11:30,  1.38it/s, loss=1.6721, audio=0.0593, latent=10.3557, nans=0]


  Step 800/1750 | Loss: 1.794970 | Audio: 0.110417 | Latent: 10.494243


Epoch 23/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè      | 900/1750 [10:44<10:11,  1.39it/s, loss=1.8245, audio=0.1359, latent=10.3517, nans=0]


  Step 900/1750 | Loss: 1.791830 | Audio: 0.109746 | Latent: 10.482259


Epoch 23/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:56<08:58,  1.39it/s, loss=1.6372, audio=0.0750, latent=9.9145, nans=0]


  Step 1000/1750 | Loss: 1.789153 | Audio: 0.109356 | Latent: 10.469605


Epoch 23/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:07<07:40,  1.41it/s, loss=1.6305, audio=0.0670, latent=9.9774, nans=1]


  Step 1100/1750 | Loss: 1.786221 | Audio: 0.109292 | Latent: 10.450912


Epoch 23/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:19<06:34,  1.39it/s, loss=1.6016, audio=0.0695, latent=9.7510, nans=1]


  Step 1200/1750 | Loss: 1.783890 | Audio: 0.109056 | Latent: 10.438521


Epoch 23/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1300/1750 [15:30<05:21,  1.40it/s, loss=1.9611, audio=0.1830, latent=10.6335, nans=1]


  Step 1300/1750 | Loss: 1.784139 | Audio: 0.109926 | Latent: 10.428580


Epoch 23/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:42<04:13,  1.38it/s, loss=2.1152, audio=0.2312, latent=11.0189, nans=1]


  Step 1400/1750 | Loss: 1.783894 | Audio: 0.110293 | Latent: 10.422046


Epoch 23/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [17:53<03:02,  1.37it/s, loss=1.7986, audio=0.1166, latent=10.4359, nans=1]


  Step 1500/1750 | Loss: 1.781779 | Audio: 0.110051 | Latent: 10.411177


Epoch 23/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:05<01:48,  1.39it/s, loss=1.5308, audio=0.0593, latent=9.4149, nans=1]


  Step 1600/1750 | Loss: 1.780975 | Audio: 0.110241 | Latent: 10.403286


Epoch 23/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:17<00:36,  1.36it/s, loss=1.5104, audio=0.0665, latent=9.1819, nans=1]


  Step 1700/1750 | Loss: 1.777508 | Audio: 0.109737 | Latent: 10.386899


Epoch 23/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:53<00:00,  1.40it/s, loss=1.9792, audio=0.1783, latent=10.8173, nans=1]



‚ö†Ô∏è 1 NaN occurrences


Validation 23/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.5547, audio=0.0405]



EPOCH 23/100 SUMMARY
Train Loss:  1.776091 (Audio: 0.109676, Latent: 10.378254)
Val Loss:    1.742802 (Audio: 0.105776, Latent: 10.208339)
Current LR: 2.97e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.742802


EPOCH 24/100



Epoch 24/100:   6%|‚ñä             | 100/1750 [01:12<19:48,  1.39it/s, loss=1.6704, audio=0.0761, latent=10.1209, nans=0]


  Step 100/1750 | Loss: 1.745695 | Audio: 0.110204 | Latent: 10.168585


Epoch 24/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:29,  1.40it/s, loss=1.5419, audio=0.0780, latent=9.2398, nans=0]


  Step 200/1750 | Loss: 1.738020 | Audio: 0.108632 | Latent: 10.138370


Epoch 24/100:  17%|‚ñà‚ñà‚ñç           | 300/1750 [03:35<17:00,  1.42it/s, loss=1.6533, audio=0.0652, latent=10.1525, nans=0]


  Step 300/1750 | Loss: 1.735103 | Audio: 0.109949 | Latent: 10.101368


Epoch 24/100:  23%|‚ñà‚ñà‚ñà‚ñè          | 400/1750 [04:46<15:57,  1.41it/s, loss=1.6110, audio=0.0494, latent=10.0811, nans=0]


  Step 400/1750 | Loss: 1.732207 | Audio: 0.108828 | Latent: 10.097007


Epoch 24/100:  29%|‚ñà‚ñà‚ñà‚ñà          | 500/1750 [05:57<14:39,  1.42it/s, loss=1.6606, audio=0.0678, latent=10.1664, nans=0]


  Step 500/1750 | Loss: 1.726012 | Audio: 0.107590 | Latent: 10.072218


Epoch 24/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:10<13:59,  1.37it/s, loss=1.6609, audio=0.1151, latent=9.5383, nans=0]


  Step 600/1750 | Loss: 1.728095 | Audio: 0.108742 | Latent: 10.070740


Epoch 24/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå        | 700/1750 [08:22<12:50,  1.36it/s, loss=1.9502, audio=0.1752, latent=10.6654, nans=0]


  Step 700/1750 | Loss: 1.725082 | Audio: 0.108404 | Latent: 10.055155


Epoch 24/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:33<11:08,  1.42it/s, loss=1.5783, audio=0.0675, latent=9.6213, nans=0]


  Step 800/1750 | Loss: 1.722460 | Audio: 0.108093 | Latent: 10.041829


Epoch 24/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:44<10:06,  1.40it/s, loss=1.5443, audio=0.0591, latent=9.5080, nans=0]


  Step 900/1750 | Loss: 1.721336 | Audio: 0.108176 | Latent: 10.033227


Epoch 24/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:56<08:54,  1.40it/s, loss=1.8701, audio=0.1911, latent=9.9197, nans=0]


  Step 1000/1750 | Loss: 1.720406 | Audio: 0.108342 | Latent: 10.024816


Epoch 24/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:08<08:14,  1.31it/s, loss=1.7308, audio=0.1222, latent=9.9098, nans=0]


  Step 1100/1750 | Loss: 1.718881 | Audio: 0.108185 | Latent: 10.016733


Epoch 24/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:20<06:31,  1.40it/s, loss=1.5455, audio=0.0529, latent=9.5984, nans=0]


  Step 1200/1750 | Loss: 1.716094 | Audio: 0.107713 | Latent: 10.004448


Epoch 24/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:32<05:24,  1.39it/s, loss=1.4883, audio=0.0743, latent=8.9310, nans=0]


  Step 1300/1750 | Loss: 1.713980 | Audio: 0.107592 | Latent: 9.991981


Epoch 24/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:45<04:17,  1.36it/s, loss=1.8835, audio=0.1635, latent=10.3768, nans=0]


  Step 1400/1750 | Loss: 1.713451 | Audio: 0.107780 | Latent: 9.985935


Epoch 24/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:57<03:02,  1.37it/s, loss=1.8432, audio=0.1768, latent=9.9302, nans=0]


  Step 1500/1750 | Loss: 1.712778 | Audio: 0.107900 | Latent: 9.979857


Epoch 24/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:09<01:49,  1.37it/s, loss=1.7015, audio=0.1290, latent=9.6231, nans=0]


  Step 1600/1750 | Loss: 1.710584 | Audio: 0.107531 | Latent: 9.970141


Epoch 24/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:20<00:35,  1.39it/s, loss=1.5692, audio=0.0673, latent=9.5645, nans=0]


  Step 1700/1750 | Loss: 1.710220 | Audio: 0.107914 | Latent: 9.962613


Epoch 24/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:56<00:00,  1.39it/s, loss=1.8769, audio=0.1651, latent=10.3114, nans=0]
Validation 24/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.4975, audio=0.0402]



EPOCH 24/100 SUMMARY
Train Loss:  1.709417 (Audio: 0.107877, Latent: 9.957746)
Val Loss:    1.677022 (Audio: 0.105644, Latent: 9.771564)
Current LR: 2.97e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.677022


EPOCH 25/100



Epoch 25/100:   6%|‚ñä              | 100/1750 [01:11<19:41,  1.40it/s, loss=1.7114, audio=0.1275, latent=9.7093, nans=0]


  Step 100/1750 | Loss: 1.655296 | Audio: 0.105959 | Latent: 9.622515


Epoch 25/100:  11%|‚ñà‚ñã             | 200/1750 [02:22<18:27,  1.40it/s, loss=1.4517, audio=0.0563, latent=8.9268, nans=0]


  Step 200/1750 | Loss: 1.665802 | Audio: 0.107316 | Latent: 9.674465


Epoch 25/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:32,  1.38it/s, loss=1.5102, audio=0.0562, latent=9.3190, nans=0]


  Step 300/1750 | Loss: 1.670172 | Audio: 0.107962 | Latent: 9.694983


Epoch 25/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:46<16:27,  1.37it/s, loss=1.5431, audio=0.0490, latent=9.6348, nans=0]


  Step 400/1750 | Loss: 1.669676 | Audio: 0.108394 | Latent: 9.685921


Epoch 25/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:58<14:45,  1.41it/s, loss=1.4964, audio=0.0899, latent=8.7777, nans=0]


  Step 500/1750 | Loss: 1.668657 | Audio: 0.108755 | Latent: 9.674313


Epoch 25/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:09<13:31,  1.42it/s, loss=1.5292, audio=0.0712, latent=9.2452, nans=0]


  Step 600/1750 | Loss: 1.667130 | Audio: 0.108015 | Latent: 9.673993


Epoch 25/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:21<12:34,  1.39it/s, loss=1.6676, audio=0.1263, latent=9.4339, nans=0]


  Step 700/1750 | Loss: 1.666229 | Audio: 0.107951 | Latent: 9.668843


Epoch 25/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:34<11:32,  1.37it/s, loss=1.4454, audio=0.0792, latent=8.5797, nans=0]


  Step 800/1750 | Loss: 1.663269 | Audio: 0.107115 | Latent: 9.660268


Epoch 25/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:46<10:27,  1.36it/s, loss=1.6849, audio=0.1305, latent=9.4929, nans=0]


  Step 900/1750 | Loss: 1.663220 | Audio: 0.107278 | Latent: 9.657758


Epoch 25/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:58<08:47,  1.42it/s, loss=1.3824, audio=0.0681, latent=8.3089, nans=0]


  Step 1000/1750 | Loss: 1.660720 | Audio: 0.106958 | Latent: 9.645355


Epoch 25/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:09<08:00,  1.35it/s, loss=1.7009, audio=0.1141, latent=9.8184, nans=0]


  Step 1100/1750 | Loss: 1.659359 | Audio: 0.107190 | Latent: 9.633189


Epoch 25/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 1200/1750 [14:21<06:32,  1.40it/s, loss=1.9193, audio=0.1730, latent=10.4892, nans=0]


  Step 1200/1750 | Loss: 1.657021 | Audio: 0.107003 | Latent: 9.620100


Epoch 25/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:32<05:24,  1.38it/s, loss=1.6891, audio=0.1221, latent=9.6325, nans=0]


  Step 1300/1750 | Loss: 1.654042 | Audio: 0.106510 | Latent: 9.606816


Epoch 25/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:44<04:17,  1.36it/s, loss=1.4634, audio=0.0525, latent=9.0561, nans=0]


  Step 1400/1750 | Loss: 1.653197 | Audio: 0.106478 | Latent: 9.601597


Epoch 25/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:56<03:01,  1.38it/s, loss=1.8411, audio=0.1777, latent=9.9048, nans=0]


  Step 1500/1750 | Loss: 1.652003 | Audio: 0.106737 | Latent: 9.590189


Epoch 25/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:08<01:47,  1.40it/s, loss=1.4421, audio=0.0566, latent=8.8594, nans=0]


  Step 1600/1750 | Loss: 1.649823 | Audio: 0.106252 | Latent: 9.582132


Epoch 25/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:20<00:35,  1.42it/s, loss=1.4825, audio=0.0830, latent=8.7765, nans=0]


  Step 1700/1750 | Loss: 1.649124 | Audio: 0.106491 | Latent: 9.574282


Epoch 25/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:56<00:00,  1.39it/s, loss=1.4015, audio=0.0602, latent=8.5404, nans=0]
Validation 25/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.4454, audio=0.0394]



EPOCH 25/100 SUMMARY
Train Loss:  1.647560 (Audio: 0.106267, Latent: 9.566833)
Val Loss:    1.618802 (Audio: 0.103800, Latent: 9.408007)
Current LR: 2.96e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.618802


EPOCH 26/100



Epoch 26/100:   6%|‚ñä              | 100/1750 [01:11<19:41,  1.40it/s, loss=1.4580, audio=0.0575, latent=8.9529, nans=0]


  Step 100/1750 | Loss: 1.614659 | Audio: 0.103807 | Latent: 9.380303


Epoch 26/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:16,  1.41it/s, loss=1.6091, audio=0.1153, latent=9.1908, nans=0]


  Step 200/1750 | Loss: 1.605900 | Audio: 0.102441 | Latent: 9.340115


Epoch 26/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:16,  1.40it/s, loss=1.6936, audio=0.1395, latent=9.4301, nans=0]


  Step 300/1750 | Loss: 1.609907 | Audio: 0.104140 | Latent: 9.344185


Epoch 26/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:46<16:31,  1.36it/s, loss=1.3761, audio=0.0575, latent=8.4073, nans=0]


  Step 400/1750 | Loss: 1.608260 | Audio: 0.104276 | Latent: 9.331389


Epoch 26/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:58<15:04,  1.38it/s, loss=1.7342, audio=0.1608, latent=9.4175, nans=0]


  Step 500/1750 | Loss: 1.606037 | Audio: 0.104019 | Latent: 9.319996


Epoch 26/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:10<13:33,  1.41it/s, loss=1.6078, audio=0.1145, latent=9.1921, nans=0]


  Step 600/1750 | Loss: 1.602431 | Audio: 0.103985 | Latent: 9.296404


Epoch 26/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:22<12:36,  1.39it/s, loss=1.5823, audio=0.1073, latent=9.1180, nans=0]


  Step 700/1750 | Loss: 1.602564 | Audio: 0.104526 | Latent: 9.290079


Epoch 26/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:33<11:19,  1.40it/s, loss=1.4649, audio=0.0558, latent=9.0215, nans=0]


  Step 800/1750 | Loss: 1.600483 | Audio: 0.104397 | Latent: 9.277928


Epoch 26/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:45<10:13,  1.39it/s, loss=1.6612, audio=0.1182, latent=9.4983, nans=0]


  Step 900/1750 | Loss: 1.599596 | Audio: 0.105032 | Latent: 9.263543


Epoch 26/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:57<09:02,  1.38it/s, loss=1.5879, audio=0.1165, latent=9.0334, nans=0]


  Step 1000/1750 | Loss: 1.599692 | Audio: 0.105582 | Latent: 9.256857


Epoch 26/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:09<07:39,  1.42it/s, loss=1.5910, audio=0.1313, latent=8.8561, nans=0]


  Step 1100/1750 | Loss: 1.596889 | Audio: 0.105394 | Latent: 9.240678


Epoch 26/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:20<06:32,  1.40it/s, loss=1.4692, audio=0.0425, latent=9.2280, nans=0]


  Step 1200/1750 | Loss: 1.596585 | Audio: 0.105360 | Latent: 9.239100


Epoch 26/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:32<05:20,  1.41it/s, loss=1.8153, audio=0.1690, latent=9.8488, nans=0]


  Step 1300/1750 | Loss: 1.595323 | Audio: 0.105281 | Latent: 9.231739


Epoch 26/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:45<04:17,  1.36it/s, loss=1.9679, audio=0.2335, latent=10.0051, nans=0]


  Step 1400/1750 | Loss: 1.592457 | Audio: 0.104809 | Latent: 9.218925


Epoch 26/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:57<03:01,  1.37it/s, loss=1.5867, audio=0.1150, latent=9.0440, nans=0]


  Step 1500/1750 | Loss: 1.591667 | Audio: 0.104968 | Latent: 9.211538


Epoch 26/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:10<01:46,  1.41it/s, loss=1.5733, audio=0.1129, latent=8.9836, nans=0]


  Step 1600/1750 | Loss: 1.590072 | Audio: 0.104837 | Latent: 9.202655


Epoch 26/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:21<00:35,  1.40it/s, loss=1.6462, audio=0.1100, latent=9.5086, nans=0]


  Step 1700/1750 | Loss: 1.588705 | Audio: 0.104824 | Latent: 9.193711


Epoch 26/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:57<00:00,  1.39it/s, loss=1.7878, audio=0.1740, latent=9.5989, nans=0]
Validation 26/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:10<00:00,  1.50it/s, loss=1.3974, audio=0.0393]



EPOCH 26/100 SUMMARY
Train Loss:  1.587817 (Audio: 0.104803, Latent: 9.188073)
Val Loss:    1.565342 (Audio: 0.103713, Latent: 9.052770)
Current LR: 2.96e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.565342


EPOCH 27/100



Epoch 27/100:   6%|‚ñä              | 100/1750 [01:12<20:19,  1.35it/s, loss=1.4433, audio=0.0580, latent=8.8482, nans=0]


  Step 100/1750 | Loss: 1.569063 | Audio: 0.107316 | Latent: 9.029544


Epoch 27/100:  11%|‚ñà‚ñã             | 200/1750 [02:25<18:26,  1.40it/s, loss=1.3792, audio=0.0423, latent=8.6307, nans=0]


  Step 200/1750 | Loss: 1.567351 | Audio: 0.106485 | Latent: 9.029204


Epoch 27/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:37<17:49,  1.36it/s, loss=1.3142, audio=0.0748, latent=7.7639, nans=0]


  Step 300/1750 | Loss: 1.560916 | Audio: 0.105839 | Latent: 8.994920


Epoch 27/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:50<16:16,  1.38it/s, loss=1.5691, audio=0.1145, latent=8.9332, nans=0]


  Step 400/1750 | Loss: 1.557817 | Audio: 0.105130 | Latent: 8.983717


Epoch 27/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:02<14:53,  1.40it/s, loss=1.5447, audio=0.1061, latent=8.8831, nans=0]


  Step 500/1750 | Loss: 1.552931 | Audio: 0.103921 | Latent: 8.967254


Epoch 27/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:13<14:01,  1.37it/s, loss=1.8040, audio=0.1699, latent=9.7614, nans=0]


  Step 600/1750 | Loss: 1.559358 | Audio: 0.106072 | Latent: 8.981429


Epoch 27/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:26<12:47,  1.37it/s, loss=1.4981, audio=0.1148, latent=8.4573, nans=0]


  Step 700/1750 | Loss: 1.556250 | Audio: 0.105194 | Latent: 8.972407


Epoch 27/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:38<11:22,  1.39it/s, loss=1.4245, audio=0.0467, latent=8.8744, nans=0]


  Step 800/1750 | Loss: 1.554493 | Audio: 0.105046 | Latent: 8.962672


Epoch 27/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:49<10:14,  1.38it/s, loss=1.5051, audio=0.1162, latent=8.4853, nans=0]


  Step 900/1750 | Loss: 1.551011 | Audio: 0.104374 | Latent: 8.948422


Epoch 27/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:01<08:46,  1.42it/s, loss=1.3602, audio=0.0485, latent=8.4209, nans=0]


  Step 1000/1750 | Loss: 1.550312 | Audio: 0.104285 | Latent: 8.944951


Epoch 27/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:13<07:55,  1.37it/s, loss=1.3759, audio=0.0481, latent=8.5316, nans=0]


  Step 1100/1750 | Loss: 1.546122 | Audio: 0.103748 | Latent: 8.924167


Epoch 27/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:24<06:39,  1.38it/s, loss=1.5064, audio=0.1297, latent=8.3135, nans=0]


  Step 1200/1750 | Loss: 1.546151 | Audio: 0.104514 | Latent: 8.914154


Epoch 27/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:36<05:27,  1.37it/s, loss=1.5107, audio=0.1285, latent=8.3576, nans=0]


  Step 1300/1750 | Loss: 1.543947 | Audio: 0.104511 | Latent: 8.899505


Epoch 27/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:48<04:13,  1.38it/s, loss=1.3262, audio=0.0535, latent=8.1285, nans=0]


  Step 1400/1750 | Loss: 1.541590 | Audio: 0.104413 | Latent: 8.885084


Epoch 27/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:00<03:00,  1.39it/s, loss=1.7208, audio=0.1675, latent=9.2383, nans=0]


  Step 1500/1750 | Loss: 1.539474 | Audio: 0.104268 | Latent: 8.872920


Epoch 27/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:12<01:49,  1.37it/s, loss=1.3773, audio=0.0628, latent=8.3446, nans=0]


  Step 1600/1750 | Loss: 1.536390 | Audio: 0.103823 | Latent: 8.858298


Epoch 27/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:24<00:36,  1.38it/s, loss=1.5360, audio=0.0986, latent=8.9250, nans=0]


  Step 1700/1750 | Loss: 1.533651 | Audio: 0.103386 | Latent: 8.845866


Epoch 27/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:01<00:00,  1.39it/s, loss=1.3358, audio=0.0449, latent=8.3073, nans=0]
Validation 27/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:07<00:00,  1.52it/s, loss=1.3521, audio=0.0389]



EPOCH 27/100 SUMMARY
Train Loss:  1.533733 (Audio: 0.103533, Latent: 8.844444)
Val Loss:    1.511719 (Audio: 0.102084, Latent: 8.717001)
Current LR: 2.96e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.511719


EPOCH 28/100



Epoch 28/100:   6%|‚ñä              | 100/1750 [01:11<19:16,  1.43it/s, loss=1.6115, audio=0.1152, latent=9.2068, nans=0]


  Step 100/1750 | Loss: 1.497831 | Audio: 0.099899 | Latent: 8.653551


Epoch 28/100:  11%|‚ñà‚ñã             | 200/1750 [02:22<18:30,  1.40it/s, loss=1.3365, audio=0.0552, latent=8.1747, nans=0]


  Step 200/1750 | Loss: 1.503883 | Audio: 0.101163 | Latent: 8.677052


Epoch 28/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:33<17:37,  1.37it/s, loss=1.3610, audio=0.0605, latent=8.2664, nans=0]


  Step 300/1750 | Loss: 1.496960 | Audio: 0.099443 | Latent: 8.653833


Epoch 28/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:44<16:19,  1.38it/s, loss=1.7702, audio=0.1698, latent=9.5374, nans=0]


  Step 400/1750 | Loss: 1.499602 | Audio: 0.101190 | Latent: 8.648144


Epoch 28/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:56<14:57,  1.39it/s, loss=1.6124, audio=0.1163, latent=9.1990, nans=1]


  Step 500/1750 | Loss: 1.497016 | Audio: 0.101467 | Latent: 8.627220


Epoch 28/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:08<13:41,  1.40it/s, loss=1.4444, audio=0.1325, latent=7.8624, nans=1]


  Step 600/1750 | Loss: 1.494655 | Audio: 0.101649 | Latent: 8.609046


Epoch 28/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:20<12:34,  1.39it/s, loss=1.7756, audio=0.1935, latent=9.2567, nans=1]


  Step 700/1750 | Loss: 1.493664 | Audio: 0.101781 | Latent: 8.600674


Epoch 28/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:31<11:33,  1.37it/s, loss=1.6321, audio=0.1601, latent=8.7466, nans=1]


  Step 800/1750 | Loss: 1.494230 | Audio: 0.102088 | Latent: 8.600358


Epoch 28/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:44<10:20,  1.37it/s, loss=1.5175, audio=0.1007, latent=8.7737, nans=1]


  Step 900/1750 | Loss: 1.493317 | Audio: 0.102497 | Latent: 8.588828


Epoch 28/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:56<08:55,  1.40it/s, loss=1.4812, audio=0.1077, latent=8.4388, nans=1]


  Step 1000/1750 | Loss: 1.491639 | Audio: 0.102157 | Latent: 8.582166


Epoch 28/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:07<07:52,  1.38it/s, loss=1.5326, audio=0.1151, latent=8.6819, nans=1]


  Step 1100/1750 | Loss: 1.491861 | Audio: 0.102206 | Latent: 8.583000


Epoch 28/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:20<06:40,  1.37it/s, loss=1.3549, audio=0.0614, latent=8.2145, nans=1]


  Step 1200/1750 | Loss: 1.496027 | Audio: 0.102601 | Latent: 8.605497


Epoch 28/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:31<05:25,  1.38it/s, loss=1.2428, audio=0.0537, latent=7.5690, nans=2]


  Step 1300/1750 | Loss: 1.493211 | Audio: 0.102115 | Latent: 8.593205


Epoch 28/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 1400/1750 [16:43<04:12,  1.39it/s, loss=1.9637, audio=0.1214, latent=11.4730, nans=2]


  Step 1400/1750 | Loss: 1.492737 | Audio: 0.102442 | Latent: 8.585686


Epoch 28/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 1500/1750 [17:55<03:00,  1.38it/s, loss=1.9858, audio=0.1258, latent=11.5620, nans=2]


  Step 1500/1750 | Loss: 1.495255 | Audio: 0.102366 | Latent: 8.603485


Epoch 28/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:06<01:46,  1.40it/s, loss=1.6100, audio=0.1683, latent=8.4897, nans=2]


  Step 1600/1750 | Loss: 1.496634 | Audio: 0.102544 | Latent: 8.610300


Epoch 28/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:18<00:35,  1.39it/s, loss=1.5044, audio=0.1106, latent=8.5547, nans=2]


  Step 1700/1750 | Loss: 1.495687 | Audio: 0.102452 | Latent: 8.605220


Epoch 28/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:54<00:00,  1.39it/s, loss=1.3515, audio=0.0582, latent=8.2345, nans=2]



‚ö†Ô∏è 2 NaN occurrences


Validation 28/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.3109, audio=0.0373]



EPOCH 28/100 SUMMARY
Train Loss:  1.494993 (Audio: 0.102472, Latent: 8.600325)
Val Loss:    1.467883 (Audio: 0.100450, Latent: 8.446555)
Current LR: 2.96e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.467883


EPOCH 29/100



Epoch 29/100:   6%|‚ñä              | 100/1750 [01:12<19:35,  1.40it/s, loss=1.3114, audio=0.0450, latent=8.1421, nans=0]


  Step 100/1750 | Loss: 1.454927 | Audio: 0.096231 | Latent: 8.416428


Epoch 29/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:41,  1.38it/s, loss=1.6010, audio=0.1699, latent=8.4074, nans=0]


  Step 200/1750 | Loss: 1.446520 | Audio: 0.096487 | Latent: 8.356970


Epoch 29/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:37,  1.37it/s, loss=1.6951, audio=0.1738, latent=8.9827, nans=0]


  Step 300/1750 | Loss: 1.453721 | Audio: 0.100449 | Latent: 8.352154


Epoch 29/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:05,  1.40it/s, loss=1.6740, audio=0.1706, latent=8.8854, nans=0]


  Step 400/1750 | Loss: 1.451542 | Audio: 0.100132 | Latent: 8.341847


Epoch 29/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:59<14:55,  1.40it/s, loss=1.7607, audio=0.1733, latent=9.4268, nans=0]


  Step 500/1750 | Loss: 1.453320 | Audio: 0.100684 | Latent: 8.346343


Epoch 29/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:11<14:14,  1.35it/s, loss=1.7939, audio=0.1995, latent=9.2992, nans=0]


  Step 600/1750 | Loss: 1.456387 | Audio: 0.101867 | Latent: 8.351016


Epoch 29/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:22<11:54,  1.47it/s, loss=1.4846, audio=0.1243, latent=8.2402, nans=0]


  Step 700/1750 | Loss: 1.456729 | Audio: 0.102302 | Latent: 8.347498


Epoch 29/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:32<11:28,  1.38it/s, loss=1.2293, audio=0.0595, latent=7.4015, nans=0]


  Step 800/1750 | Loss: 1.455932 | Audio: 0.102322 | Latent: 8.341921


Epoch 29/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:44<10:13,  1.38it/s, loss=1.4469, audio=0.1063, latent=8.2294, nans=0]


  Step 900/1750 | Loss: 1.451870 | Audio: 0.101445 | Latent: 8.326526


Epoch 29/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:55<08:51,  1.41it/s, loss=1.4203, audio=0.1105, latent=7.9952, nans=0]


  Step 1000/1750 | Loss: 1.450530 | Audio: 0.101356 | Latent: 8.318786


Epoch 29/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:06<07:42,  1.41it/s, loss=1.2956, audio=0.0625, latent=7.8036, nans=0]


  Step 1100/1750 | Loss: 1.448403 | Audio: 0.101059 | Latent: 8.308563


Epoch 29/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:18<06:38,  1.38it/s, loss=1.4228, audio=0.0336, latent=9.0368, nans=0]


  Step 1200/1750 | Loss: 1.446509 | Audio: 0.100948 | Latent: 8.297420


Epoch 29/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:30<05:38,  1.33it/s, loss=1.2329, audio=0.0443, latent=7.6286, nans=0]


  Step 1300/1750 | Loss: 1.445438 | Audio: 0.101286 | Latent: 8.285776


Epoch 29/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:41<04:15,  1.37it/s, loss=1.2133, audio=0.0529, latent=7.3832, nans=0]


  Step 1400/1750 | Loss: 1.445170 | Audio: 0.101955 | Latent: 8.275065


Epoch 29/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:54<02:59,  1.39it/s, loss=1.4009, audio=0.1136, latent=7.8247, nans=0]


  Step 1500/1750 | Loss: 1.444478 | Audio: 0.102101 | Latent: 8.268502


Epoch 29/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:06<01:48,  1.38it/s, loss=1.4880, audio=0.0973, latent=8.6235, nans=0]


  Step 1600/1750 | Loss: 1.442447 | Audio: 0.101703 | Latent: 8.260267


Epoch 29/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:17<00:35,  1.39it/s, loss=1.2193, audio=0.0689, latent=7.2105, nans=0]


  Step 1700/1750 | Loss: 1.440833 | Audio: 0.101467 | Latent: 8.252666


Epoch 29/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:53<00:00,  1.40it/s, loss=1.3973, audio=0.1254, latent=7.6426, nans=0]
Validation 29/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.2689, audio=0.0374]



EPOCH 29/100 SUMMARY
Train Loss:  1.439639 (Audio: 0.101409, Latent: 8.245474)
Val Loss:    1.421429 (Audio: 0.100237, Latent: 8.139698)
Current LR: 2.96e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.421429


EPOCH 30/100



Epoch 30/100:   6%|‚ñä              | 100/1750 [01:11<19:48,  1.39it/s, loss=1.2445, audio=0.0663, latent=7.4131, nans=0]


  Step 100/1750 | Loss: 1.434940 | Audio: 0.107489 | Latent: 8.133081


Epoch 30/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<19:10,  1.35it/s, loss=1.4693, audio=0.1028, latent=8.4252, nans=0]


  Step 200/1750 | Loss: 1.416250 | Audio: 0.102554 | Latent: 8.074275


Epoch 30/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:12,  1.40it/s, loss=1.2441, audio=0.0583, latent=7.5165, nans=0]


  Step 300/1750 | Loss: 1.407518 | Audio: 0.100276 | Latent: 8.046436


Epoch 30/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:45<15:46,  1.43it/s, loss=1.3914, audio=0.1014, latent=7.9234, nans=0]


  Step 400/1750 | Loss: 1.407680 | Audio: 0.100201 | Latent: 8.048527


Epoch 30/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:57<15:15,  1.37it/s, loss=1.4551, audio=0.1107, latent=8.2250, nans=0]


  Step 500/1750 | Loss: 1.403310 | Audio: 0.099195 | Latent: 8.032804


Epoch 30/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:09<13:39,  1.40it/s, loss=1.4404, audio=0.0982, latent=8.2935, nans=0]


  Step 600/1750 | Loss: 1.403455 | Audio: 0.099633 | Latent: 8.027930


Epoch 30/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:21<12:33,  1.39it/s, loss=1.5646, audio=0.1583, latent=8.3202, nans=0]


  Step 700/1750 | Loss: 1.402430 | Audio: 0.099646 | Latent: 8.020925


Epoch 30/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:33<11:27,  1.38it/s, loss=1.3087, audio=0.0613, latent=7.9078, nans=0]


  Step 800/1750 | Loss: 1.402847 | Audio: 0.099692 | Latent: 8.023085


Epoch 30/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:44<09:59,  1.42it/s, loss=1.3702, audio=0.1172, latent=7.5720, nans=0]


  Step 900/1750 | Loss: 1.402435 | Audio: 0.100021 | Latent: 8.015959


Epoch 30/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:57<09:04,  1.38it/s, loss=1.2212, audio=0.0392, latent=7.6192, nans=0]


  Step 1000/1750 | Loss: 1.401668 | Audio: 0.099844 | Latent: 8.013208


Epoch 30/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:09<07:51,  1.38it/s, loss=1.4462, audio=0.1111, latent=8.1601, nans=0]


  Step 1100/1750 | Loss: 1.403210 | Audio: 0.100624 | Latent: 8.013087


Epoch 30/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:21<06:33,  1.40it/s, loss=1.4935, audio=0.1180, latent=8.3837, nans=0]


  Step 1200/1750 | Loss: 1.401410 | Audio: 0.100390 | Latent: 8.004193


Epoch 30/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:33<05:28,  1.37it/s, loss=1.5279, audio=0.1608, latent=8.0426, nans=0]


  Step 1300/1750 | Loss: 1.399795 | Audio: 0.100340 | Latent: 7.994099


Epoch 30/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:44<04:14,  1.37it/s, loss=1.4604, audio=0.1157, latent=8.1932, nans=0]


  Step 1400/1750 | Loss: 1.399352 | Audio: 0.100561 | Latent: 7.988195


Epoch 30/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:56<03:02,  1.37it/s, loss=1.4962, audio=0.1155, latent=8.4345, nans=0]


  Step 1500/1750 | Loss: 1.398106 | Audio: 0.100449 | Latent: 7.981394


Epoch 30/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:07<01:47,  1.40it/s, loss=1.3336, audio=0.1092, latent=7.4354, nans=0]


  Step 1600/1750 | Loss: 1.396365 | Audio: 0.100173 | Latent: 7.973463


Epoch 30/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:18<00:36,  1.36it/s, loss=1.3701, audio=0.1120, latent=7.6412, nans=0]


  Step 1700/1750 | Loss: 1.394977 | Audio: 0.100076 | Latent: 7.965504


Epoch 30/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:55<00:00,  1.39it/s, loss=1.6442, audio=0.2245, latent=7.9683, nans=0]
Validation 30/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:07<00:00,  1.52it/s, loss=1.2314, audio=0.0368]



EPOCH 30/100 SUMMARY
Train Loss:  1.395039 (Audio: 0.100447, Latent: 7.960973)
Val Loss:    1.377938 (Audio: 0.098721, Latent: 7.869971)
Current LR: 2.95e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.377938


EPOCH 31/100

üßπ Cleared GPU cache at epoch 30
   Allocated: 1.23GB | Reserved: 1.49GB



Epoch 31/100:   6%|‚ñä              | 100/1750 [01:12<19:49,  1.39it/s, loss=1.4408, audio=0.1137, latent=8.0897, nans=0]


  Step 100/1750 | Loss: 1.384410 | Audio: 0.106606 | Latent: 7.807984


Epoch 31/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<19:12,  1.34it/s, loss=1.3749, audio=0.1178, latent=7.5958, nans=0]


  Step 200/1750 | Loss: 1.380499 | Audio: 0.104448 | Latent: 7.810690


Epoch 31/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:37<17:46,  1.36it/s, loss=1.3724, audio=0.1125, latent=7.6491, nans=0]


  Step 300/1750 | Loss: 1.370864 | Audio: 0.100625 | Latent: 7.797432


Epoch 31/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:49<16:15,  1.38it/s, loss=1.3377, audio=0.1175, latent=7.3514, nans=0]


  Step 400/1750 | Loss: 1.369907 | Audio: 0.100298 | Latent: 7.795400


Epoch 31/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:01<15:00,  1.39it/s, loss=1.5084, audio=0.1666, latent=7.8350, nans=0]


  Step 500/1750 | Loss: 1.368862 | Audio: 0.100002 | Latent: 7.792380


Epoch 31/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:13<13:46,  1.39it/s, loss=1.2266, audio=0.0307, latent=7.7680, nans=0]


  Step 600/1750 | Loss: 1.366366 | Audio: 0.099315 | Latent: 7.784910


Epoch 31/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:24<12:27,  1.41it/s, loss=1.3351, audio=0.1005, latent=7.5600, nans=0]


  Step 700/1750 | Loss: 1.367012 | Audio: 0.099502 | Latent: 7.786716


Epoch 31/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:30,  1.38it/s, loss=1.5458, audio=0.1554, latent=8.2334, nans=0]


  Step 800/1750 | Loss: 1.365506 | Audio: 0.099386 | Latent: 7.778217


Epoch 31/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:46<10:19,  1.37it/s, loss=1.1861, audio=0.0624, latent=7.0751, nans=0]


  Step 900/1750 | Loss: 1.361120 | Audio: 0.098318 | Latent: 7.763222


Epoch 31/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:58<08:58,  1.39it/s, loss=1.4145, audio=0.1055, latent=8.0235, nans=0]


  Step 1000/1750 | Loss: 1.361295 | Audio: 0.098648 | Latent: 7.759989


Epoch 31/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:10<07:44,  1.40it/s, loss=1.4967, audio=0.1726, latent=7.6765, nans=0]


  Step 1100/1750 | Loss: 1.362473 | Audio: 0.099310 | Latent: 7.759023


Epoch 31/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:22<06:37,  1.38it/s, loss=1.2944, audio=0.1108, latent=7.1523, nans=0]


  Step 1200/1750 | Loss: 1.362919 | Audio: 0.099817 | Latent: 7.755236


Epoch 31/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:34<05:24,  1.39it/s, loss=1.3116, audio=0.1070, latent=7.3173, nans=0]


  Step 1300/1750 | Loss: 1.361863 | Audio: 0.100083 | Latent: 7.744651


Epoch 31/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:46<04:15,  1.37it/s, loss=1.4204, audio=0.1107, latent=7.9937, nans=0]


  Step 1400/1750 | Loss: 1.361158 | Audio: 0.100100 | Latent: 7.739722


Epoch 31/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:58<02:58,  1.40it/s, loss=1.2378, audio=0.0620, latent=7.4245, nans=0]


  Step 1500/1750 | Loss: 1.358497 | Audio: 0.099735 | Latent: 7.726838


Epoch 31/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:11<01:49,  1.37it/s, loss=1.3541, audio=0.1147, latent=7.4976, nans=0]


  Step 1600/1750 | Loss: 1.357196 | Audio: 0.099922 | Latent: 7.715680


Epoch 31/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:23<00:35,  1.40it/s, loss=1.4500, audio=0.1159, latent=8.1209, nans=0]


  Step 1700/1750 | Loss: 1.355496 | Audio: 0.099580 | Latent: 7.708899


Epoch 31/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:59<00:00,  1.39it/s, loss=1.1404, audio=0.0599, latent=6.8040, nans=0]
Validation 31/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.50it/s, loss=1.1968, audio=0.0360]



EPOCH 31/100 SUMMARY
Train Loss:  1.355067 (Audio: 0.099625, Latent: 7.705450)
Val Loss:    1.342515 (Audio: 0.097949, Latent: 7.644115)
Current LR: 2.95e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.342515


EPOCH 32/100



Epoch 32/100:   6%|‚ñä              | 100/1750 [01:11<19:20,  1.42it/s, loss=1.4113, audio=0.1054, latent=8.0029, nans=0]


  Step 100/1750 | Loss: 1.339404 | Audio: 0.099683 | Latent: 7.600251


Epoch 32/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:17,  1.41it/s, loss=1.1458, audio=0.0611, latent=6.8237, nans=0]


  Step 200/1750 | Loss: 1.341365 | Audio: 0.104182 | Latent: 7.553338


Epoch 32/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:09,  1.41it/s, loss=1.2010, audio=0.0484, latent=7.3614, nans=0]


  Step 300/1750 | Loss: 1.340341 | Audio: 0.104218 | Latent: 7.546032


Epoch 32/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:46<16:10,  1.39it/s, loss=1.3370, audio=0.1121, latent=7.4190, nans=0]


  Step 400/1750 | Loss: 1.334622 | Audio: 0.101974 | Latent: 7.537824


Epoch 32/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:58<14:41,  1.42it/s, loss=1.3624, audio=0.1051, latent=7.6804, nans=0]


  Step 500/1750 | Loss: 1.336260 | Audio: 0.102090 | Latent: 7.547200


Epoch 32/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:10<13:38,  1.41it/s, loss=1.6058, audio=0.2199, latent=7.7743, nans=0]


  Step 600/1750 | Loss: 1.333387 | Audio: 0.100909 | Latent: 7.543797


Epoch 32/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:21<12:38,  1.38it/s, loss=1.5091, audio=0.1612, latent=7.9107, nans=0]


  Step 700/1750 | Loss: 1.330931 | Audio: 0.100170 | Latent: 7.537270


Epoch 32/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:33<11:25,  1.39it/s, loss=1.1668, audio=0.0566, latent=7.0233, nans=0]


  Step 800/1750 | Loss: 1.330415 | Audio: 0.100502 | Latent: 7.529408


Epoch 32/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:45<10:10,  1.39it/s, loss=1.5088, audio=0.1646, latent=7.8642, nans=0]


  Step 900/1750 | Loss: 1.326779 | Audio: 0.099766 | Latent: 7.514979


Epoch 32/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:57<09:12,  1.36it/s, loss=1.4836, audio=0.1604, latent=7.7522, nans=0]


  Step 1000/1750 | Loss: 1.328491 | Audio: 0.100259 | Latent: 7.519817


Epoch 32/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:08<07:39,  1.41it/s, loss=1.5231, audio=0.1721, latent=7.8595, nans=0]


  Step 1100/1750 | Loss: 1.328894 | Audio: 0.099608 | Latent: 7.531183


Epoch 32/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:20<06:46,  1.35it/s, loss=1.2405, audio=0.1013, latent=6.9193, nans=0]


  Step 1200/1750 | Loss: 1.328371 | Audio: 0.099671 | Latent: 7.526855


Epoch 32/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:32<05:24,  1.39it/s, loss=1.5255, audio=0.1744, latent=7.8452, nans=0]


  Step 1300/1750 | Loss: 1.327260 | Audio: 0.099570 | Latent: 7.520794


Epoch 32/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:44<04:14,  1.38it/s, loss=1.4047, audio=0.0948, latent=8.1010, nans=0]


  Step 1400/1750 | Loss: 1.328416 | Audio: 0.099546 | Latent: 7.528828


Epoch 32/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:56<02:57,  1.41it/s, loss=1.5291, audio=0.1779, latent=7.8219, nans=0]


  Step 1500/1750 | Loss: 1.325859 | Audio: 0.099011 | Latent: 7.518915


Epoch 32/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:08<01:47,  1.39it/s, loss=1.3932, audio=0.0996, latent=7.9595, nans=0]


  Step 1600/1750 | Loss: 1.324324 | Audio: 0.098776 | Latent: 7.511809


Epoch 32/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:20<00:35,  1.40it/s, loss=1.1879, audio=0.0590, latent=7.1328, nans=0]


  Step 1700/1750 | Loss: 1.322752 | Audio: 0.098647 | Latent: 7.503059


Epoch 32/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:56<00:00,  1.39it/s, loss=1.5009, audio=0.1714, latent=7.7208, nans=0]
Validation 32/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.1704, audio=0.0352]



EPOCH 32/100 SUMMARY
Train Loss:  1.322973 (Audio: 0.098937, Latent: 7.500662)
Val Loss:    1.313230 (Audio: 0.097188, Latent: 7.459032)
Current LR: 2.95e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.313230


EPOCH 33/100



Epoch 33/100:   6%|‚ñä              | 100/1750 [01:11<19:36,  1.40it/s, loss=1.2143, audio=0.0453, latent=7.4908, nans=0]


  Step 100/1750 | Loss: 1.301374 | Audio: 0.100841 | Latent: 7.331275


Epoch 33/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:24,  1.40it/s, loss=1.3226, audio=0.1021, latent=7.4555, nans=0]


  Step 200/1750 | Loss: 1.299191 | Audio: 0.098559 | Latent: 7.347157


Epoch 33/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:35,  1.37it/s, loss=1.1828, audio=0.0638, latent=7.0347, nans=0]


  Step 300/1750 | Loss: 1.291575 | Audio: 0.096556 | Latent: 7.323086


Epoch 33/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<17:22,  1.29it/s, loss=1.3537, audio=0.1050, latent=7.6255, nans=0]


  Step 400/1750 | Loss: 1.290155 | Audio: 0.096492 | Latent: 7.314469


Epoch 33/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:59<14:54,  1.40it/s, loss=1.3055, audio=0.1048, latent=7.3054, nans=0]


  Step 500/1750 | Loss: 1.292764 | Audio: 0.097613 | Latent: 7.316915


Epoch 33/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:09<13:25,  1.43it/s, loss=1.2689, audio=0.0651, latent=7.5921, nans=0]


  Step 600/1750 | Loss: 1.290541 | Audio: 0.097258 | Latent: 7.306832


Epoch 33/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:21<12:34,  1.39it/s, loss=1.1829, audio=0.0688, latent=6.9682, nans=0]


  Step 700/1750 | Loss: 1.291884 | Audio: 0.097988 | Latent: 7.306050


Epoch 33/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:32<11:14,  1.41it/s, loss=1.1626, audio=0.0534, latent=7.0390, nans=0]


  Step 800/1750 | Loss: 1.291307 | Audio: 0.098388 | Latent: 7.296875


Epoch 33/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:43<10:08,  1.40it/s, loss=1.1476, audio=0.0426, latent=7.0829, nans=0]


  Step 900/1750 | Loss: 1.291951 | Audio: 0.099100 | Latent: 7.291674


Epoch 33/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:55<08:53,  1.40it/s, loss=1.2344, audio=0.0522, latent=7.5335, nans=0]


  Step 1000/1750 | Loss: 1.290056 | Audio: 0.098420 | Latent: 7.288112


Epoch 33/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:06<07:38,  1.42it/s, loss=1.2314, audio=0.0943, latent=6.9514, nans=0]


  Step 1100/1750 | Loss: 1.289334 | Audio: 0.098435 | Latent: 7.283093


Epoch 33/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:18<06:30,  1.41it/s, loss=1.1087, audio=0.0499, latent=6.7259, nans=0]


  Step 1200/1750 | Loss: 1.288077 | Audio: 0.098167 | Latent: 7.278295


Epoch 33/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:28<05:17,  1.42it/s, loss=1.2542, audio=0.1041, latent=6.9736, nans=0]


  Step 1300/1750 | Loss: 1.286748 | Audio: 0.097787 | Latent: 7.274489


Epoch 33/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:40<04:09,  1.40it/s, loss=1.3753, audio=0.1094, latent=7.7100, nans=0]


  Step 1400/1750 | Loss: 1.287183 | Audio: 0.098060 | Latent: 7.273751


Epoch 33/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:51<03:01,  1.38it/s, loss=1.1549, audio=0.0438, latent=7.1155, nans=0]


  Step 1500/1750 | Loss: 1.286342 | Audio: 0.098111 | Latent: 7.267473


Epoch 33/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:03<01:47,  1.39it/s, loss=1.0876, audio=0.0511, latent=6.5703, nans=0]


  Step 1600/1750 | Loss: 1.285717 | Audio: 0.097966 | Latent: 7.265228


Epoch 33/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:15<00:36,  1.37it/s, loss=1.3857, audio=0.1626, latent=7.0694, nans=0]


  Step 1700/1750 | Loss: 1.286412 | Audio: 0.098572 | Latent: 7.261785


Epoch 33/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:51<00:00,  1.40it/s, loss=1.1560, audio=0.0599, latent=6.9085, nans=0]
Validation 33/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:07<00:00,  1.51it/s, loss=1.1427, audio=0.0360]



EPOCH 33/100 SUMMARY
Train Loss:  1.285086 (Audio: 0.098209, Latent: 7.257789)
Val Loss:    1.273141 (Audio: 0.095831, Latent: 7.209854)
Current LR: 2.95e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.273141


EPOCH 34/100



Epoch 34/100:   6%|‚ñä              | 100/1750 [01:12<20:09,  1.36it/s, loss=1.1876, audio=0.0498, latent=7.2527, nans=0]


  Step 100/1750 | Loss: 1.262782 | Audio: 0.095810 | Latent: 7.141080


Epoch 34/100:  11%|‚ñà‚ñã             | 200/1750 [02:22<18:15,  1.41it/s, loss=1.3133, audio=0.1211, latent=7.1402, nans=0]


  Step 200/1750 | Loss: 1.268054 | Audio: 0.097717 | Latent: 7.150795


Epoch 34/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:33<17:06,  1.41it/s, loss=1.2482, audio=0.0416, latent=7.7659, nans=0]


  Step 300/1750 | Loss: 1.269552 | Audio: 0.099116 | Latent: 7.142129


Epoch 34/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:43<16:14,  1.39it/s, loss=1.2863, audio=0.0968, latent=7.2854, nans=0]


  Step 400/1750 | Loss: 1.269632 | Audio: 0.099146 | Latent: 7.142260


Epoch 34/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:55<14:47,  1.41it/s, loss=1.2996, audio=0.1198, latent=7.0671, nans=1]


  Step 500/1750 | Loss: 1.265852 | Audio: 0.098190 | Latent: 7.129813


Epoch 34/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:07<13:45,  1.39it/s, loss=1.2358, audio=0.1118, latent=6.7485, nans=1]


  Step 600/1750 | Loss: 1.264329 | Audio: 0.098308 | Latent: 7.118086


Epoch 34/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:18<12:32,  1.40it/s, loss=1.3054, audio=0.1027, latent=7.3335, nans=1]


  Step 700/1750 | Loss: 1.264659 | Audio: 0.098677 | Latent: 7.115370


Epoch 34/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:30<11:36,  1.36it/s, loss=1.5602, audio=0.2096, latent=7.6062, nans=1]


  Step 800/1750 | Loss: 1.263224 | Audio: 0.098654 | Latent: 7.106107


Epoch 34/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:42<10:27,  1.35it/s, loss=1.1678, audio=0.0519, latent=7.0934, nans=1]


  Step 900/1750 | Loss: 1.260608 | Audio: 0.097968 | Latent: 7.097810


Epoch 34/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:54<08:52,  1.41it/s, loss=1.3018, audio=0.1036, latent=7.2966, nans=1]


  Step 1000/1750 | Loss: 1.259302 | Audio: 0.098099 | Latent: 7.087358


Epoch 34/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:06<07:54,  1.37it/s, loss=1.4574, audio=0.1676, latent=7.4809, nans=1]


  Step 1100/1750 | Loss: 1.259538 | Audio: 0.098078 | Latent: 7.089206


Epoch 34/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:18<06:34,  1.39it/s, loss=1.2268, audio=0.1009, latent=6.8330, nans=1]


  Step 1200/1750 | Loss: 1.260375 | Audio: 0.098508 | Latent: 7.089068


Epoch 34/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:30<05:24,  1.39it/s, loss=1.2889, audio=0.0927, latent=7.3566, nans=1]


  Step 1300/1750 | Loss: 1.261457 | Audio: 0.099366 | Latent: 7.084838


Epoch 34/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:40<04:15,  1.37it/s, loss=1.2661, audio=0.1174, latent=6.8751, nans=1]


  Step 1400/1750 | Loss: 1.259256 | Audio: 0.098781 | Latent: 7.077963


Epoch 34/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:52<02:57,  1.41it/s, loss=1.3748, audio=0.1570, latent=7.0717, nans=1]


  Step 1500/1750 | Loss: 1.257631 | Audio: 0.098405 | Latent: 7.072141


Epoch 34/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:03<01:48,  1.39it/s, loss=1.2459, audio=0.1009, latent=6.9612, nans=1]


  Step 1600/1750 | Loss: 1.256678 | Audio: 0.098381 | Latent: 7.066113


Epoch 34/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:15<00:36,  1.37it/s, loss=1.3030, audio=0.1077, latent=7.2506, nans=1]


  Step 1700/1750 | Loss: 1.255889 | Audio: 0.097981 | Latent: 7.066180


Epoch 34/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:51<00:00,  1.40it/s, loss=1.1152, audio=0.0675, latent=6.5354, nans=1]



‚ö†Ô∏è 1 NaN occurrences


Validation 34/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:10<00:00,  1.50it/s, loss=1.1156, audio=0.0348]



EPOCH 34/100 SUMMARY
Train Loss:  1.254904 (Audio: 0.097584, Latent: 7.064902)
Val Loss:    1.252612 (Audio: 0.096309, Latent: 7.066631)
Current LR: 2.94e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.252612


EPOCH 35/100



Epoch 35/100:   6%|‚ñä              | 100/1750 [01:12<20:10,  1.36it/s, loss=1.1971, audio=0.1058, latent=6.5697, nans=0]


  Step 100/1750 | Loss: 1.230281 | Audio: 0.090095 | Latent: 7.000608


Epoch 35/100:  11%|‚ñà‚ñã             | 200/1750 [02:25<18:17,  1.41it/s, loss=1.3432, audio=0.1066, latent=7.5339, nans=0]


  Step 200/1750 | Loss: 1.233845 | Audio: 0.093326 | Latent: 6.981283


Epoch 35/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:37<17:19,  1.39it/s, loss=1.4235, audio=0.1597, latent=7.3604, nans=0]


  Step 300/1750 | Loss: 1.228570 | Audio: 0.092056 | Latent: 6.963049


Epoch 35/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:49<16:12,  1.39it/s, loss=1.3615, audio=0.1750, latent=6.7435, nans=0]


  Step 400/1750 | Loss: 1.231976 | Audio: 0.094029 | Latent: 6.959460


Epoch 35/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:02<15:29,  1.35it/s, loss=1.0514, audio=0.0449, latent=6.4105, nans=0]


  Step 500/1750 | Loss: 1.230662 | Audio: 0.094375 | Latent: 6.946082


Epoch 35/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:14<13:51,  1.38it/s, loss=1.3582, audio=0.1688, latent=6.8037, nans=0]


  Step 600/1750 | Loss: 1.230553 | Audio: 0.094981 | Latent: 6.937271


Epoch 35/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:26<12:45,  1.37it/s, loss=1.3752, audio=0.1648, latent=6.9710, nans=0]


  Step 700/1750 | Loss: 1.230520 | Audio: 0.095083 | Latent: 6.935689


Epoch 35/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:38<11:34,  1.37it/s, loss=1.3614, audio=0.1625, latent=6.9095, nans=0]


  Step 800/1750 | Loss: 1.230987 | Audio: 0.096317 | Latent: 6.922357


Epoch 35/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:50<10:15,  1.38it/s, loss=1.1400, audio=0.0562, latent=6.8502, nans=0]


  Step 900/1750 | Loss: 1.229523 | Audio: 0.096283 | Latent: 6.913037


Epoch 35/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:03<08:52,  1.41it/s, loss=1.1887, audio=0.0580, latent=7.1507, nans=0]


  Step 1000/1750 | Loss: 1.227621 | Audio: 0.095731 | Latent: 6.907729


Epoch 35/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:14<07:44,  1.40it/s, loss=1.4533, audio=0.1601, latent=7.5536, nans=0]


  Step 1100/1750 | Loss: 1.227513 | Audio: 0.096073 | Latent: 6.902447


Epoch 35/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:26<06:34,  1.39it/s, loss=1.3626, audio=0.1605, latent=6.9441, nans=0]


  Step 1200/1750 | Loss: 1.227822 | Audio: 0.096326 | Latent: 6.901131


Epoch 35/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:39<05:30,  1.36it/s, loss=1.1628, audio=0.0403, latent=7.2146, nans=0]


  Step 1300/1750 | Loss: 1.228251 | Audio: 0.096890 | Latent: 6.896473


Epoch 35/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:50<04:08,  1.41it/s, loss=1.3087, audio=0.1647, latent=6.5284, nans=0]


  Step 1400/1750 | Loss: 1.228420 | Audio: 0.097558 | Latent: 6.888694


Epoch 35/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:02<03:02,  1.37it/s, loss=1.1843, audio=0.0551, latent=7.1614, nans=0]


  Step 1500/1750 | Loss: 1.227307 | Audio: 0.097266 | Latent: 6.885162


Epoch 35/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:14<01:52,  1.33it/s, loss=1.2976, audio=0.0946, latent=7.3898, nans=0]


  Step 1600/1750 | Loss: 1.225580 | Audio: 0.096947 | Latent: 6.877914


Epoch 35/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:26<00:35,  1.41it/s, loss=1.0364, audio=0.0511, latent=6.2272, nans=0]


  Step 1700/1750 | Loss: 1.224698 | Audio: 0.096969 | Latent: 6.871733


Epoch 35/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:02<00:00,  1.39it/s, loss=1.3712, audio=0.1718, latent=6.8512, nans=0]
Validation 35/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.0882, audio=0.0345]



EPOCH 35/100 SUMMARY
Train Loss:  1.224572 (Audio: 0.097031, Latent: 6.870068)
Val Loss:    1.215957 (Audio: 0.095415, Latent: 6.834173)
Current LR: 2.94e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.215957


EPOCH 36/100



Epoch 36/100:   6%|‚ñä              | 100/1750 [01:11<19:14,  1.43it/s, loss=1.3021, audio=0.1236, latent=7.0323, nans=0]


  Step 100/1750 | Loss: 1.214666 | Audio: 0.100181 | Latent: 6.762024


Epoch 36/100:  11%|‚ñà‚ñã             | 200/1750 [02:22<18:14,  1.42it/s, loss=1.0705, audio=0.0488, latent=6.4863, nans=0]


  Step 200/1750 | Loss: 1.199556 | Audio: 0.095582 | Latent: 6.722610


Epoch 36/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:32<17:08,  1.41it/s, loss=1.0612, audio=0.0434, latent=6.4960, nans=0]


  Step 300/1750 | Loss: 1.203374 | Audio: 0.097418 | Latent: 6.723578


Epoch 36/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:42<16:04,  1.40it/s, loss=1.1616, audio=0.0724, latent=6.7784, nans=0]


  Step 400/1750 | Loss: 1.207491 | Audio: 0.097745 | Latent: 6.746674


Epoch 36/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:54<15:01,  1.39it/s, loss=1.1551, audio=0.1107, latent=6.2239, nans=0]


  Step 500/1750 | Loss: 1.206756 | Audio: 0.098157 | Latent: 6.736279


Epoch 36/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:06<13:40,  1.40it/s, loss=1.1895, audio=0.0999, latent=6.5982, nans=0]


  Step 600/1750 | Loss: 1.204830 | Audio: 0.097168 | Latent: 6.736629


Epoch 36/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:18<12:35,  1.39it/s, loss=1.0701, audio=0.0515, latent=6.4477, nans=0]


  Step 700/1750 | Loss: 1.205400 | Audio: 0.097771 | Latent: 6.732386


Epoch 36/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:31<11:19,  1.40it/s, loss=1.3449, audio=0.1175, latent=7.3998, nans=0]


  Step 800/1750 | Loss: 1.203746 | Audio: 0.097037 | Latent: 6.731139


Epoch 36/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:43<10:16,  1.38it/s, loss=1.2217, audio=0.0968, latent=6.8532, nans=0]


  Step 900/1750 | Loss: 1.209678 | Audio: 0.097038 | Latent: 6.770675


Epoch 36/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:55<09:12,  1.36it/s, loss=1.3934, audio=0.1634, latent=7.1106, nans=0]


  Step 1000/1750 | Loss: 1.209787 | Audio: 0.097347 | Latent: 6.767291


Epoch 36/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:07<07:55,  1.37it/s, loss=1.4253, audio=0.1614, latent=7.3504, nans=0]


  Step 1100/1750 | Loss: 1.209528 | Audio: 0.097259 | Latent: 6.766731


Epoch 36/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:19<06:33,  1.40it/s, loss=1.1534, audio=0.0958, latent=6.4127, nans=0]


  Step 1200/1750 | Loss: 1.208371 | Audio: 0.097152 | Latent: 6.760442


Epoch 36/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:31<05:24,  1.39it/s, loss=1.3067, audio=0.1253, latent=7.0406, nans=0]


  Step 1300/1750 | Loss: 1.208640 | Audio: 0.097770 | Latent: 6.754004


Epoch 36/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:43<04:18,  1.35it/s, loss=1.3851, audio=0.1718, latent=6.9434, nans=0]


  Step 1400/1750 | Loss: 1.206676 | Audio: 0.097394 | Latent: 6.745918


Epoch 36/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:55<03:00,  1.39it/s, loss=1.0111, audio=0.0677, latent=5.8385, nans=0]


  Step 1500/1750 | Loss: 1.204805 | Audio: 0.097166 | Latent: 6.736484


Epoch 36/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:07<01:47,  1.39it/s, loss=1.0878, audio=0.0560, latent=6.5055, nans=0]


  Step 1600/1750 | Loss: 1.203003 | Audio: 0.096785 | Latent: 6.729549


Epoch 36/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:19<00:35,  1.40it/s, loss=1.0614, audio=0.0408, latent=6.5318, nans=0]


  Step 1700/1750 | Loss: 1.202298 | Audio: 0.096720 | Latent: 6.725713


Epoch 36/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:55<00:00,  1.39it/s, loss=1.0929, audio=0.0393, latent=6.7616, nans=0]
Validation 36/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.51it/s, loss=1.0677, audio=0.0340]



EPOCH 36/100 SUMMARY
Train Loss:  1.201602 (Audio: 0.096572, Latent: 6.723050)
Val Loss:    1.192080 (Audio: 0.094769, Latent: 6.683620)
Current LR: 2.94e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.192080


EPOCH 37/100



Epoch 37/100:   6%|‚ñä              | 100/1750 [01:12<19:59,  1.38it/s, loss=1.0884, audio=0.0481, latent=6.6150, nans=0]


  Step 100/1750 | Loss: 1.201946 | Audio: 0.101687 | Latent: 6.657152


Epoch 37/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:31,  1.39it/s, loss=1.0850, audio=0.0598, latent=6.4367, nans=0]


  Step 200/1750 | Loss: 1.184082 | Audio: 0.095260 | Latent: 6.623741


Epoch 37/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:48,  1.36it/s, loss=1.3696, audio=0.1583, latent=7.0196, nans=0]


  Step 300/1750 | Loss: 1.177397 | Audio: 0.093288 | Latent: 6.605477


Epoch 37/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<16:03,  1.40it/s, loss=1.2987, audio=0.1646, latent=6.4637, nans=0]


  Step 400/1750 | Loss: 1.178175 | Audio: 0.093665 | Latent: 6.605640


Epoch 37/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:59<15:14,  1.37it/s, loss=1.0728, audio=0.0377, latent=6.6486, nans=0]


  Step 500/1750 | Loss: 1.179958 | Audio: 0.095726 | Latent: 6.590035


Epoch 37/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:11<14:04,  1.36it/s, loss=1.0318, audio=0.0478, latent=6.2418, nans=0]


  Step 600/1750 | Loss: 1.176012 | Audio: 0.095157 | Latent: 6.571318


Epoch 37/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:23<12:23,  1.41it/s, loss=1.2698, audio=0.1245, latent=6.8048, nans=0]


  Step 700/1750 | Loss: 1.177339 | Audio: 0.095801 | Latent: 6.571577


Epoch 37/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:35,  1.37it/s, loss=1.0244, audio=0.0573, latent=6.0652, nans=0]


  Step 800/1750 | Loss: 1.177780 | Audio: 0.096412 | Latent: 6.566376


Epoch 37/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:47<10:25,  1.36it/s, loss=1.1885, audio=0.1122, latent=6.4274, nans=0]


  Step 900/1750 | Loss: 1.176015 | Audio: 0.095640 | Latent: 6.564900


Epoch 37/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:59<09:12,  1.36it/s, loss=1.2782, audio=0.1075, latent=7.0880, nans=0]


  Step 1000/1750 | Loss: 1.174785 | Audio: 0.095261 | Latent: 6.561751


Epoch 37/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:11<07:51,  1.38it/s, loss=1.1529, audio=0.1031, latent=6.3115, nans=0]


  Step 1100/1750 | Loss: 1.173814 | Audio: 0.095144 | Latent: 6.556846


Epoch 37/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:22<06:35,  1.39it/s, loss=1.2206, audio=0.1014, latent=6.7856, nans=0]


  Step 1200/1750 | Loss: 1.174043 | Audio: 0.095341 | Latent: 6.555739


Epoch 37/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:34<05:24,  1.39it/s, loss=1.2114, audio=0.1245, latent=6.4153, nans=0]


  Step 1300/1750 | Loss: 1.175617 | Audio: 0.096200 | Latent: 6.554781


Epoch 37/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:46<04:10,  1.40it/s, loss=1.0214, audio=0.0517, latent=6.1193, nans=0]


  Step 1400/1750 | Loss: 1.175747 | Audio: 0.096384 | Latent: 6.553186


Epoch 37/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:58<03:01,  1.38it/s, loss=1.2822, audio=0.1534, latent=6.5031, nans=0]


  Step 1500/1750 | Loss: 1.174282 | Audio: 0.096124 | Latent: 6.546896


Epoch 37/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:10<01:49,  1.37it/s, loss=1.0873, audio=0.0473, latent=6.6175, nans=0]


  Step 1600/1750 | Loss: 1.172991 | Audio: 0.095742 | Latent: 6.543382


Epoch 37/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:23<00:35,  1.42it/s, loss=1.0161, audio=0.0414, latent=6.2226, nans=0]


  Step 1700/1750 | Loss: 1.172127 | Audio: 0.095768 | Latent: 6.537267


Epoch 37/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:59<00:00,  1.39it/s, loss=1.1978, audio=0.1627, latent=5.8162, nans=0]
Validation 37/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.0466, audio=0.0342]



EPOCH 37/100 SUMMARY
Train Loss:  1.172322 (Audio: 0.096050, Latent: 6.534813)
Val Loss:    1.166005 (Audio: 0.094924, Latent: 6.507715)
Current LR: 2.93e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.166005


EPOCH 38/100



Epoch 38/100:   6%|‚ñä              | 100/1750 [01:11<19:27,  1.41it/s, loss=1.0112, audio=0.0423, latent=6.1781, nans=0]


  Step 100/1750 | Loss: 1.155125 | Audio: 0.093890 | Latent: 6.448965


Epoch 38/100:  11%|‚ñà‚ñã             | 200/1750 [02:22<18:53,  1.37it/s, loss=1.1043, audio=0.1053, latent=5.9575, nans=0]


  Step 200/1750 | Loss: 1.166410 | Audio: 0.096684 | Latent: 6.486950


Epoch 38/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:21,  1.39it/s, loss=1.2603, audio=0.1559, latent=6.3232, nans=0]


  Step 300/1750 | Loss: 1.160794 | Audio: 0.095506 | Latent: 6.465216


Epoch 38/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:45<15:57,  1.41it/s, loss=1.1211, audio=0.1128, latent=5.9693, nans=0]


  Step 400/1750 | Loss: 1.157958 | Audio: 0.093965 | Latent: 6.466854


Epoch 38/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:57<15:06,  1.38it/s, loss=1.0200, audio=0.0417, latent=6.2440, nans=0]


  Step 500/1750 | Loss: 1.155982 | Audio: 0.094189 | Latent: 6.450701


Epoch 38/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:09<13:43,  1.40it/s, loss=1.0600, audio=0.0955, latent=5.7928, nans=0]


  Step 600/1750 | Loss: 1.154729 | Audio: 0.094542 | Latent: 6.437641


Epoch 38/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:22<13:08,  1.33it/s, loss=1.2704, audio=0.0504, latent=7.7970, nans=0]


  Step 700/1750 | Loss: 1.155615 | Audio: 0.095330 | Latent: 6.433029


Epoch 38/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:30,  1.38it/s, loss=1.0772, audio=0.1030, latent=5.8075, nans=0]


  Step 800/1750 | Loss: 1.153753 | Audio: 0.095001 | Latent: 6.425004


Epoch 38/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:47<10:13,  1.39it/s, loss=1.0312, audio=0.0465, latent=6.2549, nans=0]


  Step 900/1750 | Loss: 1.153848 | Audio: 0.095261 | Latent: 6.422172


Epoch 38/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:58<08:51,  1.41it/s, loss=1.1917, audio=0.1042, latent=6.5549, nans=0]


  Step 1000/1750 | Loss: 1.152916 | Audio: 0.095071 | Latent: 6.418488


Epoch 38/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:10<07:52,  1.37it/s, loss=1.2236, audio=0.1160, latent=6.6103, nans=0]


  Step 1100/1750 | Loss: 1.151017 | Audio: 0.095073 | Latent: 6.405798


Epoch 38/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:22<06:31,  1.41it/s, loss=1.0406, audio=0.0541, latent=6.2156, nans=0]


  Step 1200/1750 | Loss: 1.149262 | Audio: 0.094446 | Latent: 6.402468


Epoch 38/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:34<05:14,  1.43it/s, loss=1.5764, audio=0.2739, latent=6.8571, nans=0]


  Step 1300/1750 | Loss: 1.149556 | Audio: 0.094927 | Latent: 6.398006


Epoch 38/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:45<04:11,  1.39it/s, loss=1.2094, audio=0.1040, latent=6.6757, nans=0]


  Step 1400/1750 | Loss: 1.150019 | Audio: 0.095167 | Latent: 6.397892


Epoch 38/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:57<02:58,  1.40it/s, loss=1.1499, audio=0.0993, latent=6.3415, nans=0]


  Step 1500/1750 | Loss: 1.150879 | Audio: 0.095930 | Latent: 6.393460


Epoch 38/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:08<01:46,  1.41it/s, loss=1.2165, audio=0.1601, latent=5.9756, nans=0]


  Step 1600/1750 | Loss: 1.149582 | Audio: 0.095772 | Latent: 6.386925


Epoch 38/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:20<00:35,  1.41it/s, loss=1.3093, audio=0.1589, latent=6.6097, nans=0]


  Step 1700/1750 | Loss: 1.149445 | Audio: 0.095847 | Latent: 6.384998


Epoch 38/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:56<00:00,  1.39it/s, loss=1.0690, audio=0.0536, latent=6.4126, nans=0]
Validation 38/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.50it/s, loss=1.0230, audio=0.0337]



EPOCH 38/100 SUMMARY
Train Loss:  1.148425 (Audio: 0.095634, Latent: 6.381047)
Val Loss:    1.143775 (Audio: 0.094221, Latent: 6.368892)
Current LR: 2.93e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.143775


EPOCH 39/100



Epoch 39/100:   6%|‚ñä              | 100/1750 [01:12<19:53,  1.38it/s, loss=1.1104, audio=0.0422, latent=6.8404, nans=0]


  Step 100/1750 | Loss: 1.137989 | Audio: 0.092987 | Latent: 6.346767


Epoch 39/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:38,  1.39it/s, loss=1.3800, audio=0.1459, latent=7.2541, nans=0]


  Step 200/1750 | Loss: 1.143317 | Audio: 0.098877 | Latent: 6.303760


Epoch 39/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:44,  1.36it/s, loss=1.0771, audio=0.0549, latent=6.4493, nans=0]


  Step 300/1750 | Loss: 1.137186 | Audio: 0.096471 | Latent: 6.294957


Epoch 39/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:14,  1.38it/s, loss=0.9976, audio=0.0594, latent=5.8584, nans=0]


  Step 400/1750 | Loss: 1.134345 | Audio: 0.095366 | Latent: 6.290749


Epoch 39/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<14:41,  1.42it/s, loss=1.0929, audio=0.0614, latent=6.4674, nans=0]


  Step 500/1750 | Loss: 1.131321 | Audio: 0.093984 | Latent: 6.289012


Epoch 39/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:11<13:59,  1.37it/s, loss=1.2078, audio=0.1491, latent=6.0636, nans=0]


  Step 600/1750 | Loss: 1.132491 | Audio: 0.094740 | Latent: 6.286742


Epoch 39/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:23<12:37,  1.39it/s, loss=1.2242, audio=0.1267, latent=6.4715, nans=0]


  Step 700/1750 | Loss: 1.130523 | Audio: 0.094325 | Latent: 6.279154


Epoch 39/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:22,  1.39it/s, loss=1.1451, audio=0.1029, latent=6.2619, nans=0]


  Step 800/1750 | Loss: 1.132080 | Audio: 0.095181 | Latent: 6.278116


Epoch 39/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:47<10:02,  1.41it/s, loss=1.1472, audio=0.1290, latent=5.9280, nans=0]


  Step 900/1750 | Loss: 1.132360 | Audio: 0.095575 | Latent: 6.274732


Epoch 39/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:58<08:53,  1.41it/s, loss=0.9923, audio=0.0542, latent=5.8924, nans=0]


  Step 1000/1750 | Loss: 1.130195 | Audio: 0.095329 | Latent: 6.263580


Epoch 39/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:10<07:46,  1.39it/s, loss=1.2531, audio=0.1544, latent=6.2957, nans=0]


  Step 1100/1750 | Loss: 1.128559 | Audio: 0.094753 | Latent: 6.260354


Epoch 39/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:22<06:39,  1.38it/s, loss=1.0912, audio=0.1091, latent=5.8199, nans=0]


  Step 1200/1750 | Loss: 1.127449 | Audio: 0.094653 | Latent: 6.254286


Epoch 39/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:33<05:20,  1.41it/s, loss=0.9642, audio=0.0571, latent=5.6665, nans=0]


  Step 1300/1750 | Loss: 1.127718 | Audio: 0.094856 | Latent: 6.253378


Epoch 39/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:45<04:13,  1.38it/s, loss=1.0779, audio=0.0928, latent=5.9487, nans=0]


  Step 1400/1750 | Loss: 1.126445 | Audio: 0.094646 | Latent: 6.247687


Epoch 39/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:57<02:58,  1.40it/s, loss=0.9687, audio=0.0484, latent=5.8124, nans=0]


  Step 1500/1750 | Loss: 1.126391 | Audio: 0.094923 | Latent: 6.243625


Epoch 39/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:08<01:47,  1.39it/s, loss=1.2997, audio=0.1724, latent=6.3656, nans=0]


  Step 1600/1750 | Loss: 1.126927 | Audio: 0.095125 | Latent: 6.244519


Epoch 39/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:21<00:37,  1.35it/s, loss=1.0171, audio=0.0540, latent=6.0612, nans=0]


  Step 1700/1750 | Loss: 1.126167 | Audio: 0.095219 | Latent: 6.238196


Epoch 39/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:56<00:00,  1.39it/s, loss=0.9636, audio=0.0593, latent=5.6334, nans=0]
Validation 39/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=1.0072, audio=0.0342]



EPOCH 39/100 SUMMARY
Train Loss:  1.126542 (Audio: 0.095269, Latent: 6.240020)
Val Loss:    1.123737 (Audio: 0.094895, Latent: 6.226306)
Current LR: 2.93e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.123737


EPOCH 40/100



Epoch 40/100:   6%|‚ñä              | 100/1750 [01:12<19:59,  1.38it/s, loss=1.1234, audio=0.1045, latent=6.0964, nans=0]


  Step 100/1750 | Loss: 1.119708 | Audio: 0.099124 | Latent: 6.143063


Epoch 40/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:41,  1.38it/s, loss=1.0092, audio=0.0541, latent=6.0072, nans=0]


  Step 200/1750 | Loss: 1.119391 | Audio: 0.096829 | Latent: 6.171554


Epoch 40/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:13,  1.40it/s, loss=1.0392, audio=0.0377, latent=6.4250, nans=0]


  Step 300/1750 | Loss: 1.108442 | Audio: 0.092330 | Latent: 6.158549


Epoch 40/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:03,  1.40it/s, loss=1.3127, audio=0.1579, latent=6.6460, nans=0]


  Step 400/1750 | Loss: 1.106802 | Audio: 0.092180 | Latent: 6.149621


Epoch 40/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:59<14:43,  1.42it/s, loss=0.9982, audio=0.0476, latent=6.0202, nans=0]


  Step 500/1750 | Loss: 1.108899 | Audio: 0.093927 | Latent: 6.140301


Epoch 40/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:11<13:51,  1.38it/s, loss=1.0914, audio=0.0963, latent=5.9917, nans=0]


  Step 600/1750 | Loss: 1.107270 | Audio: 0.093277 | Latent: 6.138106


Epoch 40/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:23<12:34,  1.39it/s, loss=0.9803, audio=0.0518, latent=5.8444, nans=0]


  Step 700/1750 | Loss: 1.104616 | Audio: 0.092639 | Latent: 6.128923


Epoch 40/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:34<11:43,  1.35it/s, loss=1.0576, audio=0.1000, latent=5.7172, nans=0]


  Step 800/1750 | Loss: 1.105441 | Audio: 0.092750 | Latent: 6.132938


Epoch 40/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:45<10:00,  1.42it/s, loss=1.2943, audio=0.1600, latent=6.4950, nans=0]


  Step 900/1750 | Loss: 1.109507 | Audio: 0.094282 | Latent: 6.139623


Epoch 40/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:56<09:08,  1.37it/s, loss=1.0093, audio=0.0549, latent=5.9967, nans=0]


  Step 1000/1750 | Loss: 1.107498 | Audio: 0.093486 | Latent: 6.136837


Epoch 40/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:08<07:33,  1.43it/s, loss=1.0936, audio=0.1171, latent=5.7283, nans=0]


  Step 1100/1750 | Loss: 1.107518 | Audio: 0.093812 | Latent: 6.132627


Epoch 40/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:20<06:26,  1.42it/s, loss=1.0016, audio=0.0445, latent=6.0842, nans=0]


  Step 1200/1750 | Loss: 1.107794 | Audio: 0.094149 | Latent: 6.129969


Epoch 40/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:32<05:26,  1.38it/s, loss=1.2128, audio=0.1575, latent=5.9858, nans=0]


  Step 1300/1750 | Loss: 1.107390 | Audio: 0.094162 | Latent: 6.127110


Epoch 40/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:44<04:14,  1.38it/s, loss=1.0719, audio=0.1061, latent=5.7313, nans=0]


  Step 1400/1750 | Loss: 1.107918 | Audio: 0.094666 | Latent: 6.123907


Epoch 40/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:56<03:02,  1.37it/s, loss=1.0569, audio=0.0396, latent=6.5183, nans=0]


  Step 1500/1750 | Loss: 1.107119 | Audio: 0.094611 | Latent: 6.119312


Epoch 40/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:08<01:47,  1.39it/s, loss=1.1782, audio=0.1303, latent=6.1175, nans=0]


  Step 1600/1750 | Loss: 1.107397 | Audio: 0.094786 | Latent: 6.118833


Epoch 40/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:20<00:36,  1.38it/s, loss=1.1398, audio=0.0916, latent=6.3778, nans=0]


  Step 1700/1750 | Loss: 1.106308 | Audio: 0.094752 | Latent: 6.112021


Epoch 40/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:56<00:00,  1.39it/s, loss=0.9778, audio=0.0320, latent=6.0915, nans=0]
Validation 40/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.9894, audio=0.0339]



EPOCH 40/100 SUMMARY
Train Loss:  1.106356 (Audio: 0.094921, Latent: 6.110093)
Val Loss:    1.103451 (Audio: 0.092833, Latent: 6.118563)
Current LR: 2.92e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.103451


EPOCH 41/100

üßπ Cleared GPU cache at epoch 40
   Allocated: 1.23GB | Reserved: 1.49GB



Epoch 41/100:   6%|‚ñä              | 100/1750 [01:12<19:49,  1.39it/s, loss=1.1591, audio=0.1611, latent=5.5785, nans=0]


  Step 100/1750 | Loss: 1.072263 | Audio: 0.085762 | Latent: 6.004933


Epoch 41/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:54,  1.37it/s, loss=0.9630, audio=0.0391, latent=5.8993, nans=0]


  Step 200/1750 | Loss: 1.086624 | Audio: 0.092979 | Latent: 6.004436


Epoch 41/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:44,  1.36it/s, loss=0.9698, audio=0.0348, latent=6.0014, nans=0]


  Step 300/1750 | Loss: 1.089090 | Audio: 0.094546 | Latent: 5.999985


Epoch 41/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<16:23,  1.37it/s, loss=1.0327, audio=0.0437, latent=6.3024, nans=0]


  Step 400/1750 | Loss: 1.091943 | Audio: 0.095567 | Latent: 6.005394


Epoch 41/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:58<15:08,  1.38it/s, loss=0.9298, audio=0.0476, latent=5.5639, nans=0]


  Step 500/1750 | Loss: 1.094748 | Audio: 0.096198 | Latent: 6.015676


Epoch 41/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:11<13:41,  1.40it/s, loss=1.1482, audio=0.0978, latent=6.3500, nans=0]


  Step 600/1750 | Loss: 1.094114 | Audio: 0.096033 | Latent: 6.013654


Epoch 41/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:23<12:30,  1.40it/s, loss=1.0296, audio=0.0407, latent=6.3220, nans=0]


  Step 700/1750 | Loss: 1.096771 | Audio: 0.097349 | Latent: 6.013822


Epoch 41/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:18,  1.40it/s, loss=1.0639, audio=0.0397, latent=6.5636, nans=0]


  Step 800/1750 | Loss: 1.095294 | Audio: 0.096921 | Latent: 6.009683


Epoch 41/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:47<10:21,  1.37it/s, loss=1.0579, audio=0.1043, latent=5.6625, nans=0]


  Step 900/1750 | Loss: 1.093717 | Audio: 0.096257 | Latent: 6.008018


Epoch 41/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:59<08:52,  1.41it/s, loss=1.0978, audio=0.1059, latent=5.9065, nans=0]


  Step 1000/1750 | Loss: 1.093197 | Audio: 0.096330 | Latent: 6.003577


Epoch 41/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:11<07:47,  1.39it/s, loss=0.9978, audio=0.0455, latent=6.0453, nans=0]


  Step 1100/1750 | Loss: 1.091999 | Audio: 0.096117 | Latent: 5.998425


Epoch 41/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:23<06:38,  1.38it/s, loss=1.1812, audio=0.1054, latent=6.4694, nans=0]


  Step 1200/1750 | Loss: 1.091644 | Audio: 0.095991 | Latent: 5.997738


Epoch 41/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:35<05:19,  1.41it/s, loss=0.9610, audio=0.0316, latent=5.9846, nans=0]


  Step 1300/1750 | Loss: 1.091409 | Audio: 0.095759 | Latent: 5.999276


Epoch 41/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:48<04:17,  1.36it/s, loss=1.2711, audio=0.1621, latent=6.3126, nans=0]


  Step 1400/1750 | Loss: 1.090950 | Audio: 0.095515 | Latent: 5.999467


Epoch 41/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:59<03:00,  1.39it/s, loss=0.9879, audio=0.0468, latent=5.9623, nans=0]


  Step 1500/1750 | Loss: 1.089296 | Audio: 0.095009 | Latent: 5.995186


Epoch 41/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:12<01:49,  1.37it/s, loss=1.0758, audio=0.1174, latent=5.6066, nans=0]


  Step 1600/1750 | Loss: 1.087485 | Audio: 0.094530 | Latent: 5.989500


Epoch 41/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:24<00:37,  1.35it/s, loss=0.9447, audio=0.0492, latent=5.6422, nans=0]


  Step 1700/1750 | Loss: 1.087180 | Audio: 0.094445 | Latent: 5.988604


Epoch 41/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:00<00:00,  1.39it/s, loss=0.9676, audio=0.0255, latent=6.1107, nans=0]
Validation 41/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:07<00:00,  1.51it/s, loss=0.9710, audio=0.0330]



EPOCH 41/100 SUMMARY
Train Loss:  1.087265 (Audio: 0.094595, Latent: 5.987164)
Val Loss:    1.087968 (Audio: 0.092671, Latent: 6.017515)
Current LR: 2.92e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.087968


EPOCH 42/100



Epoch 42/100:   6%|‚ñä              | 100/1750 [01:12<19:46,  1.39it/s, loss=1.0313, audio=0.0499, latent=6.2102, nans=0]


  Step 100/1750 | Loss: 1.081954 | Audio: 0.099944 | Latent: 5.880446


Epoch 42/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:53,  1.37it/s, loss=1.2162, audio=0.1706, latent=5.8330, nans=0]


  Step 200/1750 | Loss: 1.079197 | Audio: 0.097287 | Latent: 5.897480


Epoch 42/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:37<17:11,  1.41it/s, loss=1.2500, audio=0.1819, latent=5.9084, nans=0]


  Step 300/1750 | Loss: 1.076570 | Audio: 0.096716 | Latent: 5.887592


Epoch 42/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:49<16:05,  1.40it/s, loss=1.0547, audio=0.0489, latent=6.3794, nans=0]


  Step 400/1750 | Loss: 1.075695 | Audio: 0.096682 | Latent: 5.882200


Epoch 42/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:01<14:55,  1.40it/s, loss=1.0517, audio=0.1064, latent=5.5934, nans=0]


  Step 500/1750 | Loss: 1.077221 | Audio: 0.095785 | Latent: 5.904343


Epoch 42/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:14<13:54,  1.38it/s, loss=0.8903, audio=0.0473, latent=5.3050, nans=1]


  Step 600/1750 | Loss: 1.075765 | Audio: 0.095224 | Latent: 5.902118


Epoch 42/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:27<12:38,  1.38it/s, loss=1.0272, audio=0.0663, latent=5.9636, nans=1]


  Step 700/1750 | Loss: 1.080130 | Audio: 0.096462 | Latent: 5.914707


Epoch 42/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:39<11:15,  1.41it/s, loss=1.1122, audio=0.1066, latent=5.9937, nans=1]


  Step 800/1750 | Loss: 1.077131 | Audio: 0.095240 | Latent: 5.911009


Epoch 42/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:50<10:08,  1.40it/s, loss=0.9880, audio=0.0416, latent=6.0324, nans=1]


  Step 900/1750 | Loss: 1.075856 | Audio: 0.094786 | Latent: 5.908563


Epoch 42/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:02<08:54,  1.40it/s, loss=1.0538, audio=0.0570, latent=6.2660, nans=1]


  Step 1000/1750 | Loss: 1.075574 | Audio: 0.095005 | Latent: 5.903760


Epoch 42/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:13<07:47,  1.39it/s, loss=0.9716, audio=0.0597, latent=5.6816, nans=1]


  Step 1100/1750 | Loss: 1.073738 | Audio: 0.094296 | Latent: 5.900963


Epoch 42/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:25<06:30,  1.41it/s, loss=1.3939, audio=0.2184, latent=6.3804, nans=1]


  Step 1200/1750 | Loss: 1.072925 | Audio: 0.094071 | Latent: 5.898549


Epoch 42/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:36<05:25,  1.38it/s, loss=1.2407, audio=0.1651, latent=6.0701, nans=1]


  Step 1300/1750 | Loss: 1.072056 | Audio: 0.094014 | Latent: 5.893524


Epoch 42/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:49<04:14,  1.38it/s, loss=1.0472, audio=0.0939, latent=5.7292, nans=1]


  Step 1400/1750 | Loss: 1.076153 | Audio: 0.094433 | Latent: 5.915253


Epoch 42/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:01<03:03,  1.36it/s, loss=0.9731, audio=0.0687, latent=5.5714, nans=1]


  Step 1500/1750 | Loss: 1.079902 | Audio: 0.094646 | Latent: 5.937403


Epoch 42/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:14<01:46,  1.41it/s, loss=1.0105, audio=0.0618, latent=5.9130, nans=1]


  Step 1600/1750 | Loss: 1.082668 | Audio: 0.094454 | Latent: 5.958391


Epoch 42/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:25<00:35,  1.40it/s, loss=1.0136, audio=0.0555, latent=6.0174, nans=1]


  Step 1700/1750 | Loss: 1.082035 | Audio: 0.094481 | Latent: 5.953816


Epoch 42/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:00<00:00,  1.39it/s, loss=0.8605, audio=0.0449, latent=5.1372, nans=1]



‚ö†Ô∏è 1 NaN occurrences


Validation 42/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.9624, audio=0.0336]



EPOCH 42/100 SUMMARY
Train Loss:  1.081787 (Audio: 0.094483, Latent: 5.952144)
Val Loss:    1.074011 (Audio: 0.093669, Latent: 5.911152)
Current LR: 2.92e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.074011


EPOCH 43/100



Epoch 43/100:   6%|‚ñä              | 100/1750 [01:11<19:29,  1.41it/s, loss=1.1585, audio=0.1502, latent=5.7205, nans=0]


  Step 100/1750 | Loss: 1.079910 | Audio: 0.097959 | Latent: 5.893280


Epoch 43/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<19:03,  1.35it/s, loss=0.8924, audio=0.0443, latent=5.3587, nans=0]


  Step 200/1750 | Loss: 1.070649 | Audio: 0.094710 | Latent: 5.874858


Epoch 43/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:31,  1.38it/s, loss=1.0196, audio=0.0519, latent=6.1055, nans=0]


  Step 300/1750 | Loss: 1.060985 | Audio: 0.091859 | Latent: 5.848443


Epoch 43/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:46<16:14,  1.39it/s, loss=0.9750, audio=0.0716, latent=5.5447, nans=0]


  Step 400/1750 | Loss: 1.063654 | Audio: 0.093852 | Latent: 5.839665


Epoch 43/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:58<14:59,  1.39it/s, loss=1.0900, audio=0.1146, latent=5.7389, nans=0]


  Step 500/1750 | Loss: 1.059216 | Audio: 0.092847 | Latent: 5.823479


Epoch 43/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:10<13:57,  1.37it/s, loss=1.2043, audio=0.1570, latent=5.9356, nans=0]


  Step 600/1750 | Loss: 1.058482 | Audio: 0.093201 | Latent: 5.813867


Epoch 43/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:23<12:45,  1.37it/s, loss=0.9470, audio=0.0435, latent=5.7332, nans=0]


  Step 700/1750 | Loss: 1.056571 | Audio: 0.092803 | Latent: 5.806431


Epoch 43/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:34<11:15,  1.41it/s, loss=0.9708, audio=0.0377, latent=5.9693, nans=0]


  Step 800/1750 | Loss: 1.057677 | Audio: 0.093429 | Latent: 5.805453


Epoch 43/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:47<10:07,  1.40it/s, loss=1.1124, audio=0.1184, latent=5.8371, nans=0]


  Step 900/1750 | Loss: 1.058704 | Audio: 0.093932 | Latent: 5.805609


Epoch 43/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:59<08:53,  1.41it/s, loss=0.9809, audio=0.0367, latent=6.0503, nans=0]


  Step 1000/1750 | Loss: 1.058887 | Audio: 0.094320 | Latent: 5.801651


Epoch 43/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:11<07:44,  1.40it/s, loss=1.0361, audio=0.1049, latent=5.5093, nans=0]


  Step 1100/1750 | Loss: 1.057851 | Audio: 0.093879 | Latent: 5.800621


Epoch 43/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:23<06:34,  1.40it/s, loss=1.1394, audio=0.1492, latent=5.6072, nans=0]


  Step 1200/1750 | Loss: 1.057102 | Audio: 0.093759 | Latent: 5.797230


Epoch 43/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:35<05:23,  1.39it/s, loss=1.0185, audio=0.0805, latent=5.7162, nans=0]


  Step 1300/1750 | Loss: 1.056322 | Audio: 0.093366 | Latent: 5.797264


Epoch 43/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:47<04:14,  1.38it/s, loss=0.9748, audio=0.0376, latent=5.9970, nans=0]


  Step 1400/1750 | Loss: 1.055564 | Audio: 0.093361 | Latent: 5.792281


Epoch 43/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:59<03:03,  1.36it/s, loss=1.0770, audio=0.1118, latent=5.6895, nans=0]


  Step 1500/1750 | Loss: 1.054608 | Audio: 0.093386 | Latent: 5.785569


Epoch 43/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:11<01:49,  1.38it/s, loss=1.0551, audio=0.1043, latent=5.6440, nans=0]


  Step 1600/1750 | Loss: 1.054713 | Audio: 0.093728 | Latent: 5.781717


Epoch 43/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:23<00:36,  1.38it/s, loss=0.9764, audio=0.1045, latent=5.1155, nans=0]


  Step 1700/1750 | Loss: 1.054724 | Audio: 0.094104 | Latent: 5.776775


Epoch 43/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:59<00:00,  1.39it/s, loss=1.0526, audio=0.1003, latent=5.6797, nans=0]
Validation 43/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.9457, audio=0.0329]



EPOCH 43/100 SUMMARY
Train Loss:  1.054444 (Audio: 0.094077, Latent: 5.775270)
Val Loss:    1.053018 (Audio: 0.092783, Latent: 5.783020)
Current LR: 2.91e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.053018


EPOCH 44/100



Epoch 44/100:   6%|‚ñä              | 100/1750 [01:12<19:59,  1.38it/s, loss=1.0541, audio=0.0435, latent=6.4477, nans=0]


  Step 100/1750 | Loss: 1.044381 | Audio: 0.095945 | Latent: 5.683280


Epoch 44/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:09,  1.42it/s, loss=1.0267, audio=0.0967, latent=5.5551, nans=0]


  Step 200/1750 | Loss: 1.044927 | Audio: 0.096408 | Latent: 5.680741


Epoch 44/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:18,  1.40it/s, loss=1.1956, audio=0.1197, latent=6.3748, nans=0]


  Step 300/1750 | Loss: 1.044521 | Audio: 0.095793 | Latent: 5.686236


Epoch 44/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:46<16:28,  1.37it/s, loss=0.9824, audio=0.0941, latent=5.2950, nans=0]


  Step 400/1750 | Loss: 1.050527 | Audio: 0.097693 | Latent: 5.700933


Epoch 44/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:58<14:55,  1.40it/s, loss=0.8997, audio=0.0269, latent=5.6396, nans=0]


  Step 500/1750 | Loss: 1.047665 | Audio: 0.096237 | Latent: 5.701273


Epoch 44/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:09<13:51,  1.38it/s, loss=1.0205, audio=0.1081, latent=5.3628, nans=0]


  Step 600/1750 | Loss: 1.045322 | Audio: 0.095294 | Latent: 5.698229


Epoch 44/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:20<12:42,  1.38it/s, loss=1.1905, audio=0.1139, latent=6.4184, nans=0]


  Step 700/1750 | Loss: 1.046151 | Audio: 0.096066 | Latent: 5.693456


Epoch 44/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:32<11:14,  1.41it/s, loss=0.9884, audio=0.1063, latent=5.1720, nans=0]


  Step 800/1750 | Loss: 1.042726 | Audio: 0.094837 | Latent: 5.687010


Epoch 44/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:44<10:21,  1.37it/s, loss=1.1122, audio=0.1157, latent=5.8717, nans=0]


  Step 900/1750 | Loss: 1.042730 | Audio: 0.094668 | Latent: 5.689291


Epoch 44/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:55<08:59,  1.39it/s, loss=1.0986, audio=0.1570, latent=5.2302, nans=0]


  Step 1000/1750 | Loss: 1.042271 | Audio: 0.094781 | Latent: 5.684723


Epoch 44/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:08<07:49,  1.39it/s, loss=1.0579, audio=0.1062, latent=5.6367, nans=0]


  Step 1100/1750 | Loss: 1.042016 | Audio: 0.094739 | Latent: 5.683587


Epoch 44/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:20<06:39,  1.38it/s, loss=1.0762, audio=0.1033, latent=5.7973, nans=0]


  Step 1200/1750 | Loss: 1.040910 | Audio: 0.094183 | Latent: 5.683622


Epoch 44/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:32<05:25,  1.38it/s, loss=1.0037, audio=0.0566, latent=5.9373, nans=0]


  Step 1300/1750 | Loss: 1.039826 | Audio: 0.093963 | Latent: 5.679337


Epoch 44/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:45<04:08,  1.41it/s, loss=1.0799, audio=0.1092, latent=5.7434, nans=0]


  Step 1400/1750 | Loss: 1.039771 | Audio: 0.094003 | Latent: 5.678427


Epoch 44/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:57<03:02,  1.37it/s, loss=0.9198, audio=0.0468, latent=5.5076, nans=0]


  Step 1500/1750 | Loss: 1.039251 | Audio: 0.093948 | Latent: 5.675699


Epoch 44/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:09<01:47,  1.39it/s, loss=1.0697, audio=0.1097, latent=5.6687, nans=0]


  Step 1600/1750 | Loss: 1.039272 | Audio: 0.093923 | Latent: 5.676180


Epoch 44/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:21<00:36,  1.37it/s, loss=0.9997, audio=0.0648, latent=5.8010, nans=0]


  Step 1700/1750 | Loss: 1.038899 | Audio: 0.094116 | Latent: 5.671117


Epoch 44/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:57<00:00,  1.39it/s, loss=1.1184, audio=0.1092, latent=6.0006, nans=0]
Validation 44/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.9296, audio=0.0335]



EPOCH 44/100 SUMMARY
Train Loss:  1.038232 (Audio: 0.093840, Latent: 5.670337)
Val Loss:    1.039913 (Audio: 0.093169, Latent: 5.690498)
Current LR: 2.91e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.039913


EPOCH 45/100



Epoch 45/100:   6%|‚ñä              | 100/1750 [01:11<19:24,  1.42it/s, loss=1.0749, audio=0.1012, latent=5.8170, nans=0]


  Step 100/1750 | Loss: 1.047773 | Audio: 0.101076 | Latent: 5.637473


Epoch 45/100:  11%|‚ñà‚ñã             | 200/1750 [02:22<18:28,  1.40it/s, loss=0.8926, audio=0.0565, latent=5.1978, nans=0]


  Step 200/1750 | Loss: 1.038270 | Audio: 0.096116 | Latent: 5.640249


Epoch 45/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:28,  1.38it/s, loss=0.9771, audio=0.1070, latent=5.0871, nans=0]


  Step 300/1750 | Loss: 1.035268 | Audio: 0.094598 | Latent: 5.640482


Epoch 45/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:46<15:47,  1.42it/s, loss=0.9294, audio=0.0651, latent=5.3278, nans=0]


  Step 400/1750 | Loss: 1.035678 | Audio: 0.095726 | Latent: 5.628175


Epoch 45/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:57<15:00,  1.39it/s, loss=1.0269, audio=0.1113, latent=5.3628, nans=0]


  Step 500/1750 | Loss: 1.034621 | Audio: 0.095324 | Latent: 5.626489


Epoch 45/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:10<13:52,  1.38it/s, loss=1.1090, audio=0.1025, latent=6.0266, nans=0]


  Step 600/1750 | Loss: 1.034808 | Audio: 0.095623 | Latent: 5.623749


Epoch 45/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:23<12:45,  1.37it/s, loss=0.8562, audio=0.0357, latent=5.2319, nans=0]


  Step 700/1750 | Loss: 1.034832 | Audio: 0.095758 | Latent: 5.622104


Epoch 45/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:24,  1.39it/s, loss=1.2043, audio=0.1606, latent=5.8875, nans=0]


  Step 800/1750 | Loss: 1.033673 | Audio: 0.095767 | Latent: 5.614265


Epoch 45/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:48<10:19,  1.37it/s, loss=1.1387, audio=0.1680, latent=5.3513, nans=0]


  Step 900/1750 | Loss: 1.032351 | Audio: 0.095803 | Latent: 5.604965


Epoch 45/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:00<08:53,  1.41it/s, loss=0.8748, audio=0.0495, latent=5.1728, nans=0]


  Step 1000/1750 | Loss: 1.029732 | Audio: 0.095041 | Latent: 5.597667


Epoch 45/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:12<07:43,  1.40it/s, loss=0.8393, audio=0.0468, latent=4.9713, nans=0]


  Step 1100/1750 | Loss: 1.028231 | Audio: 0.094392 | Latent: 5.596315


Epoch 45/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:25<06:36,  1.39it/s, loss=0.9026, audio=0.0377, latent=5.5140, nans=0]


  Step 1200/1750 | Loss: 1.025429 | Audio: 0.093370 | Latent: 5.591255


Epoch 45/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:37<05:30,  1.36it/s, loss=0.8988, audio=0.0472, latent=5.3627, nans=0]


  Step 1300/1750 | Loss: 1.025260 | Audio: 0.093282 | Latent: 5.591301


Epoch 45/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:49<04:14,  1.38it/s, loss=0.8824, audio=0.0485, latent=5.2358, nans=0]


  Step 1400/1750 | Loss: 1.023906 | Audio: 0.093217 | Latent: 5.583151


Epoch 45/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:01<03:00,  1.38it/s, loss=0.9452, audio=0.0439, latent=5.7166, nans=0]


  Step 1500/1750 | Loss: 1.025134 | Audio: 0.093873 | Latent: 5.582582


Epoch 45/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:13<01:46,  1.40it/s, loss=1.0488, audio=0.1018, latent=5.6342, nans=0]


  Step 1600/1750 | Loss: 1.024062 | Audio: 0.093594 | Latent: 5.579164


Epoch 45/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:24<00:36,  1.39it/s, loss=0.9530, audio=0.0468, latent=5.7293, nans=0]


  Step 1700/1750 | Loss: 1.024293 | Audio: 0.093885 | Latent: 5.576822


Epoch 45/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:00<00:00,  1.39it/s, loss=1.0174, audio=0.1038, latent=5.3980, nans=0]
Validation 45/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.50it/s, loss=0.9132, audio=0.0333]



EPOCH 45/100 SUMMARY
Train Loss:  1.023414 (Audio: 0.093614, Latent: 5.574572)
Val Loss:    1.024578 (Audio: 0.092999, Latent: 5.590530)
Current LR: 2.91e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.024578


EPOCH 46/100



Epoch 46/100:   6%|‚ñä              | 100/1750 [01:12<20:02,  1.37it/s, loss=0.9602, audio=0.0428, latent=5.8301, nans=0]


  Step 100/1750 | Loss: 1.025073 | Audio: 0.095907 | Latent: 5.555057


Epoch 46/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:26,  1.40it/s, loss=1.0095, audio=0.1045, latent=5.3371, nans=0]


  Step 200/1750 | Loss: 1.025575 | Audio: 0.097117 | Latent: 5.542274


Epoch 46/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:23,  1.39it/s, loss=0.9089, audio=0.0465, latent=5.4392, nans=0]


  Step 300/1750 | Loss: 1.022341 | Audio: 0.096601 | Latent: 5.527587


Epoch 46/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<15:58,  1.41it/s, loss=1.0121, audio=0.0930, latent=5.5073, nans=0]


  Step 400/1750 | Loss: 1.024042 | Audio: 0.097594 | Latent: 5.525694


Epoch 46/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<14:58,  1.39it/s, loss=1.2188, audio=0.1601, latent=5.9908, nans=0]


  Step 500/1750 | Loss: 1.022555 | Audio: 0.097102 | Latent: 5.522340


Epoch 46/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:12<13:44,  1.40it/s, loss=1.0880, audio=0.1146, latent=5.7257, nans=0]


  Step 600/1750 | Loss: 1.017626 | Audio: 0.095179 | Latent: 5.515111


Epoch 46/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:23<12:40,  1.38it/s, loss=0.8889, audio=0.0478, latent=5.2896, nans=0]


  Step 700/1750 | Loss: 1.014429 | Audio: 0.094137 | Latent: 5.507705


Epoch 46/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:22,  1.39it/s, loss=0.8890, audio=0.0445, latent=5.3332, nans=0]


  Step 800/1750 | Loss: 1.012618 | Audio: 0.093019 | Latent: 5.510536


Epoch 46/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:47<10:17,  1.38it/s, loss=0.9877, audio=0.0621, latent=5.7573, nans=0]


  Step 900/1750 | Loss: 1.010646 | Audio: 0.092018 | Latent: 5.510732


Epoch 46/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:59<08:55,  1.40it/s, loss=1.0102, audio=0.0944, latent=5.4764, nans=0]


  Step 1000/1750 | Loss: 1.011238 | Audio: 0.092040 | Latent: 5.514382


Epoch 46/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:11<07:47,  1.39it/s, loss=0.9844, audio=0.1008, latent=5.2182, nans=0]


  Step 1100/1750 | Loss: 1.011984 | Audio: 0.092989 | Latent: 5.506703


Epoch 46/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:22<06:42,  1.37it/s, loss=0.8799, audio=0.0514, latent=5.1814, nans=0]


  Step 1200/1750 | Loss: 1.010808 | Audio: 0.092642 | Latent: 5.503500


Epoch 46/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:34<05:21,  1.40it/s, loss=1.0517, audio=0.0974, latent=5.7135, nans=0]


  Step 1300/1750 | Loss: 1.010250 | Audio: 0.092408 | Latent: 5.502893


Epoch 46/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:46<04:10,  1.39it/s, loss=1.0787, audio=0.1552, latent=5.1222, nans=0]


  Step 1400/1750 | Loss: 1.009733 | Audio: 0.092780 | Latent: 5.494491


Epoch 46/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:59<02:59,  1.40it/s, loss=1.1650, audio=0.1084, latent=6.3216, nans=0]


  Step 1500/1750 | Loss: 1.010113 | Audio: 0.093190 | Latent: 5.491550


Epoch 46/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:11<01:47,  1.40it/s, loss=1.1041, audio=0.1626, latent=5.1930, nans=0]


  Step 1600/1750 | Loss: 1.009683 | Audio: 0.093185 | Latent: 5.488752


Epoch 46/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:24<00:35,  1.39it/s, loss=0.9339, audio=0.0589, latent=5.4407, nans=0]


  Step 1700/1750 | Loss: 1.009499 | Audio: 0.093244 | Latent: 5.486741


Epoch 46/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:59<00:00,  1.39it/s, loss=0.9643, audio=0.0985, latent=5.1163, nans=0]
Validation 46/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.9079, audio=0.0325]



EPOCH 46/100 SUMMARY
Train Loss:  1.009717 (Audio: 0.093428, Latent: 5.485740)
Val Loss:    1.014961 (Audio: 0.091967, Latent: 5.540186)
Current LR: 2.90e-05

‚úÖ NEW BEST MODEL! Val Loss: 1.014961


EPOCH 47/100



Epoch 47/100:   6%|‚ñä              | 100/1750 [01:11<19:45,  1.39it/s, loss=1.0699, audio=0.1543, latent=5.0753, nans=0]


  Step 100/1750 | Loss: 1.006305 | Audio: 0.094611 | Latent: 5.447218


Epoch 47/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:43,  1.38it/s, loss=0.9351, audio=0.0493, latent=5.5765, nans=1]


  Step 200/1750 | Loss: 1.013226 | Audio: 0.090866 | Latent: 5.543296


Epoch 47/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:23,  1.39it/s, loss=1.0504, audio=0.1598, latent=4.8714, nans=1]


  Step 300/1750 | Loss: 1.013805 | Audio: 0.094083 | Latent: 5.504253


Epoch 47/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:25,  1.37it/s, loss=1.2553, audio=0.2166, latent=5.4805, nans=1]


  Step 400/1750 | Loss: 1.010032 | Audio: 0.094003 | Latent: 5.480177


Epoch 47/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<15:03,  1.38it/s, loss=0.9010, audio=0.0593, latent=5.2157, nans=1]


  Step 500/1750 | Loss: 1.008055 | Audio: 0.092587 | Latent: 5.485879


Epoch 47/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:13<13:40,  1.40it/s, loss=0.9539, audio=0.0667, latent=5.4699, nans=1]


  Step 600/1750 | Loss: 1.007446 | Audio: 0.093434 | Latent: 5.470517


Epoch 47/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:26<12:35,  1.39it/s, loss=0.8621, audio=0.0436, latent=5.1661, nans=1]


  Step 700/1750 | Loss: 1.005037 | Audio: 0.092884 | Latent: 5.461790


Epoch 47/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:38<11:31,  1.37it/s, loss=0.9439, audio=0.0955, latent=5.0195, nans=1]


  Step 800/1750 | Loss: 1.003857 | Audio: 0.092482 | Latent: 5.459283


Epoch 47/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:51<10:28,  1.35it/s, loss=1.0709, audio=0.1550, latent=5.0728, nans=1]


  Step 900/1750 | Loss: 1.005181 | Audio: 0.092946 | Latent: 5.461918


Epoch 47/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:03<08:59,  1.39it/s, loss=0.9856, audio=0.0484, latent=5.9250, nans=1]


  Step 1000/1750 | Loss: 1.003345 | Audio: 0.092535 | Latent: 5.455172


Epoch 47/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:15<07:45,  1.40it/s, loss=0.9648, audio=0.0395, latent=5.9055, nans=1]


  Step 1100/1750 | Loss: 1.001624 | Audio: 0.092347 | Latent: 5.446198


Epoch 47/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:27<06:26,  1.42it/s, loss=0.8406, audio=0.0401, latent=5.0693, nans=1]


  Step 1200/1750 | Loss: 1.000822 | Audio: 0.092193 | Latent: 5.442903


Epoch 47/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:38<05:25,  1.38it/s, loss=1.0267, audio=0.1269, latent=5.1525, nans=1]


  Step 1300/1750 | Loss: 0.999696 | Audio: 0.092181 | Latent: 5.435566


Epoch 47/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:49<04:15,  1.37it/s, loss=1.0816, audio=0.1534, latent=5.1652, nans=1]


  Step 1400/1750 | Loss: 1.000431 | Audio: 0.092709 | Latent: 5.433416


Epoch 47/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:01<03:02,  1.37it/s, loss=0.9846, audio=0.1070, latent=5.1374, nans=1]


  Step 1500/1750 | Loss: 0.999828 | Audio: 0.092564 | Latent: 5.431330


Epoch 47/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:13<01:46,  1.41it/s, loss=0.9319, audio=0.0569, latent=5.4536, nans=1]


  Step 1600/1750 | Loss: 0.999488 | Audio: 0.092659 | Latent: 5.427798


Epoch 47/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:25<00:35,  1.40it/s, loss=0.9775, audio=0.1126, latent=5.0144, nans=1]


  Step 1700/1750 | Loss: 1.000030 | Audio: 0.093111 | Latent: 5.425385


Epoch 47/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:00<00:00,  1.39it/s, loss=1.0256, audio=0.0958, latent=5.5600, nans=1]



‚ö†Ô∏è 1 NaN occurrences


Validation 47/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.50it/s, loss=0.8968, audio=0.0330]



EPOCH 47/100 SUMMARY
Train Loss:  1.000442 (Audio: 0.093235, Latent: 5.426477)
Val Loss:    0.998971 (Audio: 0.091863, Latent: 5.434968)
Current LR: 2.90e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.998971


EPOCH 48/100



Epoch 48/100:   6%|‚ñä              | 100/1750 [01:12<19:33,  1.41it/s, loss=0.9481, audio=0.0512, latent=5.6384, nans=0]


  Step 100/1750 | Loss: 0.998595 | Audio: 0.095460 | Latent: 5.384492


Epoch 48/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:06,  1.43it/s, loss=0.9978, audio=0.1232, latent=5.0095, nans=0]


  Step 200/1750 | Loss: 0.990034 | Audio: 0.093930 | Latent: 5.347827


Epoch 48/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:20,  1.39it/s, loss=1.0572, audio=0.1600, latent=4.9145, nans=0]


  Step 300/1750 | Loss: 0.983497 | Audio: 0.091180 | Latent: 5.340917


Epoch 48/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:38,  1.35it/s, loss=1.0156, audio=0.0555, latent=6.0303, nans=0]


  Step 400/1750 | Loss: 0.981598 | Audio: 0.090760 | Latent: 5.333851


Epoch 48/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:01<15:10,  1.37it/s, loss=0.9654, audio=0.1097, latent=4.9740, nans=0]


  Step 500/1750 | Loss: 0.984838 | Audio: 0.092369 | Latent: 5.334004


Epoch 48/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:12<13:41,  1.40it/s, loss=0.9867, audio=0.0953, latent=5.3074, nans=0]


  Step 600/1750 | Loss: 0.984620 | Audio: 0.092357 | Latent: 5.332708


Epoch 48/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:24<12:31,  1.40it/s, loss=0.9984, audio=0.1170, latent=5.0965, nans=0]


  Step 700/1750 | Loss: 0.985449 | Audio: 0.093000 | Latent: 5.329662


Epoch 48/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:36<11:32,  1.37it/s, loss=0.9727, audio=0.1025, latent=5.1181, nans=0]


  Step 800/1750 | Loss: 0.986928 | Audio: 0.093228 | Latent: 5.336474


Epoch 48/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:48<10:20,  1.37it/s, loss=1.0887, audio=0.0985, latent=5.9449, nans=0]


  Step 900/1750 | Loss: 0.988374 | Audio: 0.093883 | Latent: 5.337387


Epoch 48/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:00<09:01,  1.39it/s, loss=0.8708, audio=0.0322, latent=5.3762, nans=0]


  Step 1000/1750 | Loss: 0.989573 | Audio: 0.094615 | Latent: 5.335623


Epoch 48/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:12<07:36,  1.42it/s, loss=0.8455, audio=0.0275, latent=5.2696, nans=0]


  Step 1100/1750 | Loss: 0.988830 | Audio: 0.094328 | Latent: 5.334494


Epoch 48/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:24<06:42,  1.37it/s, loss=0.8981, audio=0.0487, latent=5.3385, nans=0]


  Step 1200/1750 | Loss: 0.987150 | Audio: 0.093725 | Latent: 5.331328


Epoch 48/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:36<05:24,  1.39it/s, loss=0.8982, audio=0.0606, latent=5.1799, nans=0]


  Step 1300/1750 | Loss: 0.986989 | Audio: 0.093655 | Latent: 5.331194


Epoch 48/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:48<04:11,  1.39it/s, loss=1.0181, audio=0.1040, latent=5.4005, nans=0]


  Step 1400/1750 | Loss: 0.987262 | Audio: 0.093830 | Latent: 5.330680


Epoch 48/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:01<03:02,  1.37it/s, loss=0.9094, audio=0.0544, latent=5.3373, nans=0]


  Step 1500/1750 | Loss: 0.985450 | Audio: 0.093455 | Latent: 5.323598


Epoch 48/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:13<01:47,  1.39it/s, loss=1.2072, audio=0.2140, latent=5.1942, nans=0]


  Step 1600/1750 | Loss: 0.985105 | Audio: 0.093238 | Latent: 5.324190


Epoch 48/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:24<00:35,  1.41it/s, loss=0.9817, audio=0.1035, latent=5.1652, nans=0]


  Step 1700/1750 | Loss: 0.986552 | Audio: 0.093352 | Latent: 5.332321


Epoch 48/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:00<00:00,  1.39it/s, loss=0.8991, audio=0.0461, latent=5.3796, nans=0]
Validation 48/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.8870, audio=0.0337]



EPOCH 48/100 SUMMARY
Train Loss:  0.986417 (Audio: 0.093130, Latent: 5.334385)
Val Loss:    0.994498 (Audio: 0.093726, Latent: 5.380300)
Current LR: 2.90e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.994498


EPOCH 49/100



Epoch 49/100:   6%|‚ñä              | 100/1750 [01:11<19:58,  1.38it/s, loss=0.9188, audio=0.0442, latent=5.5353, nans=0]


  Step 100/1750 | Loss: 0.987966 | Audio: 0.095942 | Latent: 5.307213


Epoch 49/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:29,  1.40it/s, loss=0.9815, audio=0.1226, latent=4.9085, nans=0]


  Step 200/1750 | Loss: 0.983966 | Audio: 0.095144 | Latent: 5.291190


Epoch 49/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:27,  1.38it/s, loss=0.9122, audio=0.0421, latent=5.5202, nans=0]


  Step 300/1750 | Loss: 0.980802 | Audio: 0.094032 | Latent: 5.284921


Epoch 49/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<16:33,  1.36it/s, loss=1.0249, audio=0.1060, latent=5.4194, nans=0]


  Step 400/1750 | Loss: 0.981020 | Audio: 0.094311 | Latent: 5.282655


Epoch 49/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:58<14:45,  1.41it/s, loss=0.9860, audio=0.0918, latent=5.3497, nans=0]


  Step 500/1750 | Loss: 0.981506 | Audio: 0.094667 | Latent: 5.281145


Epoch 49/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:11<13:51,  1.38it/s, loss=0.9150, audio=0.0341, latent=5.6454, nans=0]


  Step 600/1750 | Loss: 0.979535 | Audio: 0.093902 | Latent: 5.278204


Epoch 49/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:23<12:56,  1.35it/s, loss=1.0117, audio=0.1008, latent=5.4005, nans=0]


  Step 700/1750 | Loss: 0.976788 | Audio: 0.092704 | Latent: 5.275867


Epoch 49/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:11,  1.42it/s, loss=0.9713, audio=0.1025, latent=5.1085, nans=0]


  Step 800/1750 | Loss: 0.977909 | Audio: 0.093504 | Latent: 5.272674


Epoch 49/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:47<10:12,  1.39it/s, loss=0.9517, audio=0.0432, latent=5.7686, nans=0]


  Step 900/1750 | Loss: 0.977052 | Audio: 0.092900 | Latent: 5.275008


Epoch 49/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:58<08:42,  1.43it/s, loss=1.2954, audio=0.1655, latent=6.4289, nans=0]


  Step 1000/1750 | Loss: 0.976280 | Audio: 0.092655 | Latent: 5.273138


Epoch 49/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:10<07:45,  1.40it/s, loss=1.0327, audio=0.1112, latent=5.4029, nans=0]


  Step 1100/1750 | Loss: 0.976328 | Audio: 0.093015 | Latent: 5.268659


Epoch 49/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:22<06:30,  1.41it/s, loss=0.9484, audio=0.1037, latent=4.9405, nans=0]


  Step 1200/1750 | Loss: 0.974549 | Audio: 0.092329 | Latent: 5.265941


Epoch 49/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:33<05:20,  1.40it/s, loss=0.9151, audio=0.0915, latent=4.8815, nans=0]


  Step 1300/1750 | Loss: 0.973760 | Audio: 0.091951 | Latent: 5.265716


Epoch 49/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:44<04:10,  1.40it/s, loss=0.7597, audio=0.0582, latent=4.2890, nans=0]


  Step 1400/1750 | Loss: 0.973678 | Audio: 0.092315 | Latent: 5.260318


Epoch 49/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:56<02:58,  1.40it/s, loss=1.1235, audio=0.1602, latent=5.3533, nans=0]


  Step 1500/1750 | Loss: 0.977741 | Audio: 0.092701 | Latent: 5.282265


Epoch 49/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:08<01:47,  1.39it/s, loss=0.9687, audio=0.0432, latent=5.8817, nans=0]


  Step 1600/1750 | Loss: 0.979431 | Audio: 0.092971 | Latent: 5.289928


Epoch 49/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:20<00:36,  1.38it/s, loss=0.9783, audio=0.1007, latent=5.1788, nans=0]


  Step 1700/1750 | Loss: 0.979053 | Audio: 0.093148 | Latent: 5.285052


Epoch 49/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:56<00:00,  1.39it/s, loss=0.8640, audio=0.0464, latent=5.1413, nans=0]
Validation 49/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.8783, audio=0.0327]



EPOCH 49/100 SUMMARY
Train Loss:  0.979009 (Audio: 0.093032, Latent: 5.286305)
Val Loss:    0.982292 (Audio: 0.091697, Latent: 5.325981)
Current LR: 2.89e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.982292


EPOCH 50/100



Epoch 50/100:   6%|‚ñä              | 100/1750 [01:11<19:41,  1.40it/s, loss=0.8560, audio=0.0539, latent=4.9885, nans=0]


  Step 100/1750 | Loss: 0.965699 | Audio: 0.087121 | Latent: 5.276376


Epoch 50/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:48,  1.37it/s, loss=0.9262, audio=0.0393, latent=5.6497, nans=0]


  Step 200/1750 | Loss: 0.965533 | Audio: 0.087210 | Latent: 5.274084


Epoch 50/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:39,  1.37it/s, loss=0.9884, audio=0.0910, latent=5.3755, nans=0]


  Step 300/1750 | Loss: 0.967112 | Audio: 0.089950 | Latent: 5.248078


Epoch 50/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:46<16:13,  1.39it/s, loss=0.8432, audio=0.0409, latent=5.0765, nans=0]


  Step 400/1750 | Loss: 0.968838 | Audio: 0.091677 | Latent: 5.236558


Epoch 50/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:58<14:56,  1.39it/s, loss=0.9309, audio=0.0537, latent=5.4895, nans=0]


  Step 500/1750 | Loss: 0.970124 | Audio: 0.092687 | Latent: 5.231672


Epoch 50/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:09<13:59,  1.37it/s, loss=0.9628, audio=0.0972, latent=5.1229, nans=0]


  Step 600/1750 | Loss: 0.970227 | Audio: 0.093825 | Latent: 5.217182


Epoch 50/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:21<12:41,  1.38it/s, loss=1.0030, audio=0.0902, latent=5.4841, nans=0]


  Step 700/1750 | Loss: 0.970140 | Audio: 0.094408 | Latent: 5.208825


Epoch 50/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:33<11:24,  1.39it/s, loss=0.8340, audio=0.0496, latent=4.8985, nans=0]


  Step 800/1750 | Loss: 0.970135 | Audio: 0.094357 | Latent: 5.209470


Epoch 50/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:45<10:03,  1.41it/s, loss=0.8334, audio=0.0444, latent=4.9644, nans=0]


  Step 900/1750 | Loss: 0.969101 | Audio: 0.093710 | Latent: 5.211204


Epoch 50/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:56<09:00,  1.39it/s, loss=0.7759, audio=0.0510, latent=4.4932, nans=0]


  Step 1000/1750 | Loss: 0.966671 | Audio: 0.093025 | Latent: 5.204140


Epoch 50/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:07<07:45,  1.40it/s, loss=0.9667, audio=0.0780, latent=5.4039, nans=0]


  Step 1100/1750 | Loss: 0.966553 | Audio: 0.093246 | Latent: 5.200397


Epoch 50/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:18<06:27,  1.42it/s, loss=0.9868, audio=0.0925, latent=5.3449, nans=0]


  Step 1200/1750 | Loss: 0.967095 | Audio: 0.093441 | Latent: 5.201414


Epoch 50/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:30<05:26,  1.38it/s, loss=0.9117, audio=0.1036, latent=4.6969, nans=0]


  Step 1300/1750 | Loss: 0.965532 | Audio: 0.093066 | Latent: 5.195998


Epoch 50/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:42<04:11,  1.39it/s, loss=0.8343, audio=0.0480, latent=4.9214, nans=0]


  Step 1400/1750 | Loss: 0.966284 | Audio: 0.093030 | Latent: 5.201486


Epoch 50/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:54<03:00,  1.39it/s, loss=1.1410, audio=0.1562, latent=5.5238, nans=0]


  Step 1500/1750 | Loss: 0.965823 | Audio: 0.092805 | Latent: 5.201422


Epoch 50/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:06<01:46,  1.41it/s, loss=0.9715, audio=0.1108, latent=4.9998, nans=0]


  Step 1600/1750 | Loss: 0.966022 | Audio: 0.093139 | Latent: 5.198291


Epoch 50/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:18<00:35,  1.40it/s, loss=0.8308, audio=0.0292, latent=5.1499, nans=0]


  Step 1700/1750 | Loss: 0.965944 | Audio: 0.092979 | Latent: 5.199903


Epoch 50/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:53<00:00,  1.40it/s, loss=0.8557, audio=0.0371, latent=5.2099, nans=0]
Validation 50/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.8642, audio=0.0327]



EPOCH 50/100 SUMMARY
Train Loss:  0.965581 (Audio: 0.092826, Latent: 5.199526)
Val Loss:    0.969771 (Audio: 0.092866, Latent: 5.226928)
Current LR: 2.89e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.969771


EPOCH 51/100

üßπ Cleared GPU cache at epoch 50
   Allocated: 1.23GB | Reserved: 1.55GB



Epoch 51/100:   6%|‚ñä              | 100/1750 [01:11<19:49,  1.39it/s, loss=0.9253, audio=0.0995, latent=4.8427, nans=0]


  Step 100/1750 | Loss: 0.958680 | Audio: 0.089659 | Latent: 5.195742


Epoch 51/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:31,  1.39it/s, loss=0.9834, audio=0.1071, latent=5.1277, nans=0]


  Step 200/1750 | Loss: 0.957904 | Audio: 0.092390 | Latent: 5.154156


Epoch 51/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<18:01,  1.34it/s, loss=1.0837, audio=0.1697, latent=4.9617, nans=0]


  Step 300/1750 | Loss: 0.959053 | Audio: 0.093207 | Latent: 5.150932


Epoch 51/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:26,  1.37it/s, loss=1.0102, audio=0.0870, latent=5.5740, nans=0]


  Step 400/1750 | Loss: 0.957296 | Audio: 0.092311 | Latent: 5.151161


Epoch 51/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<14:47,  1.41it/s, loss=0.8230, audio=0.0464, latent=4.8682, nans=0]


  Step 500/1750 | Loss: 0.958399 | Audio: 0.093088 | Latent: 5.148153


Epoch 51/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:11<13:52,  1.38it/s, loss=1.0061, audio=0.1633, latent=4.5297, nans=0]


  Step 600/1750 | Loss: 0.959988 | Audio: 0.094615 | Latent: 5.138390


Epoch 51/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:23<12:40,  1.38it/s, loss=0.8947, audio=0.0522, latent=5.2691, nans=0]


  Step 700/1750 | Loss: 0.960054 | Audio: 0.095088 | Latent: 5.132512


Epoch 51/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:36<11:33,  1.37it/s, loss=1.0359, audio=0.1619, latent=4.7474, nans=0]


  Step 800/1750 | Loss: 0.960623 | Audio: 0.095089 | Latent: 5.136304


Epoch 51/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:47<09:59,  1.42it/s, loss=1.0044, audio=0.1068, latent=5.2722, nans=0]


  Step 900/1750 | Loss: 0.959414 | Audio: 0.094326 | Latent: 5.138418


Epoch 51/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:59<09:15,  1.35it/s, loss=0.9202, audio=0.0973, latent=4.8375, nans=0]


  Step 1000/1750 | Loss: 0.957734 | Audio: 0.093619 | Latent: 5.136632


Epoch 51/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:11<07:40,  1.41it/s, loss=0.8073, audio=0.0331, latent=4.9401, nans=0]


  Step 1100/1750 | Loss: 0.956389 | Audio: 0.093508 | Latent: 5.129156


Epoch 51/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:23<06:35,  1.39it/s, loss=0.8318, audio=0.0350, latent=5.0781, nans=0]


  Step 1200/1750 | Loss: 0.956020 | Audio: 0.093476 | Latent: 5.127121


Epoch 51/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:35<05:24,  1.39it/s, loss=0.9759, audio=0.1516, latent=4.4851, nans=0]


  Step 1300/1750 | Loss: 0.954685 | Audio: 0.093569 | Latent: 5.116971


Epoch 51/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:47<04:18,  1.35it/s, loss=1.0564, audio=0.1025, latent=5.6754, nans=0]


  Step 1400/1750 | Loss: 0.955523 | Audio: 0.093943 | Latent: 5.117586


Epoch 51/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:58<02:55,  1.42it/s, loss=0.9474, audio=0.1023, latent=4.9515, nans=0]


  Step 1500/1750 | Loss: 0.955694 | Audio: 0.093842 | Latent: 5.120060


Epoch 51/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:10<01:47,  1.40it/s, loss=1.0341, audio=0.1218, latent=5.2702, nans=0]


  Step 1600/1750 | Loss: 0.954547 | Audio: 0.093502 | Latent: 5.116948


Epoch 51/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:22<00:36,  1.38it/s, loss=0.9714, audio=0.1000, latent=5.1433, nans=0]


  Step 1700/1750 | Loss: 0.952987 | Audio: 0.092885 | Latent: 5.114783


Epoch 51/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:58<00:00,  1.39it/s, loss=0.8886, audio=0.0398, latent=5.3934, nans=0]
Validation 51/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.8534, audio=0.0327]



EPOCH 51/100 SUMMARY
Train Loss:  0.952540 (Audio: 0.092676, Latent: 5.114589)
Val Loss:    0.958362 (Audio: 0.092943, Latent: 5.149838)
Current LR: 2.88e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.958362


EPOCH 52/100



Epoch 52/100:   6%|‚ñä              | 100/1750 [01:11<19:42,  1.40it/s, loss=0.9840, audio=0.1022, latent=5.1975, nans=0]


  Step 100/1750 | Loss: 0.934469 | Audio: 0.085358 | Latent: 5.091687


Epoch 52/100:  11%|‚ñà‚ñã             | 200/1750 [02:22<18:09,  1.42it/s, loss=1.1250, audio=0.2141, latent=4.6456, nans=0]


  Step 200/1750 | Loss: 0.939873 | Audio: 0.087623 | Latent: 5.097512


Epoch 52/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:33<17:31,  1.38it/s, loss=0.8331, audio=0.0414, latent=5.0025, nans=0]


  Step 300/1750 | Loss: 0.941612 | Audio: 0.089220 | Latent: 5.087815


Epoch 52/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:44<15:53,  1.42it/s, loss=0.9996, audio=0.1132, latent=5.1546, nans=0]


  Step 400/1750 | Loss: 0.943354 | Audio: 0.090532 | Latent: 5.081928


Epoch 52/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:56<14:55,  1.40it/s, loss=0.9356, audio=0.1235, latent=4.5906, nans=0]


  Step 500/1750 | Loss: 0.946273 | Audio: 0.092551 | Latent: 5.074469


Epoch 52/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:09<13:51,  1.38it/s, loss=0.8026, audio=0.0464, latent=4.7319, nans=0]


  Step 600/1750 | Loss: 0.946994 | Audio: 0.092719 | Latent: 5.077039


Epoch 52/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:20<12:23,  1.41it/s, loss=0.8567, audio=0.0609, latent=4.8997, nans=0]


  Step 700/1750 | Loss: 0.943468 | Audio: 0.091447 | Latent: 5.070497


Epoch 52/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:31<11:07,  1.42it/s, loss=1.0779, audio=0.1533, latent=5.1417, nans=0]


  Step 800/1750 | Loss: 0.943016 | Audio: 0.091472 | Latent: 5.067143


Epoch 52/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:42<10:10,  1.39it/s, loss=0.8938, audio=0.0948, latent=4.6947, nans=0]


  Step 900/1750 | Loss: 0.943280 | Audio: 0.091663 | Latent: 5.066362


Epoch 52/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:54<09:05,  1.38it/s, loss=0.9155, audio=0.0508, latent=5.4258, nans=0]


  Step 1000/1750 | Loss: 0.942812 | Audio: 0.091526 | Latent: 5.065067


Epoch 52/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:06<07:42,  1.41it/s, loss=1.0159, audio=0.1165, latent=5.2201, nans=0]


  Step 1100/1750 | Loss: 0.942282 | Audio: 0.091720 | Latent: 5.058948


Epoch 52/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:17<06:29,  1.41it/s, loss=0.9825, audio=0.1034, latent=5.1717, nans=0]


  Step 1200/1750 | Loss: 0.943836 | Audio: 0.092243 | Latent: 5.062336


Epoch 52/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:28<05:18,  1.41it/s, loss=1.0203, audio=0.1623, latent=4.6376, nans=0]


  Step 1300/1750 | Loss: 0.944340 | Audio: 0.092489 | Latent: 5.062410


Epoch 52/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:40<04:11,  1.39it/s, loss=0.9309, audio=0.1015, latent=4.8521, nans=0]


  Step 1400/1750 | Loss: 0.943647 | Audio: 0.092314 | Latent: 5.060129


Epoch 52/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:52<03:00,  1.39it/s, loss=1.0271, audio=0.0903, latent=5.6442, nans=0]


  Step 1500/1750 | Loss: 0.943075 | Audio: 0.092163 | Latent: 5.058330


Epoch 52/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:04<01:48,  1.39it/s, loss=0.8576, audio=0.0432, latent=5.1417, nans=0]


  Step 1600/1750 | Loss: 0.942453 | Audio: 0.091926 | Latent: 5.057347


Epoch 52/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:16<00:35,  1.39it/s, loss=0.7560, audio=0.0526, latent=4.3390, nans=0]


  Step 1700/1750 | Loss: 0.942356 | Audio: 0.091993 | Latent: 5.055804


Epoch 52/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:51<00:00,  1.40it/s, loss=1.0376, audio=0.1148, latent=5.3867, nans=0]
Validation 52/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:10<00:00,  1.50it/s, loss=0.8450, audio=0.0322]



EPOCH 52/100 SUMMARY
Train Loss:  0.943000 (Audio: 0.092545, Latent: 5.052727)
Val Loss:    0.945874 (Audio: 0.090848, Latent: 5.094514)
Current LR: 2.88e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.945874


EPOCH 53/100



Epoch 53/100:   6%|‚ñä              | 100/1750 [01:11<19:06,  1.44it/s, loss=0.9037, audio=0.0533, latent=5.3137, nans=0]


  Step 100/1750 | Loss: 0.935559 | Audio: 0.089782 | Latent: 5.039966


Epoch 53/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:52,  1.37it/s, loss=0.8351, audio=0.0436, latent=4.9863, nans=0]


  Step 200/1750 | Loss: 0.939043 | Audio: 0.091474 | Latent: 5.040637


Epoch 53/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:39,  1.37it/s, loss=0.9606, audio=0.0933, latent=5.1593, nans=0]


  Step 300/1750 | Loss: 0.938202 | Audio: 0.091539 | Latent: 5.034158


Epoch 53/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:18,  1.38it/s, loss=0.8916, audio=0.0478, latent=5.3066, nans=0]


  Step 400/1750 | Loss: 0.939592 | Audio: 0.092729 | Latent: 5.027557


Epoch 53/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<14:55,  1.40it/s, loss=0.9336, audio=0.0978, latent=4.9199, nans=0]


  Step 500/1750 | Loss: 0.939508 | Audio: 0.092701 | Latent: 5.027370


Epoch 53/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:11<13:40,  1.40it/s, loss=0.7802, audio=0.0518, latent=4.5109, nans=0]


  Step 600/1750 | Loss: 0.939005 | Audio: 0.092413 | Latent: 5.027865


Epoch 53/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:23<12:36,  1.39it/s, loss=0.8396, audio=0.0423, latent=5.0325, nans=0]


  Step 700/1750 | Loss: 0.938921 | Audio: 0.092606 | Latent: 5.024727


Epoch 53/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:07,  1.42it/s, loss=0.8475, audio=0.0597, latent=4.8541, nans=0]


  Step 800/1750 | Loss: 0.939541 | Audio: 0.092695 | Latent: 5.027677


Epoch 53/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:47<10:10,  1.39it/s, loss=0.8265, audio=0.0477, latent=4.8738, nans=0]


  Step 900/1750 | Loss: 0.938574 | Audio: 0.092847 | Latent: 5.019192


Epoch 53/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:59<09:05,  1.37it/s, loss=1.0030, audio=0.1128, latent=5.1827, nans=0]


  Step 1000/1750 | Loss: 0.938265 | Audio: 0.092764 | Latent: 5.018248


Epoch 53/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:11<07:39,  1.42it/s, loss=0.9319, audio=0.1153, latent=4.6752, nans=0]


  Step 1100/1750 | Loss: 0.936747 | Audio: 0.092439 | Latent: 5.012465


Epoch 53/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:23<06:42,  1.37it/s, loss=0.9373, audio=0.1149, latent=4.7173, nans=0]


  Step 1200/1750 | Loss: 0.935430 | Audio: 0.092086 | Latent: 5.008385


Epoch 53/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:35<05:25,  1.38it/s, loss=0.9124, audio=0.1012, latent=4.7334, nans=0]


  Step 1300/1750 | Loss: 0.935945 | Audio: 0.092682 | Latent: 5.003872


Epoch 53/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:47<04:13,  1.38it/s, loss=0.9244, audio=0.0408, latent=5.6180, nans=0]


  Step 1400/1750 | Loss: 0.936092 | Audio: 0.092839 | Latent: 5.002767


Epoch 53/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:59<02:59,  1.39it/s, loss=0.9232, audio=0.0549, latent=5.4222, nans=0]


  Step 1500/1750 | Loss: 0.934471 | Audio: 0.092334 | Latent: 4.998692


Epoch 53/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:11<01:48,  1.38it/s, loss=0.9541, audio=0.1039, latent=4.9754, nans=0]


  Step 1600/1750 | Loss: 0.934445 | Audio: 0.092281 | Latent: 4.999223


Epoch 53/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:23<00:35,  1.40it/s, loss=1.0585, audio=0.1518, latent=5.0328, nans=0]


  Step 1700/1750 | Loss: 0.934051 | Audio: 0.092254 | Latent: 4.996958


Epoch 53/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:58<00:00,  1.39it/s, loss=0.8296, audio=0.0479, latent=4.8918, nans=0]
Validation 53/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.8385, audio=0.0313]



EPOCH 53/100 SUMMARY
Train Loss:  0.934494 (Audio: 0.092444, Latent: 4.997372)
Val Loss:    0.941260 (Audio: 0.090917, Latent: 5.062837)
Current LR: 2.88e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.941260


EPOCH 54/100



Epoch 54/100:   6%|‚ñä              | 100/1750 [01:11<19:51,  1.38it/s, loss=1.0178, audio=0.1215, latent=5.1652, nans=0]


  Step 100/1750 | Loss: 0.915413 | Audio: 0.082336 | Latent: 5.004948


Epoch 54/100:  11%|‚ñà‚ñã             | 200/1750 [02:22<18:59,  1.36it/s, loss=1.0852, audio=0.1013, latent=5.8836, nans=0]


  Step 200/1750 | Loss: 0.922133 | Audio: 0.087844 | Latent: 4.976295


Epoch 54/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:43,  1.36it/s, loss=1.1690, audio=0.2202, latent=4.8567, nans=0]


  Step 300/1750 | Loss: 0.921922 | Audio: 0.087449 | Latent: 4.980160


Epoch 54/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<16:20,  1.38it/s, loss=0.9973, audio=0.1624, latent=4.4836, nans=0]


  Step 400/1750 | Loss: 0.925858 | Audio: 0.088960 | Latent: 4.986256


Epoch 54/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:59<15:10,  1.37it/s, loss=1.0092, audio=0.1604, latent=4.5900, nans=0]


  Step 500/1750 | Loss: 0.925459 | Audio: 0.089656 | Latent: 4.974316


Epoch 54/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:11<13:48,  1.39it/s, loss=0.7797, audio=0.0532, latent=4.4881, nans=0]


  Step 600/1750 | Loss: 0.928513 | Audio: 0.091376 | Latent: 4.971741


Epoch 54/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:23<12:30,  1.40it/s, loss=1.0668, audio=0.1546, latent=5.0509, nans=0]


  Step 700/1750 | Loss: 0.928778 | Audio: 0.091229 | Latent: 4.975461


Epoch 54/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:16,  1.40it/s, loss=0.8701, audio=0.0552, latent=5.0650, nans=0]


  Step 800/1750 | Loss: 0.928637 | Audio: 0.091184 | Latent: 4.975127


Epoch 54/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:47<10:17,  1.38it/s, loss=0.9988, audio=0.1547, latent=4.5962, nans=0]


  Step 900/1750 | Loss: 0.926187 | Audio: 0.090581 | Latent: 4.966830


Epoch 54/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:58<08:49,  1.42it/s, loss=0.9348, audio=0.0973, latent=4.9347, nans=1]


  Step 1000/1750 | Loss: 0.928316 | Audio: 0.091002 | Latent: 4.975409


Epoch 54/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:11<07:46,  1.39it/s, loss=0.8657, audio=0.0292, latent=5.3815, nans=1]


  Step 1100/1750 | Loss: 0.926880 | Audio: 0.090970 | Latent: 4.966267


Epoch 54/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:22<06:29,  1.41it/s, loss=1.1326, audio=0.1642, latent=5.3613, nans=1]


  Step 1200/1750 | Loss: 0.926188 | Audio: 0.090884 | Latent: 4.962800


Epoch 54/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:34<05:19,  1.41it/s, loss=0.8849, audio=0.0453, latent=5.2947, nans=1]


  Step 1300/1750 | Loss: 0.926388 | Audio: 0.091261 | Latent: 4.959098


Epoch 54/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:46<04:15,  1.37it/s, loss=1.0618, audio=0.1613, latent=4.9283, nans=1]


  Step 1400/1750 | Loss: 0.926212 | Audio: 0.091467 | Latent: 4.955185


Epoch 54/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:58<02:59,  1.39it/s, loss=1.0751, audio=0.1103, latent=5.6960, nans=1]


  Step 1500/1750 | Loss: 0.925269 | Audio: 0.091228 | Latent: 4.952093


Epoch 54/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:08<01:49,  1.37it/s, loss=0.9200, audio=0.0961, latent=4.8520, nans=1]


  Step 1600/1750 | Loss: 0.925253 | Audio: 0.091637 | Latent: 4.946522


Epoch 54/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:20<00:35,  1.43it/s, loss=0.9378, audio=0.1064, latent=4.8332, nans=1]


  Step 1700/1750 | Loss: 0.925739 | Audio: 0.092173 | Latent: 4.942613


Epoch 54/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:55<00:00,  1.39it/s, loss=1.0719, audio=0.1653, latent=4.9424, nans=1]



‚ö†Ô∏è 1 NaN occurrences


Validation 54/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [03:57<00:00,  1.58it/s, loss=0.8262, audio=0.0318]



EPOCH 54/100 SUMMARY
Train Loss:  0.925966 (Audio: 0.092215, Latent: 4.943573)
Val Loss:    0.928990 (Audio: 0.091071, Latent: 4.978984)
Current LR: 2.87e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.928990


EPOCH 55/100



Epoch 55/100:   6%|‚ñä              | 100/1750 [01:10<19:29,  1.41it/s, loss=0.9630, audio=0.1002, latent=5.0842, nans=0]


  Step 100/1750 | Loss: 0.917506 | Audio: 0.092581 | Latent: 4.882298


Epoch 55/100:  11%|‚ñà‚ñã             | 200/1750 [02:21<18:41,  1.38it/s, loss=1.1029, audio=0.1643, latent=5.1616, nans=0]


  Step 200/1750 | Loss: 0.921014 | Audio: 0.092590 | Latent: 4.905567


Epoch 55/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:33<16:40,  1.45it/s, loss=0.8144, audio=0.0411, latent=4.8811, nans=0]


  Step 300/1750 | Loss: 0.918383 | Audio: 0.091699 | Latent: 4.899899


Epoch 55/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:44<16:11,  1.39it/s, loss=0.9017, audio=0.1042, latent=4.6221, nans=0]


  Step 400/1750 | Loss: 0.920375 | Audio: 0.093217 | Latent: 4.892939


Epoch 55/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:56<15:05,  1.38it/s, loss=0.9451, audio=0.1153, latent=4.7634, nans=0]


  Step 500/1750 | Loss: 0.922495 | Audio: 0.094227 | Latent: 4.893607


Epoch 55/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:08<13:36,  1.41it/s, loss=0.8881, audio=0.0542, latent=5.1976, nans=0]


  Step 600/1750 | Loss: 0.918883 | Audio: 0.092232 | Latent: 4.896128


Epoch 55/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:19<12:49,  1.36it/s, loss=1.1446, audio=0.2177, latent=4.7283, nans=0]


  Step 700/1750 | Loss: 0.918925 | Audio: 0.092496 | Latent: 4.892892


Epoch 55/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:31<11:23,  1.39it/s, loss=1.0262, audio=0.1217, latent=5.2181, nans=0]


  Step 800/1750 | Loss: 0.917802 | Audio: 0.091738 | Latent: 4.895513


Epoch 55/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:42<10:17,  1.38it/s, loss=0.7996, audio=0.0375, latent=4.8302, nans=0]


  Step 900/1750 | Loss: 0.918371 | Audio: 0.091902 | Latent: 4.897110


Epoch 55/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:54<08:57,  1.40it/s, loss=0.8493, audio=0.0536, latent=4.9468, nans=0]


  Step 1000/1750 | Loss: 0.916937 | Audio: 0.091261 | Latent: 4.896098


Epoch 55/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:06<07:48,  1.39it/s, loss=0.9798, audio=0.1057, latent=5.1235, nans=0]


  Step 1100/1750 | Loss: 0.916551 | Audio: 0.091376 | Latent: 4.891997


Epoch 55/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:18<06:37,  1.38it/s, loss=1.0207, audio=0.1744, latent=4.4787, nans=0]


  Step 1200/1750 | Loss: 0.918606 | Audio: 0.091972 | Latent: 4.897749


Epoch 55/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:30<05:28,  1.37it/s, loss=0.8588, audio=0.0553, latent=4.9877, nans=0]


  Step 1300/1750 | Loss: 0.919092 | Audio: 0.092114 | Latent: 4.899101


Epoch 55/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:42<04:17,  1.36it/s, loss=0.8104, audio=0.0332, latent=4.9610, nans=0]


  Step 1400/1750 | Loss: 0.918016 | Audio: 0.091892 | Latent: 4.894882


Epoch 55/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:54<02:55,  1.42it/s, loss=0.9676, audio=0.1024, latent=5.0849, nans=0]


  Step 1500/1750 | Loss: 0.917405 | Audio: 0.091600 | Latent: 4.894699


Epoch 55/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:06<01:45,  1.43it/s, loss=1.0212, audio=0.1576, latent=4.7066, nans=0]


  Step 1600/1750 | Loss: 0.917839 | Audio: 0.091884 | Latent: 4.893797


Epoch 55/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:17<00:36,  1.38it/s, loss=0.8997, audio=0.0992, latent=4.6756, nans=0]


  Step 1700/1750 | Loss: 0.918041 | Audio: 0.092267 | Latent: 4.890043


Epoch 55/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:52<00:00,  1.40it/s, loss=0.8157, audio=0.0501, latent=4.7698, nans=0]
Validation 55/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.50it/s, loss=0.8211, audio=0.0314]



EPOCH 55/100 SUMMARY
Train Loss:  0.917444 (Audio: 0.092233, Latent: 4.886518)
Val Loss:    0.920961 (Audio: 0.090575, Latent: 4.932064)
Current LR: 2.87e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.920961


EPOCH 56/100



Epoch 56/100:   6%|‚ñä              | 100/1750 [01:11<19:22,  1.42it/s, loss=0.9202, audio=0.1093, latent=4.6769, nans=0]


  Step 100/1750 | Loss: 0.910568 | Audio: 0.094221 | Latent: 4.814172


Epoch 56/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:21,  1.41it/s, loss=0.8944, audio=0.0415, latent=5.4095, nans=0]


  Step 200/1750 | Loss: 0.910422 | Audio: 0.092611 | Latent: 4.834661


Epoch 56/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:26,  1.39it/s, loss=0.7777, audio=0.0646, latent=4.3228, nans=0]


  Step 300/1750 | Loss: 0.910771 | Audio: 0.092370 | Latent: 4.840201


Epoch 56/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<15:56,  1.41it/s, loss=0.9482, audio=0.0978, latent=5.0177, nans=0]


  Step 400/1750 | Loss: 0.906585 | Audio: 0.090052 | Latent: 4.843204


Epoch 56/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:59<14:58,  1.39it/s, loss=0.9228, audio=0.0917, latent=4.9286, nans=0]


  Step 500/1750 | Loss: 0.910171 | Audio: 0.091387 | Latent: 4.849312


Epoch 56/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:11<13:44,  1.39it/s, loss=0.9096, audio=0.0964, latent=4.7784, nans=0]


  Step 600/1750 | Loss: 0.908705 | Audio: 0.090946 | Latent: 4.845425


Epoch 56/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:23<12:32,  1.40it/s, loss=0.9214, audio=0.0997, latent=4.8130, nans=0]


  Step 700/1750 | Loss: 0.907683 | Audio: 0.090645 | Latent: 4.842613


Epoch 56/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:18,  1.40it/s, loss=0.9236, audio=0.1052, latent=4.7542, nans=0]


  Step 800/1750 | Loss: 0.905178 | Audio: 0.089538 | Latent: 4.840683


Epoch 56/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:47<10:19,  1.37it/s, loss=0.8715, audio=0.0966, latent=4.5216, nans=0]


  Step 900/1750 | Loss: 0.906341 | Audio: 0.089984 | Latent: 4.842492


Epoch 56/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:59<08:56,  1.40it/s, loss=1.0273, audio=0.1533, latent=4.8039, nans=0]


  Step 1000/1750 | Loss: 0.906697 | Audio: 0.089984 | Latent: 4.844860


Epoch 56/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:10<07:53,  1.37it/s, loss=0.8933, audio=0.0731, latent=4.9805, nans=0]


  Step 1100/1750 | Loss: 0.906818 | Audio: 0.090439 | Latent: 4.839597


Epoch 56/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:23<06:55,  1.32it/s, loss=0.8213, audio=0.0472, latent=4.8461, nans=0]


  Step 1200/1750 | Loss: 0.908374 | Audio: 0.091214 | Latent: 4.839635


Epoch 56/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:35<05:19,  1.41it/s, loss=0.9471, audio=0.0986, latent=4.9995, nans=0]


  Step 1300/1750 | Loss: 0.907934 | Audio: 0.091278 | Latent: 4.835860


Epoch 56/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:46<04:08,  1.41it/s, loss=1.0596, audio=0.1707, latent=4.7880, nans=0]


  Step 1400/1750 | Loss: 0.907792 | Audio: 0.091592 | Latent: 4.830710


Epoch 56/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:57<03:01,  1.37it/s, loss=0.9330, audio=0.0970, latent=4.9267, nans=0]


  Step 1500/1750 | Loss: 0.908084 | Audio: 0.091680 | Latent: 4.831491


Epoch 56/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:10<01:50,  1.36it/s, loss=1.1395, audio=0.2155, latent=4.7230, nans=0]


  Step 1600/1750 | Loss: 0.907936 | Audio: 0.091745 | Latent: 4.829635


Epoch 56/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:22<00:36,  1.38it/s, loss=0.8504, audio=0.0344, latent=5.2103, nans=0]


  Step 1700/1750 | Loss: 0.908815 | Audio: 0.092003 | Latent: 4.832061


Epoch 56/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:58<00:00,  1.39it/s, loss=0.8450, audio=0.0340, latent=5.1802, nans=0]
Validation 56/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.8097, audio=0.0315]



EPOCH 56/100 SUMMARY
Train Loss:  0.908878 (Audio: 0.092111, Latent: 4.831036)
Val Loss:    0.911975 (Audio: 0.090791, Latent: 4.869290)
Current LR: 2.86e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.911975


EPOCH 57/100



Epoch 57/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:13,  1.42it/s, loss=0.8720, audio=0.0451, latent=5.2119, nans=1]


  Step 200/1750 | Loss: 0.988147 | Audio: 0.089352 | Latent: 5.396283


Epoch 57/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:33<17:16,  1.40it/s, loss=0.8723, audio=0.0985, latent=4.5023, nans=1]


  Step 300/1750 | Loss: 0.957867 | Audio: 0.089837 | Latent: 5.187951


Epoch 57/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:45<16:03,  1.40it/s, loss=0.9318, audio=0.1000, latent=4.8789, nans=1]


  Step 400/1750 | Loss: 0.950330 | Audio: 0.092861 | Latent: 5.097383


Epoch 57/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:58<15:07,  1.38it/s, loss=1.1852, audio=0.2723, latent=4.2711, nans=1]


  Step 500/1750 | Loss: 0.941017 | Audio: 0.092730 | Latent: 5.037045


Epoch 57/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:09<13:32,  1.42it/s, loss=0.9622, audio=0.0932, latent=5.1714, nans=1]


  Step 600/1750 | Loss: 0.932704 | Audio: 0.092031 | Latent: 4.990945


Epoch 57/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:21<12:53,  1.36it/s, loss=1.0056, audio=0.1556, latent=4.6286, nans=1]


  Step 700/1750 | Loss: 0.930369 | Audio: 0.093235 | Latent: 4.959327


Epoch 57/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:32<10:42,  1.48it/s, loss=0.9029, audio=0.1002, latent=4.6834, nans=1]


  Step 800/1750 | Loss: 0.927166 | Audio: 0.093030 | Latent: 4.940706


Epoch 57/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:44<10:12,  1.39it/s, loss=0.8918, audio=0.0580, latent=5.1729, nans=1]


  Step 900/1750 | Loss: 0.924770 | Audio: 0.092999 | Latent: 4.925147


Epoch 57/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:55<09:02,  1.38it/s, loss=0.9033, audio=0.0846, latent=4.8938, nans=1]


  Step 1000/1750 | Loss: 0.921081 | Audio: 0.092225 | Latent: 4.910876


Epoch 57/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:08<07:50,  1.38it/s, loss=0.8869, audio=0.1063, latent=4.4953, nans=1]


  Step 1100/1750 | Loss: 0.919749 | Audio: 0.092316 | Latent: 4.900783


Epoch 57/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:20<06:44,  1.36it/s, loss=0.9695, audio=0.1615, latent=4.3108, nans=1]


  Step 1200/1750 | Loss: 0.918840 | Audio: 0.092598 | Latent: 4.890961


Epoch 57/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:33<05:33,  1.35it/s, loss=0.7399, audio=0.0318, latent=4.5089, nans=1]


  Step 1300/1750 | Loss: 0.916837 | Audio: 0.092337 | Latent: 4.881083


Epoch 57/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:45<04:07,  1.42it/s, loss=0.8191, audio=0.0503, latent=4.7898, nans=1]


  Step 1400/1750 | Loss: 0.915182 | Audio: 0.092320 | Latent: 4.870275


Epoch 57/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:57<03:00,  1.38it/s, loss=0.8646, audio=0.0521, latent=5.0690, nans=1]


  Step 1500/1750 | Loss: 0.914676 | Audio: 0.092442 | Latent: 4.865280


Epoch 57/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:09<01:46,  1.41it/s, loss=0.8791, audio=0.0586, latent=5.0800, nans=1]


  Step 1600/1750 | Loss: 0.913386 | Audio: 0.092256 | Latent: 4.859158


Epoch 57/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:20<00:35,  1.39it/s, loss=0.9056, audio=0.0862, latent=4.8885, nans=1]


  Step 1700/1750 | Loss: 0.912182 | Audio: 0.092284 | Latent: 4.850752


Epoch 57/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:56<00:00,  1.39it/s, loss=0.7869, audio=0.0463, latent=4.6291, nans=1]



‚ö†Ô∏è 1 NaN occurrences


Validation 57/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:06<00:00,  1.52it/s, loss=0.8080, audio=0.0324]



EPOCH 57/100 SUMMARY
Train Loss:  0.911774 (Audio: 0.092137, Latent: 4.850001)
Val Loss:    0.905893 (Audio: 0.091439, Latent: 4.820091)
Current LR: 2.86e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.905893


EPOCH 58/100



Epoch 58/100:   6%|‚ñä              | 100/1750 [01:11<20:02,  1.37it/s, loss=0.8469, audio=0.0954, latent=4.3749, nans=0]


  Step 100/1750 | Loss: 0.898248 | Audio: 0.091635 | Latent: 4.766523


Epoch 58/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:47,  1.38it/s, loss=0.8481, audio=0.1026, latent=4.2867, nans=0]


  Step 200/1750 | Loss: 0.893583 | Audio: 0.091869 | Latent: 4.732295


Epoch 58/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:29,  1.38it/s, loss=0.8566, audio=0.0481, latent=5.0695, nans=0]


  Step 300/1750 | Loss: 0.897668 | Audio: 0.091731 | Latent: 4.761378


Epoch 58/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<15:58,  1.41it/s, loss=0.8609, audio=0.0924, latent=4.5076, nans=0]


  Step 400/1750 | Loss: 0.908332 | Audio: 0.094128 | Latent: 4.800509


Epoch 58/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:59<14:44,  1.41it/s, loss=0.7521, audio=0.0464, latent=4.3947, nans=0]


  Step 500/1750 | Loss: 0.901319 | Audio: 0.091867 | Latent: 4.783894


Epoch 58/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:10<13:55,  1.38it/s, loss=0.9682, audio=0.0424, latent=5.8900, nans=0]


  Step 600/1750 | Loss: 0.902506 | Audio: 0.093019 | Latent: 4.776450


Epoch 58/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:22<12:39,  1.38it/s, loss=0.8614, audio=0.0672, latent=4.8468, nans=0]


  Step 700/1750 | Loss: 0.899572 | Audio: 0.092586 | Latent: 4.762663


Epoch 58/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:34<11:22,  1.39it/s, loss=0.8323, audio=0.0328, latent=5.1106, nans=0]


  Step 800/1750 | Loss: 0.899453 | Audio: 0.092587 | Latent: 4.761860


Epoch 58/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:45<09:54,  1.43it/s, loss=0.9418, audio=0.1113, latent=4.7942, nans=0]


  Step 900/1750 | Loss: 0.899144 | Audio: 0.092907 | Latent: 4.755538


Epoch 58/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:57<09:00,  1.39it/s, loss=0.8670, audio=0.1023, latent=4.4166, nans=0]


  Step 1000/1750 | Loss: 0.898412 | Audio: 0.092862 | Latent: 4.751258


Epoch 58/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:09<07:51,  1.38it/s, loss=0.8870, audio=0.1009, latent=4.5676, nans=0]


  Step 1100/1750 | Loss: 0.897462 | Audio: 0.092826 | Latent: 4.745394


Epoch 58/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:21<06:45,  1.36it/s, loss=0.7238, audio=0.0414, latent=4.2728, nans=0]


  Step 1200/1750 | Loss: 0.897061 | Audio: 0.092778 | Latent: 4.743374


Epoch 58/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:33<05:29,  1.37it/s, loss=0.7155, audio=0.0321, latent=4.3425, nans=0]


  Step 1300/1750 | Loss: 0.896695 | Audio: 0.092657 | Latent: 4.742533


Epoch 58/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:46<04:13,  1.38it/s, loss=0.9301, audio=0.1514, latent=4.1820, nans=0]


  Step 1400/1750 | Loss: 0.897256 | Audio: 0.092878 | Latent: 4.743329


Epoch 58/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:59<03:05,  1.35it/s, loss=0.7519, audio=0.0370, latent=4.5186, nans=0]


  Step 1500/1750 | Loss: 0.896143 | Audio: 0.092531 | Latent: 4.740542


Epoch 58/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:11<01:49,  1.37it/s, loss=0.7512, audio=0.0406, latent=4.4667, nans=0]


  Step 1600/1750 | Loss: 0.895466 | Audio: 0.092188 | Latent: 4.740605


Epoch 58/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:23<00:35,  1.41it/s, loss=1.0345, audio=0.1691, latent=4.6421, nans=0]


  Step 1700/1750 | Loss: 0.895260 | Audio: 0.091937 | Latent: 4.742572


Epoch 58/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:00<00:00,  1.39it/s, loss=1.0195, audio=0.1506, latent=4.7896, nans=0]
Validation 58/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.50it/s, loss=0.8025, audio=0.0323]



EPOCH 58/100 SUMMARY
Train Loss:  0.895034 (Audio: 0.091936, Latent: 4.741083)
Val Loss:    0.900044 (Audio: 0.090743, Latent: 4.790386)
Current LR: 2.85e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.900044


EPOCH 59/100



Epoch 59/100:   6%|‚ñä              | 100/1750 [01:12<19:56,  1.38it/s, loss=0.8736, audio=0.1023, latent=4.4599, nans=0]


  Step 100/1750 | Loss: 0.888022 | Audio: 0.092541 | Latent: 4.686268


Epoch 59/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:30,  1.40it/s, loss=0.8993, audio=0.1144, latent=4.4698, nans=0]


  Step 200/1750 | Loss: 0.890473 | Audio: 0.091970 | Latent: 4.710226


Epoch 59/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:35,  1.37it/s, loss=0.9064, audio=0.1113, latent=4.5588, nans=0]


  Step 300/1750 | Loss: 0.894143 | Audio: 0.093559 | Latent: 4.713498


Epoch 59/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<16:39,  1.35it/s, loss=0.8663, audio=0.0937, latent=4.5252, nans=0]


  Step 400/1750 | Loss: 0.892239 | Audio: 0.092617 | Latent: 4.713371


Epoch 59/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:59<14:52,  1.40it/s, loss=0.9663, audio=0.1105, latent=4.9678, nans=0]


  Step 500/1750 | Loss: 0.891069 | Audio: 0.092287 | Latent: 4.709971


Epoch 59/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:11<13:51,  1.38it/s, loss=0.8656, audio=0.0514, latent=5.0849, nans=0]


  Step 600/1750 | Loss: 0.891723 | Audio: 0.093246 | Latent: 4.701533


Epoch 59/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:22<12:37,  1.39it/s, loss=0.8468, audio=0.0887, latent=4.4635, nans=0]


  Step 700/1750 | Loss: 0.891846 | Audio: 0.093995 | Latent: 4.692380


Epoch 59/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:35<11:13,  1.41it/s, loss=1.0461, audio=0.1598, latent=4.8427, nans=0]


  Step 800/1750 | Loss: 0.890519 | Audio: 0.093347 | Latent: 4.692168


Epoch 59/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:48<10:20,  1.37it/s, loss=0.8659, audio=0.0656, latent=4.8978, nans=0]


  Step 900/1750 | Loss: 0.890416 | Audio: 0.093636 | Latent: 4.687626


Epoch 59/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:59<08:49,  1.42it/s, loss=0.9811, audio=0.1591, latent=4.4195, nans=0]


  Step 1000/1750 | Loss: 0.890953 | Audio: 0.093522 | Latent: 4.692729


Epoch 59/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:10<07:41,  1.41it/s, loss=0.9209, audio=0.1443, latent=4.2145, nans=0]


  Step 1100/1750 | Loss: 0.889015 | Audio: 0.092622 | Latent: 4.691809


Epoch 59/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:22<06:17,  1.46it/s, loss=0.9982, audio=0.1116, latent=5.1665, nans=0]


  Step 1200/1750 | Loss: 0.889945 | Audio: 0.092788 | Latent: 4.695790


Epoch 59/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:33<05:18,  1.41it/s, loss=0.8441, audio=0.0377, latent=5.1250, nans=0]


  Step 1300/1750 | Loss: 0.890428 | Audio: 0.092768 | Latent: 4.699283


Epoch 59/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:45<04:11,  1.39it/s, loss=0.9858, audio=0.1551, latent=4.5035, nans=0]


  Step 1400/1750 | Loss: 0.890409 | Audio: 0.092670 | Latent: 4.700454


Epoch 59/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:57<03:04,  1.36it/s, loss=0.8943, audio=0.1022, latent=4.5996, nans=0]


  Step 1500/1750 | Loss: 0.889620 | Audio: 0.092413 | Latent: 4.698633


Epoch 59/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:09<01:49,  1.37it/s, loss=0.8480, audio=0.1002, latent=4.3177, nans=0]


  Step 1600/1750 | Loss: 0.888587 | Audio: 0.092178 | Latent: 4.694870


Epoch 59/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:21<00:35,  1.42it/s, loss=0.7981, audio=0.0328, latent=4.8832, nans=0]


  Step 1700/1750 | Loss: 0.887889 | Audio: 0.092049 | Latent: 4.691938


Epoch 59/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:57<00:00,  1.39it/s, loss=0.7658, audio=0.0511, latent=4.4241, nans=0]
Validation 59/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:10<00:00,  1.50it/s, loss=0.7911, audio=0.0315]



EPOCH 59/100 SUMMARY
Train Loss:  0.887375 (Audio: 0.091838, Latent: 4.691324)
Val Loss:    0.895571 (Audio: 0.091729, Latent: 4.747419)
Current LR: 2.85e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.895571


EPOCH 60/100



Epoch 60/100:   6%|‚ñä              | 100/1750 [01:15<19:41,  1.40it/s, loss=0.7990, audio=0.0592, latent=4.5369, nans=0]


  Step 100/1750 | Loss: 0.894990 | Audio: 0.093859 | Latent: 4.715150


Epoch 60/100:  11%|‚ñà‚ñã             | 200/1750 [02:33<20:22,  1.27it/s, loss=0.9347, audio=0.1159, latent=4.6859, nans=0]


  Step 200/1750 | Loss: 0.888773 | Audio: 0.092746 | Latent: 4.688542


Epoch 60/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:50<19:16,  1.25it/s, loss=0.7788, audio=0.0425, latent=4.6251, nans=0]


  Step 300/1750 | Loss: 0.885710 | Audio: 0.091460 | Latent: 4.685269


Epoch 60/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [05:05<16:58,  1.33it/s, loss=0.8782, audio=0.0626, latent=5.0205, nans=0]


  Step 400/1750 | Loss: 0.884474 | Audio: 0.090587 | Latent: 4.688672


Epoch 60/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:21<15:27,  1.35it/s, loss=0.8532, audio=0.0516, latent=5.0002, nans=0]


  Step 500/1750 | Loss: 0.883028 | Audio: 0.090327 | Latent: 4.682500


Epoch 60/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:36<14:34,  1.31it/s, loss=0.8809, audio=0.1008, latent=4.5292, nans=0]


  Step 600/1750 | Loss: 0.881520 | Audio: 0.090142 | Latent: 4.674902


Epoch 60/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:49<12:22,  1.41it/s, loss=0.8653, audio=0.0877, latent=4.5986, nans=0]


  Step 700/1750 | Loss: 0.882733 | Audio: 0.091012 | Latent: 4.671398


Epoch 60/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [10:00<11:26,  1.38it/s, loss=0.7543, audio=0.0472, latent=4.3990, nans=0]


  Step 800/1750 | Loss: 0.879004 | Audio: 0.089683 | Latent: 4.664252


Epoch 60/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [11:12<10:26,  1.36it/s, loss=0.9533, audio=0.1118, latent=4.8641, nans=0]


  Step 900/1750 | Loss: 0.879345 | Audio: 0.090331 | Latent: 4.657890


Epoch 60/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:24<08:56,  1.40it/s, loss=0.7467, audio=0.0441, latent=4.3910, nans=0]


  Step 1000/1750 | Loss: 0.878492 | Audio: 0.089917 | Latent: 4.657720


Epoch 60/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:35<07:35,  1.43it/s, loss=0.7375, audio=0.0481, latent=4.2748, nans=0]


  Step 1100/1750 | Loss: 0.880765 | Audio: 0.090891 | Latent: 4.659890


Epoch 60/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:47<06:31,  1.41it/s, loss=0.8648, audio=0.1019, latent=4.4068, nans=0]


  Step 1200/1750 | Loss: 0.880105 | Audio: 0.090850 | Latent: 4.656031


Epoch 60/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:59<05:25,  1.38it/s, loss=0.8521, audio=0.1025, latent=4.3137, nans=0]


  Step 1300/1750 | Loss: 0.880416 | Audio: 0.091379 | Latent: 4.651051


Epoch 60/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [17:10<04:10,  1.40it/s, loss=0.7733, audio=0.0479, latent=4.5162, nans=0]


  Step 1400/1750 | Loss: 0.880429 | Audio: 0.091512 | Latent: 4.649373


Epoch 60/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:21<03:00,  1.39it/s, loss=0.9089, audio=0.0577, latent=5.2900, nans=0]


  Step 1500/1750 | Loss: 0.880564 | Audio: 0.091560 | Latent: 4.649624


Epoch 60/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:33<01:45,  1.42it/s, loss=0.8829, audio=0.0958, latent=4.6082, nans=0]


  Step 1600/1750 | Loss: 0.879767 | Audio: 0.091564 | Latent: 4.644265


Epoch 60/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:45<00:35,  1.41it/s, loss=0.9993, audio=0.1525, latent=4.6287, nans=0]


  Step 1700/1750 | Loss: 0.879936 | Audio: 0.091877 | Latent: 4.641204


Epoch 60/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:21<00:00,  1.37it/s, loss=0.6953, audio=0.0363, latent=4.1518, nans=0]
Validation 60/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.7865, audio=0.0314]



EPOCH 60/100 SUMMARY
Train Loss:  0.879956 (Audio: 0.091742, Latent: 4.643151)
Val Loss:    0.884127 (Audio: 0.089923, Latent: 4.695206)
Current LR: 2.84e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.884127


EPOCH 61/100

üßπ Cleared GPU cache at epoch 60
   Allocated: 1.23GB | Reserved: 1.48GB



Epoch 61/100:   6%|‚ñä              | 100/1750 [01:11<20:00,  1.37it/s, loss=0.7908, audio=0.0463, latent=4.6542, nans=0]


  Step 100/1750 | Loss: 0.875815 | Audio: 0.094262 | Latent: 4.581946


Epoch 61/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:38,  1.39it/s, loss=1.0207, audio=0.1047, latent=5.4081, nans=0]


  Step 200/1750 | Loss: 0.885519 | Audio: 0.098533 | Latent: 4.589688


Epoch 61/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:56,  1.35it/s, loss=0.8116, audio=0.0372, latent=4.9151, nans=0]


  Step 300/1750 | Loss: 0.883863 | Audio: 0.095035 | Latent: 4.625291


Epoch 61/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:30,  1.36it/s, loss=0.9140, audio=0.1050, latent=4.6931, nans=0]


  Step 400/1750 | Loss: 0.882321 | Audio: 0.094384 | Latent: 4.623684


Epoch 61/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<14:54,  1.40it/s, loss=1.0139, audio=0.1563, latent=4.6752, nans=0]


  Step 500/1750 | Loss: 0.880330 | Audio: 0.093743 | Latent: 4.618964


Epoch 61/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:12<13:55,  1.38it/s, loss=0.8533, audio=0.0372, latent=5.1925, nans=0]


  Step 600/1750 | Loss: 0.879939 | Audio: 0.093752 | Latent: 4.616236


Epoch 61/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:25<12:51,  1.36it/s, loss=0.8510, audio=0.1126, latent=4.1721, nans=0]


  Step 700/1750 | Loss: 0.877018 | Audio: 0.093000 | Latent: 4.606793


Epoch 61/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:37<11:30,  1.38it/s, loss=0.7941, audio=0.0493, latent=4.6359, nans=0]


  Step 800/1750 | Loss: 0.877414 | Audio: 0.093166 | Latent: 4.607218


Epoch 61/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:50<10:19,  1.37it/s, loss=0.7166, audio=0.0533, latent=4.0666, nans=0]


  Step 900/1750 | Loss: 0.876664 | Audio: 0.092949 | Latent: 4.605112


Epoch 61/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:03<09:11,  1.36it/s, loss=0.8898, audio=0.1147, latent=4.4020, nans=0]


  Step 1000/1750 | Loss: 0.873793 | Audio: 0.091766 | Latent: 4.601747


Epoch 61/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:15<07:50,  1.38it/s, loss=0.8946, audio=0.1135, latent=4.4506, nans=0]


  Step 1100/1750 | Loss: 0.873403 | Audio: 0.091723 | Latent: 4.599705


Epoch 61/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:27<06:47,  1.35it/s, loss=0.9181, audio=0.1004, latent=4.7821, nans=0]


  Step 1200/1750 | Loss: 0.873346 | Audio: 0.091443 | Latent: 4.603062


Epoch 61/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:40<05:29,  1.37it/s, loss=0.9783, audio=0.1669, latent=4.2967, nans=0]


  Step 1300/1750 | Loss: 0.873216 | Audio: 0.091652 | Latent: 4.599406


Epoch 61/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:52<04:12,  1.38it/s, loss=1.0623, audio=0.2066, latent=4.3281, nans=0]


  Step 1400/1750 | Loss: 0.873701 | Audio: 0.091668 | Latent: 4.602441


Epoch 61/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:04<03:01,  1.38it/s, loss=0.7569, audio=0.0407, latent=4.5033, nans=0]


  Step 1500/1750 | Loss: 0.873620 | Audio: 0.091505 | Latent: 4.604072


Epoch 61/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:16<01:47,  1.40it/s, loss=0.9251, audio=0.1085, latent=4.7205, nans=0]


  Step 1600/1750 | Loss: 0.873122 | Audio: 0.091473 | Latent: 4.601179


Epoch 61/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:27<00:35,  1.39it/s, loss=0.8853, audio=0.0969, latent=4.6100, nans=0]


  Step 1700/1750 | Loss: 0.873591 | Audio: 0.091512 | Latent: 4.603771


Epoch 61/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:03<00:00,  1.38it/s, loss=0.7599, audio=0.0390, latent=4.5459, nans=0]
Validation 61/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.7787, audio=0.0311]



EPOCH 61/100 SUMMARY
Train Loss:  0.873645 (Audio: 0.091670, Latent: 4.602029)
Val Loss:    0.877052 (Audio: 0.090225, Latent: 4.644012)
Current LR: 2.84e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.877052


EPOCH 62/100



Epoch 62/100:   6%|‚ñä              | 100/1750 [01:12<20:19,  1.35it/s, loss=0.9751, audio=0.1623, latent=4.3362, nans=0]


  Step 100/1750 | Loss: 0.869896 | Audio: 0.093013 | Latent: 4.559129


Epoch 62/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:51,  1.37it/s, loss=0.8945, audio=0.1066, latent=4.5416, nans=0]


  Step 200/1750 | Loss: 0.871045 | Audio: 0.093197 | Latent: 4.564346


Epoch 62/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:25,  1.39it/s, loss=0.8442, audio=0.0899, latent=4.4302, nans=0]


  Step 300/1750 | Loss: 0.871988 | Audio: 0.093117 | Latent: 4.571700


Epoch 62/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:49<16:14,  1.38it/s, loss=0.7746, audio=0.0422, latent=4.6020, nans=0]


  Step 400/1750 | Loss: 0.871512 | Audio: 0.093219 | Latent: 4.567163


Epoch 62/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:01<15:13,  1.37it/s, loss=0.7799, audio=0.0545, latent=4.4726, nans=0]


  Step 500/1750 | Loss: 0.870194 | Audio: 0.092458 | Latent: 4.568515


Epoch 62/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:13<14:03,  1.36it/s, loss=0.8108, audio=0.0950, latent=4.1389, nans=0]


  Step 600/1750 | Loss: 0.870986 | Audio: 0.092457 | Latent: 4.573805


Epoch 62/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:25<12:57,  1.35it/s, loss=0.8303, audio=0.1016, latent=4.1804, nans=0]


  Step 700/1750 | Loss: 0.867953 | Audio: 0.091002 | Latent: 4.572997


Epoch 62/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:37<11:35,  1.37it/s, loss=0.7899, audio=0.0262, latent=4.9160, nans=0]


  Step 800/1750 | Loss: 0.868076 | Audio: 0.090877 | Latent: 4.575481


Epoch 62/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:50<10:06,  1.40it/s, loss=0.7676, audio=0.0485, latent=4.4713, nans=0]


  Step 900/1750 | Loss: 0.869062 | Audio: 0.091398 | Latent: 4.575106


Epoch 62/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:01<08:59,  1.39it/s, loss=0.7764, audio=0.0413, latent=4.6255, nans=0]


  Step 1000/1750 | Loss: 0.868166 | Audio: 0.091422 | Latent: 4.568806


Epoch 62/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:12<07:41,  1.41it/s, loss=0.6974, audio=0.0335, latent=4.2026, nans=0]


  Step 1100/1750 | Loss: 0.868766 | Audio: 0.091707 | Latent: 4.569017


Epoch 62/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:24<06:35,  1.39it/s, loss=0.9948, audio=0.1483, latent=4.6549, nans=0]


  Step 1200/1750 | Loss: 0.868895 | Audio: 0.091993 | Latent: 4.566058


Epoch 62/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:36<05:21,  1.40it/s, loss=0.8352, audio=0.0489, latent=4.9157, nans=0]


  Step 1300/1750 | Loss: 0.869705 | Audio: 0.092238 | Latent: 4.568198


Epoch 62/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:48<04:12,  1.39it/s, loss=0.8303, audio=0.1097, latent=4.0721, nans=0]


  Step 1400/1750 | Loss: 0.869147 | Audio: 0.091914 | Latent: 4.568796


Epoch 62/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:59<02:55,  1.42it/s, loss=0.9074, audio=0.0968, latent=4.7584, nans=0]


  Step 1500/1750 | Loss: 0.867317 | Audio: 0.091158 | Latent: 4.566668


Epoch 62/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:11<01:47,  1.39it/s, loss=0.8474, audio=0.0965, latent=4.3625, nans=0]


  Step 1600/1750 | Loss: 0.867500 | Audio: 0.091325 | Latent: 4.565663


Epoch 62/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:23<00:36,  1.38it/s, loss=0.8031, audio=0.1148, latent=3.8233, nans=0]


  Step 1700/1750 | Loss: 0.867475 | Audio: 0.091518 | Latent: 4.562926


Epoch 62/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:59<00:00,  1.39it/s, loss=0.8289, audio=0.0996, latent=4.1978, nans=0]
Validation 62/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.7711, audio=0.0311]



EPOCH 62/100 SUMMARY
Train Loss:  0.867526 (Audio: 0.091604, Latent: 4.562121)
Val Loss:    0.871406 (Audio: 0.090029, Latent: 4.608992)
Current LR: 2.83e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.871406


EPOCH 63/100



Epoch 63/100:   6%|‚ñä              | 100/1750 [01:11<19:17,  1.43it/s, loss=0.7302, audio=0.0499, latent=4.2030, nans=0]


  Step 100/1750 | Loss: 0.871577 | Audio: 0.093399 | Latent: 4.565196


Epoch 63/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:25,  1.40it/s, loss=0.8771, audio=0.1084, latent=4.4015, nans=0]


  Step 200/1750 | Loss: 0.866461 | Audio: 0.092817 | Latent: 4.538852


Epoch 63/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:28,  1.38it/s, loss=0.8868, audio=0.1100, latent=4.4457, nans=0]


  Step 300/1750 | Loss: 0.869794 | Audio: 0.094900 | Latent: 4.533290


Epoch 63/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<16:21,  1.38it/s, loss=0.9556, audio=0.0961, latent=5.0892, nans=0]


  Step 400/1750 | Loss: 0.873931 | Audio: 0.096716 | Latent: 4.536664


Epoch 63/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<15:22,  1.35it/s, loss=0.6771, audio=0.0367, latent=4.0248, nans=0]


  Step 500/1750 | Loss: 0.870978 | Audio: 0.095764 | Latent: 4.529671


Epoch 63/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:13<14:02,  1.37it/s, loss=0.8062, audio=0.0949, latent=4.1096, nans=0]


  Step 600/1750 | Loss: 0.871066 | Audio: 0.095668 | Latent: 4.531540


Epoch 63/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:25<13:02,  1.34it/s, loss=0.7263, audio=0.0984, latent=3.5299, nans=0]


  Step 700/1750 | Loss: 0.867011 | Audio: 0.093430 | Latent: 4.534342


Epoch 63/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:38<11:28,  1.38it/s, loss=0.9375, audio=0.1154, latent=4.7113, nans=0]


  Step 800/1750 | Loss: 0.866117 | Audio: 0.093175 | Latent: 4.531779


Epoch 63/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:50<10:18,  1.37it/s, loss=0.9656, audio=0.1623, latent=4.2724, nans=0]


  Step 900/1750 | Loss: 0.864342 | Audio: 0.092558 | Latent: 4.528178


Epoch 63/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:01<08:50,  1.41it/s, loss=0.9417, audio=0.1601, latent=4.1428, nans=0]


  Step 1000/1750 | Loss: 0.863043 | Audio: 0.092198 | Latent: 4.524314


Epoch 63/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:13<07:44,  1.40it/s, loss=0.9203, audio=0.1606, latent=3.9941, nans=0]


  Step 1100/1750 | Loss: 0.862711 | Audio: 0.092059 | Latent: 4.523950


Epoch 63/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:25<06:43,  1.36it/s, loss=0.7981, audio=0.0394, latent=4.7954, nans=0]


  Step 1200/1750 | Loss: 0.861482 | Audio: 0.091629 | Latent: 4.521492


Epoch 63/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:37<05:21,  1.40it/s, loss=0.9093, audio=0.0962, latent=4.7791, nans=0]


  Step 1300/1750 | Loss: 0.861470 | Audio: 0.091421 | Latent: 4.524193


Epoch 63/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:49<04:12,  1.38it/s, loss=0.9313, audio=0.1026, latent=4.8403, nans=0]


  Step 1400/1750 | Loss: 0.861236 | Audio: 0.091376 | Latent: 4.523223


Epoch 63/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:00<02:59,  1.39it/s, loss=0.7062, audio=0.0435, latent=4.1281, nans=0]


  Step 1500/1750 | Loss: 0.861465 | Audio: 0.091530 | Latent: 4.522698


Epoch 63/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:13<01:48,  1.38it/s, loss=0.7729, audio=0.0398, latent=4.6219, nans=0]


  Step 1600/1750 | Loss: 0.861974 | Audio: 0.091442 | Latent: 4.527266


Epoch 63/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:25<00:35,  1.40it/s, loss=0.7633, audio=0.0428, latent=4.5184, nans=0]


  Step 1700/1750 | Loss: 0.861819 | Audio: 0.091553 | Latent: 4.524751


Epoch 63/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:01<00:00,  1.39it/s, loss=0.9958, audio=0.1556, latent=4.5637, nans=0]
Validation 63/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.7681, audio=0.0316]



EPOCH 63/100 SUMMARY
Train Loss:  0.861470 (Audio: 0.091523, Latent: 4.522826)
Val Loss:    0.866876 (Audio: 0.090517, Latent: 4.572276)
Current LR: 2.83e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.866876


EPOCH 64/100



Epoch 64/100:   6%|‚ñä              | 100/1750 [01:11<19:34,  1.41it/s, loss=0.8397, audio=0.1107, latent=4.1222, nans=0]


  Step 100/1750 | Loss: 0.853315 | Audio: 0.088468 | Latent: 4.509192


Epoch 64/100:  11%|‚ñà‚ñã             | 200/1750 [02:22<18:14,  1.42it/s, loss=0.8446, audio=0.1054, latent=4.2251, nans=0]


  Step 200/1750 | Loss: 0.857806 | Audio: 0.091540 | Latent: 4.498169


Epoch 64/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:27,  1.38it/s, loss=0.9262, audio=0.1085, latent=4.7274, nans=0]


  Step 300/1750 | Loss: 0.858504 | Audio: 0.092451 | Latent: 4.490682


Epoch 64/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:46<15:49,  1.42it/s, loss=0.7823, audio=0.0413, latent=4.6646, nans=0]


  Step 400/1750 | Loss: 0.860359 | Audio: 0.093311 | Latent: 4.491582


Epoch 64/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:57<14:50,  1.40it/s, loss=0.9467, audio=0.1071, latent=4.8831, nans=0]


  Step 500/1750 | Loss: 0.859026 | Audio: 0.092890 | Latent: 4.488302


Epoch 64/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:08<14:13,  1.35it/s, loss=0.7514, audio=0.0434, latent=4.4305, nans=0]


  Step 600/1750 | Loss: 0.858337 | Audio: 0.092321 | Latent: 4.491303


Epoch 64/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:19<12:28,  1.40it/s, loss=0.7161, audio=0.0395, latent=4.2475, nans=0]


  Step 700/1750 | Loss: 0.856331 | Audio: 0.091467 | Latent: 4.489317


Epoch 64/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:31<11:29,  1.38it/s, loss=0.9048, audio=0.1006, latent=4.6907, nans=0]


  Step 800/1750 | Loss: 0.856978 | Audio: 0.091916 | Latent: 4.487640


Epoch 64/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:42<10:02,  1.41it/s, loss=0.8236, audio=0.1098, latent=4.0269, nans=0]


  Step 900/1750 | Loss: 0.854662 | Audio: 0.090970 | Latent: 4.484811


Epoch 64/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:54<08:57,  1.40it/s, loss=0.9411, audio=0.1045, latent=4.8808, nans=0]


  Step 1000/1750 | Loss: 0.856037 | Audio: 0.091506 | Latent: 4.486837


Epoch 64/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:06<07:49,  1.39it/s, loss=0.7902, audio=0.0425, latent=4.7009, nans=0]


  Step 1100/1750 | Loss: 0.855936 | Audio: 0.091483 | Latent: 4.486462


Epoch 64/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:18<06:39,  1.38it/s, loss=0.8400, audio=0.0945, latent=4.3406, nans=0]


  Step 1200/1750 | Loss: 0.856386 | Audio: 0.091661 | Latent: 4.487090


Epoch 64/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:30<05:23,  1.39it/s, loss=0.9917, audio=0.1577, latent=4.5086, nans=0]


  Step 1300/1750 | Loss: 0.856640 | Audio: 0.091818 | Latent: 4.486696


Epoch 64/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:41<04:11,  1.39it/s, loss=0.7226, audio=0.0419, latent=4.2581, nans=0]


  Step 1400/1750 | Loss: 0.857383 | Audio: 0.091395 | Latent: 4.497286


Epoch 64/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:53<02:58,  1.40it/s, loss=0.9646, audio=0.1632, latent=4.2544, nans=0]


  Step 1500/1750 | Loss: 0.858223 | Audio: 0.091810 | Latent: 4.497349


Epoch 64/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:04<01:48,  1.38it/s, loss=0.7554, audio=0.0477, latent=4.3992, nans=0]


  Step 1600/1750 | Loss: 0.857156 | Audio: 0.091429 | Latent: 4.495312


Epoch 64/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:15<00:36,  1.38it/s, loss=0.8308, audio=0.1011, latent=4.1908, nans=0]


  Step 1700/1750 | Loss: 0.857115 | Audio: 0.091654 | Latent: 4.492045


Epoch 64/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:51<00:00,  1.40it/s, loss=0.7946, audio=0.0467, latent=4.6745, nans=0]
Validation 64/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:07<00:00,  1.51it/s, loss=0.7601, audio=0.0313]



EPOCH 64/100 SUMMARY
Train Loss:  0.856881 (Audio: 0.091459, Latent: 4.493089)
Val Loss:    0.861679 (Audio: 0.090223, Latent: 4.541553)
Current LR: 2.83e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.861679


EPOCH 65/100



Epoch 65/100:   6%|‚ñä              | 100/1750 [01:12<19:52,  1.38it/s, loss=0.7579, audio=0.0503, latent=4.3817, nans=0]


  Step 100/1750 | Loss: 0.855821 | Audio: 0.088197 | Latent: 4.529517


Epoch 65/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:23,  1.40it/s, loss=0.8278, audio=0.0412, latent=4.9692, nans=0]


  Step 200/1750 | Loss: 0.859584 | Audio: 0.091211 | Latent: 4.514419


Epoch 65/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:33,  1.38it/s, loss=1.0029, audio=0.0914, latent=5.4673, nans=0]


  Step 300/1750 | Loss: 0.861596 | Audio: 0.092041 | Latent: 4.516757


Epoch 65/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:22,  1.37it/s, loss=1.0166, audio=0.1669, latent=4.5524, nans=0]


  Step 400/1750 | Loss: 0.861295 | Audio: 0.093257 | Latent: 4.498546


Epoch 65/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<15:12,  1.37it/s, loss=0.8849, audio=0.0396, latent=5.3722, nans=0]


  Step 500/1750 | Loss: 0.857848 | Audio: 0.092500 | Latent: 4.485652


Epoch 65/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:12<13:37,  1.41it/s, loss=0.8654, audio=0.1133, latent=4.2581, nans=0]


  Step 600/1750 | Loss: 0.862887 | Audio: 0.092998 | Latent: 4.512603


Epoch 65/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:24<12:35,  1.39it/s, loss=0.8036, audio=0.0508, latent=4.6805, nans=0]


  Step 700/1750 | Loss: 0.862753 | Audio: 0.092745 | Latent: 4.515091


Epoch 65/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:36<11:21,  1.39it/s, loss=0.8165, audio=0.0516, latent=4.7561, nans=0]


  Step 800/1750 | Loss: 0.863703 | Audio: 0.092419 | Latent: 4.525762


Epoch 65/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:49<10:21,  1.37it/s, loss=0.7813, audio=0.0316, latent=4.7880, nans=0]


  Step 900/1750 | Loss: 0.861685 | Audio: 0.092199 | Latent: 4.515246


Epoch 65/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:00<08:54,  1.40it/s, loss=0.9858, audio=0.1635, latent=4.3926, nans=0]


  Step 1000/1750 | Loss: 0.861320 | Audio: 0.092103 | Latent: 4.514089


Epoch 65/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:12<07:55,  1.37it/s, loss=0.7748, audio=0.0367, latent=4.6756, nans=0]


  Step 1100/1750 | Loss: 0.860459 | Audio: 0.092013 | Latent: 4.509549


Epoch 65/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:25<06:33,  1.40it/s, loss=0.8059, audio=0.0349, latent=4.9070, nans=0]


  Step 1200/1750 | Loss: 0.859210 | Audio: 0.091701 | Latent: 4.505383


Epoch 65/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:37<05:17,  1.42it/s, loss=0.8159, audio=0.1098, latent=3.9748, nans=0]


  Step 1300/1750 | Loss: 0.858423 | Audio: 0.091888 | Latent: 4.497643


Epoch 65/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:50<04:20,  1.34it/s, loss=0.7037, audio=0.0434, latent=4.1129, nans=0]


  Step 1400/1750 | Loss: 0.857301 | Audio: 0.091639 | Latent: 4.493480


Epoch 65/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:03<03:05,  1.35it/s, loss=0.9148, audio=0.1134, latent=4.5868, nans=0]


  Step 1500/1750 | Loss: 0.855993 | Audio: 0.091462 | Latent: 4.487123


Epoch 65/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:15<01:48,  1.38it/s, loss=0.8409, audio=0.1209, latent=3.9944, nans=0]


  Step 1600/1750 | Loss: 0.854430 | Audio: 0.091003 | Latent: 4.482826


Epoch 65/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:28<00:36,  1.36it/s, loss=0.8306, audio=0.0602, latent=4.7347, nans=0]


  Step 1700/1750 | Loss: 0.854499 | Audio: 0.091304 | Latent: 4.479275


Epoch 65/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:04<00:00,  1.38it/s, loss=0.6944, audio=0.0624, latent=3.7974, nans=0]
Validation 65/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.7591, audio=0.0310]



EPOCH 65/100 SUMMARY
Train Loss:  0.854490 (Audio: 0.091452, Latent: 4.477235)
Val Loss:    0.855615 (Audio: 0.089569, Latent: 4.509839)
Current LR: 2.82e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.855615


EPOCH 66/100



Epoch 66/100:   6%|‚ñä              | 100/1750 [01:12<19:50,  1.39it/s, loss=0.9475, audio=0.1140, latent=4.7963, nans=0]


  Step 100/1750 | Loss: 0.854710 | Audio: 0.095378 | Latent: 4.426363


Epoch 66/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<19:01,  1.36it/s, loss=0.8807, audio=0.1069, latent=4.4466, nans=0]


  Step 200/1750 | Loss: 0.853076 | Audio: 0.092444 | Latent: 4.454586


Epoch 66/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:14,  1.40it/s, loss=0.7560, audio=0.0375, latent=4.5395, nans=0]


  Step 300/1750 | Loss: 0.849825 | Audio: 0.092563 | Latent: 4.431321


Epoch 66/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<16:14,  1.39it/s, loss=0.7665, audio=0.0460, latent=4.4969, nans=0]


  Step 400/1750 | Loss: 0.847011 | Audio: 0.092656 | Latent: 4.411323


Epoch 66/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:58<15:14,  1.37it/s, loss=0.9516, audio=0.1618, latent=4.1858, nans=0]


  Step 500/1750 | Loss: 0.846683 | Audio: 0.092357 | Latent: 4.413130


Epoch 66/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:10<13:24,  1.43it/s, loss=0.8981, audio=0.0914, latent=4.7691, nans=0]


  Step 600/1750 | Loss: 0.845335 | Audio: 0.091303 | Latent: 4.418198


Epoch 66/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:21<12:36,  1.39it/s, loss=0.8457, audio=0.1042, latent=4.2488, nans=0]


  Step 700/1750 | Loss: 0.843837 | Audio: 0.091179 | Latent: 4.409868


Epoch 66/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:33<11:28,  1.38it/s, loss=0.9261, audio=0.1680, latent=3.9338, nans=0]


  Step 800/1750 | Loss: 0.844833 | Audio: 0.091480 | Latent: 4.412492


Epoch 66/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:45<10:09,  1.39it/s, loss=0.7641, audio=0.0433, latent=4.5164, nans=0]


  Step 900/1750 | Loss: 0.842892 | Audio: 0.090261 | Latent: 4.415799


Epoch 66/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:57<08:50,  1.41it/s, loss=0.7813, audio=0.0428, latent=4.6375, nans=0]


  Step 1000/1750 | Loss: 0.843429 | Audio: 0.090497 | Latent: 4.416236


Epoch 66/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:09<07:42,  1.41it/s, loss=0.9306, audio=0.1556, latent=4.1300, nans=0]


  Step 1100/1750 | Loss: 0.843805 | Audio: 0.090672 | Latent: 4.416411


Epoch 66/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:21<06:36,  1.39it/s, loss=0.7303, audio=0.0415, latent=4.3154, nans=0]


  Step 1200/1750 | Loss: 0.843555 | Audio: 0.090583 | Latent: 4.415923


Epoch 66/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:34<05:24,  1.38it/s, loss=0.9598, audio=0.1514, latent=4.3797, nans=0]


  Step 1300/1750 | Loss: 0.843839 | Audio: 0.090564 | Latent: 4.418073


Epoch 66/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:46<04:11,  1.39it/s, loss=0.9135, audio=0.1070, latent=4.6627, nans=0]


  Step 1400/1750 | Loss: 0.844772 | Audio: 0.091063 | Latent: 4.417639


Epoch 66/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:59<03:05,  1.35it/s, loss=0.8378, audio=0.0980, latent=4.2786, nans=0]


  Step 1500/1750 | Loss: 0.844960 | Audio: 0.091196 | Latent: 4.417118


Epoch 66/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:12<01:51,  1.35it/s, loss=0.7769, audio=0.0538, latent=4.4613, nans=0]


  Step 1600/1750 | Loss: 0.844714 | Audio: 0.091103 | Latent: 4.416719


Epoch 66/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:25<00:36,  1.37it/s, loss=0.7636, audio=0.0345, latent=4.6303, nans=0]


  Step 1700/1750 | Loss: 0.844651 | Audio: 0.091102 | Latent: 4.416311


Epoch 66/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:02<00:00,  1.39it/s, loss=0.8472, audio=0.0931, latent=4.4061, nans=0]
Validation 66/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:10<00:00,  1.50it/s, loss=0.7527, audio=0.0313]



EPOCH 66/100 SUMMARY
Train Loss:  0.845177 (Audio: 0.091315, Latent: 4.416975)
Val Loss:    0.849804 (Audio: 0.089866, Latent: 4.467142)
Current LR: 2.81e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.849804


EPOCH 67/100



Epoch 67/100:   6%|‚ñä              | 100/1750 [01:12<19:37,  1.40it/s, loss=0.8954, audio=0.1155, latent=4.4290, nans=0]


  Step 100/1750 | Loss: 0.834888 | Audio: 0.086901 | Latent: 4.407233


Epoch 67/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:38,  1.39it/s, loss=1.1017, audio=0.1659, latent=5.1325, nans=0]


  Step 200/1750 | Loss: 0.844253 | Audio: 0.091230 | Latent: 4.411955


Epoch 67/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:54,  1.35it/s, loss=0.8305, audio=0.0911, latent=4.3222, nans=0]


  Step 300/1750 | Loss: 0.845716 | Audio: 0.091959 | Latent: 4.411986


Epoch 67/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:49<15:54,  1.41it/s, loss=0.7598, audio=0.0302, latent=4.6634, nans=0]


  Step 400/1750 | Loss: 0.841807 | Audio: 0.090378 | Latent: 4.407007


Epoch 67/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:01<15:07,  1.38it/s, loss=0.8324, audio=0.1084, latent=4.1039, nans=0]


  Step 500/1750 | Loss: 0.841006 | Audio: 0.090336 | Latent: 4.402228


Epoch 67/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:13<13:51,  1.38it/s, loss=0.7336, audio=0.0499, latent=4.2251, nans=0]


  Step 600/1750 | Loss: 0.843461 | Audio: 0.091505 | Latent: 4.403010


Epoch 67/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:25<12:39,  1.38it/s, loss=0.6943, audio=0.0397, latent=4.0996, nans=0]


  Step 700/1750 | Loss: 0.843212 | Audio: 0.091478 | Latent: 4.401714


Epoch 67/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:37<11:26,  1.38it/s, loss=0.9080, audio=0.0538, latent=5.3358, nans=0]


  Step 800/1750 | Loss: 0.842672 | Audio: 0.091474 | Latent: 4.398159


Epoch 67/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:49<10:15,  1.38it/s, loss=0.7768, audio=0.0405, latent=4.6387, nans=0]


  Step 900/1750 | Loss: 0.844969 | Audio: 0.092661 | Latent: 4.397640


Epoch 67/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:01<09:07,  1.37it/s, loss=0.8589, audio=0.0948, latent=4.4618, nans=0]


  Step 1000/1750 | Loss: 0.844840 | Audio: 0.092502 | Latent: 4.398904


Epoch 67/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:12<07:39,  1.41it/s, loss=0.8221, audio=0.1005, latent=4.1409, nans=0]


  Step 1100/1750 | Loss: 0.842754 | Audio: 0.091835 | Latent: 4.393894


Epoch 67/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:25<06:42,  1.37it/s, loss=0.9014, audio=0.1112, latent=4.5264, nans=0]


  Step 1200/1750 | Loss: 0.842484 | Audio: 0.091447 | Latent: 4.397259


Epoch 67/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:38<05:27,  1.38it/s, loss=0.7990, audio=0.0352, latent=4.8582, nans=0]


  Step 1300/1750 | Loss: 0.841931 | Audio: 0.091348 | Latent: 4.394898


Epoch 67/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:49<04:09,  1.40it/s, loss=0.7444, audio=0.0529, latent=4.2581, nans=0]


  Step 1400/1750 | Loss: 0.841198 | Audio: 0.091150 | Latent: 4.392652


Epoch 67/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:02<03:01,  1.38it/s, loss=0.9391, audio=0.1705, latent=3.9870, nans=0]


  Step 1500/1750 | Loss: 0.839806 | Audio: 0.090698 | Latent: 4.389398


Epoch 67/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:14<01:50,  1.35it/s, loss=0.8590, audio=0.0562, latent=4.9776, nans=0]


  Step 1600/1750 | Loss: 0.840739 | Audio: 0.091234 | Latent: 4.388475


Epoch 67/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:26<00:35,  1.41it/s, loss=0.8082, audio=0.1023, latent=4.0244, nans=0]


  Step 1700/1750 | Loss: 0.840121 | Audio: 0.091250 | Latent: 4.384138


Epoch 67/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:02<00:00,  1.39it/s, loss=0.8423, audio=0.0507, latent=4.9388, nans=0]
Validation 67/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:07<00:00,  1.51it/s, loss=0.7480, audio=0.0310]



EPOCH 67/100 SUMMARY
Train Loss:  0.840147 (Audio: 0.091268, Latent: 4.384067)
Val Loss:    0.845702 (Audio: 0.089481, Latent: 4.444932)
Current LR: 2.81e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.845702


EPOCH 68/100



Epoch 68/100:   6%|‚ñä              | 100/1750 [01:11<19:27,  1.41it/s, loss=0.7700, audio=0.0440, latent=4.5462, nans=0]


  Step 100/1750 | Loss: 0.833498 | Audio: 0.090665 | Latent: 4.347782


Epoch 68/100:  11%|‚ñà‚ñã             | 200/1750 [02:22<18:28,  1.40it/s, loss=0.8295, audio=0.0511, latent=4.8487, nans=0]


  Step 200/1750 | Loss: 0.829337 | Audio: 0.088352 | Latent: 4.350887


Epoch 68/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:09,  1.41it/s, loss=0.7019, audio=0.0491, latent=4.0246, nans=0]


  Step 300/1750 | Loss: 0.831772 | Audio: 0.088907 | Latent: 4.359723


Epoch 68/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:46<16:16,  1.38it/s, loss=0.8569, audio=0.0516, latent=5.0244, nans=0]


  Step 400/1750 | Loss: 0.831111 | Audio: 0.089152 | Latent: 4.352043


Epoch 68/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:57<15:06,  1.38it/s, loss=0.8273, audio=0.0649, latent=4.6498, nans=0]


  Step 500/1750 | Loss: 0.836455 | Audio: 0.091266 | Latent: 4.359488


Epoch 68/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:08<13:33,  1.41it/s, loss=0.9436, audio=0.1149, latent=4.7585, nans=0]


  Step 600/1750 | Loss: 0.836395 | Audio: 0.092064 | Latent: 4.348448


Epoch 68/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:20<12:34,  1.39it/s, loss=0.7720, audio=0.0512, latent=4.4642, nans=0]


  Step 700/1750 | Loss: 0.834534 | Audio: 0.090887 | Latent: 4.351736


Epoch 68/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:31<11:12,  1.41it/s, loss=0.7475, audio=0.0396, latent=4.4560, nans=0]


  Step 800/1750 | Loss: 0.833772 | Audio: 0.090473 | Latent: 4.352175


Epoch 68/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:43<10:02,  1.41it/s, loss=0.8156, audio=0.0948, latent=4.1733, nans=0]


  Step 900/1750 | Loss: 0.834808 | Audio: 0.091071 | Latent: 4.351110


Epoch 68/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:53<08:55,  1.40it/s, loss=0.8631, audio=0.0490, latent=5.1015, nans=0]


  Step 1000/1750 | Loss: 0.835099 | Audio: 0.091274 | Latent: 4.350335


Epoch 68/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:17<06:33,  1.40it/s, loss=1.0186, audio=0.1617, latent=4.6343, nans=0]


  Step 1200/1750 | Loss: 0.836206 | Audio: 0.091535 | Latent: 4.354237


Epoch 68/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:28<05:19,  1.41it/s, loss=0.9727, audio=0.1614, latent=4.3328, nans=0]


  Step 1300/1750 | Loss: 0.836355 | Audio: 0.091309 | Latent: 4.358245


Epoch 68/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:40<04:11,  1.39it/s, loss=0.8364, audio=0.1108, latent=4.0991, nans=0]


  Step 1400/1750 | Loss: 0.836011 | Audio: 0.091335 | Latent: 4.355605


Epoch 68/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:52<03:03,  1.36it/s, loss=0.7744, audio=0.0428, latent=4.5921, nans=0]


  Step 1500/1750 | Loss: 0.835422 | Audio: 0.091225 | Latent: 4.353141


Epoch 68/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:03<01:42,  1.46it/s, loss=0.8189, audio=0.0937, latent=4.2101, nans=0]


  Step 1600/1750 | Loss: 0.834664 | Audio: 0.090805 | Latent: 4.353692


Epoch 68/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:15<00:35,  1.40it/s, loss=0.7713, audio=0.0396, latent=4.6136, nans=0]


  Step 1700/1750 | Loss: 0.835660 | Audio: 0.091386 | Latent: 4.352594


Epoch 68/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:50<00:00,  1.40it/s, loss=0.8124, audio=0.1008, latent=4.0726, nans=0]
Validation 68/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:06<00:00,  1.52it/s, loss=0.7412, audio=0.0304]



EPOCH 68/100 SUMMARY
Train Loss:  0.835172 (Audio: 0.091210, Latent: 4.351682)
Val Loss:    0.840023 (Audio: 0.089731, Latent: 4.403736)
Current LR: 2.80e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.840023


EPOCH 69/100



Epoch 69/100:   6%|‚ñä              | 100/1750 [01:12<20:12,  1.36it/s, loss=0.7821, audio=0.0918, latent=3.9894, nans=0]


  Step 100/1750 | Loss: 0.843424 | Audio: 0.095284 | Latent: 4.352376


Epoch 69/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:29,  1.40it/s, loss=0.7793, audio=0.0522, latent=4.5001, nans=0]


  Step 200/1750 | Loss: 0.837338 | Audio: 0.091668 | Latent: 4.360007


Epoch 69/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:36<17:25,  1.39it/s, loss=0.9424, audio=0.1526, latent=4.2487, nans=0]


  Step 300/1750 | Loss: 0.833749 | Audio: 0.091179 | Latent: 4.342600


Epoch 69/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:47<16:18,  1.38it/s, loss=0.8401, audio=0.1073, latent=4.1701, nans=0]


  Step 400/1750 | Loss: 0.834995 | Audio: 0.092079 | Latent: 4.338909


Epoch 69/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:59<14:53,  1.40it/s, loss=0.8953, audio=0.0994, latent=4.6433, nans=0]


  Step 500/1750 | Loss: 0.828665 | Audio: 0.089368 | Latent: 4.332856


Epoch 69/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 508/1750 [06:04<14:53,  1.39it/s, loss=0.7919, audio=0.0913, latent=4.0620, nans=0]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

Validation 70/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:10<00:00,  1.50it/s, loss=0.7327, audio=0.0304]



EPOCH 70/100 SUMMARY
Train Loss:  0.825777 (Audio: 0.091088, Latent: 4.290669)
Val Loss:    0.833399 (Audio: 0.089892, Latent: 4.357431)
Current LR: 2.79e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.833399


EPOCH 71/100

üßπ Cleared GPU cache at epoch 70
   Allocated: 1.23GB | Reserved: 1.55GB



Epoch 71/100:   6%|‚ñä              | 100/1750 [01:13<20:34,  1.34it/s, loss=0.9467, audio=0.1633, latent=4.1344, nans=0]


  Step 100/1750 | Loss: 0.832327 | Audio: 0.094359 | Latent: 4.290730


Epoch 71/100:  11%|‚ñà‚ñã             | 200/1750 [02:25<17:44,  1.46it/s, loss=0.7687, audio=0.0497, latent=4.4619, nans=0]


  Step 200/1750 | Loss: 0.830538 | Audio: 0.091927 | Latent: 4.311220


Epoch 71/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:37<17:27,  1.38it/s, loss=0.8990, audio=0.1695, latent=3.7324, nans=0]


  Step 300/1750 | Loss: 0.828859 | Audio: 0.091900 | Latent: 4.300392


Epoch 71/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<15:58,  1.41it/s, loss=0.8229, audio=0.1060, latent=4.0725, nans=0]


  Step 400/1750 | Loss: 0.825669 | Audio: 0.091693 | Latent: 4.281886


Epoch 71/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<14:58,  1.39it/s, loss=0.8073, audio=0.0905, latent=4.1750, nans=0]


  Step 500/1750 | Loss: 0.825942 | Audio: 0.092533 | Latent: 4.272501


Epoch 71/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:15<13:14,  1.45it/s, loss=0.7753, audio=0.0929, latent=3.9308, nans=0]


  Step 600/1750 | Loss: 0.824495 | Audio: 0.091369 | Latent: 4.278376


Epoch 71/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:26<12:14,  1.43it/s, loss=0.7250, audio=0.0449, latent=4.2351, nans=0]


  Step 700/1750 | Loss: 0.826046 | Audio: 0.092153 | Latent: 4.278259


Epoch 71/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:40<12:53,  1.23it/s, loss=0.9007, audio=0.1024, latent=4.6389, nans=0]


  Step 800/1750 | Loss: 0.824901 | Audio: 0.091817 | Latent: 4.275114


Epoch 71/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:52<10:21,  1.37it/s, loss=0.8144, audio=0.1009, latent=4.0838, nans=0]


  Step 900/1750 | Loss: 0.824227 | Audio: 0.091695 | Latent: 4.272245


Epoch 71/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:06<11:10,  1.12it/s, loss=0.9174, audio=0.1089, latent=4.6640, nans=0]


  Step 1000/1750 | Loss: 0.824199 | Audio: 0.092041 | Latent: 4.267450


Epoch 71/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [14:13<08:43,  1.24it/s, loss=0.8213, audio=0.0982, latent=4.1658, nans=0]


  Step 1100/1750 | Loss: 0.824049 | Audio: 0.091486 | Latent: 4.273849


Epoch 71/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [16:14<23:36,  2.58s/it, loss=0.7902, audio=0.1044, latent=3.8762, nans=0]


  Step 1200/1750 | Loss: 0.823797 | Audio: 0.091659 | Latent: 4.269859


Epoch 71/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [18:14<05:51,  1.28it/s, loss=0.9921, audio=0.1553, latent=4.5430, nans=0]


  Step 1300/1750 | Loss: 0.822238 | Audio: 0.091201 | Latent: 4.265575


Epoch 71/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [20:07<04:06,  1.42it/s, loss=0.9885, audio=0.1546, latent=4.5283, nans=0]


  Step 1400/1750 | Loss: 0.822233 | Audio: 0.091078 | Latent: 4.267184


Epoch 71/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [21:51<02:59,  1.39it/s, loss=0.9160, audio=0.0942, latent=4.8506, nans=0]


  Step 1500/1750 | Loss: 0.821972 | Audio: 0.090893 | Latent: 4.267910


Epoch 71/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [23:33<01:46,  1.41it/s, loss=0.6640, audio=0.0478, latent=3.7894, nans=0]


  Step 1600/1750 | Loss: 0.820714 | Audio: 0.090467 | Latent: 4.265208


Epoch 71/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [25:16<01:53,  2.26s/it, loss=0.8813, audio=0.0951, latent=4.6072, nans=0]


  Step 1700/1750 | Loss: 0.821166 | Audio: 0.090914 | Latent: 4.262252


Epoch 71/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [25:51<00:00,  1.13it/s, loss=0.8000, audio=0.1118, latent=3.8427, nans=0]
Validation 71/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.7290, audio=0.0307]



EPOCH 71/100 SUMMARY
Train Loss:  0.821363 (Audio: 0.091036, Latent: 4.261941)
Val Loss:    0.825713 (Audio: 0.089731, Latent: 4.308346)
Current LR: 2.79e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.825713


EPOCH 72/100



Epoch 72/100:   6%|‚ñä              | 100/1750 [01:12<19:46,  1.39it/s, loss=0.8251, audio=0.1087, latent=4.0520, nans=0]


  Step 100/1750 | Loss: 0.836244 | Audio: 0.093371 | Latent: 4.330010


Epoch 72/100:  11%|‚ñà‚ñã             | 200/1750 [02:24<18:44,  1.38it/s, loss=0.8694, audio=0.0943, latent=4.5392, nans=0]


  Step 200/1750 | Loss: 0.825896 | Audio: 0.091923 | Latent: 4.280334


Epoch 72/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:02,  1.42it/s, loss=0.9538, audio=0.1575, latent=4.2585, nans=0]


  Step 300/1750 | Loss: 0.824825 | Audio: 0.091814 | Latent: 4.274646


Epoch 72/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:46<16:01,  1.40it/s, loss=0.8709, audio=0.1491, latent=3.8178, nans=0]


  Step 400/1750 | Loss: 0.818729 | Audio: 0.090071 | Latent: 4.257255


Epoch 72/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:57<14:42,  1.42it/s, loss=0.9001, audio=0.0959, latent=4.7221, nans=0]


  Step 500/1750 | Loss: 0.814696 | Audio: 0.089019 | Latent: 4.244388


Epoch 72/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:09<14:22,  1.33it/s, loss=0.9283, audio=0.1554, latent=4.1165, nans=0]


  Step 600/1750 | Loss: 0.815898 | Audio: 0.089761 | Latent: 4.242500


Epoch 72/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:52<12:23,  1.41it/s, loss=0.8477, audio=0.1069, latent=4.2263, nans=0]


  Step 700/1750 | Loss: 0.814611 | Audio: 0.089157 | Latent: 4.241980


Epoch 72/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [10:03<11:11,  1.41it/s, loss=0.8853, audio=0.1568, latent=3.8111, nans=0]


  Step 800/1750 | Loss: 0.814589 | Audio: 0.089414 | Latent: 4.238403


Epoch 72/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [11:46<10:22,  1.37it/s, loss=0.6798, audio=0.0337, latent=4.0826, nans=0]


  Step 900/1750 | Loss: 0.814777 | Audio: 0.089362 | Latent: 4.240354


Epoch 72/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [13:00<08:57,  1.39it/s, loss=0.6706, audio=0.0442, latent=3.8812, nans=0]


  Step 1000/1750 | Loss: 0.814883 | Audio: 0.089547 | Latent: 4.238598


Epoch 72/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [14:13<07:48,  1.39it/s, loss=0.9836, audio=0.1595, latent=4.4309, nans=0]


  Step 1100/1750 | Loss: 0.815976 | Audio: 0.090071 | Latent: 4.238899


Epoch 72/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [16:11<07:29,  1.22it/s, loss=0.8220, audio=0.0898, latent=4.2829, nans=0]


  Step 1200/1750 | Loss: 0.817636 | Audio: 0.090629 | Latent: 4.242528


Epoch 72/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [17:32<06:08,  1.22it/s, loss=0.7367, audio=0.0964, latent=3.6258, nans=0]


  Step 1300/1750 | Loss: 0.817542 | Audio: 0.090467 | Latent: 4.244056


Epoch 72/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [19:29<05:07,  1.14it/s, loss=0.7419, audio=0.0377, latent=4.4431, nans=0]


  Step 1400/1750 | Loss: 0.815887 | Audio: 0.089900 | Latent: 4.240573


Epoch 72/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [20:52<03:35,  1.16it/s, loss=0.6619, audio=0.0491, latent=3.7589, nans=0]


  Step 1500/1750 | Loss: 0.816627 | Audio: 0.090549 | Latent: 4.236865


Epoch 72/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [22:12<02:00,  1.25it/s, loss=1.0758, audio=0.1658, latent=4.9607, nans=0]


  Step 1600/1750 | Loss: 0.817119 | Audio: 0.090760 | Latent: 4.237328


Epoch 72/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [23:33<00:40,  1.24it/s, loss=0.7327, audio=0.0489, latent=4.2321, nans=0]


  Step 1700/1750 | Loss: 0.817488 | Audio: 0.091048 | Latent: 4.235951


Epoch 72/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [24:14<00:00,  1.20it/s, loss=0.6842, audio=0.0375, latent=4.0608, nans=0]
Validation 72/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:47<00:00,  1.30it/s, loss=0.7239, audio=0.0307]



EPOCH 72/100 SUMMARY
Train Loss:  0.817014 (Audio: 0.090970, Latent: 4.233824)
Val Loss:    0.823679 (Audio: 0.089815, Latent: 4.293656)
Current LR: 2.78e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.823679


EPOCH 73/100



Epoch 73/100:   6%|‚ñä              | 100/1750 [01:20<22:32,  1.22it/s, loss=0.7575, audio=0.0381, latent=4.5426, nans=0]


  Step 100/1750 | Loss: 0.813455 | Audio: 0.091602 | Latent: 4.201672


Epoch 73/100:  11%|‚ñà‚ñã             | 200/1750 [02:40<20:41,  1.25it/s, loss=0.8033, audio=0.0903, latent=4.1511, nans=0]


  Step 200/1750 | Loss: 0.813137 | Audio: 0.090379 | Latent: 4.215856


Epoch 73/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:51<17:04,  1.42it/s, loss=1.0056, audio=0.1660, latent=4.4914, nans=0]


  Step 300/1750 | Loss: 0.822467 | Audio: 0.095248 | Latent: 4.213138


Epoch 73/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [05:07<17:03,  1.32it/s, loss=0.7966, audio=0.0897, latent=4.1145, nans=0]


  Step 400/1750 | Loss: 0.818556 | Audio: 0.092979 | Latent: 4.217320


Epoch 73/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:19<14:38,  1.42it/s, loss=0.7199, audio=0.0500, latent=4.1325, nans=0]


  Step 500/1750 | Loss: 0.820083 | Audio: 0.094097 | Latent: 4.212596


Epoch 73/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:32<14:40,  1.31it/s, loss=0.9992, audio=0.1151, latent=5.1270, nans=0]


  Step 600/1750 | Loss: 0.817383 | Audio: 0.093330 | Latent: 4.204816


Epoch 73/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:44<12:24,  1.41it/s, loss=0.9638, audio=0.1494, latent=4.4331, nans=0]


  Step 700/1750 | Loss: 0.818485 | Audio: 0.093630 | Latent: 4.208163


Epoch 73/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:55<11:15,  1.41it/s, loss=0.8272, audio=0.0990, latent=4.1943, nans=0]


  Step 800/1750 | Loss: 0.818330 | Audio: 0.093483 | Latent: 4.209088


Epoch 73/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [11:06<10:11,  1.39it/s, loss=0.7273, audio=0.0450, latent=4.2486, nans=0]


  Step 900/1750 | Loss: 0.818046 | Audio: 0.093212 | Latent: 4.210810


Epoch 73/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:21<09:26,  1.32it/s, loss=0.9309, audio=0.1599, latent=4.0747, nans=0]


  Step 1000/1750 | Loss: 0.814943 | Audio: 0.091963 | Latent: 4.206781


Epoch 73/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:33<07:39,  1.41it/s, loss=0.7006, audio=0.0495, latent=4.0114, nans=0]


  Step 1100/1750 | Loss: 0.813219 | Audio: 0.091165 | Latent: 4.205929


Epoch 73/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:46<06:30,  1.41it/s, loss=1.1008, audio=0.2147, latent=4.4754, nans=0]


  Step 1200/1750 | Loss: 0.813403 | Audio: 0.091038 | Latent: 4.208842


Epoch 73/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:58<05:20,  1.40it/s, loss=0.8999, audio=0.1116, latent=4.5110, nans=0]


  Step 1300/1750 | Loss: 0.813481 | Audio: 0.091248 | Latent: 4.206575


Epoch 73/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [17:10<04:11,  1.39it/s, loss=0.6661, audio=0.0384, latent=3.9277, nans=0]


  Step 1400/1750 | Loss: 0.813767 | Audio: 0.091335 | Latent: 4.207311


Epoch 73/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:21<02:58,  1.40it/s, loss=0.9480, audio=0.1098, latent=4.8561, nans=0]


  Step 1500/1750 | Loss: 0.814093 | Audio: 0.091511 | Latent: 4.207134


Epoch 73/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:32<01:46,  1.41it/s, loss=0.9052, audio=0.1539, latent=3.9828, nans=0]


  Step 1600/1750 | Loss: 0.814623 | Audio: 0.091528 | Latent: 4.210449


Epoch 73/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:43<00:36,  1.39it/s, loss=0.9441, audio=0.1539, latent=4.2416, nans=0]


  Step 1700/1750 | Loss: 0.813155 | Audio: 0.091013 | Latent: 4.207524


Epoch 73/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:18<00:00,  1.37it/s, loss=0.8760, audio=0.1083, latent=4.3965, nans=0]
Validation 73/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:07<00:00,  1.51it/s, loss=0.7189, audio=0.0302]



EPOCH 73/100 SUMMARY
Train Loss:  0.813139 (Audio: 0.090947, Latent: 4.208301)
Val Loss:    0.819740 (Audio: 0.090067, Latent: 4.264034)
Current LR: 2.78e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.819740


EPOCH 74/100



Epoch 74/100:   6%|‚ñä              | 100/1750 [01:11<19:43,  1.39it/s, loss=0.6638, audio=0.0388, latent=3.9077, nans=0]


  Step 100/1750 | Loss: 0.801109 | Audio: 0.088513 | Latent: 4.160551


Epoch 74/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:45,  1.38it/s, loss=0.7171, audio=0.0377, latent=4.2781, nans=0]


  Step 200/1750 | Loss: 0.802284 | Audio: 0.086935 | Latent: 4.189425


Epoch 74/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:34<17:16,  1.40it/s, loss=0.7022, audio=0.0392, latent=4.1591, nans=0]


  Step 300/1750 | Loss: 0.799927 | Audio: 0.086794 | Latent: 4.175589


Epoch 74/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:46<16:00,  1.41it/s, loss=0.7908, audio=0.0980, latent=3.9648, nans=0]


  Step 400/1750 | Loss: 0.806464 | Audio: 0.089967 | Latent: 4.176867


Epoch 74/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [05:57<15:07,  1.38it/s, loss=0.7681, audio=0.0468, latent=4.4965, nans=0]


  Step 500/1750 | Loss: 0.807809 | Audio: 0.090680 | Latent: 4.176324


Epoch 74/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:08<13:18,  1.44it/s, loss=0.7611, audio=0.0394, latent=4.5489, nans=0]


  Step 600/1750 | Loss: 0.806742 | Audio: 0.090104 | Latent: 4.176897


Epoch 74/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:19<12:52,  1.36it/s, loss=0.8797, audio=0.1499, latent=3.8663, nans=0]


  Step 700/1750 | Loss: 0.808290 | Audio: 0.090655 | Latent: 4.179872


Epoch 74/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:31<11:12,  1.41it/s, loss=0.8767, audio=0.1047, latent=4.4484, nans=0]


  Step 800/1750 | Loss: 0.809271 | Audio: 0.091094 | Latent: 4.180554


Epoch 74/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:42<10:04,  1.41it/s, loss=0.6922, audio=0.0430, latent=4.0412, nans=0]


  Step 900/1750 | Loss: 0.809606 | Audio: 0.090948 | Latent: 4.184728


Epoch 74/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [11:52<08:53,  1.41it/s, loss=0.7980, audio=0.0941, latent=4.0649, nans=0]


  Step 1000/1750 | Loss: 0.810109 | Audio: 0.091196 | Latent: 4.184775


Epoch 74/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:04<07:43,  1.40it/s, loss=0.9139, audio=0.1599, latent=3.9607, nans=0]


  Step 1100/1750 | Loss: 0.810545 | Audio: 0.091196 | Latent: 4.187686


Epoch 74/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:15<06:23,  1.43it/s, loss=0.8294, audio=0.1007, latent=4.1864, nans=0]


  Step 1200/1750 | Loss: 0.810331 | Audio: 0.090872 | Latent: 4.190578


Epoch 74/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:27<05:29,  1.36it/s, loss=0.7181, audio=0.0521, latent=4.0920, nans=0]


  Step 1300/1750 | Loss: 0.809864 | Audio: 0.090956 | Latent: 4.186351


Epoch 74/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:39<04:09,  1.41it/s, loss=0.7893, audio=0.1074, latent=3.8297, nans=0]


  Step 1400/1750 | Loss: 0.810119 | Audio: 0.091108 | Latent: 4.186019


Epoch 74/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [17:51<02:57,  1.41it/s, loss=0.6517, audio=0.0334, latent=3.8990, nans=0]


  Step 1500/1750 | Loss: 0.809549 | Audio: 0.091004 | Latent: 4.183613


Epoch 74/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:03<01:49,  1.38it/s, loss=0.6588, audio=0.0508, latent=3.7149, nans=0]


  Step 1600/1750 | Loss: 0.809955 | Audio: 0.091171 | Latent: 4.184086


Epoch 74/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:15<00:36,  1.39it/s, loss=0.7889, audio=0.1021, latent=3.8984, nans=0]


  Step 1700/1750 | Loss: 0.809347 | Audio: 0.090980 | Latent: 4.182585


Epoch 74/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [20:51<00:00,  1.40it/s, loss=0.8608, audio=0.0962, latent=4.4566, nans=0]
Validation 74/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:09<00:00,  1.50it/s, loss=0.7171, audio=0.0309]



EPOCH 74/100 SUMMARY
Train Loss:  0.808975 (Audio: 0.090875, Latent: 4.181506)
Val Loss:    0.815729 (Audio: 0.089884, Latent: 4.239731)
Current LR: 2.77e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.815729


EPOCH 75/100



Epoch 75/100:   6%|‚ñä              | 100/1750 [01:13<19:46,  1.39it/s, loss=0.8922, audio=0.1084, latent=4.5032, nans=0]


  Step 100/1750 | Loss: 0.818861 | Audio: 0.094945 | Latent: 4.193139


Epoch 75/100:  11%|‚ñà‚ñã             | 200/1750 [02:23<18:44,  1.38it/s, loss=0.7020, audio=0.0427, latent=4.1099, nans=0]


  Step 200/1750 | Loss: 0.810048 | Audio: 0.090857 | Latent: 4.188897


Epoch 75/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:35<17:09,  1.41it/s, loss=0.8073, audio=0.0968, latent=4.0909, nans=0]


  Step 300/1750 | Loss: 0.803730 | Audio: 0.088465 | Latent: 4.178664


Epoch 75/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:48<16:32,  1.36it/s, loss=0.9199, audio=0.1518, latent=4.1090, nans=0]


  Step 400/1750 | Loss: 0.807312 | Audio: 0.090743 | Latent: 4.172171


Epoch 75/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:00<14:48,  1.41it/s, loss=0.7622, audio=0.0454, latent=4.4755, nans=0]


  Step 500/1750 | Loss: 0.807149 | Audio: 0.091147 | Latent: 4.165707


Epoch 75/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:12<13:45,  1.39it/s, loss=0.7831, audio=0.0961, latent=3.9387, nans=0]


  Step 600/1750 | Loss: 0.806654 | Audio: 0.090665 | Latent: 4.168826


Epoch 75/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:24<12:40,  1.38it/s, loss=0.8396, audio=0.0937, latent=4.3482, nans=0]


  Step 700/1750 | Loss: 0.805398 | Audio: 0.089603 | Latent: 4.174605


Epoch 75/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:37<11:12,  1.41it/s, loss=0.7501, audio=0.0528, latent=4.2970, nans=0]


  Step 800/1750 | Loss: 0.804681 | Audio: 0.089688 | Latent: 4.168692


Epoch 75/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:48<10:11,  1.39it/s, loss=0.6673, audio=0.0296, latent=4.0539, nans=0]


  Step 900/1750 | Loss: 0.806424 | Audio: 0.090355 | Latent: 4.171433


Epoch 75/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:01<08:57,  1.40it/s, loss=0.8588, audio=0.0941, latent=4.4705, nans=0]


  Step 1000/1750 | Loss: 0.805133 | Audio: 0.089715 | Latent: 4.171346


Epoch 75/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:13<07:58,  1.36it/s, loss=0.8160, audio=0.1074, latent=4.0080, nans=0]


  Step 1100/1750 | Loss: 0.805220 | Audio: 0.090199 | Latent: 4.165487


Epoch 75/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:26<06:25,  1.43it/s, loss=0.7646, audio=0.0873, latent=3.9327, nans=0]


  Step 1200/1750 | Loss: 0.805720 | Audio: 0.090511 | Latent: 4.164650


Epoch 75/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:38<05:31,  1.36it/s, loss=0.8979, audio=0.1503, latent=3.9824, nans=0]


  Step 1300/1750 | Loss: 0.805284 | Audio: 0.090201 | Latent: 4.165883


Epoch 75/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:50<04:16,  1.37it/s, loss=0.7506, audio=0.0414, latent=4.4519, nans=0]


  Step 1400/1750 | Loss: 0.805986 | Audio: 0.090716 | Latent: 4.163693


Epoch 75/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:02<02:59,  1.40it/s, loss=0.8238, audio=0.1041, latent=4.1041, nans=0]


  Step 1500/1750 | Loss: 0.805732 | Audio: 0.090846 | Latent: 4.160268


Epoch 75/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:13<01:46,  1.41it/s, loss=0.8363, audio=0.1129, latent=4.0706, nans=0]


  Step 1600/1750 | Loss: 0.805359 | Audio: 0.090775 | Latent: 4.158721


Epoch 75/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:25<00:35,  1.41it/s, loss=0.8501, audio=0.1570, latent=3.5742, nans=0]


  Step 1700/1750 | Loss: 0.805118 | Audio: 0.090845 | Latent: 4.156187


Epoch 75/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:01<00:00,  1.39it/s, loss=0.7740, audio=0.0862, latent=4.0100, nans=0]
Validation 75/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:11<00:00,  1.49it/s, loss=0.7141, audio=0.0302]



EPOCH 75/100 SUMMARY
Train Loss:  0.804858 (Audio: 0.090842, Latent: 4.154496)
Val Loss:    0.811827 (Audio: 0.089777, Latent: 4.215144)
Current LR: 2.76e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.811827


EPOCH 76/100



Epoch 76/100:   6%|‚ñä              | 100/1750 [01:12<20:06,  1.37it/s, loss=0.6904, audio=0.0407, latent=4.0606, nans=0]


  Step 100/1750 | Loss: 0.785920 | Audio: 0.082266 | Latent: 4.142585


Epoch 76/100:  11%|‚ñà‚ñã             | 200/1750 [02:25<19:18,  1.34it/s, loss=0.9156, audio=0.1625, latent=3.9378, nans=0]


  Step 200/1750 | Loss: 0.796590 | Audio: 0.087452 | Latent: 4.144572


Epoch 76/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:38<17:39,  1.37it/s, loss=0.6846, audio=0.0284, latent=4.1854, nans=0]


  Step 300/1750 | Loss: 0.801608 | Audio: 0.090399 | Latent: 4.138728


Epoch 76/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:50<15:56,  1.41it/s, loss=0.7758, audio=0.1038, latent=3.7878, nans=0]


  Step 400/1750 | Loss: 0.800175 | Audio: 0.089653 | Latent: 4.139123


Epoch 76/100:  29%|‚ñà‚ñà‚ñà‚ñà‚ñé          | 500/1750 [06:05<16:14,  1.28it/s, loss=0.8573, audio=0.1164, latent=4.1631, nans=0]


  Step 500/1750 | Loss: 0.801684 | Audio: 0.090339 | Latent: 4.140041


Epoch 76/100:  34%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè         | 600/1750 [07:20<13:40,  1.40it/s, loss=1.0070, audio=0.1601, latent=4.5783, nans=0]


  Step 600/1750 | Loss: 0.801101 | Audio: 0.090433 | Latent: 4.134901


Epoch 76/100:  40%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà         | 700/1750 [08:31<12:23,  1.41it/s, loss=0.7790, audio=0.0948, latent=3.9296, nans=0]


  Step 700/1750 | Loss: 0.802526 | Audio: 0.090606 | Latent: 4.142090


Epoch 76/100:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä        | 800/1750 [09:41<10:58,  1.44it/s, loss=0.8832, audio=0.1003, latent=4.5506, nans=0]


  Step 800/1750 | Loss: 0.803342 | Audio: 0.091017 | Latent: 4.142049


Epoch 76/100:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã       | 900/1750 [10:52<10:16,  1.38it/s, loss=0.8333, audio=0.0980, latent=4.2485, nans=0]


  Step 900/1750 | Loss: 0.801846 | Audio: 0.090237 | Latent: 4.142480


Epoch 76/100:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà      | 1000/1750 [12:04<08:58,  1.39it/s, loss=0.7175, audio=0.0474, latent=4.1517, nans=0]


  Step 1000/1750 | Loss: 0.801398 | Audio: 0.089859 | Latent: 4.144531


Epoch 76/100:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä     | 1100/1750 [13:17<07:47,  1.39it/s, loss=0.8590, audio=0.1065, latent=4.3074, nans=0]


  Step 1100/1750 | Loss: 0.800745 | Audio: 0.089825 | Latent: 4.140626


Epoch 76/100:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 1200/1750 [14:29<06:33,  1.40it/s, loss=0.7458, audio=0.0456, latent=4.3645, nans=0]


  Step 1200/1750 | Loss: 0.800117 | Audio: 0.090004 | Latent: 4.134060


Epoch 76/100:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 1300/1750 [15:41<05:25,  1.38it/s, loss=0.8826, audio=0.0997, latent=4.5549, nans=0]


  Step 1300/1750 | Loss: 0.800276 | Audio: 0.090085 | Latent: 4.134044


Epoch 76/100:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 1400/1750 [16:53<04:14,  1.38it/s, loss=0.7148, audio=0.0541, latent=4.0436, nans=0]


  Step 1400/1750 | Loss: 0.801247 | Audio: 0.090389 | Latent: 4.136456


Epoch 76/100:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 1500/1750 [18:04<03:00,  1.38it/s, loss=0.8013, audio=0.0936, latent=4.0946, nans=0]


  Step 1500/1750 | Loss: 0.801256 | Audio: 0.090412 | Latent: 4.136213


Epoch 76/100:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 1600/1750 [19:18<01:46,  1.41it/s, loss=0.8783, audio=0.1429, latent=3.9506, nans=0]


  Step 1600/1750 | Loss: 0.802006 | Audio: 0.090716 | Latent: 4.137154


Epoch 76/100:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 1700/1750 [20:30<00:36,  1.38it/s, loss=0.7711, audio=0.0955, latent=3.8667, nans=0]


  Step 1700/1750 | Loss: 0.801346 | Audio: 0.090621 | Latent: 4.134036


Epoch 76/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1750/1750 [21:06<00:00,  1.38it/s, loss=0.8090, audio=0.0961, latent=4.1119, nans=0]
Validation 76/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 375/375 [04:08<00:00,  1.51it/s, loss=0.7102, audio=0.0308]



EPOCH 76/100 SUMMARY
Train Loss:  0.801535 (Audio: 0.090808, Latent: 4.132786)
Val Loss:    0.806472 (Audio: 0.089389, Latent: 4.184624)
Current LR: 2.76e-05

‚úÖ NEW BEST MODEL! Val Loss: 0.806472


EPOCH 77/100



Epoch 77/100:   6%|‚ñä              | 100/1750 [01:12<19:55,  1.38it/s, loss=0.8381, audio=0.1040, latent=4.2014, nans=0]


  Step 100/1750 | Loss: 0.785317 | Audio: 0.088220 | Latent: 4.059172


Epoch 77/100:  11%|‚ñà‚ñã             | 200/1750 [02:25<18:52,  1.37it/s, loss=0.7870, audio=0.0367, latent=4.7575, nans=0]


  Step 200/1750 | Loss: 0.797554 | Audio: 0.090130 | Latent: 4.115301


Epoch 77/100:  17%|‚ñà‚ñà‚ñå            | 300/1750 [03:37<17:37,  1.37it/s, loss=0.8635, audio=0.0997, latent=4.4270, nans=0]


  Step 300/1750 | Loss: 0.796332 | Audio: 0.089255 | Latent: 4.118809


Epoch 77/100:  23%|‚ñà‚ñà‚ñà‚ñç           | 400/1750 [04:50<16:36,  1.35it/s, loss=0.7604, audio=0.0922, latent=3.8406, nans=0]


  Step 400/1750 | Loss: 0.793606 | Audio: 0.088121 | Latent: 4.115756


Epoch 77/100:  28%|‚ñà‚ñà‚ñà‚ñà‚ñè          | 486/1750 [05:54<15:21,  1.37it/s, loss=0.9134, audio=0.1523, latent=4.0594, nans=0]


KeyboardInterrupt: 

# Inference

In [4]:
"""
FINAL OPTIMIZED Inference Script for DAC-VAE Audio Effect Generator
Compatible with soundfile and optimized training code

FEATURES:
- Works with 3-second or 5-second trained models
- soundfile for audio loading (no torchaudio issues)
- Handles any audio length with chunking
- Auto-detects training length from checkpoint
- No audiotools dependency
- Correct decoder API
"""

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
import soundfile as sf
import os
from transformers import AutoTokenizer, AutoModel
from einops import rearrange
import argparse
import numpy as np

# DAC import
try:
    import dac
    print("‚úì DAC library imported successfully")
except ImportError:
    print("‚ùå DAC not installed. Run: pip install descript-audio-codec")
    exit(1)

print("‚úì soundfile available")

#############################################
#     MODEL ARCHITECTURE
#############################################

class CrossAttention(nn.Module):
    def __init__(self, audio_dim, text_dim, n_heads=8):
        super().__init__()
        self.n_heads = n_heads
        self.scale = (audio_dim // n_heads) ** -0.5
        self.to_q = nn.Linear(audio_dim, audio_dim)
        self.to_k = nn.Linear(text_dim, audio_dim)
        self.to_v = nn.Linear(text_dim, audio_dim)
        self.to_out = nn.Linear(audio_dim, audio_dim)
        
    def forward(self, x, context):
        B, C, T = x.shape
        x_flat = rearrange(x, 'b c t -> b t c')
        q = self.to_q(x_flat)
        k = self.to_k(context)
        v = self.to_v(context)
        q = rearrange(q, 'b t (h d) -> b h t d', h=self.n_heads)
        k = rearrange(k, 'b s (h d) -> b h s d', h=self.n_heads)
        v = rearrange(v, 'b s (h d) -> b h s d', h=self.n_heads)
        attn = torch.einsum('bhqd,bhkd->bhqk', q, k) * self.scale
        attn = F.softmax(attn, dim=-1)
        out = torch.einsum('bhqk,bhvd->bhqd', attn, v)
        out = rearrange(out, 'b h t d -> b t (h d)')
        out = self.to_out(out)
        return rearrange(out, 'b t c -> b c t')

class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.conv1 = nn.Conv1d(channels, channels, 3, padding=1)
        self.conv2 = nn.Conv1d(channels, channels, 3, padding=1)
        self.norm1 = nn.GroupNorm(8, channels)
        self.norm2 = nn.GroupNorm(8, channels)
        self.act = nn.SiLU()
        
    def forward(self, x):
        residual = x
        x = self.act(self.norm1(self.conv1(x)))
        x = self.act(self.norm2(self.conv2(x)))
        return x + residual

class DownBlock(nn.Module):
    def __init__(self, in_c, out_c, text_dim=768, use_attn=False):
        super().__init__()
        self.use_attn = use_attn
        self.conv = nn.Conv1d(in_c, out_c, 3, padding=1)
        self.res1 = ResidualBlock(out_c)
        self.res2 = ResidualBlock(out_c)
        if use_attn:
            self.attn = CrossAttention(out_c, text_dim)
        self.downsample = nn.Conv1d(out_c, out_c, 4, stride=2, padding=1)
        
    def forward(self, x, text_emb=None):
        x = self.conv(x)
        x = self.res1(x)
        x = self.res2(x)
        if self.use_attn and text_emb is not None:
            x = x + self.attn(x, text_emb)
        skip = x
        x = self.downsample(x)
        return x, skip

class UpBlock(nn.Module):
    def __init__(self, in_c, out_c, skip_c, text_dim=768, use_attn=False):
        super().__init__()
        self.use_attn = use_attn
        self.upsample = nn.ConvTranspose1d(in_c, out_c, 4, stride=2, padding=1)
        self.conv = nn.Conv1d(out_c + skip_c, out_c, 3, padding=1)
        self.res1 = ResidualBlock(out_c)
        self.res2 = ResidualBlock(out_c)
        if use_attn:
            self.attn = CrossAttention(out_c, text_dim)
        
    def forward(self, x, skip, text_emb=None):
        x = self.upsample(x)
        if x.size(-1) != skip.size(-1):
            x = F.interpolate(x, size=skip.size(-1), mode='linear', align_corners=False)
        x = torch.cat([x, skip], dim=1)
        x = self.conv(x)
        x = self.res1(x)
        x = self.res2(x)
        if self.use_attn and text_emb is not None:
            x = x + self.attn(x, text_emb)
        return x

class LatentUNet(nn.Module):
    def __init__(self, latent_channels, channels, text_dim=768):
        super().__init__()
        self.input_conv = nn.Conv1d(latent_channels, channels[0], 7, padding=3)
        
        self.down_blocks = nn.ModuleList()
        for i in range(len(channels) - 1):
            use_attn = i >= 2
            self.down_blocks.append(DownBlock(channels[i], channels[i+1], text_dim, use_attn))
        
        self.mid_block1 = ResidualBlock(channels[-1])
        self.mid_attn = CrossAttention(channels[-1], text_dim)
        self.mid_block2 = ResidualBlock(channels[-1])
        
        self.up_blocks = nn.ModuleList()
        for i in range(len(channels) - 1, 0, -1):
            use_attn = i >= 2
            self.up_blocks.append(
                UpBlock(channels[i], channels[i-1], channels[i], text_dim, use_attn)
            )
        
        self.output_conv = nn.Conv1d(channels[0], latent_channels, 7, padding=3)
        
    def forward(self, z, text_emb):
        original_length = z.size(-1)
        x = self.input_conv(z)
        
        skips = []
        for down in self.down_blocks:
            x, skip = down(x, text_emb)
            skips.append(skip)
        
        x = self.mid_block1(x)
        x = x + self.mid_attn(x, text_emb)
        x = self.mid_block2(x)
        
        for up in self.up_blocks:
            skip = skips.pop()
            x = up(x, skip, text_emb)
        
        x = self.output_conv(x)
        
        if x.size(-1) != original_length:
            x = F.interpolate(x, size=original_length, mode='linear', align_corners=False)
        
        return x

class AudioEffectModel(nn.Module):
    def __init__(self, dac_model, latent_channels, unet_channels, text_dim):
        super().__init__()
        self.text_encoder = AutoModel.from_pretrained("bert-base-uncased")
        self.dac = dac_model
        self.unet = LatentUNet(latent_channels, unet_channels, text_dim)
        
    @torch.no_grad()
    def generate(self, wav_in, prompt):
        """Generate audio with effect (handles any length!)"""
        self.eval()
        
        if wav_in.dim() == 2:
            wav_in = wav_in.unsqueeze(1)
        
        original_length = wav_in.size(-1)
        
        tokens = tokenizer(
            [prompt],
            padding=True,
            truncation=True,
            max_length=128,
            return_tensors="pt"
        ).to(wav_in.device)
        
        text_output = self.text_encoder(
            input_ids=tokens.input_ids,
            attention_mask=tokens.attention_mask
        )
        text_emb = text_output.last_hidden_state
        
        z_in = self.dac.encoder(wav_in)
        z_out = self.unet(z_in, text_emb)
        wav_out = self.dac.decoder(z_out)
        
        # Match length
        if wav_out.size(-1) != original_length:
            if wav_out.size(-1) > original_length:
                wav_out = wav_out[..., :original_length]
            else:
                wav_out = F.pad(wav_out, (0, original_length - wav_out.size(-1)))
        
        return wav_out

#############################################
#     INFERENCE CLASS
#############################################

class AudioEffectInference:
    def __init__(self, model_path, dac_model_path=None, device='cuda'):
        """Initialize inference pipeline"""
        self.device = device if torch.cuda.is_available() else 'cpu'
        
        print("="*60)
        print("LOADING MODEL FOR INFERENCE")
        print("="*60)
        
        # Load checkpoint
        print(f"Loading: {model_path}")
        ckpt = torch.load(model_path, map_location=self.device)
        
        config = ckpt['config']
        self.sample_rate = config['sample_rate']
        latent_channels = config['latent_channels']
        unet_channels = config['unet_channels']
        text_dim = config['text_dim']
        
        self.training_length = config.get('max_audio_length', None)
        if self.training_length:
            print(f"‚úì Trained on {self.training_length / self.sample_rate:.1f}s audio")
        
        print(f"‚úì Sample rate: {self.sample_rate} Hz")
        print(f"‚úì Latent channels: {latent_channels}")
        
        # Load DAC
        if dac_model_path is None:
            dac_model_path = "C:/Users/user/Downloads/weights_44khz_16kbps.pth"
        
        if not os.path.exists(dac_model_path):
            print(f"‚ùå DAC not found at: {dac_model_path}")
            exit(1)
        
        self.dac_model = dac.DAC.load(dac_model_path)
        self.dac_model = self.dac_model.to(self.device)
        self.dac_model.eval()
        print("‚úì DAC loaded")
        
        # Create model
        self.model = AudioEffectModel(
            dac_model=self.dac_model,
            latent_channels=latent_channels,
            unet_channels=unet_channels,
            text_dim=text_dim
        ).to(self.device)
        
        self.model.load_state_dict(ckpt['model'])
        self.model.eval()
        print("‚úì Model loaded")
        
        global tokenizer
        tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
        print("‚úì Tokenizer loaded")
        print("="*60 + "\n")
    
    def _load_audio(self, path):
        """Load audio using soundfile"""
        wav, sr = sf.read(path)
        wav = torch.from_numpy(wav).float()
        
        # Shape: (channels, samples)
        if wav.dim() == 1:
            wav = wav.unsqueeze(0)
        elif wav.dim() == 2 and wav.size(0) > wav.size(1):
            wav = wav.t()
        
        # Resample if needed
        if sr != self.sample_rate:
            print(f"  Resampling {sr}Hz ‚Üí {self.sample_rate}Hz")
            wav = torchaudio.functional.resample(wav, sr, self.sample_rate)
        
        # Convert to mono
        if wav.size(0) > 1:
            print("  Converting to mono")
            wav = wav.mean(dim=0, keepdim=True)
        
        return wav, self.sample_rate
    
    def process_audio(self, input_path, output_path, prompt, chunk_size=None):
        """Process audio file with effect"""
        print(f"Processing: {input_path}")
        print(f"Effect: '{prompt}'")
        
        # Load with soundfile
        wav, sr = self._load_audio(input_path)
        duration = wav.size(-1) / sr
        print(f"Duration: {duration:.2f}s")
        
        # Decide chunking
        if chunk_size is None:
            if self.training_length and wav.size(-1) > self.training_length * 2:
                chunk_size = self.training_length
                print(f"Using chunks: {chunk_size / sr:.1f}s")
            else:
                chunk_size = wav.size(-1)
        
        # Process
        if wav.size(-1) <= chunk_size:
            # Process all at once
            print("Processing...")
            wav = wav.unsqueeze(0).to(self.device)
            with torch.no_grad():
                wav_out = self.model.generate(wav, prompt)
            wav_out = wav_out.squeeze(0).cpu()
        else:
            # Process in chunks
            print(f"Processing in chunks...")
            overlap = chunk_size // 4
            hop = chunk_size - overlap
            
            chunks_out = []
            num_chunks = (wav.size(-1) - overlap) // hop + 1
            
            for i in range(num_chunks):
                start = i * hop
                end = min(start + chunk_size, wav.size(-1))
                
                chunk = wav[:, start:end]
                
                if chunk.size(-1) < chunk_size:
                    chunk = F.pad(chunk, (0, chunk_size - chunk.size(-1)))
                
                chunk = chunk.unsqueeze(0).to(self.device)
                with torch.no_grad():
                    chunk_out = self.model.generate(chunk, prompt)
                chunk_out = chunk_out.squeeze(0).cpu()
                
                if end - start < chunk_size:
                    chunk_out = chunk_out[:, :end - start]
                
                # Crossfade
                if i > 0:
                    fade_len = overlap
                    fade_in = torch.linspace(0, 1, fade_len)
                    fade_out = torch.linspace(1, 0, fade_len)
                    
                    prev_end = chunks_out[-1][:, -fade_len:]
                    curr_start = chunk_out[:, :fade_len]
                    crossfaded = prev_end * fade_out + curr_start * fade_in
                    
                    chunks_out[-1] = chunks_out[-1][:, :-fade_len]
                    chunk_out = torch.cat([crossfaded, chunk_out[:, fade_len:]], dim=-1)
                
                chunks_out.append(chunk_out)
                print(f"  Chunk {i+1}/{num_chunks}")
            
            wav_out = torch.cat(chunks_out, dim=-1)
        
        # Match length
        if wav_out.size(-1) != wav.size(-1):
            if wav_out.size(-1) > wav.size(-1):
                wav_out = wav_out[:, :wav.size(-1)]
            else:
                wav_out = F.pad(wav_out, (0, wav.size(-1) - wav_out.size(-1)))
        
        print(f"Output: {wav_out.size(-1) / sr:.2f}s")
        
        # Save with soundfile
        wav_out_np = wav_out.squeeze(0).numpy()
        sf.write(output_path, wav_out_np, sr)
        print(f"‚úì Saved: {output_path}\n")
    
    def batch_process(self, input_dir, output_dir, prompt, chunk_size=None):
        """Process all audio files in directory"""
        os.makedirs(output_dir, exist_ok=True)
        
        audio_exts = ['*.wav', '*.mp3', '*.flac', '*.ogg', '*.m4a']
        files = [f for f in os.listdir(input_dir) if os.path.splitext(f)[1].lower() in audio_exts]
        
        if not files:
            print(f"‚ùå No audio files in {input_dir}")
            return
        
        print(f"Found {len(files)} files")
        print(f"Effect: '{prompt}'\n")
        
        for i, filename in enumerate(files, 1):
            print(f"[{i}/{len(files)}] {filename}")
            
            input_path = os.path.join(input_dir, filename)
            name, ext = os.path.splitext(filename)
            output_path = os.path.join(output_dir, f"{name}_processed{ext}")
            
            try:
                self.process_audio(input_path, output_path, prompt, chunk_size)
            except Exception as e:
                print(f"‚ùå Error: {e}\n")
        
        print(f"\n‚úÖ Batch complete! {len(files)} files ‚Üí {output_dir}")

#############################################
#     MAIN
#############################################

def main():
    parser = argparse.ArgumentParser(
        description='Audio Effect Generator Inference',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Single file
  python inference.py --model model_best.pt --input audio.wav --output result.wav --prompt "add rain sounds"
  
  # Batch process
  python inference.py --model model_best.pt --input input_folder/ --output output_folder/ --prompt "add birds chirping"
  
  # With chunking (for long audio)
  python inference.py --model model_best.pt --input long.wav --output result.wav --prompt "add rain sounds" --chunk-size 132300
        """
    )
    
    parser.add_argument('--model', required=True, help='Model checkpoint (.pt)')
    parser.add_argument('--input', required=True, help='Input file or directory')
    parser.add_argument('--output', required=True, help='Output file or directory')
    parser.add_argument('--prompt', required=True, help='Effect description')
    parser.add_argument('--dac-model', default=None, help='DAC weights path')
    parser.add_argument('--device', default='cuda', help='cuda or cpu')
    parser.add_argument('--chunk-size', type=int, default=None, help='Chunk size (samples)')
    
    args = parser.parse_args()
    
    try:
        inference = AudioEffectInference(args.model, args.dac_model, args.device)
    except Exception as e:
        print(f"‚ùå Failed to load: {e}")
        return
    
    if os.path.isfile(args.input):
        inference.process_audio(args.input, args.output, args.prompt, args.chunk_size)
    elif os.path.isdir(args.input):
        inference.batch_process(args.input, args.output, args.prompt, args.chunk_size)
    else:
        print(f"‚ùå Invalid input: {args.input}")


‚úì DAC library imported successfully
‚úì soundfile available


In [20]:
#from dac_vae_inference_OPTIMIZED import AudioEffectInference

# Add at the end of inference.py:
if __name__ == '__main__':
    # For direct usage (no command line):
    model_path = "C:/zahra/EchoMind4/data/result/model_best.pt"
    
    inference = AudioEffectInference(model_path)
    
    # Single file
    
    inference.process_audio(
        "C:/zahra/EchoMind4/data/result/inference/arabic_XBmfzfHL.wav",
        "C:/zahra/EchoMind4/data/result/inference/arabic_XBmfzfHL_birds.wav",
        "add birds sounds"
    )
    # OR batch process
    main()

LOADING MODEL FOR INFERENCE
Loading: C:/zahra/EchoMind4/data/result/model_best.pt
‚úì Trained on 3.0s audio
‚úì Sample rate: 44100 Hz
‚úì Latent channels: 128
‚úì DAC loaded
‚úì Model loaded
‚úì Tokenizer loaded

Processing: C:/zahra/EchoMind4/data/result/inference/arabic_XBmfzfHL.wav
Effect: 'add birds sounds'
  Resampling 48000Hz ‚Üí 44100Hz
  Converting to mono
Duration: 4.98s
Processing...
Output: 4.98s
‚úì Saved: C:/zahra/EchoMind4/data/result/inference/arabic_XBmfzfHL_birds.wav



usage: ipykernel_launcher.py [-h] --model MODEL --input INPUT --output OUTPUT --prompt PROMPT [--dac-model DAC_MODEL]
                             [--device DEVICE] [--chunk-size CHUNK_SIZE]
ipykernel_launcher.py: error: the following arguments are required: --model, --input, --output, --prompt


SystemExit: 2

In [None]:
#C:/zahra/EchoMind4/data/result/inference
 """
    inference.batch_process(
        "C:/zahra/EchoMind4/data/result/inference/",
        "C:/zahra/EchoMind4/data/result/inference/dogs/",
        "add dogs sounds"
    )
    inference.batch_process(
        "C:/zahra/EchoMind4/data/result/inference/",
        "C:/zahra/EchoMind4/data/result/inference/birds/",
        "add birds sounds"
    )
    inference.batch_process(
        "C:/zahra/EchoMind4/data/result/inference/",
        "C:/zahra/EchoMind4/data/result/inference/cats/",
        "add cats sounds"
    )
    inference.batch_process(
        "C:/zahra/EchoMind4/data/result/inference/",
        "C:/zahra/EchoMind4/data/result/inference/rain/",
        "add rain sounds"
    )
    inference.batch_process(
        "C:/zahra/EchoMind4/data/result/inference/",
        "C:/zahra/EchoMind4/data/result/inference/thunder/",
        "add thunder sounds"
    )"""