In [1]:
import os
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import whisper
from transformers import BertTokenizer, BertModel
import numpy as np
from tqdm import tqdm
import wandb
from collections import Counter
from sklearn.metrics import mean_squared_error

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# --- Load and Unfreeze Whisper‑medium ---
whisper_model = whisper.load_model("base.en").to(device)
# Unfreeze all layers in Whisper
for param in whisper_model.parameters():
    param.requires_grad = True
whisper_model.train()  # Set to train mode so gradients are computed

# Load BERT tokenizer and model (BERT remains frozen here).
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = BertModel.from_pretrained("bert-base-uncased").to(device).eval()

# --- Initialize wandb ---
wandb.init(project="somos-ensemble", name="finetune-whisper-ensemble")
!wandb online

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: rtfiof (rtfiof-hse-university). Use `wandb login --relogin` to force relogin


W&B online. Running your script from this directory will now sync to the cloud.


Exception in thread IntMsgThr:
Traceback (most recent call last):
  File "D:\ProgramData\anaconda\envs\project\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "D:\ProgramData\anaconda\envs\project\Lib\site-packages\ipykernel\ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "D:\ProgramData\anaconda\envs\project\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "D:\ProgramData\anaconda\envs\project\Lib\site-packages\wandb\sdk\wandb_run.py", line 325, in check_internal_messages
    self._loop_check_status(
  File "D:\ProgramData\anaconda\envs\project\Lib\site-packages\wandb\sdk\wandb_run.py", line 235, in _loop_check_status
    local_handle = request()
                   ^^^^^^^^^
  File "D:\ProgramData\anaconda\envs\project\Lib\site-packages\wandb\sdk\interface\interface.py", line 914, in deliver_internal_messages
    return self._deliver_internal_messages(internal_message)
           ^^^^^^^^^^^

In [3]:
device


device(type='cuda', index=0)

In [4]:
# --- Utility Functions ---

# Function to compute class weights
# Compute class weights for imbalanced dataset
def compute_class_weights(labels, num_classes):
    class_counts = Counter(labels)
    total_samples = sum(class_counts.values())
    weights = {cls: total_samples / (num_classes * count) for cls, count in class_counts.items()}
    return torch.tensor([weights[i] for i in range(num_classes)], dtype=torch.float).to(device)


# Function to compute sample weights for oversampling
def get_sample_weights(dataset, class_weights):
    sample_weights = []

    for _, _, label in dataset:
        sample_weights.append(class_weights[label].item())

    return torch.tensor(sample_weights, dtype=torch.float)

def load_json(filepath):
    with open(filepath, "r", encoding="utf-8") as f:
        return json.load(f)

def process_audio_path(clean_path, base_dir="data/somos/audios"):
    return os.path.join(base_dir, clean_path.replace("\\", "/"))

# Earth Mover’s Distance (EMD) Loss for ordinal MOS prediction.
def emd_loss(y_pred, y_true, num_classes):
    y_pred = F.softmax(y_pred, dim=-1)  # Convert logits to probability distribution
    y_true = F.one_hot(y_true, num_classes).float()  # Convert labels to one-hot

    cdf_pred = torch.cumsum(y_pred, dim=-1)  # Compute cumulative sum for predicted distribution
    cdf_true = torch.cumsum(y_true, dim=-1)  # Compute cumulative sum for true distribution

    loss = torch.mean((cdf_pred - cdf_true) ** 2)  # Use squared difference for smoother gradients
    return loss

def entropy_regularization(gate_weights, lambda_reg=0.01):
    # Compute entropy loss to encourage diverse gating weights
    eps = 1e-8
    entropy = -torch.sum(gate_weights * torch.log(gate_weights + eps), dim=1)
    return lambda_reg * torch.mean(entropy)

def save_model(model, epoch, best_acc, save_path="models"):
    os.makedirs(save_path, exist_ok=True)
    model_path = os.path.join(save_path, f"model_epoch_{epoch}.pth")
    torch.save(model.state_dict(), model_path)
    best_model_path = os.path.join(save_path, "best_model.pth")
    if best_acc:
        torch.save(model.state_dict(), best_model_path)

In [5]:
# --- Dataset Class ---
class SOMOSDataset(Dataset):
    def __init__(self, json_file, base_dir="data/somos/audios"):
        self.samples = load_json(json_file)
        self.base_dir = base_dir
        self.labels = [round(float(sample["mos"])) - 1 for sample in self.samples]  # Round and convert 1-5 to 0-4

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]
        text = sample["text"]
        label = torch.tensor(round(float(sample["mos"])) - 1, dtype=torch.long)  # Round for classification
        audio_path = process_audio_path(sample["clean path"], self.base_dir)
        return audio_path, text, label


def collate_fn(batch):
    audio_paths, texts, labels = zip(*batch)
    audios = [whisper.load_audio(path) for path in audio_paths]
    audios = [whisper.pad_or_trim(audio) for audio in audios]
    mel_spectrograms = [whisper.log_mel_spectrogram(audio).to(device) for audio in audios]
    mel_spectrograms = torch.stack(mel_spectrograms)

    # Compute audio embeddings with gradients enabled
    audio_embeddings = whisper_model.encoder(mel_spectrograms).mean(dim=1)

    # Process texts using BERT (BERT remains frozen)
    inputs = tokenizer(list(texts), return_tensors="pt", padding=True, truncation=True, max_length=128)
    inputs = {key: val.to(device) for key, val in inputs.items()}
    with torch.no_grad():
        text_embeddings = bert_model(**inputs).last_hidden_state[:, 0, :]

    labels = torch.stack(labels).to(device)
    return audio_embeddings, text_embeddings, labels


In [6]:
# --- Model Definitions ---
class ComplexFusionSubModel(nn.Module):
    def __init__(self, audio_dim, text_dim, hidden_dim, num_classes, dropout_rate=0.05):
        super(ComplexFusionSubModel, self).__init__()
        self.audio_fc = nn.Sequential(
            nn.Linear(audio_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
        )
        self.text_fc = nn.Sequential(
            nn.Linear(text_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
        )
        self.attention = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.Tanh(),
            nn.Linear(hidden_dim // 2, 1),
            nn.Softmax(dim=1)
        )
        self.fusion_fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim // 2, num_classes)
        )

    def forward(self, audio_emb, text_emb):
        audio_feat = self.audio_fc(audio_emb)
        text_feat = self.text_fc(text_emb)
        fusion = torch.cat([audio_feat, text_feat], dim=1)
        attn_weights = self.attention(fusion)
        fusion = fusion * attn_weights
        return self.fusion_fc(fusion)

class EnsembleFusionClassifier(nn.Module):
    def __init__(self, audio_dim, text_dim, hidden_dim, num_classes, dropout_rate=0.05, num_models=3):
        super(EnsembleFusionClassifier, self).__init__()
        self.num_models = num_models
        self.sub_models = nn.ModuleList([
            ComplexFusionSubModel(audio_dim, text_dim, hidden_dim, num_classes, dropout_rate)
            for _ in range(num_models)
        ])
        self.gate = nn.Sequential(
            nn.Linear(audio_dim + text_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, num_models),
            nn.Softmax(dim=1)
        )
        self.residual = nn.Sequential(
            nn.Linear(num_classes, num_classes),
            nn.BatchNorm1d(num_classes),
            nn.ReLU()
        )

    def forward(self, audio_emb, text_emb, return_gate=False):
        gate_input = torch.cat([audio_emb, text_emb], dim=1)
        gate_weights = self.gate(gate_input)  # (batch_size, num_models)
        outputs = [model(audio_emb, text_emb) for model in self.sub_models]
        outputs = torch.stack(outputs, dim=1)  # (batch_size, num_models, num_classes)
        gate_weights_unsq = gate_weights.unsqueeze(2)  # (batch_size, num_models, 1)
        ensemble_output = (gate_weights_unsq * outputs).sum(dim=1)
        final_output = ensemble_output + self.residual(ensemble_output)
        if return_gate:
            return final_output, gate_weights
        return final_output

In [7]:
def main():
    train_json = "data/somos/audios/train_new.json"
    test_json = "data/somos/audios/test_new.json"

    train_dataset = SOMOSDataset(train_json)
    test_dataset = SOMOSDataset(test_json)

    # Compute class weights and create weighted sampler
    class_weights = compute_class_weights(train_dataset.labels, num_classes=5)
    sample_weights = [class_weights[label].item() for label in train_dataset.labels]
    sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)
    
    train_loader = DataLoader(train_dataset, batch_size=4, sampler=sampler, collate_fn=collate_fn)
    test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)
    
    dummy_audio, dummy_text, _ = next(iter(train_loader))
    audio_dim, text_dim = dummy_audio.shape[1], dummy_text.shape[1]
    num_classes = 5

    # Instantiate the ensemble classifier
    model = EnsembleFusionClassifier(audio_dim, text_dim, hidden_dim=256, num_classes=num_classes, dropout_rate=0.05, num_models=3).to(device)
    
    wandb.watch(model, log="all", log_freq=100)
    
    scaler = torch.cuda.amp.GradScaler()
    criterion = lambda y_pred, y_true: emd_loss(y_pred, y_true, num_classes=5)  # EMD loss
    optimizer = optim.Adam(model.parameters(), lr=1e-6)

    num_epochs = 100
    best_mse = float("inf")

    for epoch in range(num_epochs):
        model.train()
        running_loss, total_samples = 0.0, 0
        true_labels, predicted_labels = [], []

        train_pbar = tqdm(train_loader, desc=f"Epoch {epoch+1} Training", leave=False)
        for audio_emb, text_emb, labels in train_pbar:
            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                outputs, gate_weights = model(audio_emb, text_emb, return_gate=True)
                loss = criterion(outputs, labels)
                loss += entropy_regularization(gate_weights, lambda_reg=0.01)

            scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item() * audio_emb.size(0)
            total_samples += labels.size(0)

            preds = torch.argmax(outputs, dim=1)
            true_labels.extend(labels.cpu().tolist())
            predicted_labels.extend(preds.cpu().tolist())

            wandb.log({"train_loss": loss.item()})
            train_pbar.set_postfix(loss=loss.item())

        train_mse = mean_squared_error([t + 1 for t in true_labels], [p + 1 for p in predicted_labels])
        wandb.log({"train_mse": train_mse})
        print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {running_loss/total_samples:.4f} | Train MSE: {train_mse:.4f}")

        # Evaluation Phase
        model.eval()
        test_loss, total_samples = 0.0, 0
        true_labels, predicted_labels = [], []

        with torch.no_grad():
            test_pbar = tqdm(test_loader, desc=f"Epoch {epoch+1} Validation", leave=False)
            for audio_emb, text_emb, labels in test_pbar:
                audio_emb = audio_emb.to(device)
                text_emb = text_emb.to(device)
                labels = labels.to(device)

                outputs = model(audio_emb, text_emb)
                loss = criterion(outputs, labels)

                test_loss += loss.item() * audio_emb.size(0)
                total_samples += labels.size(0)

                preds = torch.argmax(outputs, dim=1)
                true_labels.extend(labels.cpu().tolist())
                predicted_labels.extend(preds.cpu().tolist())

                test_pbar.set_postfix(loss=loss.item())

        test_mse = mean_squared_error([t + 1 for t in true_labels], [p + 1 for p in predicted_labels])
        avg_test_loss = test_loss / total_samples
        wandb.log({"val_loss": avg_test_loss, "val_mse": test_mse})
        print(f"Epoch {epoch+1}/{num_epochs} - Val Loss: {avg_test_loss:.4f} | Val MSE: {test_mse:.4f}")

        # Print sample predictions
        print("\nSample Predictions (Real MOS vs Predicted MOS):")
        for i, (real_mos, pred_mos) in enumerate(zip(true_labels[:5], predicted_labels[:5])):
            print(f"Example {i+1}: Real MOS = {real_mos + 1}, Predicted MOS = {pred_mos + 1}")
            wandb.log({f"sample_{i}_real_vs_pred": f"{real_mos+1} vs {pred_mos+1}"})

        # Save model if it improves
        save_model(model, epoch + 1, test_mse < best_mse)

        if test_mse < best_mse:
            best_mse = test_mse

    print("Training complete! Best validation MSE:", best_mse)

In [8]:
main()

  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 1/100 - Train Loss: 0.1757 | Train MSE: 3.8111


                                                                                                                       

Epoch 1/100 - Val Loss: 0.0975 | Val MSE: 2.1070

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 3
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 3
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 2/100 - Train Loss: 0.1692 | Train MSE: 3.2772


                                                                                                                       

Epoch 2/100 - Val Loss: 0.0944 | Val MSE: 1.6410

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 3
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 2
Example 5: Real MOS = 3, Predicted MOS = 3


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 3/100 - Train Loss: 0.1628 | Train MSE: 2.9557


                                                                                                                       

Epoch 3/100 - Val Loss: 0.0955 | Val MSE: 1.6977

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 3
Example 2: Real MOS = 4, Predicted MOS = 3
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 2
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 4/100 - Train Loss: 0.1591 | Train MSE: 2.7748


                                                                                                                       

Epoch 4/100 - Val Loss: 0.0946 | Val MSE: 1.6790

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 3
Example 2: Real MOS = 4, Predicted MOS = 3
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 5/100 - Train Loss: 0.1549 | Train MSE: 2.6394


                                                                                                                       

Epoch 5/100 - Val Loss: 0.0957 | Val MSE: 1.5643

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 3
Example 3: Real MOS = 4, Predicted MOS = 1
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 3


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 6/100 - Train Loss: 0.1502 | Train MSE: 2.5564


                                                                                                                       

Epoch 6/100 - Val Loss: 0.0920 | Val MSE: 1.3307

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 1
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 7/100 - Train Loss: 0.1465 | Train MSE: 2.4165


                                                                                                                       

Epoch 7/100 - Val Loss: 0.0922 | Val MSE: 1.4557

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 8/100 - Train Loss: 0.1434 | Train MSE: 2.3624


                                                                                                                       

Epoch 8/100 - Val Loss: 0.0919 | Val MSE: 1.4513

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 3
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 9/100 - Train Loss: 0.1409 | Train MSE: 2.3267


                                                                                                                       

Epoch 9/100 - Val Loss: 0.0921 | Val MSE: 1.5393

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 10/100 - Train Loss: 0.1385 | Train MSE: 2.2379


                                                                                                                       

Epoch 10/100 - Val Loss: 0.0923 | Val MSE: 1.3123

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 11/100 - Train Loss: 0.1351 | Train MSE: 2.1934


                                                                                                                       

Epoch 11/100 - Val Loss: 0.0956 | Val MSE: 1.6737

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 12/100 - Train Loss: 0.1320 | Train MSE: 2.0660


                                                                                                                       

Epoch 12/100 - Val Loss: 0.0904 | Val MSE: 1.4070

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 13/100 - Train Loss: 0.1313 | Train MSE: 2.0439


                                                                                                                       

Epoch 13/100 - Val Loss: 0.0939 | Val MSE: 1.6380

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 14/100 - Train Loss: 0.1275 | Train MSE: 1.9744


                                                                                                                       

Epoch 14/100 - Val Loss: 0.0904 | Val MSE: 1.5663

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 15/100 - Train Loss: 0.1253 | Train MSE: 1.9318


                                                                                                                       

Epoch 15/100 - Val Loss: 0.0916 | Val MSE: 1.6870

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 1
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 16/100 - Train Loss: 0.1252 | Train MSE: 1.9604


                                                                                                                       

Epoch 16/100 - Val Loss: 0.0902 | Val MSE: 1.5520

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 17/100 - Train Loss: 0.1206 | Train MSE: 1.8390


                                                                                                                       

Epoch 17/100 - Val Loss: 0.0889 | Val MSE: 1.6450

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 18/100 - Train Loss: 0.1166 | Train MSE: 1.7897


                                                                                                                       

Epoch 18/100 - Val Loss: 0.0894 | Val MSE: 1.5860

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 19/100 - Train Loss: 0.1157 | Train MSE: 1.8020


                                                                                                                       

Epoch 19/100 - Val Loss: 0.0896 | Val MSE: 1.6760

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 20/100 - Train Loss: 0.1128 | Train MSE: 1.7559


                                                                                                                       

Epoch 20/100 - Val Loss: 0.0890 | Val MSE: 1.6253

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 21/100 - Train Loss: 0.1116 | Train MSE: 1.7901


                                                                                                                       

Epoch 21/100 - Val Loss: 0.0912 | Val MSE: 1.7093

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 22/100 - Train Loss: 0.1088 | Train MSE: 1.7230


                                                                                                                       

Epoch 22/100 - Val Loss: 0.0892 | Val MSE: 1.7393

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 23/100 - Train Loss: 0.1058 | Train MSE: 1.6957


                                                                                                                       

Epoch 23/100 - Val Loss: 0.0885 | Val MSE: 1.6217

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 24/100 - Train Loss: 0.1033 | Train MSE: 1.6851


                                                                                                                       

Epoch 24/100 - Val Loss: 0.0879 | Val MSE: 1.6333

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 25/100 - Train Loss: 0.1037 | Train MSE: 1.7137


                                                                                                                       

Epoch 25/100 - Val Loss: 0.0876 | Val MSE: 1.6160

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 26/100 - Train Loss: 0.1013 | Train MSE: 1.6465


                                                                                                                       

Epoch 26/100 - Val Loss: 0.0898 | Val MSE: 1.6173

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 27/100 - Train Loss: 0.0992 | Train MSE: 1.6601


                                                                                                                       

Epoch 27/100 - Val Loss: 0.0870 | Val MSE: 1.5720

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 28/100 - Train Loss: 0.0977 | Train MSE: 1.6106


                                                                                                                       

Epoch 28/100 - Val Loss: 0.0884 | Val MSE: 1.6820

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 29/100 - Train Loss: 0.0985 | Train MSE: 1.6733


                                                                                                                       

Epoch 29/100 - Val Loss: 0.0871 | Val MSE: 1.4730

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 30/100 - Train Loss: 0.0980 | Train MSE: 1.6479


                                                                                                                       

Epoch 30/100 - Val Loss: 0.0883 | Val MSE: 1.6010

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 31/100 - Train Loss: 0.0966 | Train MSE: 1.5977


                                                                                                                       

Epoch 31/100 - Val Loss: 0.0882 | Val MSE: 1.6807

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 32/100 - Train Loss: 0.0953 | Train MSE: 1.5915


                                                                                                                       

Epoch 32/100 - Val Loss: 0.0862 | Val MSE: 1.4927

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 33/100 - Train Loss: 0.0946 | Train MSE: 1.5717


                                                                                                                       

Epoch 33/100 - Val Loss: 0.0839 | Val MSE: 1.3700

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 34/100 - Train Loss: 0.0936 | Train MSE: 1.5367


                                                                                                                       

Epoch 34/100 - Val Loss: 0.0881 | Val MSE: 1.6027

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 35/100 - Train Loss: 0.0924 | Train MSE: 1.5053


                                                                                                                       

Epoch 35/100 - Val Loss: 0.0868 | Val MSE: 1.4833

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 4
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 36/100 - Train Loss: 0.0934 | Train MSE: 1.5111


                                                                                                                       

Epoch 36/100 - Val Loss: 0.0867 | Val MSE: 1.3263

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 37/100 - Train Loss: 0.0927 | Train MSE: 1.4970


                                                                                                                       

Epoch 37/100 - Val Loss: 0.0825 | Val MSE: 1.1817

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 38/100 - Train Loss: 0.0918 | Train MSE: 1.4975


                                                                                                                       

Epoch 38/100 - Val Loss: 0.0829 | Val MSE: 1.2747

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 39/100 - Train Loss: 0.0908 | Train MSE: 1.4304


                                                                                                                       

Epoch 39/100 - Val Loss: 0.0831 | Val MSE: 1.3257

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 5
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 5


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 40/100 - Train Loss: 0.0900 | Train MSE: 1.4534


                                                                                                                       

Epoch 40/100 - Val Loss: 0.0827 | Val MSE: 1.2590

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 5
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 41/100 - Train Loss: 0.0899 | Train MSE: 1.4230


                                                                                                                       

Epoch 41/100 - Val Loss: 0.0832 | Val MSE: 1.3103

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 42/100 - Train Loss: 0.0891 | Train MSE: 1.3957


                                                                                                                       

Epoch 42/100 - Val Loss: 0.0795 | Val MSE: 1.0580

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 43/100 - Train Loss: 0.0889 | Train MSE: 1.4160


                                                                                                                       

Epoch 43/100 - Val Loss: 0.0781 | Val MSE: 1.0553

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 44/100 - Train Loss: 0.0865 | Train MSE: 1.3069


                                                                                                                       

Epoch 44/100 - Val Loss: 0.0835 | Val MSE: 1.1353

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 2
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 45/100 - Train Loss: 0.0864 | Train MSE: 1.3301


                                                                                                                       

Epoch 45/100 - Val Loss: 0.0779 | Val MSE: 1.0447

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 46/100 - Train Loss: 0.0860 | Train MSE: 1.3055


                                                                                                                       

Epoch 46/100 - Val Loss: 0.0778 | Val MSE: 1.0337

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 2
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 47/100 - Train Loss: 0.0854 | Train MSE: 1.3001


                                                                                                                       

Epoch 47/100 - Val Loss: 0.0795 | Val MSE: 0.9857

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 2
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 48/100 - Train Loss: 0.0847 | Train MSE: 1.2564


                                                                                                                       

Epoch 48/100 - Val Loss: 0.0769 | Val MSE: 0.9833

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 49/100 - Train Loss: 0.0846 | Train MSE: 1.2489


                                                                                                                       

Epoch 49/100 - Val Loss: 0.0787 | Val MSE: 1.0140

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 50/100 - Train Loss: 0.0845 | Train MSE: 1.2536


                                                                                                                       

Epoch 50/100 - Val Loss: 0.0768 | Val MSE: 0.9783

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 3


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 51/100 - Train Loss: 0.0846 | Train MSE: 1.2520


                                                                                                                       

Epoch 51/100 - Val Loss: 0.0773 | Val MSE: 0.9667

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 3
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 3


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 52/100 - Train Loss: 0.0849 | Train MSE: 1.2263


                                                                                                                       

Epoch 52/100 - Val Loss: 0.0799 | Val MSE: 1.0670

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 53/100 - Train Loss: 0.0838 | Train MSE: 1.2250


                                                                                                                       

Epoch 53/100 - Val Loss: 0.0743 | Val MSE: 0.8717

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 54/100 - Train Loss: 0.0823 | Train MSE: 1.2097


                                                                                                                       

Epoch 54/100 - Val Loss: 0.0760 | Val MSE: 0.9813

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 55/100 - Train Loss: 0.0819 | Train MSE: 1.1597


                                                                                                                       

Epoch 55/100 - Val Loss: 0.0772 | Val MSE: 1.0130

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 56/100 - Train Loss: 0.0815 | Train MSE: 1.1763


                                                                                                                       

Epoch 56/100 - Val Loss: 0.0740 | Val MSE: 0.8893

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 3


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 57/100 - Train Loss: 0.0815 | Train MSE: 1.1760


                                                                                                                       

Epoch 57/100 - Val Loss: 0.0779 | Val MSE: 1.0193

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 58/100 - Train Loss: 0.0806 | Train MSE: 1.1374


                                                                                                                       

Epoch 58/100 - Val Loss: 0.0806 | Val MSE: 1.0783

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 3
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 59/100 - Train Loss: 0.0803 | Train MSE: 1.1389


                                                                                                                       

Epoch 59/100 - Val Loss: 0.0735 | Val MSE: 0.7627

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 60/100 - Train Loss: 0.0821 | Train MSE: 1.1809


                                                                                                                       

Epoch 60/100 - Val Loss: 0.0736 | Val MSE: 0.8783

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 3


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 61/100 - Train Loss: 0.0802 | Train MSE: 1.1397


                                                                                                                       

Epoch 61/100 - Val Loss: 0.0766 | Val MSE: 0.9453

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 3
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 62/100 - Train Loss: 0.0803 | Train MSE: 1.1126


                                                                                                                       

Epoch 62/100 - Val Loss: 0.0757 | Val MSE: 0.9403

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 3


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 63/100 - Train Loss: 0.0790 | Train MSE: 1.0848


                                                                                                                       

Epoch 63/100 - Val Loss: 0.0766 | Val MSE: 0.9597

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 4
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 5
Example 4: Real MOS = 3, Predicted MOS = 5
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

Epoch 64/100 - Train Loss: 0.0784 | Train MSE: 1.0797


                                                                                                                       

Epoch 64/100 - Val Loss: 0.0732 | Val MSE: 0.8190

Sample Predictions (Real MOS vs Predicted MOS):
Example 1: Real MOS = 4, Predicted MOS = 3
Example 2: Real MOS = 4, Predicted MOS = 4
Example 3: Real MOS = 4, Predicted MOS = 3
Example 4: Real MOS = 3, Predicted MOS = 4
Example 5: Real MOS = 3, Predicted MOS = 2


  with torch.cuda.amp.autocast():
                                                                                                                       

RuntimeError: Failed to load audio: 

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x00000206208EBC20>> (for post_run_cell), with arguments args (<ExecutionResult object at 206c414b440, execution_count=8 error_before_exec=None error_in_exec=Failed to load audio:  info=<ExecutionInfo object at 206c414a600, raw_cell="main()" store_history=True silent=False shell_futures=True cell_id=e6da0ed7-7510-44f7-a47f-e2c76c940059> result=None>,),kwargs {}:


ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host