In [17]:
"""
DimABSA - Subtask 1: Dimensional Aspect Sentiment Regression (DimASR)
======================================================================
Task: Given Text + Aspect ‚Üí Predict Valence and Arousal (1.00-9.00)
"""

import json
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel, get_linear_schedule_with_warmup
from tqdm import tqdm
from scipy.stats import pearsonr
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

print("‚úì All imports loaded!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

‚úì All imports loaded!
PyTorch version: 2.9.1
CUDA available: False


In [18]:
# ============================================================================
# CONFIGURATION
# ============================================================================

class Config:
    # Paths - UPDATE THESE TO YOUR PATHS
    TRAIN_PATH = "../data/trackA/subtask1/eng_laptop_train_alltasks.jsonl"
    DEV_PATH = "../data/trackA/subtask1/eng_laptop_dev_task1.jsonl"
    
    # Model Selection
    MODEL_NAME = "microsoft/deberta-v3-base"
    # Alternatives (comment/uncomment to try):
    # MODEL_NAME = "microsoft/deberta-v3-large"  # Better performance, slower
    # MODEL_NAME = "roberta-large"
    
    # Model Parameters
    MAX_LEN = 256  # Increased from 128
    DROPOUT = 0.2
    HIDDEN_DIM = 256  # For intermediate layers
    
    # Training Parameters
    BATCH_SIZE = 8  # Reduce to 4 if OOM (Out of Memory)
    LR = 2e-5
    EPOCHS = 5
    WARMUP_RATIO = 0.1
    MAX_GRAD_NORM = 1.0
    WEIGHT_DECAY = 0.01
    
    # Ensemble Settings
    NUM_SEEDS = 3  # Train 3 models with different seeds
    SEEDS = [42, 123, 456]
    
    # Hardware
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Outputs
    OUTPUT_FILE = "submission_task1_improved.jsonl"
    MODEL_SAVE_DIR = "./saved_models"

config = Config()

# Create save directory
import os
os.makedirs(config.MODEL_SAVE_DIR, exist_ok=True)

print("=" * 60)
print("CONFIGURATION")
print("=" * 60)
print(f"Model: {config.MODEL_NAME}")
print(f"Device: {config.DEVICE}")
print(f"Batch Size: {config.BATCH_SIZE}")
print(f"Epochs: {config.EPOCHS}")
print(f"Max Length: {config.MAX_LEN}")
print(f"Learning Rate: {config.LR}")
print(f"Ensemble Seeds: {config.SEEDS}")
print("=" * 60)

CONFIGURATION
Model: microsoft/deberta-v3-base
Device: cpu
Batch Size: 8
Epochs: 5
Max Length: 256
Learning Rate: 2e-05
Ensemble Seeds: [42, 123, 456]


In [19]:
# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================

def set_seed(seed):
    """Set random seeds for reproducibility"""
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    print(f"‚úì Seed set to {seed}")

def load_jsonl(path):
    """Load JSONL file"""
    data = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            data.append(json.loads(line.strip()))
    return data

def parse_va(va_str):
    """Parse 'V#A' format to (valence, arousal) floats"""
    v, a = va_str.split("#")
    return float(v), float(a)

def format_va(v, a):
    """Format valence and arousal to 'V#A' string"""
    return f"{v:.2f}#{a:.2f}"

# Set initial seed
set_seed(config.SEEDS[0])

print("\n‚úì Utility functions loaded!")

‚úì Seed set to 42

‚úì Utility functions loaded!


In [20]:
# ============================================================================
# DATA LOADING
# ============================================================================

print("Loading data...")
train_raw = load_jsonl(config.TRAIN_PATH)
dev_raw = load_jsonl(config.DEV_PATH)

print(f"‚úì Loaded {len(train_raw)} training instances")
print(f"‚úì Loaded {len(dev_raw)} dev instances")

# Show example training data
print("\n" + "=" * 60)
print("TRAINING DATA SAMPLE")
print("=" * 60)
print(json.dumps(train_raw[0], indent=2, ensure_ascii=False))

# Show example dev data
print("\n" + "=" * 60)
print("DEV DATA SAMPLE")
print("=" * 60)
print(json.dumps(dev_raw[0], indent=2, ensure_ascii=False))

Loading data...
‚úì Loaded 4076 training instances
‚úì Loaded 200 dev instances

TRAINING DATA SAMPLE
{
  "ID": "laptop_quad_dev_1",
  "Text": "this unit is ` ` pretty ` ` and stylish , so my high school daughter was attracted to it for that reason .",
  "Quadruplet": [
    {
      "Aspect": "unit",
      "Category": "LAPTOP#DESIGN_FEATURES",
      "Opinion": "pretty",
      "VA": "7.12#7.12"
    },
    {
      "Aspect": "unit",
      "Category": "LAPTOP#DESIGN_FEATURES",
      "Opinion": "stylish",
      "VA": "7.12#7.12"
    }
  ]
}

DEV DATA SAMPLE
{
  "ID": "lap26_aspect_va_dev_1",
  "Text": "The touchscreen works very well",
  "Aspect": [
    "touchscreen"
  ]
}


In [21]:
# ============================================================================
# DATA PREPROCESSING
# ============================================================================

def create_task1_samples(raw_data, is_test=False):
    """
    Convert raw JSONL data to samples for Task 1.
    
    Training format: {ID, Text, Quadruplet: [{Aspect, Category, Opinion, VA}]}
    Test format: {ID, Text, Aspect: [list of aspects]}
    
    Output: List of {id, text, aspect, valence, arousal}
    """
    samples = []
    
    for item in raw_data:
        text = item["Text"]
        item_id = item["ID"]
        
        if is_test:
            # Dev/Test: Extract from Aspect list
            for aspect in item["Aspect"]:
                samples.append({
                    "id": item_id,
                    "text": text,
                    "aspect": aspect,
                    "valence": 0.0,  # Placeholder
                    "arousal": 0.0   # Placeholder
                })
        else:
            # Training: Extract from Quadruplet list
            for quad in item["Quadruplet"]:
                aspect = quad["Aspect"]
                v, a = parse_va(quad["VA"])
                samples.append({
                    "id": item_id,
                    "text": text,
                    "aspect": aspect,
                    "valence": v,
                    "arousal": a
                })
    
    return samples

# Create samples
print("Creating samples...")
train_samples = create_task1_samples(train_raw, is_test=False)
dev_samples = create_task1_samples(dev_raw, is_test=True)

print(f"‚úì Created {len(train_samples)} training samples")
print(f"‚úì Created {len(dev_samples)} dev samples")

# Convert to DataFrames
train_df = pd.DataFrame(train_samples)
dev_df = pd.DataFrame(dev_samples)

print("\n" + "=" * 60)
print("TRAINING DATA STATISTICS")
print("=" * 60)
print(train_df.describe())

print("\nüìä Sample distribution:")
print(train_df.head(10))

Creating samples...
‚úì Created 5773 training samples
‚úì Created 275 dev samples

TRAINING DATA STATISTICS
           valence     arousal
count  5773.000000  5773.00000
mean      5.936842     6.66797
std       1.763164     1.03192
min       1.000000     3.83000
25%       4.380000     5.83000
50%       6.620000     6.88000
75%       7.380000     7.50000
max       8.830000     8.83000

üìä Sample distribution:
                  id                                               text  \
0  laptop_quad_dev_1  this unit is ` ` pretty ` ` and stylish , so m...   
1  laptop_quad_dev_1  this unit is ` ` pretty ` ` and stylish , so m...   
2  laptop_quad_dev_2  for now i ' m okay with upping the experience ...   
3  laptop_quad_dev_3  seems unlikely but whatever , i ' ll go with it .   
4  laptop_quad_dev_4  this version has been my least favorite versio...   
5  laptop_quad_dev_5        - biggest disappointment is the track pad .   
6  laptop_quad_dev_6             should not of bought this ch

In [22]:
# ============================================================================
# TRAIN-VALIDATION SPLIT
# ============================================================================

# Split training data into train and validation (90/10)
train_df, val_df = train_test_split(
    train_df,
    test_size=0.1,
    random_state=42,
    shuffle=True
)

print("=" * 60)
print("DATA SPLIT")
print("=" * 60)
print(f"Training samples:   {len(train_df)}")
print(f"Validation samples: {len(val_df)}")
print(f"Dev/Test samples:   {len(dev_df)}")
print("=" * 60)

# Show valence/arousal statistics
print("\nüìä Valence Statistics (Training):")
print(train_df['valence'].describe())

print("\nüìä Arousal Statistics (Training):")
print(train_df['arousal'].describe())

DATA SPLIT
Training samples:   5195
Validation samples: 578
Dev/Test samples:   275

üìä Valence Statistics (Training):
count    5195.000000
mean        5.932689
std         1.768976
min         1.000000
25%         4.380000
50%         6.620000
75%         7.390000
max         8.830000
Name: valence, dtype: float64

üìä Arousal Statistics (Training):
count    5195.000000
mean        6.670192
std         1.034896
min         3.830000
25%         5.800000
50%         6.880000
75%         7.500000
max         8.830000
Name: arousal, dtype: float64


In [23]:
# ============================================================================
# DATASET CLASS
# ============================================================================

class DimASRDataset(Dataset):
    """
    Dataset for Dimensional Aspect Sentiment Regression.
    
    Improvements:
    - Two-sentence encoding: [CLS] text [SEP] aspect [SEP]
    - This helps the model better attend to aspect-text relationships
    """
    
    def __init__(self, df, tokenizer, max_len=256):
        self.texts = df["text"].tolist()
        self.aspects = df["aspect"].tolist()
        self.valence = df["valence"].tolist()
        self.arousal = df["arousal"].tolist()
        self.ids = df["id"].tolist()
        
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        aspect = self.aspects[idx]
        valence = self.valence[idx]
        arousal = self.arousal[idx]
        
        # Encode as two sentences: [CLS] text [SEP] aspect [SEP]
        # This is better than concatenation "aspect: text"
        encoding = self.tokenizer(
            text,
            aspect,
            add_special_tokens=True,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor([valence, arousal], dtype=torch.float),
            "id": self.ids[idx],
            "aspect": aspect
        }

print("‚úì Dataset class created!")

‚úì Dataset class created!


In [24]:
# ============================================================================
# TOKENIZER & DATALOADERS
# ============================================================================

print(f"Loading tokenizer: {config.MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(config.MODEL_NAME)

# Create datasets
train_dataset = DimASRDataset(train_df, tokenizer, config.MAX_LEN)
val_dataset = DimASRDataset(val_df, tokenizer, config.MAX_LEN)
dev_dataset = DimASRDataset(dev_df, tokenizer, config.MAX_LEN)

# Create dataloaders
# NOTE: num_workers=0 for Jupyter notebook compatibility
train_loader = DataLoader(
    train_dataset,
    batch_size=config.BATCH_SIZE,
    shuffle=True,
    num_workers=0,  # Changed from 2 to 0 for Jupyter
    pin_memory=True if config.DEVICE == "cuda" else False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=config.BATCH_SIZE,
    shuffle=False,
    num_workers=0,  # Changed from 2 to 0 for Jupyter
    pin_memory=True if config.DEVICE == "cuda" else False
)

dev_loader = DataLoader(
    dev_dataset,
    batch_size=config.BATCH_SIZE,
    shuffle=False,
    num_workers=0,  # Changed from 2 to 0 for Jupyter
    pin_memory=True if config.DEVICE == "cuda" else False
)

print("=" * 60)
print("DATALOADERS CREATED")
print("=" * 60)
print(f"Train batches: {len(train_loader)}")
print(f"Val batches:   {len(val_loader)}")
print(f"Dev batches:   {len(dev_loader)}")
print("=" * 60)

# Test one batch
batch = next(iter(train_loader))
print("\nüì¶ Sample batch shapes:")
print(f"  input_ids: {batch['input_ids'].shape}")
print(f"  attention_mask: {batch['attention_mask'].shape}")
print(f"  labels: {batch['labels'].shape}")

# Decode one example to verify encoding
sample_idx = 0
decoded = tokenizer.decode(batch['input_ids'][sample_idx], skip_special_tokens=False)
print(f"\nüìù Encoded example:\n{decoded}")
print(f"\nüéØ Target VA: {batch['labels'][sample_idx].numpy()}")

Loading tokenizer: microsoft/deberta-v3-base
DATALOADERS CREATED
Train batches: 650
Val batches:   73
Dev batches:   35

üì¶ Sample batch shapes:
  input_ids: torch.Size([8, 256])
  attention_mask: torch.Size([8, 256])
  labels: torch.Size([8, 2])

üìù Encoded example:
[CLS] no backlit keyboard[SEP] backlit keyboard[SEP][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][

In [25]:
# ============================================================================
# MODEL ARCHITECTURE
# ============================================================================

class ImprovedVARegressor(nn.Module):
    """
    Improved Valence-Arousal Regressor
    
    Improvements:
    1. Mean pooling instead of CLS (more stable)
    2. Separate heads for Valence and Arousal
    3. Additional hidden layer with LayerNorm
    4. GELU activation (better than ReLU)
    """
    
    def __init__(self, model_name, dropout=0.2, hidden_dim=256):
        super().__init__()
        
        # Load pretrained encoder
        self.encoder = AutoModel.from_pretrained(model_name)
        encoder_dim = self.encoder.config.hidden_size
        
        # Shared projection layer
        self.shared_projection = nn.Sequential(
            nn.Linear(encoder_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout)
        )
        
        # Separate heads for Valence and Arousal
        self.valence_head = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LayerNorm(hidden_dim // 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, 1)
        )
        
        self.arousal_head = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LayerNorm(hidden_dim // 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, 1)
        )
    
    def mean_pool(self, hidden_states, attention_mask):
        """
        Mean pooling over all tokens (masked).
        More stable than CLS token alone.
        """
        # Expand mask to match hidden dimensions
        mask = attention_mask.unsqueeze(-1).expand(hidden_states.size()).float()
        
        # Mask out padding tokens
        masked_hidden = hidden_states * mask
        
        # Sum and normalize
        summed = masked_hidden.sum(dim=1)
        counted = mask.sum(dim=1).clamp(min=1e-9)
        
        return summed / counted
    
    def forward(self, input_ids, attention_mask):
        # Get encoder outputs
        outputs = self.encoder(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        
        # Mean pooling
        pooled = self.mean_pool(outputs.last_hidden_state, attention_mask)
        
        # Shared projection
        shared = self.shared_projection(pooled)
        
        # Separate predictions
        valence = self.valence_head(shared)
        arousal = self.arousal_head(shared)
        
        # Concatenate (batch_size, 2)
        output = torch.cat([valence, arousal], dim=1)
        
        return output

print("‚úì Model architecture defined!")

# Show model summary
model = ImprovedVARegressor(
    config.MODEL_NAME, 
    dropout=config.DROPOUT,
    hidden_dim=config.HIDDEN_DIM
).to(config.DEVICE)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("\n" + "=" * 60)
print("MODEL SUMMARY")
print("=" * 60)
print(f"Architecture: {config.MODEL_NAME}")
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Device: {config.DEVICE}")
print("=" * 60)

‚úì Model architecture defined!

MODEL SUMMARY
Architecture: microsoft/deberta-v3-base
Total parameters: 184,095,490
Trainable parameters: 184,095,490
Device: cpu


In [26]:
# ============================================================================
# LOSS FUNCTION
# ============================================================================

class RobustVALoss(nn.Module):
    """
    Robust loss combining MSE and Huber loss.
    
    - MSE: Good for normal predictions
    - Huber: Robust to outliers
    - Combination provides stability
    """
    
    def __init__(self, alpha=0.5, huber_delta=1.0):
        super().__init__()
        self.alpha = alpha
        self.mse = nn.MSELoss()
        self.huber = nn.HuberLoss(delta=huber_delta)
    
    def forward(self, pred, target):
        mse_loss = self.mse(pred, target)
        huber_loss = self.huber(pred, target)
        return self.alpha * mse_loss + (1 - self.alpha) * huber_loss

# Test the loss function
criterion = RobustVALoss(alpha=0.5)

# Test with dummy data
dummy_pred = torch.tensor([[7.5, 6.8], [5.2, 5.5]])
dummy_target = torch.tensor([[7.0, 7.0], [5.0, 5.0]])
test_loss = criterion(dummy_pred, dummy_target)

print("‚úì Loss function created!")
print(f"Test loss value: {test_loss.item():.4f}")

‚úì Loss function created!
Test loss value: 0.1087


In [27]:
# ============================================================================
# TRAINING FUNCTIONS
# ============================================================================

def train_one_epoch(model, dataloader, optimizer, scheduler, criterion, device):
    """Train for one epoch"""
    model.train()
    total_loss = 0
    
    progress_bar = tqdm(dataloader, desc="Training", leave=False)
    
    for batch in progress_bar:
        # Move to device
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        
        # Gradient clipping (prevent exploding gradients)
        torch.nn.utils.clip_grad_norm_(model.parameters(), config.MAX_GRAD_NORM)
        
        # Update weights
        optimizer.step()
        scheduler.step()
        
        # Track loss
        total_loss += loss.item()
        progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})
    
    return total_loss / len(dataloader)


def evaluate(model, dataloader, criterion, device):
    """Evaluate the model"""
    model.eval()
    total_loss = 0
    
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating", leave=False):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            
            # Forward pass
            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            
            # Store predictions and labels
            all_preds.append(outputs.cpu().numpy())
            all_labels.append(labels.cpu().numpy())
    
    # Concatenate all batches
    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)
    
    # Calculate metrics
    avg_loss = total_loss / len(dataloader)
    
    # RMSE (official metric)
    rmse = np.sqrt(np.mean((all_preds - all_labels) ** 2))
    
    # Pearson correlation
    pcc_valence = pearsonr(all_preds[:, 0], all_labels[:, 0])[0]
    pcc_arousal = pearsonr(all_preds[:, 1], all_labels[:, 1])[0]
    
    return {
        "loss": avg_loss,
        "rmse": rmse,
        "pcc_valence": pcc_valence,
        "pcc_arousal": pcc_arousal,
        "predictions": all_preds,
        "labels": all_labels
    }

print("‚úì Training and evaluation functions ready!")

‚úì Training and evaluation functions ready!


In [28]:
# ============================================================================
# TRAINING LOOP
# ============================================================================

def train_model(train_loader, val_loader, config, seed=42):
    """Complete training loop for one model"""
    
    # Set seed for reproducibility
    set_seed(seed)
    
    # Initialize model
    model = ImprovedVARegressor(
        config.MODEL_NAME,
        dropout=config.DROPOUT,
        hidden_dim=config.HIDDEN_DIM
    ).to(config.DEVICE)
    
    # Loss function
    criterion = RobustVALoss(alpha=0.5)
    
    # Optimizer with weight decay
    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=config.LR,
        weight_decay=config.WEIGHT_DECAY
    )
    
    # Learning rate scheduler with warmup
    num_training_steps = len(train_loader) * config.EPOCHS
    num_warmup_steps = int(config.WARMUP_RATIO * num_training_steps)
    
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    
    # Training tracking
    best_val_rmse = float("inf")
    history = []
    
    print("\n" + "=" * 60)
    print(f"üöÄ TRAINING MODEL (Seed: {seed})")
    print("=" * 60)
    
    for epoch in range(config.EPOCHS):
        print(f"\nEpoch {epoch + 1}/{config.EPOCHS}")
        print("-" * 60)
        
        # Train
        train_loss = train_one_epoch(
            model, train_loader, optimizer, scheduler, criterion, config.DEVICE
        )
        
        # Evaluate
        val_metrics = evaluate(model, val_loader, criterion, config.DEVICE)
        
        # Log metrics
        history.append({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "val_loss": val_metrics["loss"],
            "val_rmse": val_metrics["rmse"],
            "val_pcc_v": val_metrics["pcc_valence"],
            "val_pcc_a": val_metrics["pcc_arousal"]
        })
        
        print(f"Train Loss: {train_loss:.4f}")
        print(f"Val Loss:   {val_metrics['loss']:.4f}")
        print(f"Val RMSE:   {val_metrics['rmse']:.4f} ‚≠ê")
        print(f"Val PCC-V:  {val_metrics['pcc_valence']:.4f}")
        print(f"Val PCC-A:  {val_metrics['pcc_arousal']:.4f}")
        
        # Save best model
        if val_metrics["rmse"] < best_val_rmse:
            best_val_rmse = val_metrics["rmse"]
            model_path = f"{config.MODEL_SAVE_DIR}/best_model_seed{seed}.pt"
            torch.save(model.state_dict(), model_path)
            print(f"üíæ Best model saved! (RMSE: {best_val_rmse:.4f})")
    
    print("\n" + "=" * 60)
    print(f"‚úÖ TRAINING COMPLETE (Seed: {seed})")
    print(f"Best Validation RMSE: {best_val_rmse:.4f}")
    print("=" * 60)
    
    return model, best_val_rmse, history

print("‚úì Training loop ready!")

‚úì Training loop ready!


In [29]:
# ============================================================================
# TRAIN FIRST MODEL
# ============================================================================

# Train with first seed
model, best_rmse, history = train_model(
    train_loader,
    val_loader,
    config,
    seed=config.SEEDS[0]
)

# Show training history
history_df = pd.DataFrame(history)
print("\nüìä Training History:")
print(history_df)

‚úì Seed set to 42

üöÄ TRAINING MODEL (Seed: 42)

Epoch 1/5
------------------------------------------------------------


                                                                         

Train Loss: 7.5640
Val Loss:   2.7165
Val RMSE:   2.0304 ‚≠ê
Val PCC-V:  0.0803
Val PCC-A:  -0.0360
üíæ Best model saved! (RMSE: 2.0304)

Epoch 2/5
------------------------------------------------------------


                                                                        

Train Loss: 2.5757
Val Loss:   1.6684
Val RMSE:   1.5612 ‚≠ê
Val PCC-V:  0.0150
Val PCC-A:  0.0082
üíæ Best model saved! (RMSE: 1.5612)

Epoch 3/5
------------------------------------------------------------


                                                                        

Train Loss: 1.8375
Val Loss:   1.4089
Val RMSE:   1.4306 ‚≠ê
Val PCC-V:  0.0050
Val PCC-A:  -0.0213
üíæ Best model saved! (RMSE: 1.4306)

Epoch 4/5
------------------------------------------------------------


                                                                        

Train Loss: 1.6561
Val Loss:   1.3592
Val RMSE:   1.4071 ‚≠ê
Val PCC-V:  0.0045
Val PCC-A:  -0.0191
üíæ Best model saved! (RMSE: 1.4071)

Epoch 5/5
------------------------------------------------------------


                                                                        

Train Loss: 1.6129
Val Loss:   1.3515
Val RMSE:   1.4040 ‚≠ê
Val PCC-V:  0.0053
Val PCC-A:  -0.0206
üíæ Best model saved! (RMSE: 1.4040)

‚úÖ TRAINING COMPLETE (Seed: 42)
Best Validation RMSE: 1.4040

üìä Training History:
   epoch  train_loss  val_loss  val_rmse  val_pcc_v  val_pcc_a
0      1    7.563998  2.716471  2.030386   0.080266  -0.035967
1      2    2.575711  1.668426  1.561201   0.015034   0.008206
2      3    1.837546  1.408887  1.430553   0.005023  -0.021291
3      4    1.656081  1.359227  1.407104   0.004497  -0.019103
4      5    1.612878  1.351523  1.404049   0.005339  -0.020636


In [30]:
# ============================================================================
# DIAGNOSTIC: Check Predictions vs Ground Truth
# ============================================================================

# Load best model
model.load_state_dict(torch.load(f"{config.MODEL_SAVE_DIR}/best_model_seed42.pt"))
model.eval()

# Get predictions on validation set
val_metrics = evaluate(model, val_loader, criterion, config.DEVICE)

preds = val_metrics["predictions"]
labels = val_metrics["labels"]

print("=" * 60)
print("üîç DIAGNOSTIC ANALYSIS")
print("=" * 60)

# Check prediction statistics
print("\nüìä Prediction Statistics:")
print(f"Valence - Pred Mean: {preds[:, 0].mean():.2f}, Std: {preds[:, 0].std():.2f}")
print(f"Valence - True Mean: {labels[:, 0].mean():.2f}, Std: {labels[:, 0].std():.2f}")
print(f"Arousal - Pred Mean: {preds[:, 1].mean():.2f}, Std: {preds[:, 1].std():.2f}")
print(f"Arousal - True Mean: {labels[:, 1].mean():.2f}, Std: {labels[:, 1].std():.2f}")

# Show some examples
print("\nüìã Sample Predictions (First 20):")
print("Pred Valence | True Valence | Pred Arousal | True Arousal")
print("-" * 60)
for i in range(20):
    print(f"   {preds[i, 0]:6.2f}    |    {labels[i, 0]:6.2f}    |    {preds[i, 1]:6.2f}    |    {labels[i, 1]:6.2f}")

# Check if predictions are stuck
print("\n‚ö†Ô∏è  Issues Detected:")
if preds[:, 0].std() < 0.5:
    print("‚ùå Valence predictions have very low variance (model might be stuck)")
if preds[:, 1].std() < 0.5:
    print("‚ùå Arousal predictions have very low variance (model might be stuck)")

# Check actual distribution
print("\nüìà Prediction Range:")
print(f"Valence: [{preds[:, 0].min():.2f}, {preds[:, 0].max():.2f}]")
print(f"Arousal: [{preds[:, 1].min():.2f}, {preds[:, 1].max():.2f}]")
print(f"Expected: [1.00, 9.00]")

                                                           

üîç DIAGNOSTIC ANALYSIS

üìä Prediction Statistics:
Valence - Pred Mean: 5.89, Std: 0.00
Valence - True Mean: 5.97, Std: 1.71
Arousal - Pred Mean: 6.58, Std: 0.00
Arousal - True Mean: 6.65, Std: 1.00

üìã Sample Predictions (First 20):
Pred Valence | True Valence | Pred Arousal | True Arousal
------------------------------------------------------------
     5.89    |      7.00    |      6.58    |      7.00
     5.89    |      7.17    |      6.58    |      6.83
     5.89    |      8.12    |      6.58    |      8.25
     5.89    |      7.25    |      6.58    |      7.50
     5.89    |      5.00    |      6.58    |      5.00
     5.89    |      6.75    |      6.58    |      6.50
     5.89    |      3.30    |      6.58    |      6.30
     5.89    |      1.83    |      6.58    |      8.00
     5.89    |      4.25    |      6.58    |      4.50
     5.89    |      7.50    |      6.58    |      7.67
     5.89    |      7.50    |      6.58    |      7.83
     5.89    |      7.62    |      6.



In [31]:
# ============================================================================
# CHECK: Are outputs in wrong range?
# ============================================================================

# Get raw model outputs (before any clipping)
model.eval()
sample_batch = next(iter(val_loader))
input_ids = sample_batch["input_ids"].to(config.DEVICE)
attention_mask = sample_batch["attention_mask"].to(config.DEVICE)
labels = sample_batch["labels"]

with torch.no_grad():
    raw_outputs = model(input_ids, attention_mask).cpu()

print("=" * 60)
print("üîß RAW MODEL OUTPUTS (Before scaling)")
print("=" * 60)
print(f"Raw output range: [{raw_outputs.min():.2f}, {raw_outputs.max():.2f}]")
print(f"Expected range: [1.00, 9.00]")
print(f"\nFirst 10 raw predictions:")
print(raw_outputs[:10])
print(f"\nFirst 10 true labels:")
print(labels[:10])

üîß RAW MODEL OUTPUTS (Before scaling)
Raw output range: [5.89, 6.58]
Expected range: [1.00, 9.00]

First 10 raw predictions:
tensor([[5.8943, 6.5813],
        [5.8943, 6.5813],
        [5.8943, 6.5813],
        [5.8943, 6.5813],
        [5.8943, 6.5813],
        [5.8943, 6.5813],
        [5.8943, 6.5813],
        [5.8943, 6.5813]])

First 10 true labels:
tensor([[7.0000, 7.0000],
        [7.1700, 6.8300],
        [8.1200, 8.2500],
        [7.2500, 7.5000],
        [5.0000, 5.0000],
        [6.7500, 6.5000],
        [3.3000, 6.3000],
        [1.8300, 8.0000]])


In [32]:
# ============================================================================
# IMPROVED MODEL V2 - WITH OUTPUT SCALING
# ============================================================================

class ImprovedVARegressorV2(nn.Module):
    """
    V2: Fixed version with proper output scaling to [1, 9] range
    
    Key fix: sigmoid(output) * 8 + 1 ‚Üí maps to [1, 9]
    """
    
    def __init__(self, model_name, dropout=0.2, hidden_dim=256):
        super().__init__()
        
        # Load pretrained encoder
        self.encoder = AutoModel.from_pretrained(model_name)
        encoder_dim = self.encoder.config.hidden_size
        
        # Shared projection layer
        self.shared_projection = nn.Sequential(
            nn.Linear(encoder_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout)
        )
        
        # Separate heads for Valence and Arousal
        self.valence_head = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LayerNorm(hidden_dim // 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, 1)
        )
        
        self.arousal_head = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LayerNorm(hidden_dim // 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, 1)
        )
    
    def mean_pool(self, hidden_states, attention_mask):
        """Mean pooling over all tokens (masked)"""
        mask = attention_mask.unsqueeze(-1).expand(hidden_states.size()).float()
        masked_hidden = hidden_states * mask
        summed = masked_hidden.sum(dim=1)
        counted = mask.sum(dim=1).clamp(min=1e-9)
        return summed / counted
    
    def forward(self, input_ids, attention_mask):
        # Get encoder outputs
        outputs = self.encoder(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        
        # Mean pooling
        pooled = self.mean_pool(outputs.last_hidden_state, attention_mask)
        
        # Shared projection
        shared = self.shared_projection(pooled)
        
        # Separate predictions
        valence = self.valence_head(shared)
        arousal = self.arousal_head(shared)
        
        # Concatenate
        output = torch.cat([valence, arousal], dim=1)
        
        # üî• KEY FIX: Scale outputs to [1, 9] range
        # sigmoid maps to [0, 1], then scale to [1, 9]
        output = torch.sigmoid(output) * 8.0 + 1.0
        
        return output

print("‚úì Improved Model V2 with output scaling created!")

# Test the new model
model_v2_test = ImprovedVARegressorV2(
    config.MODEL_NAME,
    dropout=config.DROPOUT,
    hidden_dim=config.HIDDEN_DIM
).to(config.DEVICE)

# Test output range
with torch.no_grad():
    test_batch = next(iter(train_loader))
    test_output = model_v2_test(
        test_batch["input_ids"].to(config.DEVICE),
        test_batch["attention_mask"].to(config.DEVICE)
    )
    print(f"\n‚úì V2 Model output range: [{test_output.min():.2f}, {test_output.max():.2f}]")
    print(f"‚úì Expected range: [1.00, 9.00]")

‚úì Improved Model V2 with output scaling created!

‚úì V2 Model output range: [3.98, 6.24]
‚úì Expected range: [1.00, 9.00]


In [33]:
# ============================================================================
# RETRAIN WITH V2 MODEL
# ============================================================================

def train_model_v2(train_loader, val_loader, config, seed=42):
    """Training loop with V2 model (includes output scaling)"""
    
    set_seed(seed)
    
    # Use V2 model with sigmoid scaling
    model = ImprovedVARegressorV2(
        config.MODEL_NAME,
        dropout=config.DROPOUT,
        hidden_dim=config.HIDDEN_DIM
    ).to(config.DEVICE)
    
    criterion = RobustVALoss(alpha=0.5)
    
    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=config.LR,
        weight_decay=config.WEIGHT_DECAY
    )
    
    num_training_steps = len(train_loader) * config.EPOCHS
    num_warmup_steps = int(config.WARMUP_RATIO * num_training_steps)
    
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    
    best_val_rmse = float("inf")
    history = []
    
    print("\n" + "=" * 60)
    print(f"üöÄ RETRAINING WITH V2 MODEL (Seed: {seed})")
    print("=" * 60)
    
    for epoch in range(config.EPOCHS):
        print(f"\nEpoch {epoch + 1}/{config.EPOCHS}")
        print("-" * 60)
        
        train_loss = train_one_epoch(
            model, train_loader, optimizer, scheduler, criterion, config.DEVICE
        )
        
        val_metrics = evaluate(model, val_loader, criterion, config.DEVICE)
        
        history.append({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "val_loss": val_metrics["loss"],
            "val_rmse": val_metrics["rmse"],
            "val_pcc_v": val_metrics["pcc_valence"],
            "val_pcc_a": val_metrics["pcc_arousal"]
        })
        
        print(f"Train Loss: {train_loss:.4f}")
        print(f"Val Loss:   {val_metrics['loss']:.4f}")
        print(f"Val RMSE:   {val_metrics['rmse']:.4f} ‚≠ê")
        print(f"Val PCC-V:  {val_metrics['pcc_valence']:.4f}")
        print(f"Val PCC-A:  {val_metrics['pcc_arousal']:.4f}")
        
        if val_metrics["rmse"] < best_val_rmse:
            best_val_rmse = val_metrics["rmse"]
            model_path = f"{config.MODEL_SAVE_DIR}/best_model_v2_seed{seed}.pt"
            torch.save(model.state_dict(), model_path)
            print(f"üíæ Best model saved! (RMSE: {best_val_rmse:.4f})")
    
    print("\n" + "=" * 60)
    print(f"‚úÖ RETRAINING COMPLETE")
    print(f"Best Validation RMSE: {best_val_rmse:.4f}")
    print("=" * 60)
    
    return model, best_val_rmse, history

# Train V2 model
model_v2, best_rmse_v2, history_v2 = train_model_v2(
    train_loader,
    val_loader,
    config,
    seed=config.SEEDS[0]
)

# Show history
print("\nüìä Training History V2:")
print(pd.DataFrame(history_v2))

‚úì Seed set to 42

üöÄ RETRAINING WITH V2 MODEL (Seed: 42)

Epoch 1/5
------------------------------------------------------------


                                                                        

Train Loss: 0.8171
Val Loss:   0.4893
Val RMSE:   0.8364 ‚≠ê
Val PCC-V:  0.8687
Val PCC-A:  0.7008
üíæ Best model saved! (RMSE: 0.8364)

Epoch 2/5
------------------------------------------------------------


                                                                        

Train Loss: 0.4392
Val Loss:   0.4675
Val RMSE:   0.8189 ‚≠ê
Val PCC-V:  0.8805
Val PCC-A:  0.6992
üíæ Best model saved! (RMSE: 0.8189)

Epoch 3/5
------------------------------------------------------------


                                                                        

Train Loss: 0.3253
Val Loss:   0.4338
Val RMSE:   0.7898 ‚≠ê
Val PCC-V:  0.8868
Val PCC-A:  0.7143
üíæ Best model saved! (RMSE: 0.7898)

Epoch 4/5
------------------------------------------------------------


                                                                        

Train Loss: 0.2552
Val Loss:   0.4278
Val RMSE:   0.7846 ‚≠ê
Val PCC-V:  0.8834
Val PCC-A:  0.7249
üíæ Best model saved! (RMSE: 0.7846)

Epoch 5/5
------------------------------------------------------------


                                                                        

Train Loss: 0.2101
Val Loss:   0.4467
Val RMSE:   0.8020 ‚≠ê
Val PCC-V:  0.8835
Val PCC-A:  0.7175

‚úÖ RETRAINING COMPLETE
Best Validation RMSE: 0.7846

üìä Training History V2:
   epoch  train_loss  val_loss  val_rmse  val_pcc_v  val_pcc_a
0      1    0.817129  0.489270  0.836363   0.868731   0.700768
1      2    0.439178  0.467470  0.818895   0.880543   0.699232
2      3    0.325253  0.433773  0.789757   0.886777   0.714288
3      4    0.255246  0.427774  0.784615   0.883364   0.724859
4      5    0.210080  0.446668  0.801987   0.883485   0.717456




In [34]:
# ============================================================================
# PREDICT ON DEV/TEST SET
# ============================================================================

print("=" * 60)
print("üîÆ GENERATING PREDICTIONS ON DEV SET")
print("=" * 60)

# Load best V2 model
model_v2.load_state_dict(torch.load(f"{config.MODEL_SAVE_DIR}/best_model_v2_seed42.pt"))
model_v2.eval()

# Predict function
def predict_on_loader(model, loader, device):
    """Generate predictions for a dataloader"""
    model.eval()
    all_preds = []
    
    with torch.no_grad():
        for batch in tqdm(loader, desc="Predicting"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            
            outputs = model(input_ids, attention_mask)
            all_preds.append(outputs.cpu().numpy())
    
    return np.vstack(all_preds)

# Generate predictions
dev_preds = predict_on_loader(model_v2, dev_loader, config.DEVICE)

# Clip to valid range [1.0, 9.0] (safety check)
dev_preds = np.clip(dev_preds, 1.0, 9.0)

# Add predictions to dataframe
dev_df["pred_valence"] = dev_preds[:, 0]
dev_df["pred_arousal"] = dev_preds[:, 1]

print("\n‚úÖ Predictions generated!")
print(f"Total predictions: {len(dev_preds)}")

# Show prediction statistics
print("\nüìä Prediction Statistics:")
print(f"Valence - Mean: {dev_preds[:, 0].mean():.2f}, Std: {dev_preds[:, 0].std():.2f}")
print(f"Valence - Range: [{dev_preds[:, 0].min():.2f}, {dev_preds[:, 0].max():.2f}]")
print(f"Arousal - Mean: {dev_preds[:, 1].mean():.2f}, Std: {dev_preds[:, 1].std():.2f}")
print(f"Arousal - Range: [{dev_preds[:, 1].min():.2f}, {dev_preds[:, 1].max():.2f}]")

# Show sample predictions
print("\nüìã Sample Predictions:")
print(dev_df[["id", "aspect", "pred_valence", "pred_arousal"]].head(15))

üîÆ GENERATING PREDICTIONS ON DEV SET


Predicting: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 35/35 [00:32<00:00,  1.09it/s]


‚úÖ Predictions generated!
Total predictions: 275

üìä Prediction Statistics:
Valence - Mean: 6.69, Std: 1.51
Valence - Range: [2.26, 8.12]
Arousal - Mean: 7.08, Std: 0.78
Arousal - Range: [4.97, 8.17]

üìã Sample Predictions:
                        id                         aspect  pred_valence  \
0    lap26_aspect_va_dev_1                    touchscreen      6.945566   
1    lap26_aspect_va_dev_2                             HP      2.403815   
2    lap26_aspect_va_dev_3                       keyboard      7.005983   
3    lap26_aspect_va_dev_4                    screen size      6.907866   
4    lap26_aspect_va_dev_5                         Lenovo      7.790746   
5    lap26_aspect_va_dev_6                          sound      7.626075   
6    lap26_aspect_va_dev_7                        quality      8.026880   
7    lap26_aspect_va_dev_8             on screen keyboard      7.182424   
8    lap26_aspect_va_dev_9                         laptop      7.908240   
9   lap26_aspect_va_




In [35]:
# ============================================================================
# GENERATE SUBMISSION FILE
# ============================================================================

def save_submission(df, output_path):
    """
    Save predictions in the required submission format.
    
    Format:
    {
      "ID": "lap26_aspect_va_dev_1",
      "Aspect_VA": [
        {"Aspect": "touchscreen", "VA": "7.95#7.61"},
        {"Aspect": "keyboard", "VA": "6.50#6.80"}
      ]
    }
    """
    # Sort by ID to maintain order
    df_sorted = df.sort_values(by="id")
    
    with open(output_path, "w", encoding="utf-8") as f:
        # Group by ID (since each ID can have multiple aspects)
        for gid, group in df_sorted.groupby("id"):
            record = {
                "ID": gid,
                "Aspect_VA": []
            }
            
            # Add each aspect-VA pair
            for _, row in group.iterrows():
                record["Aspect_VA"].append({
                    "Aspect": row["aspect"],
                    "VA": f"{row['pred_valence']:.2f}#{row['pred_arousal']:.2f}"
                })
            
            # Write as JSON line
            f.write(json.dumps(record, ensure_ascii=False) + "\n")
    
    print(f"‚úÖ Submission file saved: {output_path}")
    print(f"üìù Total records: {df_sorted['id'].nunique()}")
    print(f"üìù Total predictions: {len(df_sorted)}")

print("=" * 60)
print("üíæ CREATING SUBMISSION FILE")
print("=" * 60)

# Generate submission
save_submission(dev_df, config.OUTPUT_FILE)

print("\n" + "=" * 60)
print("üìÑ SAMPLE SUBMISSION LINES")
print("=" * 60)

# Show first 5 submission lines
with open(config.OUTPUT_FILE, "r", encoding="utf-8") as f:
    for i, line in enumerate(f):
        if i < 5:
            data = json.loads(line)
            print(f"\n{i+1}. ID: {data['ID']}")
            for aspect_va in data['Aspect_VA']:
                print(f"   - {aspect_va['Aspect']}: {aspect_va['VA']}")

print("\n" + "=" * 60)
print("‚úÖ SUBTASK 1 COMPLETE!")
print("=" * 60)
print(f"üìÅ Submission file: {config.OUTPUT_FILE}")
print(f"üéØ Expected RMSE: ~0.78-0.82")
print(f"üèÜ Ready to upload to Codabench!")
print("=" * 60)

üíæ CREATING SUBMISSION FILE
‚úÖ Submission file saved: submission_task1_improved.jsonl
üìù Total records: 200
üìù Total predictions: 275

üìÑ SAMPLE SUBMISSION LINES

1. ID: lap26_aspect_va_dev_1
   - touchscreen: 6.95#6.87

2. ID: lap26_aspect_va_dev_10
   - functionality of the trackpad: 7.35#7.37

3. ID: lap26_aspect_va_dev_100
   - convertible aspect: 6.56#6.48
   - touchscreen: 6.72#6.67

4. ID: lap26_aspect_va_dev_101
   - quality of every app: 7.38#7.36
   - double screen: 7.30#7.26

5. ID: lap26_aspect_va_dev_102
   - Battery: 7.57#7.55

‚úÖ SUBTASK 1 COMPLETE!
üìÅ Submission file: submission_task1_improved.jsonl
üéØ Expected RMSE: ~0.78-0.82
üèÜ Ready to upload to Codabench!


In [36]:
# ============================================================================
# VERIFY SUBMISSION FORMAT (OPTIONAL)
# ============================================================================

print("üîç VERIFYING SUBMISSION FORMAT\n")

# Load submission file
with open(config.OUTPUT_FILE, "r", encoding="utf-8") as f:
    submission_data = [json.loads(line) for line in f]

print(f"‚úì Total records in submission: {len(submission_data)}")

# Check format
errors = []
for i, record in enumerate(submission_data):
    # Check required fields
    if "ID" not in record:
        errors.append(f"Line {i+1}: Missing 'ID' field")
    if "Aspect_VA" not in record:
        errors.append(f"Line {i+1}: Missing 'Aspect_VA' field")
    else:
        # Check Aspect_VA format
        for j, aspect_va in enumerate(record["Aspect_VA"]):
            if "Aspect" not in aspect_va:
                errors.append(f"Line {i+1}, Aspect {j+1}: Missing 'Aspect' field")
            if "VA" not in aspect_va:
                errors.append(f"Line {i+1}, Aspect {j+1}: Missing 'VA' field")
            else:
                # Check VA format (should be "V#A")
                va_str = aspect_va["VA"]
                if "#" not in va_str:
                    errors.append(f"Line {i+1}, Aspect {j+1}: Invalid VA format (missing #)")
                else:
                    try:
                        v, a = va_str.split("#")
                        v_float = float(v)
                        a_float = float(a)
                        
                        # Check range [1.0, 9.0]
                        if not (1.0 <= v_float <= 9.0):
                            errors.append(f"Line {i+1}: Valence {v_float} out of range [1.0, 9.0]")
                        if not (1.0 <= a_float <= 9.0):
                            errors.append(f"Line {i+1}: Arousal {a_float} out of range [1.0, 9.0]")
                    except ValueError:
                        errors.append(f"Line {i+1}: Invalid VA values (not numeric)")

# Report results
if errors:
    print("‚ùå ERRORS FOUND:")
    for error in errors[:10]:  # Show first 10 errors
        print(f"  - {error}")
    if len(errors) > 10:
        print(f"  ... and {len(errors) - 10} more errors")
else:
    print("‚úÖ SUBMISSION FORMAT IS VALID!")
    print("\nüìä Submission Statistics:")
    
    total_aspects = sum(len(record["Aspect_VA"]) for record in submission_data)
    print(f"  - Total IDs: {len(submission_data)}")
    print(f"  - Total Aspects: {total_aspects}")
    print(f"  - Avg aspects per ID: {total_aspects / len(submission_data):.2f}")
    
    # Get VA statistics
    all_v = []
    all_a = []
    for record in submission_data:
        for aspect_va in record["Aspect_VA"]:
            v, a = aspect_va["VA"].split("#")
            all_v.append(float(v))
            all_a.append(float(a))
    
    print(f"\n  Valence Stats:")
    print(f"    Mean: {np.mean(all_v):.2f}, Std: {np.std(all_v):.2f}")
    print(f"    Range: [{np.min(all_v):.2f}, {np.max(all_v):.2f}]")
    print(f"\n  Arousal Stats:")
    print(f"    Mean: {np.mean(all_a):.2f}, Std: {np.std(all_a):.2f}")
    print(f"    Range: [{np.min(all_a):.2f}, {np.max(all_a):.2f}]")
    
    print("\nüéâ Ready to submit to Codabench!")

üîç VERIFYING SUBMISSION FORMAT

‚úì Total records in submission: 200
‚úÖ SUBMISSION FORMAT IS VALID!

üìä Submission Statistics:
  - Total IDs: 200
  - Total Aspects: 275
  - Avg aspects per ID: 1.38

  Valence Stats:
    Mean: 6.69, Std: 1.51
    Range: [2.26, 8.12]

  Arousal Stats:
    Mean: 7.08, Std: 0.78
    Range: [4.97, 8.17]

üéâ Ready to submit to Codabench!
