In [1]:
!ls

sample_data  test_split.csv  train_split.csv  val_split.csv


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import zipfile
import os

# Set the path
zip_path = '/content/drive/MyDrive/iemocap_audio.zip'
extract_to = '/content/iemocap/'

# Create target folder
os.makedirs(extract_to, exist_ok=True)

# Extract the zip
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print(f"✅ Unzipped to {extract_to}")


✅ Unzipped to /content/iemocap/


In [1]:
!pip install numpy==1.26.4
!pip install pandas==2.2.2
!pip install torch==2.0.0 torchvision==0.15.1
!pip install transformers==4.31.0
!pip install torchaudio==2.0.1
!pip install tqdm==4.66.2
!pip install nlpaug==1.1.11

Collecting torch==2.0.0
  Using cached torch-2.0.0-cp311-cp311-manylinux1_x86_64.whl.metadata (24 kB)
Collecting torchvision==0.15.1
  Using cached torchvision-0.15.1-cp311-cp311-manylinux1_x86_64.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch==2.0.0)
  Using cached nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==2.0.0)
  Using cached nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cuda-cupti-cu11==11.7.101 (from torch==2.0.0)
  Using cached nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu11==8.5.0.96 (from torch==2.0.0)
  Using cached nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu11==11.10.3.66 (from torch==2.0.0)
  Using cached nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl.metadata (

In [2]:
import pandas as pd
import librosa
from IPython.display import Audio

# Load CSV
csv_path = '/content/val_split.csv'
df = pd.read_csv(csv_path)

# Get the first audio file path
audio_path = df['filepath'].iloc[0]

# Load audio
waveform, sr = librosa.load(audio_path, sr=None)

# Play audio
Audio(waveform, rate=sr)


In [4]:
import nltk
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [5]:
!pip install peft==0.4.0

Collecting peft==0.4.0
  Downloading peft-0.4.0-py3-none-any.whl.metadata (21 kB)
Downloading peft-0.4.0-py3-none-any.whl (72 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: peft
  Attempting uninstall: peft
    Found existing installation: peft 0.16.0
    Uninstalling peft-0.16.0:
      Successfully uninstalled peft-0.16.0
Successfully installed peft-0.4.0


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from transformers import HubertModel, Wav2Vec2FeatureExtractor, BertModel, BertTokenizer
import torchaudio
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import nlpaug.augmenter.word as naw
import random
from peft import LoraConfig, get_peft_model
from scipy.stats import pearsonr

# Dataset with fixed augmentation (unchanged)
class MultimodalIEMOCAPDataset(Dataset):
    def __init__(self, csv_file, audio_feature_extractor, text_tokenizer, augment=True, max_audio_samples=128000):
        self.df = pd.read_csv(csv_file)
        self.audio_feature_extractor = audio_feature_extractor
        self.text_tokenizer = text_tokenizer
        self.augment = augment
        self.text_augmenter = naw.SynonymAug(aug_p=0.3) if augment else None
        self.max_audio_samples = max_audio_samples

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        audio_path = self.df.iloc[idx]['filepath']
        transcript = self.df.iloc[idx]['transcription']
        vad_label = self.df.iloc[idx]['EmoVal'].astype(np.float32)

        if not os.path.exists(audio_path):
            raise FileNotFoundError(f"Audio file missing: {audio_path}")

        try:
            audio, sr = torchaudio.load(audio_path)
            if audio.abs().mean() < 1e-5:
                raise ValueError(f"Silent audio: {audio_path}")
        except Exception as e:
            raise RuntimeError(f"Error loading audio {audio_path}: {str(e)}")

        if sr != 16000:
            audio = torchaudio.transforms.Resample(sr, 16000)(audio)

        audio = audio.squeeze(0)  # Remove channel dim if mono
        if audio.dim() > 1:
            audio = audio[0]  # Take first channel if stereo

        # Pad or truncate to max_audio_samples
        if audio.size(0) > self.max_audio_samples:
            audio = audio[:self.max_audio_samples]
        elif audio.size(0) < self.max_audio_samples:
            audio = torch.nn.functional.pad(audio, (0, self.max_audio_samples - audio.size(0)))

        if self.augment and random.random() < 0.5:
            # Add noise
            noise = torch.randn_like(audio) * 0.005
            audio = audio + noise

            # Speed augmentation
            try:
                speed_factor = random.uniform(0.9, 1.1)
                effect = torch.tensor(audio).unsqueeze(0)  # [1, samples]
                augmented_audio, new_sr = torchaudio.sox_effects.apply_effects_tensor(
                    effect,
                    sample_rate=16000,
                    effects=[["speed", str(speed_factor)], ["rate", "16000"]]
                )
                audio = augmented_audio.squeeze(0)
                # Re-pad or truncate to max_audio_samples
                if audio.size(0) > self.max_audio_samples:
                    audio = audio[:self.max_audio_samples]
                elif audio.size(0) < self.max_audio_samples:
                    audio = torch.nn.functional.pad(audio, (0, self.max_audio_samples - audio.size(0)))
            except Exception as e:
                print(f"Speed augmentation failed for {audio_path}: {e}")
                # Keep original audio

        audio = audio.numpy()

        if self.augment and self.text_augmenter and random.random() < 0.3:
            try:
                transcript = self.text_augmenter.augment(transcript)[0]
            except Exception as e:
                print(f"Text augmentation failed for {audio_path}: {e}")
                # Keep original transcript

        audio_inputs = self.audio_feature_extractor(
            audio,
            sampling_rate=16000,
            return_tensors="pt",
            padding=False,
            truncation=False
        )

        text_inputs = self.text_tokenizer(
            transcript,
            padding=False,
            truncation=True,
            max_length=512,
            return_tensors="pt"
        )

        return {
            'audio_values': audio_inputs['input_values'].squeeze(0),
            'input_ids': text_inputs['input_ids'].squeeze(0),
            'attention_mask': text_inputs['attention_mask'].squeeze(0)
        }, torch.tensor(vad_label)

# Dynamic collation (unchanged)
def dynamic_collate_fn(batch):
    inputs, labels = zip(*batch)
    audio_values = [item['audio_values'] for item in inputs]
    input_ids = [item['input_ids'] for item in inputs]
    attention_masks = [item['attention_mask'] for item in inputs]

    audio_values_padded = pad_sequence(audio_values, batch_first=True, padding_value=0.0)
    input_ids_padded = pad_sequence(input_ids, batch_first=True, padding_value=0)
    attention_masks_padded = pad_sequence(attention_masks, batch_first=True, padding_value=0)

    labels_stacked = torch.stack(labels)

    return {
        'audio_values': audio_values_padded,
        'input_ids': input_ids_padded,
        'attention_mask': attention_masks_padded
    }, labels_stacked

# Label smoothing loss (unchanged)
class SmoothMSELoss(nn.Module):
    def __init__(self, smoothing=0.1):
        super().__init__()
        self.smoothing = smoothing
        self.mse = nn.MSELoss()

    def forward(self, pred, target):
        smooth_target = target * (1 - self.smoothing) + 3.0 * self.smoothing
        return self.mse(pred, smooth_target)

# Updated Transformer model with LoRA for both Hubert and BERT
class ValenceRegressor(nn.Module):
    def __init__(self, audio_dim=768, text_dim=768, hidden_dim=192, num_heads=6, num_layers=2, dropout=0.5):
        super().__init__()

        # Audio encoder (Hubert) with LoRA
        self.hubert = HubertModel.from_pretrained("facebook/hubert-base-ls960")
        hubert_lora_config = LoraConfig(
            r=16,
            lora_alpha=32,
            target_modules=["q_proj", "v_proj"],
            lora_dropout=dropout,
            bias="none"
        )
        self.hubert = get_peft_model(self.hubert, hubert_lora_config)

        # Text encoder (BERT) with LoRA
        self.text_encoder = BertModel.from_pretrained("bert-base-uncased")
        bert_lora_config = LoraConfig(
            r=16,
            lora_alpha=32,
            target_modules=["query", "value"],
            lora_dropout=dropout,
            bias="none"
        )
        self.text_encoder = get_peft_model(self.text_encoder, bert_lora_config)

        # Audio transformer layers
        self.audio_transformer = nn.ModuleList([
            nn.TransformerEncoderLayer(
                d_model=audio_dim,
                nhead=num_heads,
                dim_feedforward=hidden_dim * 4,
                dropout=dropout,
                batch_first=True
            ) for _ in range(num_layers)
        ])
        self.audio_layer_norm = nn.LayerNorm(audio_dim)

        # Audio attention pooling
        self.audio_attention_pool = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim * 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim * 2, 1)
        )

        # Projection layers
        self.audio_projection = nn.Linear(audio_dim, hidden_dim)
        self.text_projection = nn.Linear(text_dim, hidden_dim)

        # Cross-attention mechanisms
        self.audio_to_text_attention = nn.MultiheadAttention(
            embed_dim=hidden_dim,
            num_heads=num_heads // 2,
            dropout=dropout,
            batch_first=True
        )
        self.text_to_audio_attention = nn.MultiheadAttention(
            embed_dim=hidden_dim,
            num_heads=num_heads // 2,
            dropout=dropout,
            batch_first=True
        )

        # Gating layers
        self.audio_gate = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.Sigmoid()
        )
        self.text_gate = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.Sigmoid()
        )

        # Fusion layer
        self.fusion_layer = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim * 2),
            nn.LayerNorm(hidden_dim * 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim * 2, hidden_dim)
        )

        # Shared fully connected layer
        self.shared_fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout)
        )

        # Output branch
        self.output_branch = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LayerNorm(hidden_dim // 2),
            nn.GELU(),
            nn.Dropout(dropout * 0.5),
            nn.Linear(hidden_dim // 2, 1)
        )

    def audio_attention_pooling(self, x, audio_mask=None):
        weights = self.audio_attention_pool(x)
        if audio_mask is not None:
            weights = weights.masked_fill(~audio_mask.bool().unsqueeze(-1), float('-inf'))
        weights = torch.softmax(weights, dim=1)
        output = torch.bmm(weights.transpose(1, 2), x)
        return output.squeeze(1)

    def forward(self, audio_values, input_ids, attention_mask):
        # Process audio with Hubert
        audio_features = self.hubert(audio_values).last_hidden_state
        audio_mask = (audio_features.abs().sum(dim=-1) > 1e-6)

        # Process audio with transformer layers
        audio_repr = audio_features
        for layer in self.audio_transformer:
            audio_key_padding_mask = (~audio_mask).float()
            audio_repr = layer(audio_repr, src_key_padding_mask=audio_key_padding_mask)

        audio_repr = self.audio_layer_norm(audio_repr)

        # Process text with BERT
        text_outputs = self.text_encoder(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        text_repr = text_outputs.last_hidden_state

        # Projection
        audio_proj = self.audio_projection(audio_repr)
        text_proj = self.text_projection(text_repr)

        # Cross-attention
        audio_attended_text, _ = self.audio_to_text_attention(
            query=audio_proj,
            key=text_proj,
            value=text_proj,
            key_padding_mask=(1 - attention_mask).bool()
        )
        text_attended_audio, _ = self.text_to_audio_attention(
            query=text_proj,
            key=audio_proj,
            value=audio_proj,
            key_padding_mask=(~audio_mask).bool()
        )

        # Gating and fusion
        audio_concat = torch.cat([audio_proj, audio_attended_text], dim=-1)
        text_concat = torch.cat([text_proj, text_attended_audio], dim=-1)

        audio_gate_value = self.audio_gate(audio_concat)
        text_gate_value = self.text_gate(text_concat)

        gated_audio = audio_proj * audio_gate_value
        gated_text = text_proj * text_gate_value

        # Pooling
        pooled_audio = self.audio_attention_pooling(gated_audio, audio_mask)
        text_sum = torch.sum(gated_text * attention_mask.unsqueeze(-1), dim=1)
        text_count = torch.sum(attention_mask, dim=1, keepdim=True).clamp(min=1)
        pooled_text = text_sum / text_count

        # Fusion and output
        fused = torch.cat([pooled_audio, pooled_text], dim=-1)
        joint_repr = self.fusion_layer(fused)
        shared = self.shared_fc(joint_repr)
        output = self.output_branch(shared)
        scaled_output = 1.0 + 4.0 * torch.sigmoid(output)

        return scaled_output

# NEW: Functions to save and load LoRA weights only
def save_lora_weights(model, save_dir):
    """Save only LoRA weights and custom layers"""
    os.makedirs(save_dir, exist_ok=True)

    # Save HuBERT LoRA weights
    hubert_lora_dir = os.path.join(save_dir, "hubert_lora")
    model.hubert.save_pretrained(hubert_lora_dir)
    print(f"Saved HuBERT LoRA weights to {hubert_lora_dir}")

    # Save BERT LoRA weights
    bert_lora_dir = os.path.join(save_dir, "bert_lora")
    model.text_encoder.save_pretrained(bert_lora_dir)
    print(f"Saved BERT LoRA weights to {bert_lora_dir}")

    # Save custom layers (everything except the base models)
    custom_layers_state = {}
    for name, param in model.named_parameters():
        if not (name.startswith('hubert.') or name.startswith('text_encoder.')):
            custom_layers_state[name] = param

    custom_layers_path = os.path.join(save_dir, "custom_layers.pth")
    torch.save(custom_layers_state, custom_layers_path)
    print(f"Saved custom layers to {custom_layers_path}")

    # Save model configuration
    config = {
        'audio_dim': 768,
        'text_dim': 768,
        'hidden_dim': 192,
        'num_heads': 6,
        'num_layers': 2,
        'dropout': 0.5,
        'hubert_lora_config': {
            'r': 16,
            'lora_alpha': 32,
            'target_modules': ["q_proj", "v_proj"],
            'lora_dropout': 0.5,
            'bias': "none"
        },
        'bert_lora_config': {
            'r': 16,
            'lora_alpha': 32,
            'target_modules': ["query", "value"],
            'lora_dropout': 0.5,
            'bias': "none"
        }
    }

    config_path = os.path.join(save_dir, "config.json")
    import json
    with open(config_path, 'w') as f:
        json.dump(config, f, indent=2)
    print(f"Saved model configuration to {config_path}")

    # Calculate and print size information
    total_size = 0
    for root, dirs, files in os.walk(save_dir):
        for file in files:
            total_size += os.path.getsize(os.path.join(root, file))

    print(f"Total LoRA checkpoint size: {total_size / (1024*1024):.2f} MB")
    return total_size

def load_lora_model(save_dir, device=None):
    """Load model with LoRA weights"""
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load configuration
    config_path = os.path.join(save_dir, "config.json")
    import json
    with open(config_path, 'r') as f:
        config = json.load(f)

    # Create model instance
    model = ValenceRegressor(
        audio_dim=config['audio_dim'],
        text_dim=config['text_dim'],
        hidden_dim=config['hidden_dim'],
        num_heads=config['num_heads'],
        num_layers=config['num_layers'],
        dropout=config['dropout']
    )

    # Load HuBERT LoRA weights
    hubert_lora_dir = os.path.join(save_dir, "hubert_lora")
    from peft import PeftModel
    model.hubert = PeftModel.from_pretrained(
        model.hubert.get_base_model(),
        hubert_lora_dir
    )

    # Load BERT LoRA weights
    bert_lora_dir = os.path.join(save_dir, "bert_lora")
    model.text_encoder = PeftModel.from_pretrained(
        model.text_encoder.get_base_model(),
        bert_lora_dir
    )

    # Load custom layers
    custom_layers_path = os.path.join(save_dir, "custom_layers.pth")
    custom_layers_state = torch.load(custom_layers_path, map_location=device)

    # Load custom layer weights
    model_state = model.state_dict()
    model_state.update(custom_layers_state)
    model.load_state_dict(model_state)

    model = model.to(device)
    print(f"Loaded LoRA model from {save_dir}")

    return model

# Training function (updated to save LoRA weights only)
def train_valence_model(model, train_loader, val_loader,
                        num_epochs=10, lr=5e-5, max_norm=0.5, device=torch.device("cpu")):
    model = model.to(device)

    # Optimize only LoRA parameters and custom layers
    optimizer = optim.AdamW(
        model.parameters(),  # All parameters, but only LoRA and custom layers are trainable
        lr=lr,
        weight_decay=1e-2
    )
    criterion = SmoothMSELoss(smoothing=0.1)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, T_0=3, T_mult=2, eta_min=1e-6
    )

    best_val_loss = float('inf')
    patience_counter = 0
    max_patience = 7

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        num_train_batches = 0

        train_pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train Valence]")

        for batch_inputs, labels in train_pbar:
            try:
                audio_values = batch_inputs['audio_values'].to(device)
                input_ids = batch_inputs['input_ids'].to(device)
                attention_mask = batch_inputs['attention_mask'].to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                outputs = model(audio_values, input_ids, attention_mask)
                loss = criterion(outputs.squeeze(), labels)

                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=max_norm)
                optimizer.step()

                train_loss += loss.item()
                num_train_batches += 1

                train_pbar.set_postfix({'loss': f"{loss.item():.4f}"})

            except Exception as e:
                print(f"Error in training batch: {e}")
                continue

        scheduler.step()

        if num_train_batches > 0:
            train_loss /= num_train_batches

        model.eval()
        val_loss = 0.0
        num_val_batches = 0

        val_pbar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val Valence]")

        with torch.no_grad():
            for batch_inputs, labels in val_pbar:
                try:
                    audio_values = batch_inputs['audio_values'].to(device)
                    input_ids = batch_inputs['input_ids'].to(device)
                    attention_mask = batch_inputs['attention_mask'].to(device)
                    labels = labels.to(device)

                    outputs = model(audio_values, input_ids, attention_mask)
                    loss = criterion(outputs.squeeze(), labels)

                    val_loss += loss.item()
                    num_val_batches += 1

                    val_pbar.set_postfix({'loss': f"{loss.item():.4f}"})

                except Exception as e:
                    print(f"Error in validation batch: {e}")
                    continue

        if num_val_batches > 0:
            val_loss /= num_val_batches

        print(f"Epoch {epoch+1}/{num_epochs} [Valence] Results:")
        print(f"  Train Loss: {train_loss:.4f}")
        print(f"  Val Loss: {val_loss:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            # Save LoRA weights only instead of full model
            save_lora_weights(model, '/content/best_valence_regressor_lora')
            print(f"  Saved best LoRA model (val_loss: {val_loss:.4f})")
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= max_patience:
                print(f"Early stopping after {epoch+1} epochs without improvement")
                break

    return best_val_loss

# Main execution (training only)
def main():
    # Check for GPU availability
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Configuration - Updated CSV file paths
    train_csv = "/content/train_split.csv"
    val_csv = "/content/val_split.csv"

    audio_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/hubert-base-ls960")
    text_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

    # Dataset and loaders
    train_dataset = MultimodalIEMOCAPDataset(
        csv_file=train_csv,
        audio_feature_extractor=audio_feature_extractor,
        text_tokenizer=text_tokenizer,
        augment=True,
        max_audio_samples=128000
    )
    val_dataset = MultimodalIEMOCAPDataset(
        csv_file=val_csv,
        audio_feature_extractor=audio_feature_extractor,
        text_tokenizer=text_tokenizer,
        augment=False,
        max_audio_samples=128000
    )

    train_loader = DataLoader(
        train_dataset,
        batch_size=8,
        shuffle=True,
        num_workers=0,
        pin_memory=True if torch.cuda.is_available() else False,
        collate_fn=dynamic_collate_fn
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=8,
        shuffle=False,
        num_workers=0,
        pin_memory=True if torch.cuda.is_available() else False,
        collate_fn=dynamic_collate_fn
    )

    # Model
    model = ValenceRegressor(
        audio_dim=768,
        text_dim=768,
        hidden_dim=192,
        num_heads=6,
        num_layers=2,
        dropout=0.5
    ).to(device)

    # Print trainable parameters info
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total parameters: {total_params:,}")
    print(f"Trainable parameters: {trainable_params:,}")
    print(f"Trainable percentage: {100 * trainable_params / total_params:.2f}%")

    # Training
    print("Training Valence model...")
    best_val_loss = train_valence_model(
        model,
        train_loader,
        val_loader,
        num_epochs=10,
        lr=5e-5,
        max_norm=0.5,
        device=device
    )

    print(f"Best validation loss: {best_val_loss:.4f}")

    # Example of loading the saved model
    print("\nTesting model loading...")
    loaded_model = load_lora_model('/content/best_valence_regressor_lora', device)
    print("Model loaded successfully!")

if __name__ == "__main__":
    main()

Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/213 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/378M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Total parameters: 213,221,250
Trainable parameters: 9,367,298
Trainable percentage: 4.39%
Training Valence model...


  effect = torch.tensor(audio).unsqueeze(0)  # [1, samples]
Epoch 1/10 [Train Valence]: 100%|██████████| 1004/1004 [15:28<00:00,  1.08it/s, loss=0.4587]
  return torch._transformer_encoder_layer_fwd(
Epoch 1/10 [Val Valence]: 100%|██████████| 126/126 [00:47<00:00,  2.67it/s, loss=0.0810]


Epoch 1/10 [Valence] Results:
  Train Loss: 0.6674
  Val Loss: 0.4591
Saved HuBERT LoRA weights to /content/best_valence_regressor_lora/hubert_lora
Saved BERT LoRA weights to /content/best_valence_regressor_lora/bert_lora
Saved custom layers to /content/best_valence_regressor_lora/custom_layers.pth
Saved model configuration to /content/best_valence_regressor_lora/config.json
Total LoRA checkpoint size: 35.79 MB
  Saved best LoRA model (val_loss: 0.4591)


Epoch 2/10 [Train Valence]: 100%|██████████| 1004/1004 [15:38<00:00,  1.07it/s, loss=0.8201]
Epoch 2/10 [Val Valence]: 100%|██████████| 126/126 [00:45<00:00,  2.75it/s, loss=0.1476]


Epoch 2/10 [Valence] Results:
  Train Loss: 0.4721
  Val Loss: 0.3626
Saved HuBERT LoRA weights to /content/best_valence_regressor_lora/hubert_lora
Saved BERT LoRA weights to /content/best_valence_regressor_lora/bert_lora
Saved custom layers to /content/best_valence_regressor_lora/custom_layers.pth
Saved model configuration to /content/best_valence_regressor_lora/config.json
Total LoRA checkpoint size: 35.79 MB
  Saved best LoRA model (val_loss: 0.3626)


Epoch 3/10 [Train Valence]: 100%|██████████| 1004/1004 [15:25<00:00,  1.09it/s, loss=0.5448]
Epoch 3/10 [Val Valence]: 100%|██████████| 126/126 [00:45<00:00,  2.80it/s, loss=0.0444]


Epoch 3/10 [Valence] Results:
  Train Loss: 0.4253
  Val Loss: 0.3586
Saved HuBERT LoRA weights to /content/best_valence_regressor_lora/hubert_lora
Saved BERT LoRA weights to /content/best_valence_regressor_lora/bert_lora
Saved custom layers to /content/best_valence_regressor_lora/custom_layers.pth
Saved model configuration to /content/best_valence_regressor_lora/config.json
Total LoRA checkpoint size: 35.79 MB
  Saved best LoRA model (val_loss: 0.3586)


Epoch 4/10 [Train Valence]: 100%|██████████| 1004/1004 [15:18<00:00,  1.09it/s, loss=0.2729]
Epoch 4/10 [Val Valence]: 100%|██████████| 126/126 [00:44<00:00,  2.81it/s, loss=0.0808]


Epoch 4/10 [Valence] Results:
  Train Loss: 0.4127
  Val Loss: 0.3807


Epoch 5/10 [Train Valence]: 100%|██████████| 1004/1004 [15:21<00:00,  1.09it/s, loss=0.4873]
Epoch 5/10 [Val Valence]: 100%|██████████| 126/126 [00:45<00:00,  2.80it/s, loss=0.1030]


Epoch 5/10 [Valence] Results:
  Train Loss: 0.3797
  Val Loss: 0.3060
Saved HuBERT LoRA weights to /content/best_valence_regressor_lora/hubert_lora
Saved BERT LoRA weights to /content/best_valence_regressor_lora/bert_lora
Saved custom layers to /content/best_valence_regressor_lora/custom_layers.pth
Saved model configuration to /content/best_valence_regressor_lora/config.json
Total LoRA checkpoint size: 35.79 MB
  Saved best LoRA model (val_loss: 0.3060)


Epoch 6/10 [Train Valence]: 100%|██████████| 1004/1004 [15:21<00:00,  1.09it/s, loss=0.3677]
Epoch 6/10 [Val Valence]: 100%|██████████| 126/126 [00:46<00:00,  2.71it/s, loss=0.1096]


Epoch 6/10 [Valence] Results:
  Train Loss: 0.3433
  Val Loss: 0.2914
Saved HuBERT LoRA weights to /content/best_valence_regressor_lora/hubert_lora
Saved BERT LoRA weights to /content/best_valence_regressor_lora/bert_lora
Saved custom layers to /content/best_valence_regressor_lora/custom_layers.pth
Saved model configuration to /content/best_valence_regressor_lora/config.json
Total LoRA checkpoint size: 35.79 MB
  Saved best LoRA model (val_loss: 0.2914)


Epoch 7/10 [Train Valence]: 100%|██████████| 1004/1004 [15:40<00:00,  1.07it/s, loss=0.5327]
Epoch 7/10 [Val Valence]: 100%|██████████| 126/126 [00:46<00:00,  2.70it/s, loss=0.1933]


Epoch 7/10 [Valence] Results:
  Train Loss: 0.3159
  Val Loss: 0.2848
Saved HuBERT LoRA weights to /content/best_valence_regressor_lora/hubert_lora
Saved BERT LoRA weights to /content/best_valence_regressor_lora/bert_lora
Saved custom layers to /content/best_valence_regressor_lora/custom_layers.pth
Saved model configuration to /content/best_valence_regressor_lora/config.json
Total LoRA checkpoint size: 35.79 MB
  Saved best LoRA model (val_loss: 0.2848)


Epoch 8/10 [Train Valence]: 100%|██████████| 1004/1004 [15:42<00:00,  1.07it/s, loss=0.2873]
Epoch 8/10 [Val Valence]: 100%|██████████| 126/126 [00:46<00:00,  2.70it/s, loss=0.1067]


Epoch 8/10 [Valence] Results:
  Train Loss: 0.3020
  Val Loss: 0.2851


Epoch 9/10 [Train Valence]: 100%|██████████| 1004/1004 [15:44<00:00,  1.06it/s, loss=0.0864]
Epoch 9/10 [Val Valence]: 100%|██████████| 126/126 [00:46<00:00,  2.71it/s, loss=0.1247]


Epoch 9/10 [Valence] Results:
  Train Loss: 0.2839
  Val Loss: 0.2830
Saved HuBERT LoRA weights to /content/best_valence_regressor_lora/hubert_lora
Saved BERT LoRA weights to /content/best_valence_regressor_lora/bert_lora
Saved custom layers to /content/best_valence_regressor_lora/custom_layers.pth
Saved model configuration to /content/best_valence_regressor_lora/config.json
Total LoRA checkpoint size: 35.79 MB
  Saved best LoRA model (val_loss: 0.2830)


Epoch 10/10 [Train Valence]: 100%|██████████| 1004/1004 [15:36<00:00,  1.07it/s, loss=0.3724]
Epoch 10/10 [Val Valence]: 100%|██████████| 126/126 [00:46<00:00,  2.70it/s, loss=0.1594]


Epoch 10/10 [Valence] Results:
  Train Loss: 0.3050
  Val Loss: 0.2946
Best validation loss: 0.2830

Testing model loading...
Loaded LoRA model from /content/best_valence_regressor_lora
Model loaded successfully!


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from transformers import HubertModel, Wav2Vec2FeatureExtractor, BertModel, BertTokenizer
import pandas as pd
import numpy as np
from tqdm import tqdm
from peft import PeftModel
import os
from scipy.stats import pearsonr

# Reuse the dataset class (unchanged from your code)
class MultimodalIEMOCAPDataset(Dataset):
    def __init__(self, csv_file, audio_feature_extractor, text_tokenizer, augment=True, max_audio_samples=128000):
        self.df = pd.read_csv(csv_file)
        self.audio_feature_extractor = audio_feature_extractor
        self.text_tokenizer = text_tokenizer
        self.augment = augment
        self.text_augmenter = naw.SynonymAug(aug_p=0.3) if augment else None
        self.max_audio_samples = max_audio_samples

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        audio_path = self.df.iloc[idx]['filepath']
        transcript = self.df.iloc[idx]['transcription']
        vad_label = self.df.iloc[idx]['EmoVal'].astype(np.float32)

        if not os.path.exists(audio_path):
            raise FileNotFoundError(f"Audio file missing: {audio_path}")

        try:
            audio, sr = torchaudio.load(audio_path)
            if audio.abs().mean() < 1e-5:
                raise ValueError(f"Silent audio: {audio_path}")
        except Exception as e:
            raise RuntimeError(f"Error loading audio {audio_path}: {str(e)}")

        if sr != 16000:
            audio = torchaudio.transforms.Resample(sr, 16000)(audio)

        audio = audio.squeeze(0)  # Remove channel dim if mono
        if audio.dim() > 1:
            audio = audio[0]  # Take first channel if stereo

        # Pad or truncate to max_audio_samples
        if audio.size(0) > self.max_audio_samples:
            audio = audio[:self.max_audio_samples]
        elif audio.size(0) < self.max_audio_samples:
            audio = torch.nn.functional.pad(audio, (0, self.max_audio_samples - audio.size(0)))

        if self.augment and random.random() < 0.5:
            # Add noise
            noise = torch.randn_like(audio) * 0.005
            audio = audio + noise

            # Speed augmentation
            try:
                speed_factor = random.uniform(0.9, 1.1)
                effect = torch.tensor(audio).unsqueeze(0)  # [1, samples]
                augmented_audio, new_sr = torchaudio.sox_effects.apply_effects_tensor(
                    effect,
                    sample_rate=16000,
                    effects=[["speed", str(speed_factor)], ["rate", "16000"]]
                )
                audio = augmented_audio.squeeze(0)
                # Re-pad or truncate to max_audio_samples
                if audio.size(0) > self.max_audio_samples:
                    audio = audio[:self.max_audio_samples]
                elif audio.size(0) < self.max_audio_samples:
                    audio = torch.nn.functional.pad(audio, (0, self.max_audio_samples - audio.size(0)))
            except Exception as e:
                print(f"Speed augmentation failed for {audio_path}: {e}")
                # Keep original audio

        audio = audio.numpy()

        if self.augment and self.text_augmenter and random.random() < 0.3:
            try:
                transcript = self.text_augmenter.augment(transcript)[0]
            except Exception as e:
                print(f"Text augmentation failed for {audio_path}: {e}")
                # Keep original transcript

        audio_inputs = self.audio_feature_extractor(
            audio,
            sampling_rate=16000,
            return_tensors="pt",
            padding=False,
            truncation=False
        )

        text_inputs = self.text_tokenizer(
            transcript,
            padding=False,
            truncation=True,
            max_length=512,
            return_tensors="pt"
        )

        return {
            'audio_values': audio_inputs['input_values'].squeeze(0),
            'input_ids': text_inputs['input_ids'].squeeze(0),
            'attention_mask': text_inputs['attention_mask'].squeeze(0)
        }, torch.tensor(vad_label)

# Reuse the dynamic collation function (unchanged)
def dynamic_collate_fn(batch):
    inputs, labels = zip(*batch)
    audio_values = [item['audio_values'] for item in inputs]
    input_ids = [item['input_ids'] for item in inputs]
    attention_masks = [item['attention_mask'] for item in inputs]

    audio_values_padded = pad_sequence(audio_values, batch_first=True, padding_value=0.0)
    input_ids_padded = pad_sequence(input_ids, batch_first=True, padding_value=0)
    attention_masks_padded = pad_sequence(attention_masks, batch_first=True, padding_value=0)

    labels_stacked = torch.stack(labels)

    return {
        'audio_values': audio_values_padded,
        'input_ids': input_ids_padded,
        'attention_mask': attention_masks_padded
    }, labels_stacked

# Reuse the SmoothMSELoss (unchanged)
class SmoothMSELoss(nn.Module):
    def __init__(self, smoothing=0.1):
        super().__init__()
        self.smoothing = smoothing
        self.mse = nn.MSELoss()

    def forward(self, pred, target):
        smooth_target = target * (1 - self.smoothing) + 3.0 * self.smoothing
        return self.mse(pred, smooth_target)

# Reuse the ValenceRegressor model (unchanged)
class ValenceRegressor(nn.Module):
    def __init__(self, audio_dim=768, text_dim=768, hidden_dim=192, num_heads=6, num_layers=2, dropout=0.5):
        super().__init__()

        # Audio encoder (Hubert) with LoRA
        self.hubert = HubertModel.from_pretrained("facebook/hubert-base-ls960")
        hubert_lora_config = LoraConfig(
            r=16,
            lora_alpha=32,
            target_modules=["q_proj", "v_proj"],
            lora_dropout=dropout,
            bias="none"
        )
        self.hubert = get_peft_model(self.hubert, hubert_lora_config)

        # Text encoder (BERT) with LoRA
        self.text_encoder = BertModel.from_pretrained("bert-base-uncased")
        bert_lora_config = LoraConfig(
            r=16,
            lora_alpha=32,
            target_modules=["query", "value"],
            lora_dropout=dropout,
            bias="none"
        )
        self.text_encoder = get_peft_model(self.text_encoder, bert_lora_config)

        # Audio transformer layers
        self.audio_transformer = nn.ModuleList([
            nn.TransformerEncoderLayer(
                d_model=audio_dim,
                nhead=num_heads,
                dim_feedforward=hidden_dim * 4,
                dropout=dropout,
                batch_first=True
            ) for _ in range(num_layers)
        ])
        self.audio_layer_norm = nn.LayerNorm(audio_dim)

        # Audio attention pooling
        self.audio_attention_pool = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim * 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim * 2, 1)
        )

        # Projection layers
        self.audio_projection = nn.Linear(audio_dim, hidden_dim)
        self.text_projection = nn.Linear(text_dim, hidden_dim)

        # Cross-attention mechanisms
        self.audio_to_text_attention = nn.MultiheadAttention(
            embed_dim=hidden_dim,
            num_heads=num_heads // 2,
            dropout=dropout,
            batch_first=True
        )
        self.text_to_audio_attention = nn.MultiheadAttention(
            embed_dim=hidden_dim,
            num_heads=num_heads // 2,
            dropout=dropout,
            batch_first=True
        )

        # Gating layers
        self.audio_gate = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.Sigmoid()
        )
        self.text_gate = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.Sigmoid()
        )

        # Fusion layer
        self.fusion_layer = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim * 2),
            nn.LayerNorm(hidden_dim * 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim * 2, hidden_dim)
        )

        # Shared fully connected layer
        self.shared_fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout)
        )

        # Output branch
        self.output_branch = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LayerNorm(hidden_dim // 2),
            nn.GELU(),
            nn.Dropout(dropout * 0.5),
            nn.Linear(hidden_dim // 2, 1)
        )

    def audio_attention_pooling(self, x, audio_mask=None):
        weights = self.audio_attention_pool(x)
        if audio_mask is not None:
            weights = weights.masked_fill(~audio_mask.bool().unsqueeze(-1), float('-inf'))
        weights = torch.softmax(weights, dim=1)
        output = torch.bmm(weights.transpose(1, 2), x)
        return output.squeeze(1)

    def forward(self, audio_values, input_ids, attention_mask):
        # Process audio with Hubert
        audio_features = self.hubert(audio_values).last_hidden_state
        audio_mask = (audio_features.abs().sum(dim=-1) > 1e-6)

        # Process audio with transformer layers
        audio_repr = audio_features
        for layer in self.audio_transformer:
            audio_key_padding_mask = (~audio_mask).float()
            audio_repr = layer(audio_repr, src_key_padding_mask=audio_key_padding_mask)

        audio_repr = self.audio_layer_norm(audio_repr)

        # Process text with BERT
        text_outputs = self.text_encoder(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        text_repr = text_outputs.last_hidden_state

        # Projection
        audio_proj = self.audio_projection(audio_repr)
        text_proj = self.text_projection(text_repr)

        # Cross-attention
        audio_attended_text, _ = self.audio_to_text_attention(
            query=audio_proj,
            key=text_proj,
            value=text_proj,
            key_padding_mask=(1 - attention_mask).bool()
        )
        text_attended_audio, _ = self.text_to_audio_attention(
            query=text_proj,
            key=audio_proj,
            value=audio_proj,
            key_padding_mask=(~audio_mask).bool()
        )

        # Gating and fusion
        audio_concat = torch.cat([audio_proj, audio_attended_text], dim=-1)
        text_concat = torch.cat([text_proj, text_attended_audio], dim=-1)

        audio_gate_value = self.audio_gate(audio_concat)
        text_gate_value = self.text_gate(text_concat)

        gated_audio = audio_proj * audio_gate_value
        gated_text = text_proj * text_gate_value

        # Pooling
        pooled_audio = self.audio_attention_pooling(gated_audio, audio_mask)
        text_sum = torch.sum(gated_text * attention_mask.unsqueeze(-1), dim=1)
        text_count = torch.sum(attention_mask, dim=1, keepdim=True).clamp(min=1)
        pooled_text = text_sum / text_count

        # Fusion and output
        fused = torch.cat([pooled_audio, pooled_text], dim=-1)
        joint_repr = self.fusion_layer(fused)
        shared = self.shared_fc(joint_repr)
        output = self.output_branch(shared)
        scaled_output = 1.0 + 4.0 * torch.sigmoid(output)

        return scaled_output

# Reuse the load_lora_model function (unchanged)
def load_lora_model(save_dir, device=None):
    """Load model with LoRA weights"""
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load configuration
    config_path = os.path.join(save_dir, "config.json")
    import json
    with open(config_path, 'r') as f:
        config = json.load(f)

    # Create model instance
    model = ValenceRegressor(
        audio_dim=config['audio_dim'],
        text_dim=config['text_dim'],
        hidden_dim=config['hidden_dim'],
        num_heads=config['num_heads'],
        num_layers=config['num_layers'],
        dropout=config['dropout']
    )

    # Load HuBERT LoRA weights
    hubert_lora_dir = os.path.join(save_dir, "hubert_lora")
    model.hubert = PeftModel.from_pretrained(
        model.hubert.get_base_model(),
        hubert_lora_dir
    )

    # Load BERT LoRA weights
    bert_lora_dir = os.path.join(save_dir, "bert_lora")
    model.text_encoder = PeftModel.from_pretrained(
        model.text_encoder.get_base_model(),
        bert_lora_dir
    )

    # Load custom layers
    custom_layers_path = os.path.join(save_dir, "custom_layers.pth")
    custom_layers_state = torch.load(custom_layers_path, map_location=device)

    # Load custom layer weights
    model_state = model.state_dict()
    model_state.update(custom_layers_stat e)
    model.load_state_dict(model_state)

    model = model.to(device)
    print(f"Loaded LoRA model from {save_dir}")

    return model

# New function to test the model on the test set
def test_valence_model(model, test_loader, device=torch.device("cpu")):
    """Test the ValenceRegressor model on the test set."""
    model = model.to(device)
    model.eval()  # Set model to evaluation mode
    criterion = SmoothMSELoss(smoothing=0.1)

    test_loss = 0.0
    num_test_batches = 0
    all_preds = []
    all_labels = []

    test_pbar = tqdm(test_loader, desc="Testing Valence Model")

    with torch.no_grad():
        for batch_inputs, labels in test_pbar:
            try:
                audio_values = batch_inputs['audio_values'].to(device)
                input_ids = batch_inputs['input_ids'].to(device)
                attention_mask = batch_inputs['attention_mask'].to(device)
                labels = labels.to(device)

                # Forward pass
                outputs = model(audio_values, input_ids, attention_mask)
                loss = criterion(outputs.squeeze(), labels)

                test_loss += loss.item()
                num_test_batches += 1

                # Store predictions and labels for correlation
                all_preds.extend(outputs.squeeze().cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                test_pbar.set_postfix({'loss': f"{loss.item():.4f}"})

            except Exception as e:
                print(f"Error in test batch: {e}")
                continue

    if num_test_batches > 0:
        test_loss /= num_test_batches

    # Compute Pearson correlation coefficient
    pearson_corr, _ = pearsonr(all_preds, all_labels) if len(all_preds) > 1 else (0.0, 0.0)

    print(f"Test Results:")
    print(f"  Test Loss (Smooth MSE): {test_loss:.4f}")
    print(f"  Pearson Correlation: {pearson_corr:.4f}")

    return test_loss, pearson_corr

# Main execution for testing
def main():
    # Check for GPU availability
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Configuration
    test_csv = "/content/test_split.csv"  # Path to test CSV
    save_dir = "/content/best_valence_regressor_lora"  # Path to saved model

    # Initialize feature extractor and tokenizer
    audio_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/hubert-base-ls960")
    text_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

    # Create test dataset (no augmentation)
    test_dataset = MultimodalIEMOCAPDataset(
        csv_file=test_csv,
        audio_feature_extractor=audio_feature_extractor,
        text_tokenizer=text_tokenizer,
        augment=False,  # No augmentation for testing
        max_audio_samples=128000
    )

    # Create test DataLoader
    test_loader = DataLoader(
        test_dataset,
        batch_size=8,
        shuffle=False,
        num_workers=0,
        pin_memory=True if torch.cuda.is_available() else False,
        collate_fn=dynamic_collate_fn
    )

    # Load the trained model
    print("Loading trained model...")
    model = load_lora_model(save_dir, device)

    # Test the model
    print("\nTesting model on test set...")
    test_loss, pearson_corr = test_valence_model(model, test_loader, device)

    print(f"\nFinal Test Results:")
    print(f"  Test Loss (Smooth MSE): {test_loss:.4f}")
    print(f"  Pearson Correlation: {pearson_corr:.4f}")

if __name__ == "__main__":
    main()

Using device: cuda
Loading trained model...




Loaded LoRA model from /content/best_valence_regressor_lora

Testing model on test set...


  return torch._transformer_encoder_layer_fwd(
Testing Valence Model: 100%|██████████| 126/126 [00:49<00:00,  2.54it/s, loss=0.5192]


Test Results:
  Test Loss (Smooth MSE): 0.2815
  Pearson Correlation: 0.7622

Final Test Results:
  Test Loss (Smooth MSE): 0.2815
  Pearson Correlation: 0.7622


In [None]:
import torch
import gc

# Delete all models, tensors, and optimizers
model = None
audio_model = None
optimizer = None
del model, audio_model, optimizer
gc.collect()  # Collect garbage to free CPU memory
torch.cuda.empty_cache()  # Clear GPU memory cache

# Verify memory usage
print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
print(f"GPU Memory Cached: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")

GPU Memory Allocated: 834.82 MB
GPU Memory Cached: 3670.00 MB
