### Params

In [1]:
sample_rate = 16000
length = 15
embedding_size= 768

### Load dataset

In [2]:
from birdset.datamodule.beans_datamodule import BEANSDataModule
from birdset.datamodule.base_datamodule import DatasetConfig

datasetconfig = DatasetConfig(dataset_name='beans_watkins', hf_path='DBD-research-group/beans_watkins', hf_name='default',sample_rate=sample_rate)

datamodule = BEANSDataModule(dataset=datasetconfig)
dataset = datamodule._load_data()
#datamodule = datamodule.prepare_data() 
dataset['train'][0]

Map:   0%|          | 0/1017 [00:00<?, ? examples/s]

Map:   0%|          | 0/339 [00:00<?, ? examples/s]

Map:   0%|          | 0/339 [00:00<?, ? examples/s]

Map:   0%|          | 0/203 [00:00<?, ? examples/s]

{'audio': {'path': '83006002.wav',
  'array': array([0.01681028, 0.0108546 , 0.01608891, ..., 0.00757095, 0.00183893,
         0.00651151]),
  'sample_rate': 16000},
 'labels': 4}

### Cut to length

In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

class FixedLengthDataset(Dataset):
    def __init__(self, dataset, sample_rate=sample_rate, length=length ):
        """
        dataset: The original dataset (list of tuples: (data, label))
        sample_rate: The desired sampling rate (e.g., 16 kHz)
        target_length: The desired length in terms of samples (e.g., 16000 for 1 second of audio at 16 kHz)
        """
        self.dataset = dataset
        self.sample_rate = sample_rate
        self.target_length = length * sample_rate

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        data = self.dataset[idx]['audio']['array']
        label = self.dataset[idx]['labels']
        data = torch.from_numpy(data).float()
        # Trim the data if it's too long
        if data.shape[0] > self.target_length:
            data = data[:self.target_length]

        # Pad the data if it's too short
        if data.shape[0] < self.target_length:
            padding = self.target_length - data.shape[0]
            data = F.pad(data, (0, padding))

        return data, label
    
train_dataset = FixedLengthDataset(dataset['train'])  
valid_dataset = FixedLengthDataset(dataset['valid'])
test_dataset = FixedLengthDataset(dataset['test'])

datasets = {'train': train_dataset, 'valid': valid_dataset, 'test': test_dataset}  

In [4]:
from birdset.datamodule.embedding_datamodule import EmbeddingModuleConfig
from birdset.modules.models.hubert import HubertSequenceClassifier
from birdset.configs.module_configs import NetworkConfig

embedding_module= NetworkConfig(model_type="waveform",sample_rate=sample_rate,normalize_spectrogram= False,
normalize_waveform= False,model=HubertSequenceClassifier(checkpoint= "facebook/hubert-base-ls960",num_classes=31))

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
from birdset.modules.models.linear_classifier import LinearClassifier

classifier = LinearClassifier(num_classes=31, in_features=embedding_size).to(embedding_datamodule.device)
network_config = NetworkConfig(model_type="waveform",sample_rate=sample_rate,normalize_spectrogram= False,normalize_waveform= False,model=classifier)

import torch.nn.functional as F

output = F.softmax(classifier.forward(embedding), dim=1)
output

NameError: name 'embedding_datamodule' is not defined

### Training loop

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import pytorch_lightning as pl
from torchmetrics.classification import Accuracy
from torch.utils.data import DataLoader, random_split, TensorDataset

class MyClassifier(pl.LightningModule):
    def __init__(self, embedding_model, datasets,num_classes=31, embedding_size=embedding_size,freeze_epochs=15, fine_tune_lr=1e-2,batch_size=32):
        super(MyClassifier, self).__init__()
        self.embedding_model = embedding_model  # Embedding model
        self.classifier = nn.Linear(embedding_size, num_classes)  # Final classification layer
        self.loss_fn = nn.CrossEntropyLoss()

        self.freeze_epochs = freeze_epochs  # Number of epochs before unfreezing
        self.fine_tune_lr = fine_tune_lr  # Learning rate for fine-tuning
        self.test_acc = Accuracy(task="multiclass",num_classes=num_classes)
        self.val_acc = Accuracy(task="multiclass",num_classes=num_classes)
        print(embedding_size)
        self.datasets = datasets
        self.batch_size = batch_size
        # Freeze the embedding model initially
        self.unfreeze_embedding_model()

    def forward(self, x):
        # Forward pass through the embedding model and the classifier
        #with torch.no_grad():  # No need to compute gradients for frozen embedding model
        features = self.embedding_model.get_embeddings(x)[0]
        output = self.classifier(features)
        
        return output

    def model_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.loss_fn(y_hat, y)
        preds = torch.softmax(y_hat,dim=1)
        return loss, preds, y

    def training_step(self, batch, batch_idx):
        train_loss, preds, targets = self.model_step(batch, batch_idx)
        self.log('train_loss', train_loss, on_step=False, on_epoch=True)
        return {"loss": train_loss}

    def validation_step(self, batch, batch_idx):
        val_loss, preds, targets = self.model_step(batch, batch_idx)     
        acc = self.val_acc(preds, targets)
        self.log('val_loss', val_loss, on_step=False, on_epoch=True, prog_bar=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True)
        return {"loss": val_loss, "preds": preds, "targets": targets}

    def test_step(self, batch, batch_idx):
        test_loss, preds, targets = self.model_step(batch, batch_idx)
        acc = self.test_acc(preds, targets)
        self.log('test_loss', test_loss, on_step=False, on_epoch=True, prog_bar=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True, prog_bar=True)
        return {"loss": test_loss, "preds": preds, "targets": targets}

    def configure_optimizers(self):
        # Optimizer with learning rate scheduler
        self.optimizer = optim.AdamW(list(self.embedding_model.parameters()) + list(self.classifier.parameters()), lr=1e-4)

        # Learning rate schedule: we reduce the learning rate after freeze_epochs
        #scheduler = optim.lr_scheduler.LambdaLR(
            #optimizer, 
            #lr_lambda=lambda epoch: 1.0 if epoch < self.freeze_epochs else self.fine_tune_lr
        #)
        return [self.optimizer]#, [scheduler]

    def on_train_epoch_end(self):
        for param_group in self.optimizers().param_groups:
            print(param_group['lr'])
        # Unfreeze embedding model after freeze_epochs
        if self.current_epoch == self.freeze_epochs:
            print(f"Unfreezing embedding model at epoch {self.current_epoch}")
            #self.unfreeze_embedding_model()
            # Set fine-tune learning rate for all layers
            for param_group in self.optimizers().param_groups:
                param_group['lr'] = self.fine_tune_lr


    def train_dataloader(self):
        return DataLoader(self.datasets['train'], batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.datasets['valid'], batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.datasets['test'], batch_size=self.batch_size)


    def freeze_embedding_model(self):
        """Freeze the embedding model's parameters"""
        for param in self.embedding_model.parameters():
            param.requires_grad = False

    def unfreeze_embedding_model(self):
        """Unfreeze the embedding model's parameters"""
        for param in self.embedding_model.parameters():
            param.requires_grad = True


In [8]:
hybrid = MyClassifier(embedding_model=embedding_module.model, datasets=datasets, freeze_epochs=5, fine_tune_lr=1e-4)

trainer = pl.Trainer(max_epochs=10, accelerator="gpu", devices=1)
trainer.fit(hybrid)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name            | Type                     | Params | Mode 
---------------------------------------------------------------------
0 | embedding_model | HubertSequenceClassifier | 94.6 M | train
1 | classifier      | Linear                   | 23.8 K | train
2 | loss_fn         | CrossEntropyLoss         | 0      | train
3 | test_acc        | MulticlassAccuracy       | 0      | train
4 | val_acc         | MulticlassAccuracy       | 0      | train
---------------------------------------------------------------------
94.6 M    Trainable params
0         Non-trainable params
94.6 M    Total params
378.402   Total estimated model params size (MB)


768


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0.0001


Validation: |          | 0/? [00:00<?, ?it/s]

0.0001


Validation: |          | 0/? [00:00<?, ?it/s]

0.0001


Validation: |          | 0/? [00:00<?, ?it/s]

0.0001


Validation: |          | 0/? [00:00<?, ?it/s]

0.0001


Validation: |          | 0/? [00:00<?, ?it/s]

0.0001
Unfreezing embedding model at epoch 5


Validation: |          | 0/? [00:00<?, ?it/s]

0.0001


Validation: |          | 0/? [00:00<?, ?it/s]

0.0001


Validation: |          | 0/? [00:00<?, ?it/s]

0.0001


Validation: |          | 0/? [00:00<?, ?it/s]

0.0001


`Trainer.fit` stopped: `max_epochs=10` reached.
