In [3]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa
import soundfile as sf
from tqdm.notebook import tqdm
from glob import glob
from scipy.signal import butter, lfilter
import ast  # Import ast module to use literal_eval
from urllib.parse import unquote
import torch
import evaluate
import accelerate
from torch.utils.data import Dataset, DataLoader
from torch import nn
from sklearn.metrics import roc_auc_score
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR
import noisereduce as nr
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import KFold
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from transformers import AutoModelForAudioClassification, AutoConfig, ASTFeatureExtractor
from typing import Optional, List
from sklearn.model_selection import StratifiedKFold, KFold
from collections import defaultdict
from cycler import cycler

In [4]:
print("PyTorch version:", torch.__version__)
print("Is CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("Number of GPUs:", torch.cuda.device_count())
print("GPU Name:", torch.cuda.get_device_name(0))

PyTorch version: 2.2.0+cu121
Is CUDA available: True
CUDA version: 12.1
Number of GPUs: 1
GPU Name: NVIDIA RTX A6000


In [8]:
os.getcwd()

'/workspace'

In [5]:
DRIVE_FOLDER = "." #"/content/drive/MyDrive/Colab Notebooks"
KEEP_COLS = ['category_number', 'common_name', 'audio_length', 'type', 'remarks', 'quality', 'scientific_name', 'mp3_link', 'region']

class Config:
    dataset_dir = f"{DRIVE_FOLDER}/Audio_XenoCanto"
    labels_list = f"{DRIVE_FOLDER}/xeno_labels.csv"
    model_name = "ast_baseline"
    backbone_name = "MIT/ast-finetuned-audioset-10-10-0.4593"
    n_classes = 800 # number of classes in the dataset
    audio_sr = 16000 #Hz
    segment_length = 10  #s
    fft_window = 0.025 #s
    hop_window_length = 0.01 #s
    n_mels = 128
    low_cut = 1000 #Hz
    high_cut = 8000 #Hz
    top_db = 100
    batch_size = 4 
    num_workers = 0
    n_splits = 5
    log_dir = f"{DRIVE_FOLDER}/training_logs"
    max_lr = 1e-5
    epochs = 5
    weight_decay = 0.01
    lr_final_div = 1000
    amp = True
    grad_accum_steps = 1
    max_grad_norm = 1e7
    print_epoch_freq = 1
    print_freq = 200
    random_seed = 2046
    
    @classmethod
    def copy(cls):
        new_class = type('CustomConfig', (cls,), {k: v for k, v in cls.__dict__.items() if not k.startswith('__') and not callable(v)})
        return new_class
    
config = Config.copy()

## Bird Dataset

In [6]:
df_audio_meta = pd.read_csv(f"metadata.csv")
print(df_audio_meta.shape)
df_audio_meta = df_audio_meta.dropna().reset_index(drop=True)

# Filter out files that do not exist
df_audio_meta['file_exists'] = df_audio_meta['file_name'].apply(lambda x: os.path.exists(x))
df_audio_meta = df_audio_meta[df_audio_meta['file_exists']].reset_index(drop=True)

# parse scientific names
df_audio_meta['scientific_name'] = df_audio_meta['scientific_name'].apply(lambda x: "_".join(x.split(" ")))

# drop species with less than 2 samples
class_counts = df_audio_meta['scientific_name'].value_counts()
print(f"Number of classes with less than 2 samples: {len(class_counts[class_counts < 2])}")

df_audio_meta = df_audio_meta[df_audio_meta['scientific_name'].isin(class_counts[class_counts > 1].index)].copy().reset_index(drop=True)

# encode scientific names to label ids
label_ids_list = df_audio_meta['scientific_name'].unique().tolist()
label_ids_list.sort()
label_to_id = {label: i for i, label in enumerate(label_ids_list)}
df_audio_meta['species_id'] = df_audio_meta['scientific_name'].map(label_to_id)

# save the label mapping
label_mapping = pd.DataFrame(label_to_id.items(), columns=['scientific_name', 'species_id'])
label_mapping.to_csv(f"new_label_map.csv", index=False)

# drop samples with no labels
df_audio_meta.dropna(subset=['species_id'], inplace=True)
df_audio_meta.reset_index(drop=True, inplace=True)
df_audio_meta['species_id'] = df_audio_meta['species_id'].astype(int)

print(f"Number of classes in dataset: {df_audio_meta['species_id'].nunique()}")
print(f'Number of samples:', len(df_audio_meta))

# save the number of classes in the config
config.n_classes = df_audio_meta['species_id'].nunique()

# encode mp3 links to group ids for 5-folds
group_ids = df_audio_meta['mp3_link'].unique().tolist()
group_ids_map = {group_id: i for i, group_id in enumerate(group_ids)}
df_audio_meta['group_id'] = df_audio_meta['mp3_link'].map(group_ids_map)

df_audio_meta.head(5)

(13523, 10)
Number of classes with less than 2 samples: 72
Number of classes in dataset: 728
Number of samples: 11171


Unnamed: 0,file_name,category_number,common_name,audio_length,type,remarks,quality,mp3_link,scientific_name,region,file_exists,species_id,group_id
0,data/XC228210-Blue-crowned_Manakin_B_9369_0.wav,XC228210,Blue-crowned Manakin,0:20,call,ID certainty 80%. (Archiv. tape 393 side A tra...,B,//xeno-canto.org/sounds/uploaded/OOECIWCSWV/XC...,Lepidothrix_coronata,amazonas,True,329,0
1,data/XC228210-Blue-crowned_Manakin_B_9369_1.wav,XC228210,Blue-crowned Manakin,0:20,call,ID certainty 80%. (Archiv. tape 393 side A tra...,B,//xeno-canto.org/sounds/uploaded/OOECIWCSWV/XC...,Lepidothrix_coronata,amazonas,True,329,0
2,data/XC200163-PIPCOR03_0.wav,XC200163,Blue-crowned Manakin,0:42,"call, song","left bank of rio Negro - terra firme forest, w...",C,//xeno-canto.org/sounds/uploaded/DGVLLRYDXS/XC...,Lepidothrix_coronata,amazonas,True,329,1
3,data/XC200163-PIPCOR03_1.wav,XC200163,Blue-crowned Manakin,0:42,"call, song","left bank of rio Negro - terra firme forest, w...",C,//xeno-canto.org/sounds/uploaded/DGVLLRYDXS/XC...,Lepidothrix_coronata,amazonas,True,329,1
4,data/XC200163-PIPCOR03_2.wav,XC200163,Blue-crowned Manakin,0:42,"call, song","left bank of rio Negro - terra firme forest, w...",C,//xeno-canto.org/sounds/uploaded/DGVLLRYDXS/XC...,Lepidothrix_coronata,amazonas,True,329,1


In [7]:
def load_and_denoise(file_name):
    # Load audio file
    audio_data, sample_rate = librosa.load(file_name, sr=None)  # sr=None to preserve original sample rate
    # Reduce noise
    reduced_noise = nr.reduce_noise(y=audio_data, sr=sample_rate)
    # Calculate noise
    noise = audio_data - reduced_noise
    # Return a pandas Series containing the three audio data arrays
    return pd.Series([audio_data, reduced_noise, noise])

In [8]:
# Create a new DataFrame with columns for each type of audio data
# Apply the function to each row in the DataFrame and expand the results into separate columns
df_audio_meta[['original_audio', 'denoised_audio', 'noise']] = df_audio_meta['file_name'].apply(load_and_denoise)
df_audio_meta.head()

  sig_mult_above_thresh = (abs_sig_stft - sig_stft_smooth) / sig_stft_smooth


Unnamed: 0,file_name,category_number,common_name,audio_length,type,remarks,quality,mp3_link,scientific_name,region,file_exists,species_id,group_id,original_audio,denoised_audio,noise
0,data/XC228210-Blue-crowned_Manakin_B_9369_0.wav,XC228210,Blue-crowned Manakin,0:20,call,ID certainty 80%. (Archiv. tape 393 side A tra...,B,//xeno-canto.org/sounds/uploaded/OOECIWCSWV/XC...,Lepidothrix_coronata,amazonas,True,329,0,"[0.004058838, -0.008453369, 0.020111084, -0.00...","[-0.00055981963, 0.001044453, -0.0008553348, 0...","[0.0046186578, -0.009497822, 0.020966418, -0.0..."
1,data/XC228210-Blue-crowned_Manakin_B_9369_1.wav,XC228210,Blue-crowned Manakin,0:20,call,ID certainty 80%. (Archiv. tape 393 side A tra...,B,//xeno-canto.org/sounds/uploaded/OOECIWCSWV/XC...,Lepidothrix_coronata,amazonas,True,329,0,"[-0.0049438477, 0.0087890625, 0.004058838, 0.0...","[0.0003904775, 0.000830834, 0.00082218484, 0.0...","[-0.005334325, 0.007958229, 0.003236653, 0.002..."
2,data/XC200163-PIPCOR03_0.wav,XC200163,Blue-crowned Manakin,0:42,"call, song","left bank of rio Negro - terra firme forest, w...",C,//xeno-canto.org/sounds/uploaded/DGVLLRYDXS/XC...,Lepidothrix_coronata,amazonas,True,329,1,"[-3.0517578e-05, 0.0, -3.0517578e-05, 0.0, -3....","[-2.400732e-06, -8.714055e-07, -3.2778255e-06,...","[-2.8116847e-05, 8.714055e-07, -2.7239752e-05,..."
3,data/XC200163-PIPCOR03_1.wav,XC200163,Blue-crowned Manakin,0:42,"call, song","left bank of rio Negro - terra firme forest, w...",C,//xeno-canto.org/sounds/uploaded/DGVLLRYDXS/XC...,Lepidothrix_coronata,amazonas,True,329,1,"[0.0115356445, -0.007843018, 0.0052490234, 0.0...","[0.0035293808, 0.00043889682, 0.0012755911, 0....","[0.0080062635, -0.0082819145, 0.0039734324, 0...."
4,data/XC200163-PIPCOR03_2.wav,XC200163,Blue-crowned Manakin,0:42,"call, song","left bank of rio Negro - terra firme forest, w...",C,//xeno-canto.org/sounds/uploaded/DGVLLRYDXS/XC...,Lepidothrix_coronata,amazonas,True,329,1,"[-0.005493164, -0.021057129, -0.010803223, 0.0...","[0.003562114, -0.0011988133, -0.0022794271, -0...","[-0.009055278, -0.019858316, -0.008523796, 0.0..."


In [9]:
data_df = df_audio_meta[['original_audio', 'denoised_audio', 'noise', 'species_id', 'group_id']]
data_df.to_csv("birdset.csv")

In [10]:
data_df.head()

Unnamed: 0,original_audio,denoised_audio,noise,species_id,group_id
0,"[0.004058838, -0.008453369, 0.020111084, -0.00...","[-0.00055981963, 0.001044453, -0.0008553348, 0...","[0.0046186578, -0.009497822, 0.020966418, -0.0...",329,0
1,"[-0.0049438477, 0.0087890625, 0.004058838, 0.0...","[0.0003904775, 0.000830834, 0.00082218484, 0.0...","[-0.005334325, 0.007958229, 0.003236653, 0.002...",329,0
2,"[-3.0517578e-05, 0.0, -3.0517578e-05, 0.0, -3....","[-2.400732e-06, -8.714055e-07, -3.2778255e-06,...","[-2.8116847e-05, 8.714055e-07, -2.7239752e-05,...",329,1
3,"[0.0115356445, -0.007843018, 0.0052490234, 0.0...","[0.0035293808, 0.00043889682, 0.0012755911, 0....","[0.0080062635, -0.0082819145, 0.0039734324, 0....",329,1
4,"[-0.005493164, -0.021057129, -0.010803223, 0.0...","[0.003562114, -0.0011988133, -0.0022794271, -0...","[-0.009055278, -0.019858316, -0.008523796, 0.0...",329,1


In [11]:

npres1 = [np.isnan(data_df.iloc[i]['original_audio']).any() for i in range(len(data_df))]
npres2 = [np.isnan(data_df.iloc[i]['denoised_audio']).any() for i in range(len(data_df))]
npres3 = [np.isnan(data_df.iloc[i]['noise']).any() for i in range(len(data_df))]
print(any(npres1),np.where(npres2)[0], np.where(npres3)[0]) 

False [2093 2094 2095] [2093 2094 2095]


In [12]:
data_df = data_df.drop(index=[2093, 2094, 2095])

In [14]:
import torch
import torch.nn.functional as F

class CrossEntropyLoss:
    def __init__(self, w):
        self.class_weights = w

    def weighted_cross_entropy_with_logits(self, logits, targets):
        """
        This function applies a weighted cross-entropy loss.

        Args:
        logits (torch.Tensor): Logits output from the model (before softmax).
        targets (torch.Tensor): Ground truth labels.
        class_weights (torch.Tensor): Tensor of weights for each class.

        Returns:
        torch.Tensor: Computed weighted cross-entropy loss.
        """
        return F.cross_entropy(logits.float(), targets, weight=self.class_weights)


In [79]:
class ROCAUCScore:
    def __init__(self, average='macro', multi_class='ovo'):
        self.num_classes = 728
        self.average = average
        self.multi_class = multi_class  # 'ovo' (one-vs-one) or 'ovr' (one-vs-rest)
        self.label_ids = np.arange(self.num_classes)

    def roc_auc_loss(self, logits, targets):
        """
        This function computes the ROC-AUC loss for multi-class classification.

        Args:
        logits (torch.Tensor): Logits output from the model (before softmax).
        targets (torch.Tensor): Ground truth labels.

        Returns:
        torch.Tensor: Computed ROC-AUC loss.
        """
        # Apply softmax to get probabilities
        # Apply softmax to convert logits to probabilities
        probas = torch.exp(F.log_softmax(logits, dim=1))

        # Detach and move to CPU for sklearn compatibility
        #probas = logits
        probas = probas.detach().cpu().numpy()
        targets = targets.detach().cpu().numpy()
        
        df_scores = pd.DataFrame(probas, columns=self.label_ids)
        df_scores['target'] = targets
        
        # remove samples with classes which is predeicted as 0 in all samples
        unscored_cols = df_scores.columns[df_scores.sum(axis=0) == 0]
        rows_to_remove = df_scores['target'].isin(unscored_cols)
        df_scores = df_scores[~rows_to_remove]
        
        eval_score = roc_auc_score(
            y_true=df_scores['target'].values,
            y_score=df_scores[self.label_ids].values,
            average=self.average, 
            multi_class=self.multi_class,
            labels=self.label_ids
        )
        
        # Convert the evaluation score back to a PyTorch tensor and move it to the same device as logits
        #eval_score_tensor = torch.tensor(eval_score, dtype=torch.float32, device=logits.device)

        return eval_score

In [None]:
from sklearn.utils.class_weight import compute_class_weight
classes = np.unique(data_df[['species_id']])

# Calculate class weights using the 'balanced' option, which automatically adjusts for class imbalance.
weights = compute_class_weight(class_weight='balanced', classes=classes, y=data_df['species_id'])

# Create a dictionary mapping each class to its respective class weight.
class_weights = dict(zip(classes, weights))

# Print the computed class weights to the console.
print(class_weights)

In [81]:
# Specify the pre-trained model you want to use.
model_str = "dima806/bird_sounds_classification" #"facebook/wav2vec2-base-960h"
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification, pipeline, TrainingArguments, Trainer

# Create an instance of the feature extractor for audio.
feature_extractor = AutoFeatureExtractor.from_pretrained(model_str)



94.755672


In [82]:
RATE_HZ = 16000

# Define the maximum audio interval length to consider in seconds
MAX_SECONDS = 10

# Calculate the maximum audio interval length in samples by multiplying the rate and seconds
MAX_LENGTH = RATE_HZ * MAX_SECONDS

In [83]:
class BirdSongDataset(torch.utils.data.Dataset):
    def __init__(self, data_df):
        self.df = data_df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        audio = np.array(row['denoised_audio'], dtype=np.float32)
        if np.random.rand() < 0.5:
            noise = np.array(self.df.sample(n=1).iloc[0]['noise'], dtype=np.float32)
            audio += noise
        processed = feature_extractor(audio, sampling_rate=RATE_HZ, max_length=MAX_LENGTH, truncation=True, return_tensors="pt")
        return processed.input_values.squeeze(0), torch.tensor(row['species_id'], dtype=torch.long)

In [84]:
# PyTorch Lightning Module
class BirdSongClassifier(pl.LightningModule):
    def __init__(self, class_weights):
        super().__init__()
        config = AutoConfig.from_pretrained("dima806/bird_sounds_classification")
        config.num_labels = 728
        self.model = AutoModelForAudioClassification.from_config(config)
        # Convert class weights from dict to tensor and register as model parameter
        # self.register_buffer('class_weights', )
        weights = torch.tensor([class_weights[i] for i in sorted(class_weights.keys())]).float()
        weights = weights.to('cuda')
        self.loss_fn = CrossEntropyLoss(weights)
        self.eval_fn  = ROCAUCScore()
        self.training_losses = []
        self.validation_losses = []
        self.validation_step_outputs = []
        self.training_step_outputs = []

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        return torch.optim.AdamW(self.parameters(), lr=3e-6, weight_decay=0.02)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x).logits  # Get logits from model
        loss = self.loss_fn.weighted_cross_entropy_with_logits(logits, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x).logits
        loss = self.eval_fn.roc_auc_loss(logits, y)
        self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.validation_step_outputs.append(torch.tensor(loss, dtype=torch.float32))
        return loss

    def on_train_epoch_end(self):
        epoch_average = torch.stack(self.training_step_outputs).mean()
        self.log("training_epoch_average", epoch_average)
        self.training_losses.append(epoch_average)
        self.training_step_outputs.clear()  # free memory
        
    def on_validation_epoch_end(self):
        epoch_average = torch.stack(self.validation_step_outputs).mean()
        self.log("validation_epoch_average", epoch_average)
        self.validation_losses.append(epoch_average)
        self.validation_step_outputs.clear()  # free memory

In [85]:
from torch.utils.data import DataLoader, random_split
from sklearn.model_selection import GroupShuffleSplit

In [86]:
shuffled_df = data_df.sample(frac=1, random_state=42).reset_index(drop=True)

(11168, 5)
(11168, 5)


In [87]:
from sklearn.model_selection import GroupKFold
group_kfold = GroupKFold(n_splits=5)

# Get the group ids
groups = data_df['group_id'].values

# Split the data using GroupKFold
folds = list(group_kfold.split(shuffled_df, groups=groups))

In [88]:
class BirdSongDataModule(pl.LightningDataModule):
    def __init__(self, data_df, train_idx, valid_idx, batch_size=4):
        super().__init__()
        self.batch_size = batch_size
        self.data_df = data_df #BirdSongDataset(data_df)
        #self.fold = fold #  from 0 to 4
        self.tot_len = len(data_df)
        self.fold_size = int(len(data_df)*0.2)
        self.train_idx = train_idx
        self.valid_idx = valid_idx

    def setup(self, stage=None):
        # train_size = int(0.8 * len(self.data_df))
        # val_size = len(self.data_df) - train_size
        # self.train_dataset, self.val_dataset = random_split(self.dataset, [train_size, val_size])
        # use GroupShuffleSplit to ensure that samples from the same group are not split between train and val
        # start = self.fold * self.fold_size
        # end =   (self.fold + 1)*self.fold_size if self.fold < 4 else self.tot_len
        # valid_idx = np.arange(start, end)
        # train_idx = [el for el in np.arange(0, self.tot_len) if el not in np.arange(start, end)]  #next(GroupShuffleSplit(test_size=0.2, n_splits=1, random_state=42).split(self.data_df, groups=self.data_df['group_id']))
        self.train_dataset = BirdSongDataset(self.data_df.loc[self.train_idx])
        self.valid_dataset = BirdSongDataset(self.data_df.loc[self.valid_idx])

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, num_workers=10, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.valid_dataset, batch_size=self.batch_size, num_workers=10, shuffle = False)

In [56]:
data_df.index, shuffled_df.index

(RangeIndex(start=0, stop=11168, step=1),
 RangeIndex(start=0, stop=11168, step=1))

In [25]:
data_df = data_df.reset_index(drop=True)

In [89]:
# TensorBoard Logger
logger = [ pl.loggers.TensorBoardLogger(save_dir=f'tb_logs{i}/', name='birdsong_classifier') for i in range(5)]

# Model Checkpoint based on validation loss improvement
checkpoint_callbacks = [pl.callbacks.ModelCheckpoint(
    dirpath=f'model_checkpoints{i}',
    filename='best-birdsong',
    save_top_k=1,
    verbose=True,
    monitor='val_loss',
    mode='max'
) for i in range(5)]

In [None]:
for i, (train_idx, test_idx) in enumerate(folds):
    model = BirdSongClassifier(class_weights) # BirdSongClassifier.load_from_checkpoint('./model_checkpoints/best-birdsong.ckpt', class_weights = class_weights) 
    data_module = BirdSongDataModule(shuffled_df, train_idx, test_idx)
    trainer = pl.Trainer(max_epochs=20, callbacks=[checkpoint_callbacks[i]], logger=logger[i])
    trainer.fit(model, data_module)
    
    with open(f'tr_loss{i}.txt', 'w') as file:
        for item in model.training_losses:
            file.write(f"{item} ")
            
    with open(f'val_loss{i}.txt', 'w') as file:
        for item in model.validation_losses:
            file.write(f"{item} ")

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:653: Checkpoint directory /workspace/model_checkpoints0 exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                              | Params
------------------------------------------------------------
0 | model | Wav2Vec2ForSequenceClassification | 94.8 M
------------------------------------------------------------
94.8 M    Trainable params
0         Non-trainable params
94.8 M    Total params
379.023   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 0, global step 2234: 'val_loss' reached 0.55118 (best 0.55118), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 1, global step 4468: 'val_loss' reached 0.58371 (best 0.58371), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fa0757cd900>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    
if w.is_alive():  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fa0757cd900>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 3, global step 8936: 'val_loss' reached 0.65712 (best 0.65712), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 4, global step 11170: 'val_loss' reached 0.67875 (best 0.67875), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 5, global step 13404: 'val_loss' reached 0.70397 (best 0.70397), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 6, global step 15638: 'val_loss' was not in top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 7, global step 17872: 'val_loss' reached 0.71964 (best 0.71964), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 8, global step 20106: 'val_loss' reached 0.73694 (best 0.73694), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 9, global step 22340: 'val_loss' reached 0.73754 (best 0.73754), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 10, global step 24574: 'val_loss' reached 0.75142 (best 0.75142), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 11, global step 26808: 'val_loss' reached 0.76186 (best 0.76186), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 12, global step 29042: 'val_loss' reached 0.77022 (best 0.77022), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 13, global step 31276: 'val_loss' reached 0.77246 (best 0.77246), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 14, global step 33510: 'val_loss' reached 0.78648 (best 0.78648), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 15, global step 35744: 'val_loss' was not in top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 16, global step 37978: 'val_loss' reached 0.78708 (best 0.78708), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 17, global step 40212: 'val_loss' reached 0.79260 (best 0.79260), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 18, global step 42446: 'val_loss' reached 0.80021 (best 0.80021), saving model to '/workspace/model_checkpoints0/best-birdsong-v2.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 19, global step 44680: 'val_loss' was not in top 1
`Trainer.fit` stopped: `max_epochs=20` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: tb_logs1/birdsong_classifier
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:653: Checkpoint directory /workspace/model_checkpoints1 exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                              | Params
------------------------------------------------------------
0 | model | Wav2Vec2ForSequenceClassification | 94.8 M
------------------------------------------------------------
94.8 M    Trainable params
0         Non-trainable params
94.8 M    Total params
379.023   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 0, global step 2234: 'val_loss' reached 0.56267 (best 0.56267), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 1, global step 4468: 'val_loss' reached 0.59460 (best 0.59460), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 2, global step 6702: 'val_loss' reached 0.63906 (best 0.63906), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 3, global step 8936: 'val_loss' reached 0.66846 (best 0.66846), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 4, global step 11170: 'val_loss' reached 0.69546 (best 0.69546), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 5, global step 13404: 'val_loss' reached 0.70322 (best 0.70322), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 6, global step 15638: 'val_loss' reached 0.72187 (best 0.72187), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 7, global step 17872: 'val_loss' reached 0.73426 (best 0.73426), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 8, global step 20106: 'val_loss' reached 0.74694 (best 0.74694), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 9, global step 22340: 'val_loss' was not in top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 10, global step 24574: 'val_loss' reached 0.75918 (best 0.75918), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 11, global step 26808: 'val_loss' reached 0.77499 (best 0.77499), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 12, global step 29042: 'val_loss' reached 0.77932 (best 0.77932), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 13, global step 31276: 'val_loss' reached 0.78290 (best 0.78290), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 14, global step 33510: 'val_loss' reached 0.78812 (best 0.78812), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 15, global step 35744: 'val_loss' reached 0.79141 (best 0.79141), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 16, global step 37978: 'val_loss' reached 0.79857 (best 0.79857), saving model to '/workspace/model_checkpoints1/best-birdsong-v1.ckpt' as top 1
  self.training_step_outputs.append(torch.tensor(loss, dtype=torch.float32))
