In [18]:
!pip install -q torchtoolbox timm
!pip install -q pytorch_metric_learning

import os
import time
import random
import math
import copy
import cv2
import sklearn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gc 

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset
from torch.cuda import amp

import torch.multiprocessing as mp
import warnings

from torchtoolbox.tools import mixup_data, mixup_criterion
from torch.nn.functional import cross_entropy
import torchmetrics
import timm

import pytorch_lightning as pl
from torch.utils.data import DataLoader
from sklearn import model_selection
import torchvision.transforms as transforms
import torchvision.io 
import librosa
from PIL import Image
import albumentations as alb
from albumentations.pytorch import ToTensorV2

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score
from pytorch_lightning.callbacks import ModelCheckpoint, BackboneFinetuning, EarlyStopping
from torch.optim.lr_scheduler import CosineAnnealingLR, CosineAnnealingWarmRestarts, ReduceLROnPlateau, OneCycleLR

from tqdm.notebook import tqdm
from collections import defaultdict

import timm
from pytorch_metric_learning import losses

warnings.filterwarnings('ignore')

[0m

In [19]:
class Config:
    num_classes = 264
    batch_size = 64
    epochs = 10  
    seed = 2023
    model = "skresnet18"
    pretrained = True            
    weight_decay = 1e-3
    use_mixup = True
    mixup_alpha = 0.2   
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')    
    data_root = "/kaggle/input/birdclef-2023/"
    train_images = "/kaggle/input/split-creating-melspecs-stage-1/specs/train/"
    valid_images = "/kaggle/input/split-creating-melspecs-stage-1/specs/valid/"
    train_path = "/kaggle/input/bc2023-train-val-df/train.csv"
    valid_path = "/kaggle/input/bc2023-train-val-df/valid.csv"
    sampling_rate = 32000
    signal_duration = 5
    learning_rate = 5e-4
    
pl.seed_everything(Config.seed, workers=True)

2023

In [20]:
df_train = pd.read_csv(Config.train_path)
df_valid = pd.read_csv(Config.valid_path)
df_train = pd.concat([df_train, pd.get_dummies(df_train['primary_label'])], axis=1)
df_valid = pd.concat([df_valid, pd.get_dummies(df_valid['primary_label'])], axis=1)
birds = list(df_train.primary_label.unique())
missing_birds = list(set(list(df_train.primary_label.unique())).difference(list(df_valid.primary_label.unique())))
non_missing_birds = list(set(list(df_train.primary_label.unique())).difference(missing_birds))
df_valid[missing_birds] = 0
df_valid = df_valid[df_train.columns] 

In [21]:
def get_train_transform():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.OneOf([
                A.Cutout(max_h_size=5, max_w_size=16),
                A.CoarseDropout(max_holes=4),
            ], p=0.5),
    ])

In [22]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self, df, sr = Config.sampling_rate, duration = Config.signal_duration, augmentations = None, train = True):

        self.df = df
        self.sr = sr 
        self.train = train
        self.duration = duration
        self.augmentations = augmentations
        if train:
            self.img_dir = Config.train_images
        else:
            self.img_dir = Config.valid_images

    def __len__(self):
        return len(self.df)

    @staticmethod
    def normalize(image):
        image = image / 255.0
        return image

    def __getitem__(self, idx):

        row = self.df.iloc[idx]
        impath = self.img_dir + f"{row.filename}.npy"
        image = np.load(str(impath))[:5]        
        if self.train:
            image = image[np.random.choice(len(image))]
        else:
            image = image[0]
            
        image = torch.tensor(image).float()
        if self.augmentations:
            image = self.augmentations(image.unsqueeze(0)).squeeze()
        
        image = torch.stack([image, image, image])
        image = self.normalize(image)
        return image, torch.tensor(row[17:]).float()


In [23]:
def load_dataset(df_train, df_valid):

    ds_train = Dataset(
        df_train, 
        sr = Config.sampling_rate,
        duration = Config.signal_duration,
        augmentations = None,
        train = True
    )
    
    ds_val = Dataset(
        df_valid, 
        sr = Config.sampling_rate,
        duration = Config.signal_duration,
        augmentations = None,
        train = False
    )
    dl_train = DataLoader(ds_train, batch_size=Config.batch_size , shuffle=True, num_workers = 2)    
    dl_val = DataLoader(ds_val, batch_size=Config.batch_size, num_workers = 2)
    return dl_train, dl_val, ds_train, ds_val

In [24]:
def get_optimizer(lr, params):
    model_optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, params), 
            lr=lr,
            weight_decay=Config.weight_decay
        )
    interval = "epoch"
    
    lr_scheduler = CosineAnnealingWarmRestarts(
                            model_optimizer, 
                            T_0=Config.epochs, 
                            T_mult=1, 
                            eta_min=1e-6, 
                            last_epoch=-1
                        )

    return {
        "optimizer": model_optimizer, 
        "lr_scheduler": {
            "scheduler": lr_scheduler,
            "interval": interval,
            "monitor": "val_loss",
            "frequency": 1
        }
    }

In [25]:
def padded_cmap(solution, submission, padding_factor=5):
    solution = solution
    submission = submission
    new_rows = []
    for i in range(padding_factor):
        new_rows.append([1 for i in range(len(solution.columns))])
    new_rows = pd.DataFrame(new_rows)
    new_rows.columns = solution.columns
    padded_solution = pd.concat([solution, new_rows]).reset_index(drop=True).copy()
    padded_submission = pd.concat([submission, new_rows]).reset_index(drop=True).copy()
    score = sklearn.metrics.average_precision_score(
        padded_solution.values,
        padded_submission.values,
        average='macro',
    )
    return score

def map_score(solution, submission):
    solution = solution
    submission = submission
    score = sklearn.metrics.average_precision_score(
        solution.values,
        submission.values,
        average='micro',
    )
    return score

In [26]:
class Model(pl.LightningModule):
    def __init__(self, model_name=Config.model, num_classes = Config.num_classes, pretrained = Config.pretrained):
        super().__init__()
        self.num_classes = num_classes
        self.backbone = timm.create_model(model_name, pretrained=pretrained)
        self.in_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(self.in_features, num_classes)
        checkpoint = torch.load('/kaggle/input/model-weights/sk_with_contrastive_loss.pth')
        self.backbone.load_state_dict(checkpoint['model_state_dict'])
        self.loss_function = nn.BCEWithLogitsLoss() 

    def forward(self,images):
        logits = self.backbone(images)
        return logits
        
    def configure_optimizers(self):
        return get_optimizer(lr=Config.learning_rate, params=self.parameters())

    def train_with_mixup(self, X, y):
        X, y_a, y_b, lam = mixup_data(X, y, alpha=Config.mixup_alpha)
        y_pred = self(X)
        loss_mixup = mixup_criterion(cross_entropy, y_pred, y_a, y_b, lam)
        return loss_mixup

    def training_step(self, batch, batch_idx):
        image, target = batch        
        if Config.use_mixup:
            loss = self.train_with_mixup(image, target)
        else:
            y_pred = self(image)
            loss = self.loss_function(y_pred,target)

        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True)
        return loss        

    def validation_step(self, batch, batch_idx):
        image, target = batch     
        y_pred = self(image)
        val_loss = self.loss_function(y_pred, target)
        self.log("val_loss", val_loss, on_step=True, on_epoch=True, logger=True, prog_bar=True)    
        return {"val_loss": val_loss, "logits": y_pred, "targets": target}
    
    def train_dataloader(self):
        return self._train_dataloader 
    
    def validation_dataloader(self):
        return self._validation_dataloader
    
    def validation_epoch_end(self,outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        output_val = torch.cat([x['logits'] for x in outputs],dim=0).sigmoid().cpu().detach().numpy()
        target_val = torch.cat([x['targets'] for x in outputs],dim=0).cpu().detach().numpy()
        
        val_df = pd.DataFrame(target_val, columns = birds)
        pred_df = pd.DataFrame(output_val, columns = birds)        
        avg_score = padded_cmap(val_df, pred_df, padding_factor = 5)
               
        print(f'epoch {self.current_epoch} validation loss {avg_loss}')
        print(f'epoch {self.current_epoch} validation C-MAP score pad 5 {avg_score}')
        
        return {'val_loss': avg_loss,'val_cmap':avg_score}

In [27]:
def run_training():
    print(f"Running training...")    
    dl_train, dl_val, ds_train, ds_val = load_dataset(df_train, df_valid)
    
    audio_model = Model()
    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.00, patience=8, verbose= True, mode="min")
    checkpoint_callback = ModelCheckpoint(monitor='val_loss',
                                          dirpath= "/kaggle/working/exp1/",
                                      save_top_k=1,
                                      save_last= True,
                                      save_weights_only=True,
                                      filename= f'./{Config.model}_loss',
                                      verbose= True,
                                      mode='min')
    
    callbacks_to_use = [checkpoint_callback,early_stop_callback]


    trainer = pl.Trainer(
        gpus=1,
        val_check_interval=0.5,
        deterministic=True,
        max_epochs=10,
        auto_lr_find=False,    
        callbacks=callbacks_to_use,
        precision=16, accelerator="gpu" 
    )

    print("Running trainer.fit")
    trainer.fit(audio_model, train_dataloaders = dl_train, val_dataloaders = dl_val)                

    gc.collect()
    torch.cuda.empty_cache()
    return audio_model

In [28]:
model = run_training()

Running training...
Running trainer.fit


Sanity Checking: 0it [00:00, ?it/s]

epoch 0 validation loss 0.7384665012359619
epoch 0 validation C-MAP score pad 5 0.9408054506012066


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

epoch 0 validation loss 0.2426360845565796
epoch 0 validation C-MAP score pad 5 0.6018534500288357


Validation: 0it [00:00, ?it/s]

epoch 0 validation loss 0.16977612674236298
epoch 0 validation C-MAP score pad 5 0.6668877056731761


In [None]:
torch.save({'model_state_dict': model.backbone.state_dict() }, 'skresnet18_with_contrastive loss.pth')