In [None]:
!pip install -q timm pytorch-metric-learning
import os
import time
import random
import math
import copy
import cv2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset
from torch.cuda import amp

import torch.multiprocessing as mp
import warnings

import pytorch_lightning as pl
from torch.utils.data import DataLoader
from sklearn import model_selection
import torchvision.transforms as transforms
import torchvision.io 
import librosa
from PIL import Image
import albumentations as alb
from albumentations.pytorch import ToTensorV2

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score

from tqdm.notebook import tqdm
from collections import defaultdict

import timm
from pytorch_metric_learning import losses

warnings.filterwarnings('ignore')

In [None]:
class CFG:
    seed = 42
    model_name = "skresnet18"
    img_size = 512
    scheduler = 'CosineAnnealingLR'
    use_mixup = True
    mixup_alpha = 0.2   
    T_max = 10
    lr = 1e-5
    min_lr = 1e-6
    batch_size = 16
    weight_decay = 1e-6
    num_epochs = 10
    num_classes = 264
    embedding_size = 512
    n_accumulate = 4
    temperature = 0.1
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    data_root = "/kaggle/input/birdclef-2023/"
    train_images = "/kaggle/input/split-creating-melspecs-stage-1/specs/train/"
    valid_images = "/kaggle/input/split-creating-melspecs-stage-1/specs/valid/"
    train_path = "/kaggle/input/bc2023-train-val-df/train.csv"
    valid_path = "/kaggle/input/bc2023-train-val-df/valid.csv"
    
    
    SR = 32000
    DURATION = 5
    MAX_READ_SAMPLES = 5
    LR = 5e-4

In [None]:
def set_seed(seed = 42):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(CFG.seed)

In [None]:
df_train = pd.read_csv(CFG.train_path)
df_valid = pd.read_csv(CFG.valid_path)

label_encoder = LabelEncoder()
train_labels = df_train['primary_label']
label_encoder.fit(train_labels)

df_train['encoded_labels'] = label_encoder.transform(train_labels)
df_valid['encoded_labels'] = label_encoder.transform(df_valid['primary_label'])

In [None]:
class BirdDataset(torch.utils.data.Dataset):

    def __init__(self, df, sr = CFG.SR, duration = CFG.DURATION, transforms = None,  train = False):
        self.df = df
        self.sr = sr 
        self.train = train
        self.duration = duration
        self.transforms = transforms
        if self.train:
            self.img_dir = CFG.train_images
        else:
            self.img_dir = CFG.valid_images
            
    def __len__(self):
        return len(self.df)

    @staticmethod
    def normalize(image):
        image = image / 255.0
        return image

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        impath = self.img_dir + f"{row.filename}.npy"
        image = np.load(str(impath))[:CFG.MAX_READ_SAMPLES]
        if self.train:
            image = image[np.random.choice(len(image))]
        else:
            image = image[0]

        if self.transforms:
            image = self.transforms(image=image)["image"]
            
        image = torch.tensor(image).float()
        image = torch.stack([image, image, image])
        image = self.normalize(image)
        return image, torch.tensor(row.encoded_labels).float()


In [None]:
import albumentations as A
data_transforms = {

    "train": A.Compose([
        A.HorizontalFlip(p=0.5),
        A.OneOf([
                A.Cutout(max_h_size=5, max_w_size=16),
                A.CoarseDropout(max_holes=4),
            ], p=0.5),
    ]),
    "valid": A.Compose([
        A.HorizontalFlip(p=0.5),
        A.OneOf([
                A.Cutout(max_h_size=5, max_w_size=16),
                A.CoarseDropout(max_holes=4),
            ], p=0.5),
    ])
}

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs, dataloaders, dataset_sizes, device, fold):
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = np.inf
    scaler = amp.GradScaler()

    for step, epoch in enumerate(range(1,num_epochs+1)):
        print('Epoch {}/{}'.format(epoch, num_epochs))
        print('-' * 10)
        for phase in ['train','valid']:
            if(phase == 'train'):
                model.train() 
            else:
                model.eval() 
            
            running_loss = 0.0
            for inputs,labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(CFG.device)
                labels = labels.to(CFG.device)

                with torch.set_grad_enabled(phase == 'train'):
                    with amp.autocast(enabled=True):
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                        loss = loss / CFG.n_accumulate
                    
                    if phase == 'train':
                        scaler.scale(loss).backward()

                    if phase == 'train' and (step + 1) % CFG.n_accumulate == 0:
                        scaler.step(optimizer)
                        scaler.update()
                        scheduler.step()
                        optimizer.zero_grad()
                running_loss += loss.item()*inputs.size(0)
            
            epoch_loss = running_loss/dataset_sizes[phase]            

            print('{} Loss: {:.4f}'.format(
                phase, epoch_loss))
            
            # deep copy the model
            if phase=='valid' and epoch_loss <= best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
                PATH = f"Fold{fold}_{best_loss}_epoch_{epoch}.bin"
                torch.save(model.state_dict(), PATH)
        print()
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best Loss ",best_loss)

    model.load_state_dict(best_model_wts)
    return model

In [None]:
def run_fold(model, criterion, optimizer, scheduler, device, fold, num_epochs=10):
    
    train_data = BirdDataset(
        df_train, 
        sr = CFG.SR,
        duration = CFG.DURATION,
        transforms = data_transforms["train"],
        train = True
    )
        
    valid_data = BirdDataset(
        df_valid, 
        sr = CFG.SR,
        duration = CFG.DURATION,
        transforms = data_transforms["valid"],
    )
    
    dataset_sizes = {
        'train' : len(train_data),
        'valid' : len(valid_data)
    }
    
    train_loader = DataLoader(dataset=train_data, batch_size=CFG.batch_size, num_workers=4, pin_memory=True, shuffle=True)
    valid_loader = DataLoader(dataset=valid_data, batch_size=CFG.batch_size, num_workers=4, pin_memory=True, shuffle=False)
    
    dataloaders = {
        'train' : train_loader,
        'valid' : valid_loader
    }

    model = train_model(model, criterion, optimizer, scheduler, num_epochs, dataloaders, dataset_sizes, device, fold)
    
    return model

In [None]:
model = timm.create_model(CFG.model_name, pretrained=True)
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, CFG.embedding_size)
model.to(CFG.device)
print("Model loaded")

In [None]:
class SupervisedContrastiveLoss(nn.Module):
    def __init__(self, temperature=0.1):
        super(SupervisedContrastiveLoss, self).__init__()
        self.temperature = temperature

    def forward(self, feature_vectors, labels):
        feature_vectors_normalized = F.normalize(feature_vectors, p=2, dim=1)
        logits = torch.div( torch.matmul( feature_vectors_normalized, torch.transpose(feature_vectors_normalized, 0, 1) ), self.temperature)
        return losses.NTXentLoss(temperature=0.07)(logits, torch.squeeze(labels))

In [None]:
criterion = SupervisedContrastiveLoss(temperature=CFG.temperature).to(CFG.device) 
optimizer = optim.Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr)
model = run_fold(model, criterion, optimizer, scheduler, device=CFG.device, fold=0, num_epochs=50)

In [None]:
model.fc = nn.Linear(in_features=512, out_features=CFG.num_classes, bias=True)
torch.save({ 'model_state_dict': model.state_dict(),}, 'sk_with_contrastive_loss.pth')