In [1]:
#imports
import os
import timm
import torch
import numpy as np 
import pandas as pd
import librosa as lb
import torch.nn as nn
import soundfile as sf

from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import average_precision_score
import torchvision.transforms as transforms
from  soundfile import SoundFile 
from sklearn.model_selection import train_test_split

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

bs = 32
classnum = 264
n_mels = 224
epochs = 10
lr = 3e-3


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#audio conversion
def compute_melspec(y, sr, n_mels, fmin, fmax):
    melspec = lb.feature.melspectrogram(
        y=y, sr=sr, n_mels=n_mels, fmin=fmin, fmax=fmax,
    )

    melspec = lb.power_to_db(melspec).astype(np.float32)
    return melspec

def mono_to_color(X, eps=1e-6, mean=None, std=None):
    mean = mean or X.mean()
    std = std or X.std()
    X = (X - mean) / (std + eps)
    
    _min, _max = X.min(), X.max()

    if (_max - _min) > eps:
        V = np.clip(X, _min, _max)
        V = 255 * (V - _min) / (_max - _min)
        V = V.astype(np.uint8)
    else:
        V = np.zeros_like(X, dtype=np.uint8)

    return V


def padded_cmap(solution, submission, padding_factor=5):
    solution = solution.drop(['row_id'], axis=1, errors='ignore')
    submission = submission.drop(['row_id'], axis=1, errors='ignore')
    new_rows = []
    for i in range(padding_factor):
        new_rows.append([1 for i in range(len(solution.columns))])
    new_rows = pd.DataFrame(new_rows)
    new_rows.columns = solution.columns
    padded_solution = pd.concat([solution, new_rows]).reset_index(drop=True).copy()
    padded_submission = pd.concat([submission, new_rows]).reset_index(drop=True).copy()
    score = average_precision_score(
        padded_solution.values,
        padded_submission.values,
        average='macro',
    )
    return score

In [3]:
#model
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.model = timm.create_model('tf_efficientnet_b2_ns', pretrained=False)
        self.in_features = self.model.classifier.in_features
        self.model.classifier = nn.Sequential(nn.Linear(self.in_features, classnum))
    
    def forward(self, images):
        features = self.model(images)
        return features

In [4]:
#dataset for work
class BirdDataset(Dataset):
    def __init__(self, data, sr=32000, n_mels=128, fmin=0, fmax=None, duration=5, 
                 step=None, res_type="kaiser_fast", resample=True, valid=False, transform=None):
        self.data = data
        self.sr = sr
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax or self.sr // 2
        
        self.transform = transform

        self.duration = duration
        self.audio_length = self.duration*self.sr
        self.step = step or self.audio_length
        
        self.valid = valid
        self.path = '' if valid else '/kaggle/input/birdclef-2023/train_audio/'
        self.res_type = res_type
        self.resample = resample

    def __len__(self):
        return len(self.data)
    
    def normalize(self, image):
        image = image.astype("float32", copy=False) / 255.0
        if image.shape[1] > 256:
            image = image[:128, :256]
        else:
          zeroes = np.zeros((128, 256 - image.shape[1]))
          image = np.concatenate([image, zeroes], axis=1, dtype=np.float32)
          
        image = np.stack([image, image, image], axis=0)
        return image
    
    def audio_to_image(self, audio):
        melspec = compute_melspec(audio, self.sr, self.n_mels, self.fmin, self.fmax) 
        image = mono_to_color(melspec)
        image = self.normalize(image)
        return image

    def read_file(self, row):
        filepath = self.path + str(row['path'])
        audio, orig_sr = sf.read(filepath, dtype="float32")

        if self.resample and orig_sr != self.sr:
            audio = lb.resample(audio, orig_sr, self.sr, res_type=self.res_type)
          
        if self.valid:
            audios = []
            for i in range(self.audio_length, len(audio) + self.step, self.step):
                start = max(0, i - self.audio_length)
                end = start + self.audio_length
                audios.append(audio[start:end])

            if len(audios[-1]) < self.audio_length:
                audios = audios[:-1]

            images = [self.audio_to_image(audio) for audio in audios]
            images = np.stack(images)
            
        else:
            images = self.audio_to_image(audio)  
        
        labels = torch.tensor(row[3:]).float() 
        return (images, labels)
    
        
    def __getitem__(self, idx):
        return self.read_file(self.data.loc[idx])

In [5]:
#model, optim, scheduler, loss, transform 
birds_classifier = Model()
birds_classifier = birds_classifier.to(device)
optim = AdamW(birds_classifier.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, eta_min=1e-5, T_max=10)
loss_fn = nn.CrossEntropyLoss()
transform = transforms.Compose([transforms.ToTensor(), transforms.Resize((120, 224))])


In [6]:
data = pd.read_csv('/kaggle/input/birdclef-2023/train_metadata.csv')
data = pd.concat([ pd.Series(data['primary_label']), pd.Series(data['type']), pd.Series(data['filename'], name='path')], axis=1, names=['primary_label', 'type', 'path'])

data = pd.concat([data, pd.get_dummies(data['primary_label'])], axis=1)
birds = list(pd.get_dummies(data['primary_label']).columns)

train_data, test_data = train_test_split(data, train_size=0.8)
train_data = train_data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/birdclef-2023/train_metadata.csv'

In [7]:
train_dataset = BirdDataset(train_data, transform=transform)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=bs)

test_dataset = BirdDataset(test_data)
test_dataloader = DataLoader(test_dataset, shuffle=True, batch_size=bs)

NameError: name 'train_data' is not defined

In [8]:
def train(model, data_loader, optimizer, scheduler):
    model.train()
    
    cnt = 0
    for mels, labels in data_loader:
        optimizer.zero_grad()
        
        mels = mels.to(device)
        labels = labels.to(device)
        
        outputs = model(mels)
        _, preds = torch.max(outputs, 1)
       
        loss = loss_fn(outputs, labels)
        if(cnt%10==0):
            print(loss.item())
        cnt+=1
        
        loss.backward()
        optimizer.step()
        
        if scheduler is not None:
            scheduler.step()

In [9]:
#test
def test(model, data_loader):
    model.eval()
    
    pred = []
    label = []
    
    for mels, labels in data_loader:
        mels = mels.to(device)
        labels = labels.to(device)
        
        outputs = model(mels)
        _, preds = torch.max(outputs, 1)
        
        label.append(labels.cpu().detach())
        pred.append(outputs.sigmoid().cpu().detach())
        
        
    labels_df = torch.cat([x for x in label], dim=0)
    pred_df = torch.cat([x for x in pred], dim=0)
    label_df = pd.DataFrame(labels_df)  
    pred_df = pd.DataFrame(pred_df)  
    current_score = padded_cmap(label_df, pred_df)
    
    return current_score

In [10]:
temp_score = 0
for i in range(epochs):
    print(i)
    train(birds_classifier, train_dataloader, optim, scheduler)
    cur_score = test(birds_classifier, test_dataloader)
    print(cur_score)
    if(cur_score)>temp_score:
        temp_score = cur_score
        torch.save(birds_classifier.state_dict(), '/kaggle/working/best.pth')

0


NameError: name 'train_dataloader' is not defined

In [11]:
#birds_classifier.load_state_dict(torch.load('/kaggle/input/weights/best1.pth', map_location='cpu'), strict=False)

<All keys matched successfully>

In [12]:
input_file_name = os.listdir('/kaggle/input/birdclef-2023/test_soundscapes')
input_file_path = '/kaggle/input/birdclef-2023/test_soundscapes/'

data = {'primary_label' : [x for x in range(len(input_file_name))], 
        'type' : [x for x in range(len(input_file_name))], 
        'path' : [str(input_file_path + x) for x in input_file_name]}

test_data = pd.DataFrame(data=data)

test_data = BirdDataset(test_data, valid=True)

In [13]:
predictions = []
birds_classifier.eval()
for en in range(len(test_data)):
    images = torch.from_numpy(test_data[en][0])
    outputs = birds_classifier(images).sigmoid().detach().numpy()
    predictions.append(outputs)

In [14]:
submission = pd.DataFrame(columns=['row_id']+birds)

for i, file in enumerate(input_file_name):
    pred = predictions[i]
    file = input_file_name[i][:-4]
    num_rows = len(pred)
    row_ids = [f'{file}_{(i+1)*5}' for i in range(num_rows)]
    df = pd.DataFrame(columns=['row_id'] + birds)
    df['row_id'] = row_ids
    df[birds] = pred
    submission = pd.concat([submission, df]).reset_index(drop=True)
submission.to_csv('submission.csv', index=False)