## Installing ResNest

Pip install of resnest is currently borked, need to download pretrained model manually
according to the github, model is stored as ../Data/resnest*.pth on github and loaded below


In [1]:
# !pip install resnest

## Imports and Definition of Constants

In [2]:
import torch
from torch import nn
from torchvision import transforms
from PIL import Image
import numpy as np
import pandas as pd
import soundfile as sf
import librosa
import torch
import random
import os
from torch.utils.data import Dataset, DataLoader, RandomSampler
from resnest.torch import resnest50
from sklearn.model_selection import StratifiedKFold
from skimage.transform import resize

rng_seed = 1234
random.seed(rng_seed)
np.random.seed(rng_seed)
os.environ['PYTHONHASHSEED'] = str(rng_seed)
torch.manual_seed(rng_seed)
torch.cuda.manual_seed(rng_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

num_species = 24
batch_size = 8

fft = 2048
hop = 512 
# According to research, standard sampling bitrate is 48khz. Seen in discussion of kaggle competition as well. 
sr = 48000
length = 10*sr

# ResNeSt50 input layer is 224 x 224 x 3, specifying dimensions here  
mel_spec_dimensions = (224,224)

data_path = '../Data/'

# should change this according to nvidia-smi output e.g. "3"
os.environ["CUDA_VISIBLE_DEVICES"] = "4"
device = torch.device('cuda')


## Loading in Preproccessed Mel Spectrograms 

In [3]:
def to2DArray(x): 
    # casts object representation of specs stored in csv to a 2D numpy array 
    x=x.replace("[", '')
    x=x.replace("]", '')
    x=x.replace("...", '')
    x=x.replace("\n", '')
    y=np.array(x.split(" "))
    y = y[y != ""]
    y = np.asfarray(y, 'float64')
    y = np.reshape(y,(1, y.size))
    return y

In [4]:
df = pd.read_csv(data_path + 'csv/train_tp_data.csv')
df['mspec_db'] = df['mspec_db'].apply(lambda x: to2DArray(x))
df.head()

Unnamed: 0.1,Unnamed: 0,recording_id,species_id,songtype_id,t_min,f_min,t_max,f_max,mspec_db,chroma_db,stft_db
0,0,003bec244,14,1,44.544,2531.25,45.1307,5531.25,"[[-29.944086, -29.54014, -31.48041, -22.044724...",[[-24.453102 -23.385326 -19.578325 ... -1....,[[-31.428196 -32.94088 -34.383667 ... -26.787...
1,1,006ab765f,23,1,39.9615,7235.16,46.0452,11283.4,"[[-14.063939, -13.551413, -19.256004, -35.1699...",[[-3.9856358 -3.3799872 -2.2499917 ... -3.6...,[[-16.067139 -16.579441 -21.948366 ... -36.201...
2,2,007f87ba2,12,1,39.136,562.5,42.272,3281.25,"[[-35.684315, -36.580364, -36.661636, -37.0789...",[[ -9.486548 -8.426069 -11.021532 ... -8....,[[-38.457386 -39.250862 -39.292217 ... -37.594...
3,3,0099c367b,17,4,51.4206,1464.26,55.1996,4565.04,"[[-18.213322, -15.251486, -15.522239, -11.4986...",[[ -4.0246634 -4.580097 -4.9695506 ... -10....,[[-19.732197 -19.913256 -19.73383 ... -19.330...
4,4,009b760e6,10,1,50.0854,947.461,52.5293,10852.7,"[[-19.20329, -17.409256, -19.761925, -24.59859...",[[-2.5808494 -2.7056158 -2.1449702 ... -1.1806...,[[-24.51978 -23.799303 -26.561966 ... -30.522...


## Dataset Definition and Additional Preprocessing

In [5]:
class RainforestDataset(Dataset):
    def __init__(self, df):
        
        self.data = []
        self.labels = []
        
        # additional preprocessing required for ResNeST, normalization outlined in paper
        self.preprocess = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
                
        labels = df['species_id'].to_list()
        for label in labels:
            label_arr = np.zeros(24, dtype=np.single)
            label_arr[label] = 1
            self.labels.append(label_arr)
             
        mspecs = df['mspec_db']
        
        for i in range(len(mspecs)):
            current_mspec = (Image.fromarray(mspecs[i])).convert('RGB')
            current_mspec = self.preprocess(current_mspec)
            self.data.append(current_mspec)
            
            
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return (self.data[idx], self.labels[idx])

## Model Definition and Loading to GPU

In [6]:
# Model class definition 
model = resnest50(pretrained=False)

# ResNeST pretrained model should be uploaded to this path with the notebook
model.load_state_dict(torch.load(data_path + 'resnest50-528c19ca.pth'))
model.eval()
model.fc = nn.Sequential(
    nn.Linear(2048, 1024),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(1024, 1024),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(1024, num_species)
)

pos_weight = (torch.ones(num_species) * num_species)

# load model into GPU
model = model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=0.001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.4)
loss_function = nn.BCEWithLogitsLoss(pos_weight)

loss_function.to(device)

BCEWithLogitsLoss()

## Definition of Training Loop

In [7]:
def training_loop(train_loader, val_loader, model, optimizer, scheduler, pos_weight, loss_function):
    best_corrects = 0


    for e in range(0, 20):
        train_loss = []

        model.train()
        for batch, (data, target) in enumerate(train_loader):
            data = data.float()
            if torch.cuda.is_available():
                data, target = data.to('cuda'), target.to('cuda')

            optimizer.zero_grad()
            output = model(data)
            output = output.cuda()
            
            loss = loss_function(output, target)
        
            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        for g in optimizer.param_groups:
            lr = g['lr']

        print("Epoch: ", str(e))
        print("Learning Rate: ", str(lr))
        print("Training Loss: ", str(sum(train_loss) / len(train_loss)))

        # Validation
        with torch.no_grad():
            val_loss = []
            val_corr = []

            model.eval()
            for batch, (data, target) in enumerate(val_loader):
                data = data.float()
                if torch.cuda.is_available():
                    data, target = data.cuda(), target.cuda()
                
                output = model(data)
                loss = loss_function(output, target)

                val_loss.append(loss.item())

                vals, answers = torch.max(output, 1)
                vals, targets = torch.max(target, 1)
                corrects = 0
                for i in range(0, len(answers)):
                    if answers[i] == targets[i]:
                        corrects = corrects + 1
                val_corr.append(corrects)


        print("Epoch: ", str(e))
        print("Learning Rate: ", str(lr))
        print("Validation Loss: ", str(sum(val_loss) / len(val_loss)))


        if sum(val_corr) > best_corrects:
            print('Saving new best model at epoch ' + str(e) + ' (' + str(sum(val_corr)) + '/' + str(val_dataset.__len__()) + ')')
            torch.save(model, data_path + 'best_model_resnest.pt')
            best_corrects = sum(val_corr)

        scheduler.step()

    del model
    
    return 1

In [8]:
train_df = None
val_df = None

X = df.drop('species_id', axis=1)
y = df['species_id']

strat = StratifiedKFold(n_splits=5, shuffle=True, random_state=rng_seed)

for fold, (train_index, val_index) in enumerate(strat.split(X,y)):
    if fold != 0:
        continue
    train_df = df.iloc[train_index]
    val_df = df.iloc[val_index]

    train_df = train_df.reset_index(drop=True)

    val_df = val_df.reset_index(drop=True)
    train_dataset = RainforestDataset(train_df)
    val_dataset = RainforestDataset(val_df)
    
    train_loader = DataLoader(train_dataset, batch_size = batch_size, sampler = RandomSampler(train_dataset))
    val_loader = DataLoader(val_dataset, batch_size = batch_size, sampler = RandomSampler(val_dataset))
    training_loop(train_loader, val_loader, model, optimizer, scheduler, pos_weight, loss_function)

Epoch:  0
Learning Rate:  0.001
Training Loss:  5.731409481314362
Epoch:  0
Learning Rate:  0.001
Validation Loss:  11.657833376238424
Saving new best model at epoch 0 (10/244)
Epoch:  1
Learning Rate:  0.001
Training Loss:  4.223901527826904
Epoch:  1
Learning Rate:  0.001
Validation Loss:  11.042784537038495
Saving new best model at epoch 1 (20/244)
Epoch:  2
Learning Rate:  0.001
Training Loss:  4.216528730314286
Epoch:  2
Learning Rate:  0.001
Validation Loss:  11.420835802631993
Epoch:  3
Learning Rate:  0.001
Training Loss:  4.209338702139307
Epoch:  3
Learning Rate:  0.001
Validation Loss:  10.262843901111234
Epoch:  4
Learning Rate:  0.001
Training Loss:  4.202783918771588
Epoch:  4
Learning Rate:  0.001
Validation Loss:  11.20800833548269
Epoch:  5
Learning Rate:  0.001
Training Loss:  4.195293719651269
Epoch:  5
Learning Rate:  0.001
Validation Loss:  9.47043191232989
Epoch:  6
Learning Rate:  0.001
Training Loss:  4.194914978058612
Epoch:  6
Learning Rate:  0.001
Validation 