In [41]:
import pandas as pd
import numpy as np 
import soundfile as sf 
import librosa
from PIL import Image
import os
import torch
import random 
from torch import nn 
from torch.utils.data import DataLoader 
import torch.utils.data as td
import torchvision
from torchvision import models
from torchvision import transforms
from sklearn.model_selection import StratifiedKFold
import torch.utils.data as td 
import os
from skimage.transform import resize 
from tqdm import tqdm 

# Setting seeds for reproducible results 
rng_seed = 1234
random.seed(rng_seed)
np.random.seed(rng_seed)
os.environ['PYTHONHASHSEED'] = str(rng_seed)
torch.manual_seed(rng_seed)
torch.cuda.manual_seed(rng_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

num_species = 24
batch_size = 8

fft = 2048
hop = 512 
# According to research, standard sampling bitrate is 48khz. Seen in discussion of kaggle competition as well. 
sr = 48000
length = 10*sr
# ResNet50 input layer is 224 x 224 x 3, so I'm resizing the image to fit the first input dimension. 
mel_spec_dimensions = (224,224)

data_path = '../../Data/'

df = pd.read_csv(data_path + 'train_tp.csv')

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


### Cuda Device Selection

Use cuda:{device_num} to select cuda device that is not being used already

Make sure that this device is selected by exporting CUDA_VISIBLE_DEVICES={device_num} on the shell that's running the notebook server

In [3]:
os.system('nvidia-smi')

Wed Mar  9 10:23:49 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.39.01    Driver Version: 510.39.01    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:06:00.0 Off |                    0 |
| N/A   46C    P0   242W / 300W |   1016MiB / 16384MiB |    100%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2...  On   | 00000000:07:00.0 Off |                    0 |
| N/A   34C    P0    56W / 300W |   9422MiB / 16384MiB |      0%      Default |
|       

0

In [4]:
# os.environ["CUDA_VISIBLE_DEVICES"] = "6"
device = torch.device('cuda')
print(device)

cuda


### Preprocessing 

Just using a quick technique for now, worried only about getting model results back. Will use different preprocessing steps as we move on. 

In [5]:
def create_mel_spectograms(df):
    df['spec'] = np.nan
    df['spec'] = df['spec'].astype(object)
    
    for idx,row in df.iterrows():
        
        rid = row['recording_id']

        wav, sr = librosa.load(data_path + 'train/' + rid + '.flac', sr=None)

        # Slicing and centering spectograms 
        m = np.round((row['t_min'] + row['t_max']) / 2)
        l = m - length / 2
        if l < 0: l = 0
        r = m + length
        if r > len(wav):
            r = len(wav)
            l = r - m

        mspec = librosa.feature.melspectrogram(y=wav[int(l):int(r)], n_fft=fft, hop_length=hop, sr=sr)
        mspec = resize(mspec, mel_spec_dimensions)
        mspec = (mspec - np.min(mspec))/np.max(mspec)
            
        df.at[idx, 'spec'] = mspec
        
    return df

### Optional: Rerun Mel Spectogram Pipeline 

Note: should not be necessary if up to date with main branch
train_spectograms.csv should already be saved at Data/train_spectograms.csv


In [12]:
df = create_mel_spectograms(df)
# df.to_csv(data_path + 'train_spectograms.csv')
print(df.dtypes)
df.head()


recording_id     object
species_id        int64
songtype_id       int64
t_min           float64
f_min           float64
t_max           float64
f_max           float64
spec             object
dtype: object


Unnamed: 0,recording_id,species_id,songtype_id,t_min,f_min,t_max,f_max,spec
0,003bec244,14,1,44.544,2531.25,45.1307,5531.25,"[[0.015969152, 0.0068952353, 0.004454193, 0.00..."
1,006ab765f,23,1,39.9615,7235.16,46.0452,11283.4,"[[0.00960646, 0.027989434, 0.066221, 0.0223827..."
2,007f87ba2,12,1,39.136,562.5,42.272,3281.25,"[[0.09593078, 0.24144638, 0.07836443, 0.084347..."
3,0099c367b,17,4,51.4206,1464.26,55.1996,4565.04,"[[0.10142235, 0.08636865, 0.07434975, 0.119185..."
4,009b760e6,10,1,50.0854,947.461,52.5293,10852.7,"[[0.1255407, 0.032630917, 0.01827762, 0.018702..."


In [13]:
type(df.at[1, 'spec'])

numpy.ndarray

### Creating PyTorch Dataset Class

Note: Have to stack the spectrograms so that they're (224 x 224 x 3) to fit the input dimensions of ResNet50

In [14]:
class RFCXDatasetFromArr(td.Dataset):
    def __init__(self, df):
        
        self.data = []
        self.labels = []
         # need this to transform data to tensors    
        self.transform = transforms.ToTensor()
                
        labels = df['species_id'].to_list()
        for label in labels:
            label_arr = np.zeros(24, dtype=np.single)
            label_arr[label] = 1.
            self.labels.append(label_arr)
             
        specs = df['spec']
            
        for i in range(len(specs)):
            current_spec = np.array(specs[i])
            stack = np.stack([current_spec, current_spec, current_spec])
            self.data.append(stack)
            
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return (torch.tensor(self.data[idx]), torch.tensor(self.labels[idx]))        

### Creating Training and Validation Sets

In [16]:
train_df = None
val_df = None

X = df.drop('species_id', axis=1)
y = df['species_id']

strat = StratifiedKFold(n_splits=5, shuffle=True, random_state=rng_seed)

for fold, (train_index, val_index) in enumerate(strat.split(X,y)):
    if fold==0:
        train_df = df.iloc[train_index]
        val_df = df.iloc[val_index]

train_df = train_df.reset_index(drop=True)

val_df = val_df.reset_index(drop=True)

In [17]:
train_dataset = RFCXDatasetFromArr(train_df)
val_dataset = RFCXDatasetFromArr(val_df)

### Configuring Model

ResNet50 Research Reference: https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Classification/ConvNets/resnet50v1.5#data-augmentation

After reading up on ResNet at the above link, SGD was recommended as an optimizer. Went with a recommended learning rate scheduler from a related notebook in Kaggle. The above link recommends a different scheduler. We chose to use BCE w/ Logits Loss also based on recommendations from related work. We plan on trying out multiple different loss functions to see what works best for our problem. 

In [42]:
train_loader = DataLoader(train_dataset, batch_size = batch_size, sampler = td.RandomSampler(train_dataset))
val_loader = DataLoader(val_dataset, batch_size = batch_size, sampler = td.RandomSampler(val_dataset))

# Model definition 
model = models.resnet50(pretrained=True)
model.fc = nn.Sequential(
    nn.Linear(2048, 1024),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(1024, 1024),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(1024, num_species)
)

pos_weight = (torch.ones(num_species) * num_species)


optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=0.0001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.4)
loss_function = nn.BCEWithLogitsLoss(pos_weight)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /home/tuj61127/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html

Below, we can see the shape of our model. Note that ResNet50 has an output dimension of 2048, which we pass through a fully connected layer. The output of our fc layer is in agreement with competition standards. We designed the FC layer based on related work, and will optimize it in later phases.

In [16]:
# load model into GPU
model = model.to(device)

### Training Loop

Training loop based on the work of another Kaggle notebook: https://www.kaggle.com/fffrrt/all-in-one-rfcx-baseline-for-beginners

Maintains a validation accuracy statistic (Does the most probable class match the ground-truth label?) as the model trains, and saves the model with the highest validation accuracy to the project directory.

In [17]:
def training_loop(train_loader, val_loader, model, optimizer, scheduler, pos_weight, loss_function):
    best_corrects = 0


    for e in range(0, 8):
        train_loss = []


        model.train()
        for batch, (data, target) in enumerate(train_loader):

            print(data.shape)
            data = data.float()
            if torch.cuda.is_available():
                data, target = data.to('cuda'), target.to('cuda')

            optimizer.zero_grad()
            output = model(data)
            loss = loss_function(output, target)
            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        for g in optimizer.param_groups:
            lr = g['lr']

        print("Epoch: ", str(e))
        print("Learning Rate: ", str(lr))
        print("Training Loss: ", str(sum(train_loss) / len(train_loss)))

        # Validation
        with torch.no_grad():
            val_loss = []
            val_corr = []

            model.eval()
            for batch, (data, target) in enumerate(val_loader):
                data = data.float()
                if torch.cuda.is_available():
                    data, target = data.to(, target.cuda()
                
        
                
                output = model(data)
                loss = loss_function(output, target)

                val_loss.append(loss.item())

                vals, answers = torch.max(output, 1)
                vals, targets = torch.max(target, 1)
                corrects = 0
                for i in range(0, len(answers)):
                    if answers[i] == targets[i]:
                        corrects = corrects + 1
                val_corr.append(corrects)


        print("Epoch: ", str(e))
        print("Learning Rate: ", str(lr))
        print("Validation Loss: ", str(sum(val_loss) / len(val_loss)))


        if sum(val_corr) > best_corrects:
            print('Saving new best model at epoch ' + str(e) + ' (' + str(sum(val_corr)) + '/' + str(val_dataset.__len__()) + ')')
            torch.save(model, 'best_model.pt')
            best_corrects = sum(val_corr)

        scheduler.step()

    del model
    
    return 1

In [18]:
training_loop(train_loader, val_loader, model, optimizer, scheduler, pos_weight, loss_function)

torch.Size([8, 3, 224, 224])


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!