In [2]:
import pandas as pd
import numpy as np 
import soundfile as sf 
import librosa
from skimage.transform import resize 
from PIL import Image
import os
import torch
import random 
from torch import nn 
from torch.utils.data import DataLoader 
import torch.utils.data as td
import torchvision
from torchvision import models
from torchvision import transforms
from sklearn.model_selection import StratifiedKFold
import torch.utils.data as td
import pywt
# Setting seeds for reproducible results 
rng_seed = 1234
random.seed(rng_seed)
np.random.seed(rng_seed)
os.environ['PYTHONHASHSEED'] = str(rng_seed)
torch.manual_seed(rng_seed)
torch.cuda.manual_seed(rng_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

num_species = 24
batch_size = 8

data_path = '../Data/'

df = pd.read_csv(data_path+'csv/'+'train_tp'+'_data.csv')

In [3]:
df.head()

Unnamed: 0,recording_id,species_id,songtype_id,t_min,f_min,t_max,f_max
0,003bec244,14,1,44.544,2531.25,45.1307,5531.25
1,006ab765f,23,1,39.9615,7235.16,46.0452,11283.4
2,007f87ba2,12,1,39.136,562.5,42.272,3281.25
3,0099c367b,17,4,51.4206,1464.26,55.1996,4565.04
4,009b760e6,10,1,50.0854,947.461,52.5293,10852.7


### Cuda Device Selection

Use cuda:{device_num} to select cuda device that is not being used already

Make sure that this device is selected by exporting CUDA_VISIBLE_DEVICES={device_num} on the shell that's running the notebook server

In [3]:
os.system('nvidia-smi')

Thu Mar 10 16:17:08 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.39.01    Driver Version: 510.39.01    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:05:00.0 Off |                    0 |
| N/A   52C    P0   291W / 300W |  10042MiB / 16384MiB |    100%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2...  On   | 00000000:06:00.0 Off |                    0 |
| N/A   60C    P0   285W / 300W |  11206MiB / 16384MiB |    100%      Default |
|       

0

In [4]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device('cuda')
print(device)

cuda


In [5]:
print(data_path)

../Data/


### Preprocessing 

In [11]:
def to2DArray(x): 
    x=x.replace("[", '')
    x=x.replace("]", '')
    x=x.replace("...", '')
    x=x.replace("\n", '')
    y=np.array(x.split(" "))
    y = y[y != ""]
    y = np.asfarray(y, 'float64')
    y = np.reshape(y,(1, y.size))
    return y

In [4]:
df['mspec_db'] = df['mspec_db'].apply(lambda x: to2DArray(x))
df['chroma_db'] = df['chroma_db'].apply(lambda x: to2DArray(x))
df['stft_db'] = df['stft_db'].apply(lambda x: to2DArray(x))

KeyError: 'spec'

### Creating PyTorch Dataset Class

In [13]:
class RFCXDatasetFromArr(td.Dataset):
    def __init__(self, df):
        
        self.data = []
        self.labels = []
         # need this to transform data to tensors    
        self.transform = transforms.ToTensor()
                
        labels = df['species_id'].to_list()
        for label in labels:
            label_arr = np.zeros(24, dtype=np.single)
            label_arr[label] = 1.
            self.labels.append(label_arr)
             
        specs = df['spec']
            
        for i in range(len(specs)):
            current_spec = np.array(specs[i])
            dwt_decomp = pywt.dwt2(current_spec, 'bior1.3')
            LL, (LH, HL, HH) = dwt_decomp
            stack = np.stack([LH, HL, HH])
            self.data.append((LH, HL, HH))
            
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return (torch.tensor(self.data[idx]), torch.tensor(self.labels[idx]))        

### Creating Training and Validation Sets

In [14]:
train_df = None
val_df = None

# df = pd.read_csv(data_path + 'train_spectograms.csv')
X = df.drop('species_id', axis=1)
y = df['species_id']

strat = StratifiedKFold(n_splits=5, shuffle=True, random_state=rng_seed)

for fold, (train_index, val_index) in enumerate(strat.split(X,y)):
    if fold==0:
        train_df = df.iloc[train_index]
        val_df = df.iloc[val_index]

train_df = train_df.reset_index(drop=True)

val_df = val_df.reset_index(drop=True)

In [15]:
train_dataset = RFCXDatasetFromArr(train_df)
val_dataset = RFCXDatasetFromArr(val_df)

### Configuring Model

ResNet50 Research Reference: https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Classification/ConvNets/resnet50v1.5#data-augmentation

After reading up on ResNet at the above link, SGD was recommended as an optimizer. Went with a recommended learning rate scheduler from a related notebook in Kaggle. The above link recommends a different scheduler. We chose to use BCE w/ Logits Loss also based on recommendations from related work. We plan on trying out multiple different loss functions to see what works best for our problem. 

In [16]:
train_loader = DataLoader(train_dataset, batch_size = batch_size, sampler = td.RandomSampler(train_dataset))
val_loader = DataLoader(val_dataset, batch_size = batch_size, sampler = td.RandomSampler(val_dataset))

# Model definition 
model = models.resnet50(pretrained=True)
model.fc = nn.Sequential(
    nn.Linear(2048, 1024),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(1024, 1024),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(1024, num_species)
)

pos_weight = (torch.ones(num_species) * num_species)

# load model into GPU
model = model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=0.0001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.4)
loss_function = nn.BCEWithLogitsLoss(pos_weight)

loss_function.to('cuda')

BCEWithLogitsLoss()

Below, we can see the shape of our model. Note that ResNet50 has an output dimension of 2048, which we pass through a fully connected layer. The output of our fc layer is in agreement with competition standards. We designed the FC layer based on related work, and will optimize it in later phases.

### Training Loop

Training loop based on the work of another Kaggle notebook: https://www.kaggle.com/fffrrt/all-in-one-rfcx-baseline-for-beginners

Maintains a validation accuracy statistic (Does the most probable class match the ground-truth label?) as the model trains, and saves the model with the highest validation accuracy to the project directory.

In [17]:
def training_loop(train_loader, val_loader, model, optimizer, scheduler, pos_weight, loss_function):
    best_corrects = 0


    for e in range(0, 20):
        train_loss = []


        model.train()
        for batch, (data, target) in enumerate(train_loader):

#             print(data.shape)
            data = data.float()
            if torch.cuda.is_available():
#                 print("Loading training data to device")
                data, target = data.to('cuda'), target.to('cuda')

            optimizer.zero_grad()
            output = model(data)
            output = output.cuda()
            
            loss = loss_function(output, target)
            
            
            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        for g in optimizer.param_groups:
            lr = g['lr']

        print("Epoch: ", str(e))
        print("Learning Rate: ", str(lr))
        print("Training Loss: ", str(sum(train_loss) / len(train_loss)))

        # Validation
        with torch.no_grad():
            val_loss = []
            val_corr = []

            model.eval()
            for batch, (data, target) in enumerate(val_loader):
                data = data.float()
                if torch.cuda.is_available():
                    data, target = data.cuda(), target.cuda()
                
        
                
                output = model(data)
                loss = loss_function(output, target)

                val_loss.append(loss.item())

                vals, answers = torch.max(output, 1)
                vals, targets = torch.max(target, 1)
                corrects = 0
                for i in range(0, len(answers)):
                    if answers[i] == targets[i]:
                        corrects = corrects + 1
                val_corr.append(corrects)


        print("Epoch: ", str(e))
        print("Learning Rate: ", str(lr))
        print("Validation Loss: ", str(sum(val_loss) / len(val_loss)))


        if sum(val_corr) > best_corrects:
            print('Saving new best model at epoch ' + str(e) + ' (' + str(sum(val_corr)) + '/' + str(val_dataset.__len__()) + ')')
            torch.save(model, 'best_model.pt')
            best_corrects = sum(val_corr)

        scheduler.step()

    del model
    
    return 1

In [18]:
training_loop(train_loader, val_loader, model, optimizer, scheduler, pos_weight, loss_function)

Epoch:  0
Learning Rate:  0.01
Training Loss:  4.867156464545453
Epoch:  0
Learning Rate:  0.01
Validation Loss:  4.7197561956221055
Saving new best model at epoch 0 (15/244)
Epoch:  1
Learning Rate:  0.01
Training Loss:  4.205348202439605
Epoch:  1
Learning Rate:  0.01
Validation Loss:  7.203796763573924
Saving new best model at epoch 1 (22/244)
Epoch:  2
Learning Rate:  0.01
Training Loss:  4.1474448325204065
Epoch:  2
Learning Rate:  0.01
Validation Loss:  4.956055225864533
Epoch:  3
Learning Rate:  0.01
Training Loss:  4.171266010550202
Epoch:  3
Learning Rate:  0.01
Validation Loss:  4.309998412286082
Epoch:  4
Learning Rate:  0.01
Training Loss:  4.156220074559822
Epoch:  4
Learning Rate:  0.01
Validation Loss:  4.706816296423635
Epoch:  5
Learning Rate:  0.01
Training Loss:  4.111804878125425
Epoch:  5
Learning Rate:  0.01
Validation Loss:  4.221887780774024
Epoch:  6
Learning Rate:  0.01
Training Loss:  4.060870784227966
Epoch:  6
Learning Rate:  0.01
Validation Loss:  4.161533

1

In [19]:
pwd

'/home/tuk99233/rainforest-audio-detection/Code'