In [None]:
import pandas as pd
import numpy as np 
import soundfile as sf 
import librosa
from skimage.transform import resize 
from PIL import Image
import os
import torch
import random 
from torch import nn 
from torch.utils.data import DataLoader 
import torch.utils.data as td
import torchvision
from torchvision import models
from torchvision import transforms
from sklearn.model_selection import StratifiedKFold
import torch.utils.data as td 
import csv
# Setting seeds for reproducible results 
rng_seed = 1234
random.seed(rng_seed)
np.random.seed(rng_seed)
os.environ['PYTHONHASHSEED'] = str(rng_seed)
torch.manual_seed(rng_seed)
torch.cuda.manual_seed(rng_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

num_species = 24
batch_size = 8

fft = 2048
hop = 512 
# According to research, standard sampling bitrate is 48khz. Seen in discussion of kaggle competition as well. 
sr = 48000
length = 10*sr
# ResNet50 input layer is 224 x 224 x 3, so I'm resizing the image to fit the first input dimension. 
mel_spec_dimensions = (224,224)

data_path = '../Data/'

df = pd.read_csv(data_path + 'train_Augmented.csv', sep=',')

In [1]:
pwd

'/home/tuk99233/rainforest-audio-detection/Code'

In [None]:
df.head()

In [None]:
#audf = pd.read_csv('https://raw.githubusercontent.com/CraigFox0/rainforest-audio-detection/main/Data/csv/train_tp_data.csv')
#!curl 'https://raw.githubusercontent.com/CraigFox0/rainforest-audio-detection/main/Data/csv/train_tp_data.csv' > savedFile.txt
#https://github.com/CraigFox0/rainforest-audio-detection/blob/main/Data/csv/train_tp_data.csv

In [None]:
import sklearn
from sklearn import model_selection
from sklearn.model_selection import GridSearchCV

In [None]:
df = df.drop(['Unnamed: 0'], axis = 1)

### Cuda Device Selection

Use cuda:{device_num} to select cuda device that is not being used already

Make sure that this device is selected by exporting CUDA_VISIBLE_DEVICES={device_num} on the shell that's running the notebook server

In [None]:
os.system('nvidia-smi')

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
device = torch.device('cuda')
print(device)

### Preprocessing 

Just using a quick technique for now, worried only about getting model results back. Will use different preprocessing steps as we move on. 

In [None]:
#def create_mel_spectograms(df):
#     df['spec'] = np.nan
#     df['spec'] = df['spec'].astype(object)
    
#     for idx,row in df.iterrows():
        
#         rid = row['recording_id']

#         wav, sr = librosa.load(data_path + 'train/' + rid + '.flac', sr=None)

         # Slicing and centering spectograms 
#         m = np.round((row['t_min'] + row['t_max']) / 2)
#         l = m - length / 2
#         if l < 0: l = 0
#         r = m + length
#         if r > len(wav):
#             r = len(wav)
#             l = r - m

#         mspec = librosa.feature.melspectrogram(y=wav[int(l):int(r)], n_fft=fft, hop_length=hop, sr=sr)
#         mspec = resize(mspec, mel_spec_dimensions)
#         mspec = (mspec - np.min(mspec))/np.max(mspec)
            
#         df.at[idx, 'spec'] = mspec
        
#     return df

### Optional: Rerun Mel Spectogram Pipeline 

Note: should not be necessary if up to date with main branch
train_spectograms.csv should already be saved at Data/train_spectograms.csv, though we might need to pickle the spec 2d array so that we can retrieve it for the model


In [None]:
#df.head()

In [None]:
# df = create_mel_spectograms(df)
# df.to_csv(data_path + 'train_spectograms.csv')
# print(df.dtypes)
# df.head()

In [None]:
#df = pd.read_csv('../../BatchNormalizedAudio.csv') #File Containing the Spectograms as String

In [None]:
df.head()

In [None]:
def to2DArray(x): 
    x=x.replace("[", '')
    x=x.replace("]", '')
    x=x.replace("...", '')
    x=x.replace("\n", '')
    y=np.array(x.split(" "))
    y = y[y != ""]
    y = np.asfarray(y, 'float64')
    y = np.reshape(y,(1, y.size))
    return y

In [None]:
#df['mspec_db'] = df['mspec_db'].apply(lambda x: to2DArray(x))
#df['chroma_db'] = df['chroma_db'].apply(lambda x: to2DArray(x))
#df['stft_db'] = df['stft_db'].apply(lambda x: to2DArray(x))
df['spec'] = df['spec'].apply(lambda x: to2DArray(x))

In [None]:
#df['spec'] = df['spec'].apply(lambda x: to2DArray(x))

### Creating PyTorch Dataset Class

Note: Have to stack the spectrograms so that they're (224 x 224 x 3) to fit the input dimensions of ResNet50

In [None]:
class RFCXDatasetFromArr(td.Dataset):
    def __init__(self, df):
        
        self.data = []
        self.labels = []
         # need this to transform data to tensors    
        self.transform = transforms.ToTensor()
                
        labels = df['species_id'].to_list()
        for label in labels:
            label_arr = np.zeros(24, dtype=np.single)
            label_arr[label] = 1.
            self.labels.append(label_arr)
             
        specs = df['spec']
        #specs = [df['mspec_db'],df['chroma_db'], df['stft_db']]
            
        for i in range(len(specs)):
            current_spec = np.array(specs[i])
            stack = np.stack([current_spec, current_spec, current_spec])
            self.data.append(stack)
            
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return (torch.tensor(self.data[idx]), torch.tensor(self.labels[idx]))        

### Creating Training and Validation Sets

In [None]:
train_df = None
val_df = None

# df = pd.read_csv(data_path + 'train_spectograms.csv')
X = df.drop('species_id', axis=1)
y = df['species_id']

strat = StratifiedKFold(n_splits=5, shuffle=True, random_state=rng_seed) #Use All folds on training loop
train_dfs=[]
val_dfs=[]
for fold, (train_index, val_index) in enumerate(strat.split(X,y)):
    #if fold==0:
    #    train_df = df.iloc[train_index]
   #     val_df = df.iloc[val_index]
    #if fold==1:
    #    train_df1 = df.iloc[train_index]
     #   val_df1 = df.iloc[val_index]
    #if fold==2:
    #    train_df2 = df.iloc[train_index]
    #    val_df2 = df.iloc[val_index]
    #if fold==3:
    #    train_df3 = df.iloc[train_index]
    #    val_df3 = df.iloc[val_index]
    #if fold==4:
     #   train_df4 = df.iloc[train_index]
      #  val_df4 = df.iloc[val_index]
    train_df = df.iloc[train_index]
    val_df = df.iloc[val_index]
    train_df = train_df.reset_index(drop=True)
    val_df = val_df.reset_index(drop=True)
    train_dfs.append(train_df)
    val_dfs.append(val_df)
        

#train_df = train_df.reset_index(drop=True)

#val_df = val_df.reset_index(drop=True)

### Configuring Model

ResNet50 Research Reference: https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Classification/ConvNets/resnet50v1.5#data-augmentation

After reading up on ResNet at the above link, SGD was recommended as an optimizer. Went with a recommended learning rate scheduler from a related notebook in Kaggle. The above link recommends a different scheduler. We chose to use BCE w/ Logits Loss also based on recommendations from related work. We plan on trying out multiple different loss functions to see what works best for our problem. 

In [None]:
# Model definition 
model = models.resnet50(pretrained=True)
model.fc = nn.Sequential(
    nn.Linear(2048, 1024),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(1024, 1024),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(1024, num_species)
)

pos_weight = (torch.ones(num_species) * num_species)

# load model into GPU
model = model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=0.0001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.4)
loss_function = nn.BCEWithLogitsLoss(pos_weight)

loss_function.to('cuda')

In [None]:
#freqs = pd.DataFrame(df.species_id.value_counts())
#num_total = sum(freqs['species_id'])

In [None]:
#freqs['species_id'] = freqs['species_id'].apply(lambda x: x/num_total)

In [None]:
#Try Weighting posititons based on frequency
#torch.ones(num_species) * num_species

Below, we can see the shape of our model. Note that ResNet50 has an output dimension of 2048, which we pass through a fully connected layer. The output of our fc layer is in agreement with competition standards. We designed the FC layer based on related work, and will optimize it in later phases.

### Training Loop

Training loop based on the work of another Kaggle notebook: https://www.kaggle.com/fffrrt/all-in-one-rfcx-baseline-for-beginners

Maintains a validation accuracy statistic (Does the most probable class match the ground-truth label?) as the model trains, and saves the model with the highest validation accuracy to the project directory.

In [None]:
def training_loop(train_loader, val_loader, model, optimizer, scheduler, pos_weight, loss_function, best_correct, e_poch, model_num):


    for e in range(0, e_poch):
        train_loss = []


        model.train()
        for batch, (data, target) in enumerate(train_loader):

#             print(data.shape)
            data = data.float()
            if torch.cuda.is_available():
#                 print("Loading training data to device")
                data, target = data.to('cuda'), target.to('cuda')

            optimizer.zero_grad()
            output = model(data)
            output = output.cuda()
            
            loss = loss_function(output, target)
            
            
            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        for g in optimizer.param_groups:
            lr = g['lr']

        print("Epoch: ", str(e))
        print("Learning Rate: ", str(lr))
        print("Training Loss: ", str(sum(train_loss) / len(train_loss)))

        # Validation
        with torch.no_grad():
            val_loss = []
            val_corr = []

            model.eval()
            for batch, (data, target) in enumerate(val_loader):
                data = data.float()
                if torch.cuda.is_available():
                    data, target = data.cuda(), target.cuda()
                
        
                
                output = model(data)
                loss = loss_function(output, target)

                val_loss.append(loss.item())

                vals, answers = torch.max(output, 1)
                vals, targets = torch.max(target, 1)
                corrects = 0
                for i in range(0, len(answers)):
                    if answers[i] == targets[i]:
                        corrects = corrects + 1
                val_corr.append(corrects)


        print("Epoch: ", str(e))
        print("Learning Rate: ", str(lr))
        print("Validation Loss: ", str(sum(val_loss) / len(val_loss)))


        if sum(val_corr) > best_correct:
            print('Saving new best model at epoch ' + str(e) + ' (' + str(sum(val_corr)) + '/' + str(val_dataset.__len__()) + ')')
            torch.save(model, 'best_model{}.pt'.format(model_num))
            best_correct = sum(val_corr)

        scheduler.step()

    del model
    
    return best_correct

In [None]:
#train_dataset = RFCXDatasetFromArr(train_dfs[x])
#val_dataset = RFCXDatasetFromArr(val_dfs[x])

In [None]:
for x in range(1):
    train_dataset = RFCXDatasetFromArr(train_dfs[x])
    val_dataset = RFCXDatasetFromArr(val_dfs[x])
    best_correct = 0
    model_num = x
    train_loader = DataLoader(train_dataset, batch_size = batch_size, sampler = td.RandomSampler(train_dataset))
    val_loader = DataLoader(val_dataset, batch_size = batch_size, sampler = td.RandomSampler(val_dataset))
    lr_vals = [.001] #Adjust
    epochs = [5] #Adjust
    for x in range(len(lr_vals)):
        optimizer = torch.optim.SGD(model.parameters(), lr=lr_vals[x], weight_decay=0.0001, momentum=0.9)
        for y in range(len(epochs)):
            scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.4)
            best_correct_new = training_loop(train_loader, val_loader, model, optimizer, scheduler, pos_weight, loss_function, best_correct, epochs[y], model_num)
            if(best_correct_new > best_correct):
                best_correct = best_correct_new
                best_epoch = epochs[y]
                best_lr = lr_vals[x]
    print("Best Learning Rate is {}".format(best_lr))
    print("Best Epochs is {}".format(best_epoch))

### Submission Generation

Based on the work of another Kaggle notebook: https://www.kaggle.com/fffrrt/all-in-one-rfcx-baseline-for-beginners. (Should edit before final project done)

In [None]:
def testing_loop(train_loader, model, optimizer, scheduler, pos_weight, loss_function, best_correct, e_poch):


    for e in range(0, e_poch):
        train_loss = []


        model.train()
        for batch, (data, target) in enumerate(train_loader):

#             print(data.shape)
            data = data.float()
            if torch.cuda.is_available():
#                 print("Loading training data to device")
                data, target = data.to('cuda'), target.to('cuda')

            optimizer.zero_grad()
            output = model(data)
            output = output.cuda()
            
            loss = loss_function(output, target)
            
            
            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        for g in optimizer.param_groups:
            lr = g['lr']

        print("Epoch: ", str(e))
        print("Learning Rate: ", str(lr))
        print("Training Loss: ", str(sum(train_loss) / len(train_loss)))

        if sum(train_loss) > best_correct:
            print('Saving new best model at epoch ')
            torch.save(model, 'best_model.pt')
            best_correct = sum(val_corr)

        scheduler.step()

    del model
    
    return best_correct

In [None]:
#train_dataset2 = RFCXDatasetFromArr(df)
#train_loader2 = DataLoader(train_dataset2, batch_size = batch_size, sampler = td.RandomSampler(train_df))

In [None]:
#optimizer = torch.optim.SGD(model.parameters(), lr=best_lr, weight_decay=0.0001, momentum=0.9)
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.4)
#testing_loop(train_loader2, model, optimizer, scheduler, pos_weight, loss_function, best_correct, best_epoch)

In [None]:
train_dataset = RFCXDatasetFromArr(train_dfs[x])
train_loader = DataLoader(train_dataset, batch_size = batch_size, sampler = td.RandomSampler(train_dataset))
dataloader_iterator = iter(train_loader)
data, target = next(dataloader_iterator)
model0 = models.resnet50(pretrained=True)
model0.fc = nn.Sequential(
    nn.Linear(2048, 1024),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(1024, 1024),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(1024, num_species)
)

model0 = torch.load('best_model0.pt')
model0.train()
#for batch, (data, target) in enumerate(train_loader):

#             print(data.shape)
data = data.float()
if torch.cuda.is_available():
#                 print("Loading training data to device")
    data, target = data.to('cuda'), target.to('cuda')

optimizer.zero_grad()
output = model(data)
output = output.cuda()
            
loss = loss_function(output, target)
            
            
loss.backward()
optimizer.step()

#train_loss.append(loss.item())

In [None]:
def create_mel_spectograms(df):
    wav, sr = librosa.load("test/{}".format(df), sr=None)

    # Split for enough segments to not miss anything
    segments = len(wav) / length
    segments = int(np.ceil(segments))
    
    mel_array = []
    
    for i in range(0, segments):
        # Last segment going from the end
        if (i + 1) * length > len(wav):
            slice = wav[len(wav) - length:len(wav)]
        else:
            slice = wav[i * length:(i + 1) * length]
        
        # Same mel spectrogram as before
        mel_spec = librosa.feature.melspectrogram(slice, n_fft=fft, hop_length=hop, sr=sr)
        mel_spec = resize(mel_spec, mel_spec_dimensions)
    
        mel_spec = mel_spec - np.min(mel_spec)
        mel_spec = mel_spec / np.max(mel_spec)
        
        mel_spec = np.stack((mel_spec, mel_spec, mel_spec))

        mel_array.append(mel_spec)
    
    return mel_array

In [None]:
import statistics

In [None]:
# model0 = models.resnet50(pretrained=True)
# model0.fc = nn.Sequential(
#     nn.Linear(2048, 1024),
#     nn.ReLU(),
#     nn.Dropout(p=0.2),
#     nn.Linear(1024, 1024),
#     nn.ReLU(),
#     nn.Dropout(p=0.2),
#     nn.Linear(1024, num_species)
# )

# model0 = torch.load('best_model0.pt')
# model0.eval()

# model1 = models.resnet50(pretrained=True)
# model1.fc = nn.Sequential(
#     nn.Linear(2048, 1024),
#     nn.ReLU(),
#     nn.Dropout(p=0.2),
#     nn.Linear(1024, 1024),
#     nn.ReLU(),
#     nn.Dropout(p=0.2),
#     nn.Linear(1024, num_species)
# )

# model1 = torch.load('best_model1.pt')
# model1.eval()

# model2 = models.resnet50(pretrained=True)
# model2.fc = nn.Sequential(
#     nn.Linear(2048, 1024),
#     nn.ReLU(),
#     nn.Dropout(p=0.2),
#     nn.Linear(1024, 1024),
#     nn.ReLU(),
#     nn.Dropout(p=0.2),
#     nn.Linear(1024, num_species)
# )

# model2 = torch.load('best_model2.pt')
# model2.eval()

# model3 = models.resnet50(pretrained=True)
# model3.fc = nn.Sequential(
#     nn.Linear(2048, 1024),
#     nn.ReLU(),
#     nn.Dropout(p=0.2),
#     nn.Linear(1024, 1024),
#     nn.ReLU(),
#     nn.Dropout(p=0.2),
#     nn.Linear(1024, num_species)
# )

# model3 = torch.load('best_model3.pt')
# model3.eval()

# model4 = models.resnet50(pretrained=True)
# model4.fc = nn.Sequential(
#     nn.Linear(2048, 1024),
#     nn.ReLU(),
#     nn.Dropout(p=0.2),
#     nn.Linear(1024, 1024),
#     nn.ReLU(),
#     nn.Dropout(p=0.2),
#     nn.Linear(1024, num_species)
# )

# model4 = torch.load('best_model4.pt')
# model4.eval()

# # Scoring does not like many files:(
# #if save_to_disk == 0:
# #    for f in os.listdir('/kaggle/working/'):
# #        os.remove('/kaggle/working/' + f)

# if torch.cuda.is_available():
#     model.cuda()
    
# # Prediction loop
# print('Starting prediction loop')
# with open('submission.csv', 'w', newline='') as csvfile:
#     submission_writer = csv.writer(csvfile, delimiter=',')
#     submission_writer.writerow(['recording_id','s0','s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11',
#                                's12','s13','s14','s15','s16','s17','s18','s19','s20','s21','s22','s23'])
    
#     test_files = os.listdir('test/') 
#     print(len(test_files))
    
#     # Every test file is split on several chunks and prediction is made for each chunk
#     for i in range(0, len(test_files)):
#         data = create_mel_spectograms(test_files[i])
#         data = torch.tensor(data)
#         data = data.float()
#         if torch.cuda.is_available():
#             data = data.cuda()

#         output4 = model4(data)
#         output2 = model2(data)
#         output3 = model3(data)
#         output0 = model0(data)
#         output1 = model1(data)
#         # Taking max prediction from all slices per bird species
#         # Usually you want Sigmoid layer here to convert output to probabilities
#         # In this competition only relative ranking matters, and not the exact value of prediction, so we can use it directly
#         maxed_output0 = torch.max(output0, dim=0)[0]
#         maxed_output0 = maxed_output0.cpu().detach()
#         maxed_output1 = torch.max(output1, dim=0)[0]
#         maxed_output1 = maxed_output1.cpu().detach()
#         maxed_output2 = torch.max(output2, dim=0)[0]
#         maxed_output3 = maxed_output2.cpu().detach()
#         maxed_output3 = torch.max(output3, dim=0)[0]
#         maxed_output3 = maxed_output3.cpu().detach()
#         maxed_output4 = torch.max(output4, dim=0)[0]
#         maxed_output4 = maxed_output4.cpu().detach()
        
#         file_id = str.split(test_files[i], '.')[0]
#         write_array0 = [file_id]
#         write_array1 = [file_id]
#         write_array2 = [file_id]
#         write_array3 = [file_id]
#         write_array4 = [file_id]
#         for out in maxed_output0:
#             write_array0.append(out.item())
#         for out in maxed_output1:
#             write_array1.append(out.item())
#         for out in maxed_output2:
#             write_array2.append(out.item())
#         for out in maxed_output3:
#             write_array3.append(out.item())
#         for out in maxed_output4:
#             write_array4.append(out.item())
            
#         write_array = [file_id]
#         for x in range(1, len(write_array0)):
#             agg = [write_array0[x], write_array1[x], write_array2[x], write_array3[x], write_array4[x]]
#             print(agg)
#             write_array.append(statistics.mean(agg))
#         submission_writer.writerow(write_array)
        
#         if i % 100 == 0 and i > 0:
#             print('Predicted for ' + str(i) + ' of ' + str(len(test_files) + 1) + ' files')

# print('Submission generated')

In [None]:
print('Starting prediction loop')
with open('submission.csv', 'w', newline='') as csvfile:
    submission_writer = csv.writer(csvfile, delimiter=',')
    submission_writer.writerow(['recording_id','s0','s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11',
                               's12','s13','s14','s15','s16','s17','s18','s19','s20','s21','s22','s23'])
    
    test_files = os.listdir('test/') 
    #print(len(test_files))
    
    # Every test file is split on several chunks and prediction is made for each chunk
    for i in range(0, len(test_files)):
        data = create_mel_spectograms(test_files[i])
        data = torch.tensor(data)
        data = data.float()
        if torch.cuda.is_available():
            data = data.cuda()

        #output4 = model4(data)
        #output2 = model2(data)
        #output3 = model3(data)
        output0 = model0(data)
        #output1 = model1(data)
        # Taking max prediction from all slices per bird species
        # Usually you want Sigmoid layer here to convert output to probabilities
        # In this competition only relative ranking matters, and not the exact value of prediction, so we can use it directly
        maxed_output0 = torch.max(output0, dim=0)[0]
        maxed_output0 = maxed_output0.cpu().detach()

        
        file_id = str.split(test_files[i], '.')[0]
        write_array0 = [file_id]

        for out in maxed_output0:
            write_array0.append(out.item())
            
        #write_array = [file_id]
        #write_array.append(write_array0)
        submission_writer.writerow(write_array0)
        
        if i % 100 == 0 and i > 0:
            print('Predicted for ' + str(i) + ' of ' + str(len(test_files) + 1) + ' files')

print('Submission generated')