In [1]:
import os
import cv2
import csv
import sys
import copy
from tqdm import tqdm
import random
import librosa
import librosa.display
import numpy as np 
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import label_ranking_average_precision_score, accuracy_score
import torchvision

import matplotlib.pyplot as plt
import IPython.display as ipd 
import skimage.io
from skimage.transform import resize
import albumentations as albu
from albumentations import pytorch as AT
from PIL import Image


import pretrainedmodels
from resnest.torch import resnest50

sys.path.append('../')

import src.audio_augs as aa
from src.utils import patch_first_conv
from src.loss import lsep_loss_stable, lsep_loss
from src.batch_mixer import BatchMixer




In [2]:
train_folder_path = "../data/train/"
test_folder_path = "../data/test/"
sample_submission = "../data/sample_submission.csv"
train_tp_path = "../data/train_tp.csv"
train_fp_path = "../data/train_fp.csv"
train_tp_folds = pd.read_csv("../data/train_tp_folds_v3.csv")
train_fp_folds = pd.read_csv("train_fp_folds.csv").drop("Unnamed: 0", 1)

train_files = os.listdir(train_folder_path)
test_files = os.listdir(test_folder_path)

train_tp = pd.read_csv(train_tp_path)
train_fp = pd.read_csv(train_fp_path)
_df = pd.read_csv("missing_3classes.csv")
_df = _df.drop(columns="Unnamed: 0")


train=pd.concat([train_fp, train_tp], ignore_index=True)

In [3]:
class Config:
    SEED = 17
    NUM_BIRDS = 5
    BATCH_SIZE = 16
    NUM_WORKERS = 8
    IMG_H = 320
    IMG_W = 512
    
    #optimizer params
    LR = 0.01
    WEIGHT_DECAY = 0.0001
    MOMENTUM = 0.9
    
    #scheduler params
    FACTOR = 0.4
    PATIENCE = 3
    
    #spec params
    FFT = 2048
    HOP = 512
    MEL = 224
    SR = 48000
    Fmin = 84 
    Fmax = 15056
    LENGTH_1  = 6 * SR
    LENGTH_2 = 3*SR
    #TODO: MAKE AUGS CONF

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(Config.SEED)    

def wav2mel(wav):


    mel_spec = librosa.feature.melspectrogram(wav, n_fft=Config.FFT, 
                                              hop_length=Config.HOP, 
                                              sr=Config.SR, 
                                              fmin=Config.Fmin, 
                                              fmax=Config.Fmax, 
                                              power=2, n_mels=Config.MEL)
    mel_spec = librosa.power_to_db(mel_spec, ref=np.max)


    return mel_spec

In [5]:
class RainforestDataset(Dataset):
    def __init__(self, df, audio_transforms = None, image_transforms = None,):
        self.audio_transforms = audio_transforms
        self.img_transforms = image_transforms
        self.df = df
        
        
    def __len__(self):
        return len(self.df)
    
    
    def wav2mel(self, wav):
        
        
        mel_spec = librosa.feature.melspectrogram(wav, n_fft=Config.FFT, 
                                                  hop_length=Config.HOP, 
                                                  sr=Config.SR, 
                                                  fmin=Config.Fmin, 
                                                  fmax=Config.Fmax, 
                                                  power=2, n_mels=Config.MEL)
        mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

        
        return mel_spec
    
        
        
    def __getitem__(self, idx):
        sample = copy.deepcopy(self.df.iloc[idx, :].values)

        bird_id = sample[1]
        #true_or_not = sample[-1]
        label_array = np.zeros(Config.NUM_BIRDS, dtype=np.single)
        labels_dict = {3 : 0, 18: 1, 12: 2, 7 : 3, 15: 4}

        label_array[labels_dict[bird_id]] = 1.
        
        wav, sr = librosa.load(train_folder_path + sample[0] + ".flac", sr=None)
        
        tmin = float(sample[3]) * sr
        tmax = float(sample[5]) * sr
        center = np.round((tmin + tmax) / 2)
        
        check_length = True
        
        if check_length: #sample[11] == 1 or sample[11] == 2:
            length = Config.LENGTH_2
        else:
            length = Config.LENGTH_1
        
        beginning = center - length / 2
        if beginning < 0:
            beginning = 0
            
        ending = beginning + length
        if ending > len(wav):
            ending = len(wav)
            beginning = ending - length 
            
        wav_slice = wav[int(beginning):int(ending)]
        
        if self.audio_transforms: # and bird_id not in (3, 7, 8, 9):
            wav_slice =  self.audio_transforms(wav_slice)
        
        mel_spec = self.wav2mel(wav_slice)
        mel_spec = np.expand_dims(mel_spec, axis=2).astype(np.float32)
        #print(np.max(mel_spec))
        
        if self.img_transforms:
            image =  self.img_transforms(image=mel_spec)
            mel_spec = image["image"]
            
            
        
        return mel_spec / 100.0, label_array

In [6]:
train_tp_folds["true"] = 1
train_fp_folds["true"] = 0

problem_ids = [3,18,12,7,15]
for i in problem_ids:
    if i == 3:
        rare_train = train_tp_folds[(train_tp_folds.species_id == i)]
    else:
        rare_train = rare_train.append(train_tp_folds[(train_tp_folds.species_id == i)], ignore_index=False )  

fold = 1
X_train = train_tp_folds[(train_tp_folds['fold'] != fold) & (train_tp_folds['fold'] != 5)].reset_index(drop=True)
X_val = train_tp_folds[train_tp_folds['fold'] == fold].reset_index(drop=True)
X_test = train_tp_folds[train_tp_folds['fold'] == 5].reset_index(drop=True)

X_train_rare = rare_train[(rare_train['fold'] != fold)].reset_index(drop=True)
X_val_rare = rare_train[rare_train['fold'] == fold].reset_index(drop=True)
_df = _df[_df.recording_id.isin(X_train_rare.recording_id)]
X_train_rare = X_train_rare[_df.columns]
X_train_rare = pd.concat([X_train_rare, _df])


print('Training on ' + str(len(X_train_rare)) + ' examples')
print('Validating on ' + str(len(X_val_rare)) + ' examples')


Training on 332 examples
Validating on 44 examples


In [7]:
audio_transform_train = aa.Compose([
  aa.OneOf([
    aa.GaussianNoiseSNR(min_snr=5),
    aa.PinkNoiseSNR(min_snr=5)
  ]),
  aa.PitchShift(max_steps=2, sr=Config.SR, p=0.5),
  #TimeStretch(max_rate=1.2, p=0.1),
  aa.TimeShift(sr=Config.SR),
  aa.VolumeControl(mode="sine", p=0.5)
])


image_transform_train = albu.Compose([
        albu.Resize(Config.IMG_H, Config.IMG_W),
        #albu.IAAAdditiveGaussianNoise(p=0.2),
        #albu.Cutout(num_holes=8, max_h_size=16, max_w_size=16, p=0.25),
        #albu.OneOf(
        #    [
        #        albu.IAASharpen(p=1),
        #        albu.Blur(blur_limit=3, p=1),
        #        albu.MotionBlur(blur_limit=3, p=1),
        #    ],
        #    p=0.3,
        #),

        #albu.Normalize((-48.01), (8.45), max_pixel_value=1),
        AT.ToTensorV2()
    ])


transform_val = albu.Compose([
        albu.Resize(Config.IMG_H, Config.IMG_W),
        #albu.Normalize((-48.01), (8.45), max_pixel_value=1),
        AT.ToTensorV2()
    ])

In [8]:
train_dataset = RainforestDataset(X_train_rare, audio_transforms=audio_transform_train, image_transforms=image_transform_train)
val_dataset = RainforestDataset(X_val_rare, audio_transforms=None, image_transforms=transform_val)

train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True, num_workers = Config.NUM_WORKERS)
val_loader = DataLoader(val_dataset, batch_size=Config.BATCH_SIZE, shuffle = False, num_workers = Config.NUM_WORKERS)

#model = pretrainedmodels.__dict__['se_resnext50_32x4d']( pretrained= "imagenet")
#model.load_state_dict(torch.load("../input/seresnext50/se_resnext50_32x4d-a260b3a4.pth"))
model = torchvision.models.resnet18(pretrained=True)
#model = resnest50(pretrained=True)
model.avg_pool = nn.AdaptiveAvgPool2d(output_size=1)
model.fc = nn.Sequential(
    nn.Linear(512, 512),
    nn.Dropout(0.4),
    nn.ReLU(),
    nn.Linear(512, Config.NUM_BIRDS)
)


patch_first_conv(model, 1)
# Picked for this notebook; pick new ones after major changes (such as adding train_fp to train data)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, )# momentum = 0.9)
#optimizer = torch.optim.SGD(model.parameters(), lr=Config.LR, weight_decay=Config.WEIGHT_DECAY, momentum=Config.MOMENTUM)
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = Config.PATIENCE, gamma = Config.FACTOR)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience = Config.PATIENCE, factor = Config.FACTOR, mode = "max")
scheduler =torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15)

mixer = BatchMixer(p=0.45)
# This loss function is not exactly suited for competition metric, which only cares about ranking of predictions
# Exploring different loss fuctions would be a good idea
#pos_weights = torch.ones(Config.NUM_BIRDS)
#pos_weights = pos_weights * Config.NUM_BIRDS
#loss_function = nn.BCEWithLogitsLoss(pos_weight=pos_weights)

if torch.cuda.is_available():
    model = model.cuda()
    #loss_function = loss_function.cuda()

In [9]:
def load_val_file(record_id, df):
    wav, sr = librosa.load('../data/train/' + record_id + ".flac", sr=None)

        

    # Split for enough segments to not miss anything
    segments = len(wav) / Config.LENGTH_2
    segments = int(np.ceil(segments))
    mel_array = []
    for i in range(0, segments):
        if (i + 1) * Config.LENGTH_2 > len(wav):
            slice = wav[len(wav) - Config.LENGTH_2:len(wav)]
        else:
            slice = wav[i * Config.LENGTH_1:(i + 1) * Config.LENGTH_2]

        mel_spec = librosa.feature.melspectrogram(slice, n_fft=Config.FFT, 
                                                  hop_length=Config.HOP, 
                                                  sr=Config.SR, 
                                                  fmin=Config.Fmin, 
                                                  fmax=Config.Fmax, 
                                                  power=2, n_mels=Config.MEL)
        mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
        transform_val = albu.Compose([albu.Resize(Config.IMG_H, Config.IMG_W),]) 
        mel_spec = np.expand_dims(mel_spec, axis=2).astype(np.float32) / 100.0
        augmented = transform_val(image = mel_spec)
        mel_spec = augmented[ "image"]
        mel_array.append(mel_spec)
        
    label_array = np.zeros(Config.NUM_BIRDS, dtype=np.single)
    species_ids = df[(df.recording_id==record_id)].species_id.unique()
    label_array[species_ids] = 1.
    

  
    return np.array(mel_array), label_array

def lwlrap(truth, scores):
    """Calculate the overall lwlrap using sklearn.metrics.lrap."""
    # sklearn doesn't correctly apply weighting to samples with no labels, so just skip them.
    sample_weight = np.sum(truth > 0, axis=1)
    nonzero_weight_sample_indices = np.flatnonzero(sample_weight > 0)
    overall_lwlrap = label_ranking_average_precision_score(
      truth[nonzero_weight_sample_indices, :] > 0,
      scores[nonzero_weight_sample_indices, :],
      sample_weight=sample_weight[nonzero_weight_sample_indices])
    return overall_lwlrap


In [10]:
best_corrects = 0

# Train loop
print('Starting training loop')
for e in range(0, 15):
    # Stats
    accumulation_steps  = 2
    train_loss = []
    train_corr = []
    
    # Single epoch - train
    model.train()
    for batch, (data, target) in tqdm(enumerate(train_loader)):
        data = data.float()
        data, target = mixer(data, target)
        if torch.cuda.is_available():
            data, target = data.cuda(), target.cuda()
            
        optimizer.zero_grad()
        
        output = model(data)
        #loss = loss_function(output, target)
        label_smoothing_list = [0.001, 0.0011, 0.0012, 0.00125, 0.001, 0.0011, 0.001, 0.0012]
        label_smoothing = random.choice(label_smoothing_list) 
        targets_smooth = target * (1 - label_smoothing) + 0.5 * label_smoothing
        loss = lsep_loss(output, targets_smooth)
        
        #oss = loss / accumulation_steps                # Normalize our loss (if averaged)
        #loss.backward()                                 # Backward pass
        #if (batch+1) % accumulation_steps == 0:             # Wait for several backward steps
        #    optimizer.step()                            # Now we can do an optimizer step
            #model.zero_grad()                           # Reset gradients tensors
  
        loss.backward()
        optimizer.step()
        #scheduler.step()
        
        # Stats
        vals, answers = torch.max(output, 1)
        vals, targets = torch.max(target, 1)
        corrects = 0
        for i in range(0, len(answers)):
            if answers[i] == targets[i]:
                corrects = corrects + 1
        train_corr.append(corrects)
        
        train_loss.append(loss.item())
    
    # Stats
    for g in optimizer.param_groups:
        lr = g['lr']
    print('Epoch ' + str(e) + ' training end. LR: ' + str(lr) + ', Loss: ' + str(sum(train_loss) / len(train_loss)) +
          ', Correct answers: ' + str(sum(train_corr)) + '/' + str(train_dataset.__len__()))
    #torch.save(model.state_dict(), 'best_model.pt')
    # Single epoch - validation
    with torch.no_grad():
        # Stats
        val_loss = []
        val_corr = []
        val_metrics = []
        acc_metrics = []
        answers_list = []
        targets_list = []
        
        model.eval()
        for batch, (data, target) in enumerate(val_loader):
            data = data.float()
            if torch.cuda.is_available():
                data, target = data.cuda(), target.cuda()
            
            output = model(data)
            #loss = loss_function(output, target)
            loss = lsep_loss(output, target)

            
            # Stats
            th_output = torch.sigmoid(output)  
            th_output[th_output >= 0.5] = 1
            accuracy = (th_output == target).sum()/(16*24)
            
            val_metric =  lwlrap( target.cpu().numpy(), output.cpu().numpy())
            vals, answers = torch.max(output, 1)
            vals, targets = torch.max(target, 1)
            answers_list.append(answers)
            targets_list.append(targets)
            val_metrics.append(val_metric.item())
            acc_metrics.append(accuracy.item())
            
            corrects = 0
            for i in range(0, len(answers)):
                if answers[i] == targets[i]:
                    corrects = corrects + 1
            val_corr.append(corrects)
            val_loss.append(loss.item())

    
    valid_epoch_metric = sum(val_metrics) / len(val_loss)
    # Stats
    print('Epoch ' + str(e) + ' validation end. LR: ' + str(lr) + ', Loss: ' + str(sum(val_loss) / len(val_loss)) +
          ', Accuracy: ' + str(sum(val_corr)) + '/' + str(val_dataset.__len__()) + ", Val metric: " + str(valid_epoch_metric))
    
    # If this epoch is better than previous on validation, save model
    # Validation loss is the more common metric, but in this case our loss is misaligned with competition metric, making accuracy a better metric
    if valid_epoch_metric > best_corrects:
        print('Saving new best model at epoch ' + str(e) + ' (' + str(sum(acc_metrics)) + '/' + str(len(val_loss)) + ')')
        torch.save(model.state_dict(), 'best_model_rare.pt')
        best_corrects = valid_epoch_metric
        
    # Call every epoch
    #scheduler.step(valid_epoch_metric)
    scheduler.step()

# Free memory
del model

Starting training loop


21it [00:23,  1.13s/it]

Epoch 0 training end. LR: 0.0003, Loss: 1.5213377589271182, Correct answers: 215/332





Epoch 0 validation end. LR: 0.0003, Loss: 1.7929445107777913, Accuracy: 9/44, Val metric: 0.49085648148148153
Saving new best model at epoch 0 (0.028645833488553762/3)


21it [00:24,  1.17s/it]

Epoch 1 training end. LR: 0.0002967221401100708, Loss: 1.2958367523692904, Correct answers: 235/332





Epoch 1 validation end. LR: 0.0002967221401100708, Loss: 1.675284465154012, Accuracy: 23/44, Val metric: 0.6836805555555555
Saving new best model at epoch 1 (0.0703125/3)


21it [00:24,  1.17s/it]

Epoch 2 training end. LR: 0.0002870318186463901, Loss: 1.1456916474160694, Correct answers: 244/332





Epoch 2 validation end. LR: 0.0002870318186463901, Loss: 0.8493809898694357, Accuracy: 28/44, Val metric: 0.8027777777777777
Saving new best model at epoch 2 (0.1067708358168602/3)


21it [00:24,  1.15s/it]

Epoch 3 training end. LR: 0.0002713525491562421, Loss: 1.1468930698576427, Correct answers: 238/332





Epoch 3 validation end. LR: 0.0002713525491562421, Loss: 0.7201937834421793, Accuracy: 28/44, Val metric: 0.8159722222222222
Saving new best model at epoch 3 (0.1067708358168602/3)


21it [00:24,  1.16s/it]

Epoch 4 training end. LR: 0.00025036959095382875, Loss: 1.0497864541553317, Correct answers: 237/332





Epoch 4 validation end. LR: 0.00025036959095382875, Loss: 0.7132663801312447, Accuracy: 34/44, Val metric: 0.8746527777777778
Saving new best model at epoch 4 (0.1041666716337204/3)


21it [00:25,  1.21s/it]

Epoch 5 training end. LR: 0.00022500000000000002, Loss: 1.0033749654179527, Correct answers: 253/332





Epoch 5 validation end. LR: 0.00022500000000000002, Loss: 0.8138600389162699, Accuracy: 27/44, Val metric: 0.7993055555555556


21it [00:22,  1.06s/it]

Epoch 6 training end. LR: 0.00019635254915624213, Loss: 0.9374407927195231, Correct answers: 261/332





Epoch 6 validation end. LR: 0.00019635254915624213, Loss: 0.7075935502847036, Accuracy: 29/44, Val metric: 0.8236111111111111


21it [00:21,  1.04s/it]

Epoch 7 training end. LR: 0.00016567926949014806, Loss: 1.00148359082994, Correct answers: 258/332





Epoch 7 validation end. LR: 0.00016567926949014806, Loss: 0.6496172696352005, Accuracy: 31/44, Val metric: 0.8559027777777777


21it [00:21,  1.02s/it]

Epoch 8 training end. LR: 0.00013432073050985205, Loss: 0.988366234870184, Correct answers: 255/332





Epoch 8 validation end. LR: 0.00013432073050985205, Loss: 0.9737612158060074, Accuracy: 25/44, Val metric: 0.7680555555555556


21it [00:21,  1.02s/it]

Epoch 9 training end. LR: 0.00010364745084375793, Loss: 0.8743262404487246, Correct answers: 266/332





Epoch 9 validation end. LR: 0.00010364745084375793, Loss: 0.6451925933361053, Accuracy: 31/44, Val metric: 0.859375


21it [00:22,  1.05s/it]

Epoch 10 training end. LR: 7.500000000000006e-05, Loss: 0.7987490211214338, Correct answers: 260/332





Epoch 10 validation end. LR: 7.500000000000006e-05, Loss: 0.5083025644222895, Accuracy: 38/44, Val metric: 0.9322916666666666
Saving new best model at epoch 10 (0.109375/3)


21it [00:22,  1.07s/it]

Epoch 11 training end. LR: 4.963040904617133e-05, Loss: 0.8427585462729136, Correct answers: 264/332





Epoch 11 validation end. LR: 4.963040904617133e-05, Loss: 0.6606930096944174, Accuracy: 33/44, Val metric: 0.8697916666666666


21it [00:21,  1.03s/it]

Epoch 12 training end. LR: 2.8647450843757904e-05, Loss: 0.8309630680651892, Correct answers: 268/332





Epoch 12 validation end. LR: 2.8647450843757904e-05, Loss: 0.5474882225195566, Accuracy: 37/44, Val metric: 0.9236111111111112


21it [00:21,  1.03s/it]

Epoch 13 training end. LR: 1.2968181353609857e-05, Loss: 0.8986740594818479, Correct answers: 266/332





Epoch 13 validation end. LR: 1.2968181353609857e-05, Loss: 0.5064829091231028, Accuracy: 38/44, Val metric: 0.9340277777777778
Saving new best model at epoch 13 (0.109375/3)


21it [00:22,  1.05s/it]

Epoch 14 training end. LR: 3.2778598899291478e-06, Loss: 0.796790443715595, Correct answers: 278/332





Epoch 14 validation end. LR: 3.2778598899291478e-06, Loss: 0.48622657855351764, Accuracy: 37/44, Val metric: 0.9236111111111112


In [11]:
answers_list, targets_list

([tensor([0, 0, 0, 0, 2, 0, 0, 2, 0, 1, 0, 1, 1, 1, 1, 1], device='cuda:0'),
  tensor([1, 1, 0, 2, 1, 2, 2, 2, 0, 0, 2, 3, 3, 4, 3, 3], device='cuda:0'),
  tensor([3, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 0, 4], device='cuda:0')],
 [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'),
  tensor([1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3], device='cuda:0'),
  tensor([3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0')])

In [11]:
# Already defined above; for reference

fft = 2048
hop = 512 * 1
# Less rounding errors this way
sr = 48000
length = 3 * sr
fmin = 84
fmax = 15056

def load_test_file(f): 
    wav, sr = librosa.load('../data/test/' + f, sr=None)

    # Split for enough segments to not miss anything
    segments = len(wav) / length
    segments = int(np.ceil(segments))
    
    mel_array = []
    
    for i in range(0, segments):
        # Last segment going from the end
        if (i + 1) * length > len(wav):
            slice = wav[len(wav) - length:len(wav)]
        else:
            slice = wav[i * length:(i + 1) * length]
        
        # Same mel spectrogram as before
        mel_spec = librosa.feature.melspectrogram(slice, n_fft=fft, hop_length=hop, sr=sr, fmin=fmin, fmax=fmax, power=2, n_mels=Config.MEL)
        mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
        transform_val = albu.Compose([albu.Resize(Config.IMG_H, Config.IMG_W),]) 
        #mel_spec = resize(mel_spec, (224, 400))
        #mel_spec = mel_spec - np.min(mel_spec)
        #mel_spec = mel_spec / np.max(mel_spec)

        mel_spec = np.expand_dims(mel_spec, axis=2).astype(np.float32) / 100.
        augmented = transform_val(image = mel_spec)
        mel_spec = augmented[ "image"]
        
        #mel_spec = np.stack((mel_spec, mel_spec, mel_spec))

        mel_array.append(mel_spec)
        
    
    return np.array(mel_array)

In [12]:
model = torchvision.models.resnet18(pretrained=True)
#model = resnest50(pretrained=True)
model.avg_pool = nn.AdaptiveAvgPool2d(output_size=1)
model.fc = nn.Sequential(
    nn.Linear(512, 512),
    nn.Dropout(0.4),
    nn.ReLU(),
    nn.Linear(512, Config.NUM_BIRDS)
)

patch_first_conv(model, 1)



model.load_state_dict(torch.load(f"best_model_rare.pt"))

model.eval()


if torch.cuda.is_available():
    model.cuda()

# Prediction loop
print('Starting prediction loop')
with open(f'submission_rare_{fold}_fold.csv', 'w', newline='') as csvfile:
    submission_writer = csv.writer(csvfile, delimiter=',')
    submission_writer.writerow(['recording_id','s0','s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11',
                               's12','s13','s14','s15','s16','s17','s18','s19','s20','s21','s22','s23'])

    test_files = os.listdir('../data/test/')
    print(len(test_files))

    # Every test file is split on several chunks and prediction is made for each chunk
    for i in range(0, len(test_files)):
        data = load_test_file(test_files[i]).transpose(0,3,1,2)
        data = torch.tensor(data)
        data = data.float()
        if torch.cuda.is_available():
            data = data.cuda()

        output = model(data)

        # Taking max prediction from all slices per bird species
        # Usually you want Sigmoid layer here to convert output to probabilities
        # In this competition only relative ranking matters, and not the exact value of prediction, so we can use it directly
        maxed_output = torch.max(output, dim=0)[0]
        maxed_output = maxed_output.cpu().detach()

        file_id = str.split(test_files[i], '.')[0]
        write_array = [file_id]

        for out in maxed_output:
            write_array.append(out.item())

        submission_writer.writerow(write_array)

        if i % 100 == 0 and i > 0:
            print('Predicted for ' + str(i) + ' of ' + str(len(test_files) + 1) + ' files')

print('Submission generated')

Starting prediction loop
1992
Predicted for 100 of 1993 files
Predicted for 200 of 1993 files
Predicted for 300 of 1993 files
Predicted for 400 of 1993 files
Predicted for 500 of 1993 files
Predicted for 600 of 1993 files
Predicted for 700 of 1993 files
Predicted for 800 of 1993 files
Predicted for 900 of 1993 files
Predicted for 1000 of 1993 files
Predicted for 1100 of 1993 files
Predicted for 1200 of 1993 files
Predicted for 1300 of 1993 files
Predicted for 1400 of 1993 files
Predicted for 1500 of 1993 files
Predicted for 1600 of 1993 files
Predicted for 1700 of 1993 files
Predicted for 1800 of 1993 files
Predicted for 1900 of 1993 files
Submission generated
