# Funkcje Pomocnicze do Zadania

Aby wszystkie funkcje zadziałały, notebook powinien być uruchamiany z następującym ułożeniem katalogów:

```
|- helpers.ipynb
|- sampleSubmission.csv
|- train
 |- {unzipped train files and labels}
|- test
 |- {unzipped test files}
```

Należy odkomentować u siebie linijki zapisujące pliki.

In [None]:
!conda install -c conda-forge -y librosa

In [None]:
import os

import librosa
from scipy import signal
from scipy.io import wavfile
from sklearn.metrics import roc_auc_score
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm, trange

# Wczytywanie Danych

Poniższe funkcje są przykładowymi funkcjami wczytującymi dane. Możliwe, że będą potrzebne modyfikacje reprezentacji i bardziej skomplikowana funkcja tworząca zbiór treningowy. Wczytywanie danych nie jest zaimplementowane optymalnie - między innymi kod wczytuje wielokrotnie ten sam plik. Dla bardziej złożonych reprezentacji możliwe, że trzeba będzie przepisać te funkcje.

In [None]:
def load_mel(file_name, start=0, stop=None, n_mels=60):
    '''Wczytuje mel spektrogram z pliku.
    
    Args:
        file_name (str): Nazwa pliku z nagraniem.
        start (float): Sekunda, w której zaczyna się interesujący fragment.
        stop (float): Sekunda, w której kończy się interesujący fragment.
        n_mels (int): Liczba meli na spektrogramie (wysokość spektrogramu).
    
    Returns:
        ndarray: Spektrogram.
    '''
    samples, sample_rate = librosa.core.load(file_name, sr = None)
    samples = samples[int(start * sample_rate):int(stop * sample_rate) if stop else None]
    spectrogram = librosa.feature.melspectrogram(y = samples, sr = sample_rate,
                                                 n_mels = n_mels, fmin = 6000, fmax = 9000)
#     spectrogram = stats.boxcox(spectrogram, lmbda=0.043, alpha=0.000001)

#     mfccs = librosa.feature.mfcc(S=spectrogram, norm='ortho', dct_type=3)
#     return mfccs
    return spectrogram


def load_spec(file_name, start=0, stop=None):
    '''Wczytuje standardowy spektrogram z pliku.
    
    Args:
        file_name (str): Nazwa pliku z nagraniem.
        start (float): Sekunda, w której zaczyna się interesujący fragment.
        stop (float): Sekunda, w której kończy się interesujący fragment.
    
    Returns:
        ndarray: Spektrogram.
    '''
    sample_rate, samples = wavfile.read(file_name)
    samples = samples[int(start * sample_rate):int(stop * sample_rate) if stop else None]
    _, _, spectrogram = signal.spectrogram(samples, sample_rate)
    return spectrogram

def load_test(load_repr=load_mel):
    '''Wczytuje dane testowe.
    
    Args:
        load_repr (function): Funkcja wczytująca pożądaną reprezentację.
    
    Returns:
        ndarray: Tablica z danymi testowymi.
    '''
    with open('sampleSubmission.csv', 'r') as file:
        lines = file.read().split()[1:]
        sample_ids = [line.split(',')[0] for line in lines]
        samples = np.array([s.split('/') for s in sample_ids])
    
    X_test = []
    rec_files = [file_name for file_name in os.listdir('test') if file_name.endswith('.wav')]
    for file_name in rec_files:
        recording_id = file_name.split('.')[0][3:]
        time_markers = samples[samples[:, 0] == recording_id, 1].astype(np.int)
        for t in time_markers:
            representation = load_repr(os.path.join('test', file_name), start = t, stop = t + 1)
            X_test.append(representation)
    return np.array(X_test)

def load_test_2(load_repr=load_mel, interval=0.3, step=0.05):
    '''Wczytuje dane testowe.
    
    Args:
        load_repr (function): Funkcja wczytująca pożądaną reprezentację.
    
    Returns:
        ndarray: Tablica z danymi testowymi.
    '''
    compartment = interval / 2
    with open('sampleSubmission.csv', 'r') as file:
        lines = file.read().split()[1:]
        sample_ids = [line.split(',')[0] for line in lines]
        samples = np.array([s.split('/') for s in sample_ids])
    
    X_test = []
    rec_files = [file_name for file_name in os.listdir('test') if file_name.endswith('.wav')]
    recordings_ids = []
    print('samples:', samples)
    for file_name in tqdm(rec_files):
        recording_id = file_name.split('.')[0][3:]
        time_markers = samples[samples[:, 0] == recording_id, 1].astype(np.int)
        for t in time_markers:
            for i in np.arange(t + compartment, t + 1 - compartment, step): 
                representation = load_repr(os.path.join('test', file_name), start = i - compartment, stop = i + compartment)
                X_test.append(representation)
                recordings_ids.append("{}/{}".format(recording_id, t))
    return np.array(X_test), recordings_ids


def read_labels():
    '''Wczytuje etykiety czasowe z pliku labels.txt w folderze train.
    
    Returns:
        ndarray: Tablica z etykietami czasowymi zawierająca kolumny: nr nagrania, sekunda początku dźwięku, sekunda końca dźwięku.
    '''
    labels = []
    with open(os.path.join('train', 'labels.txt'), 'r') as file:
        text = file.read()
        for line in text.split('\n')[1:]:
            if len(line) > 1:
                rec, start, stop = line.split(',')
                rec, start, stop = int(rec[3:]), float(start), float(stop)
                labels.append([rec, start, stop])
    return np.array(labels)


def check_voices(second, labels, tol=0.):
    '''Sprawdza czy w ramce czasowej [second, second+1] znajduje się głos według etykiet `labels`.
    
    Args:
        second (float): Sekunda nagrania.
        labels (ndarray): Tablica z etykietami, której 2 kolumna oznacza początek, a 3-cia - koniec nagrania.
        tol (float): Tolerancja na brzegach fragmentu. Dźwięk, żeby był uznany, musi się kończyć po czasie `second+tol`
            lub zaczynać przed czasem `second+1-tol`.
    Returns:
        bool: Czy w ramce czasowej jest odgłos ptaka.
    '''
    return (labels[1] >= second and labels[1] < second + 1 - tol) or \
           (labels[2] < second + 1 and labels[2] > second + tol) or \
           (labels[1] < second and labels[2] > second + 1)


def map_seconds_to_y(labels):
    '''Tworzy etykiety dla każdej kolejnej sekundy 10-sekundowego nagrania. -1 oznacza niepewną etykietę (urwane dźwięki na brzegach).
    
    Args:
        labels (ndarray): Tablica z etykietami, której 2 kolumna oznacza początek, a 3-cia - koniec nagrania.
    Returns:
        ndarray: Tablica z binarnymi etykietami dla każdej z 10 sekund z możliwą niepewną etkietą -1.
    '''
    y = [0] * 10
    y_restrictive = [0] * 10
    for s in range(10):
        for l in labels:
            if check_voices(s, l):
                y[s] = 1
            if check_voices(s, l, 0.02):
                y_restrictive[s] = 1
        if y[s] != y_restrictive[s]:
            y[s] = -1
    return y


def load_train(load_repr=load_mel):
    '''Wczytuje dane treningowe.
    
    Args:
        load_repr (function): Funkcja wczytująca pożądaną reprezentację.
    
    Returns:
        (ndarray, ndarray): Tablica z danymi treningowymi, tablica z binarnymi etykietami treningowymi.
    '''
    labels = read_labels()
    X_train, y_train = [], []
    rec_files = [file_name for file_name in os.listdir('train') if file_name.endswith('.wav')]
    print(rec_files)
    for file_name in rec_files:
        recording_id = int(file_name.split('.')[0][3:])
        recording_labels = labels[labels[:, 0] == recording_id]
        y_binary = map_seconds_to_y(recording_labels)
        for i, y in enumerate(y_binary):
            if y != -1:
                try:
                    representation = load_repr(os.path.join('train', file_name), start = i, stop = i + 1)
                    X_train.append(representation)
                    y_train.append(y)
                except ValueError:
                    print('Error reading file', file_name)
                except TypeError:
                    print('Unsupported type', file_name)
    return np.array(X_train), np.array(y_train)

def prepare_training_set(labels, interval):
    starts, stops, y_binary = [], [], []
    compartment = interval / 2
    # creating positive samples 
    for label in labels:
        middle = label[1] + (label[2] - label[1]) / 2 
        start = middle - compartment
        starts.append(start)
        stop = middle + compartment
        stops.append(stop)
        if start < 0 or stop > 10:
            y_binary.append(-1)
        else: 
            y_binary.append(1)
    # creating negative samples
    loop_counter = len(labels) 
    while loop_counter:
        middle = torch.FloatTensor(1).uniform_(compartment, 10 - compartment).numpy()
        start = middle - compartment 
        stop = middle + compartment
        check = np.zeros(len(starts))
        for i in range(len(starts)):
            if start > stops[i] or stop < starts[i]:
                check[i] = 1
            else:
                check[i] = 0
        if np.all(check):
            loop_counter -= 1
            starts.append(start)
            stops.append(stop)
            y_binary.append(0)
            
    return starts, stops, y_binary

def load_train_2(load_repr=load_mel):
    '''Wczytuje dane treningowe.
    
    Args:
        load_repr (function): Funkcja wczytująca pożądaną reprezentację.
    
    Returns:
        (ndarray, ndarray): Tablica z danymi treningowymi, tablica z binarnymi etykietami treningowymi.
    '''
    labels = read_labels()
    X_train, y_train = [], []
    rec_files = [file_name for file_name in os.listdir('train') if file_name.endswith('.wav')]
    for file_name in tqdm(rec_files):
        recording_id = int(file_name.split('.')[0][3:])
        recording_labels = labels[labels[:, 0] == recording_id]
#         print('recording labels:', recording_labels)
        starts, stops, y_binary = prepare_training_set(recording_labels, 0.3)
#         print("starts:", starts)
#         print("stops:", stops)
        assert len(starts) == len(stops) == len(y_binary)
        for start, stop, y in zip(starts, stops, y_binary):
            if y != -1:
                try:
    #                 print('start:',start)
    #                 print('stop:',stop)
    #                 print('filename:',file_name)
                    representation = load_repr(os.path.join('train', file_name), start=start, stop=stop)
                    X_train.append(representation)
                    y_train.append(y)
    #                 print(X_train[0].shape)
    #                 print(y_train[0])
                except ValueError as e:
                    print('Error reading file', file_name)
                    print(e)
                except TypeError as e:
                    print('Unsupported type', file_name)
                    print(e)
    return np.array(X_train), np.array(y_train)

In [None]:
a = []
a.append([])

In [None]:
a[-1].append(1)

In [None]:
a[-1].append(1)

In [None]:
a

In [None]:
a.append([])
a[-1].append(1)


In [None]:

a[-1].append(1)

In [None]:
a

In [None]:
np.array(a)

In [None]:
for i in np.arange(0.15,1,0.1):
    print(i)

# Zapisywanie Wczytanej Reprezentacji

Ponieważ tworzenie reprezentacji może zabierać sporo czasu (szczególnie w tak naiwnej implementacji jak powyższa), warto zapisać wczytane dane do plików.

In [None]:
print(os.getcwd())
print(os.listdir("."))

In [None]:
import librosa.core.logamplitude

In [None]:
import librosa.display
spec = load_mel(os.path.join(os.getcwd(), 'train','rec25.wav'),start=9.1, stop=9.4)
# spec = librosa.power_to_db(spec,ref=5.0)
librosa.display.specshow(spec, y_axis='mel', x_axis='time')

In [None]:
mfccs = librosa.feature.mfcc(S=spec, norm='ortho', dct_type=3)

In [None]:

import matplotlib.pyplot as plt
plt.figure(figsize=(10, 4))
librosa.display.specshow(mfccs, x_axis='time')
plt.colorbar()
plt.title('MFCC')
plt.tight_layout()

In [None]:
mfccs.shape

In [None]:
spec.shape

In [None]:
spec.shape

In [None]:
# Poniższa linijka ustawia folder główny
# os.chdir('../input/')

X_test, recordings_ids = load_test_2(interval=0.3, step=0.05)
# np.save(os.path.join('test', 'tmp_X_test'), X_test)

X, y = load_train_2()
# np.save(os.path.join('train', 'tmp_X_train'), X)
# np.save(os.path.join('train', 'tmp_y_train'), y)

In [None]:
recordings_ids

In [None]:
X_test.shape

In [None]:
assert_x_64 = X_test[64]

In [None]:
assert_x = X_test[0]

In [None]:
assert_x

In [None]:
recordings_ids


In [None]:

X_test_native = load_test()

In [None]:
X_test_native.shape

In [None]:
X_test.shape

In [None]:

librosa.display.specshow(X_test[1000], y_axis='mel', x_axis='time')

In [None]:
y[11]

In [None]:
import librosa.display
spec = load_mel(os.path.join(os.getcwd(), 'test','rec8.wav'),start=0, stop=10, n_mels=60)
# spec = librosa.power_to_db(spec,ref=5.0)
librosa.display.specshow(spec, y_axis='mel', x_axis='time')

In [None]:
X_test[1][0].shape

In [None]:
X_test_native

In [None]:
X.shape

In [None]:
np.save(os.path.join('test', 'tmp_X_test'), X_test)
np.save(os.path.join('train', 'tmp_X_train'), X)
np.save(os.path.join('train', 'tmp_y_train'), y)

In [None]:
X_test = np.load(os.path.join(os.getcwd(), 'test', 'tmp_X_test.npy'))
X = np.load(os.path.join(os.getcwd(), 'train', 'tmp_X_train.npy'))
y = np.load(os.path.join(os.getcwd(), 'train', 'tmp_y_train.npy'))

In [None]:
y[4]

In [None]:
X.shape

In [None]:
import librosa.display

In [None]:
librosa.display.specshow(X[100], y_axis='mel', x_axis='time')


# Model i Trenowanie

Poniższy przykład używa poprawnych metryk i zapisuje parametry modelu.

In [None]:
# class Detector(torch.nn.Module):
    
#     def __init__(self):
#         super().__init__()
#         self.linear = torch.nn.Linear(10 * 87, 2)
    
#     def forward(self, x):
#         out = torch.flatten(x, start_dim = 1)
#         out = self.linear(out)
#         return out
    
# clf = Detector()

In [None]:
X.shape

In [None]:

X_test = np.load(os.path.join(os.getcwd(), 'test', 'tmp_X_test.npy'))
X = np.load(os.path.join(os.getcwd(), 'train', 'tmp_X_train.npy'))
y = np.load(os.path.join(os.getcwd(), 'train', 'tmp_y_train.npy'))

import torch
torch.manual_seed(52)
class ConvNet(torch.nn.Module):
    
    def __init__(self, block):
        super(ConvNet, self).__init__()
        
        # wstępna warstwa konwolucyjna + ReLU
        self.conv = torch.nn.Conv2d(1, 8, kernel_size=(1,20), padding=1)
        self.relu = torch.nn.ReLU()
        self.batch_norm1 = torch.nn.BatchNorm2d(8)
        
        # (mikroarchitektura + max pooling) x 2
#         self.block1 = block(16, 32)
#         self.pool1 = torch.nn.MaxPool2d((1,10))
#         self.block2 = block(32, 64)
        self.pool2 = torch.nn.MaxPool2d((1,60))
        
        # warstwa w pełni połączona po "rozprostowaniu" obrazu do postaci wektora
        self.dense = torch.nn.Linear(496, 2)
    
    def forward(self, x):
        log = False 
        if log:
            print(x.shape)
        out = self.relu(self.batch_norm1(self.conv(x)))
        if log:
            print(out.shape)
#         out = self.block1(out)
#         if log:
#             print(out.shape)
#         out = self.pool1(out)
#         if log:
#             print(out.shape)
#         out = self.block2(out)
#         if log:
#             print(out.shape)
        out = self.pool2(out)
        if log:
            print(out.shape)
        out = out.view(out.size(0), -1)
        
        if log:
            print(out.shape)
        return self.dense(out)
    
class BaseBlock(torch.nn.Module):
    
    def __init__(self, in_channels, out_channels, hidden_channels=None):
        super(BaseBlock, self).__init__()
        if not hidden_channels:
            hidden_channels = out_channels

        # konwolucja zawiera padding=1, aby nie zmniejszać rozmiaru obrazu
        self.conv1 = torch.nn.Conv2d(in_channels, hidden_channels, kernel_size=(1,20) )
        # istnieje wersja batch normalization dla obrazów - statystyki dla filtrów
        self.batch_norm1 = torch.nn.BatchNorm2d(hidden_channels)
        # nieliniowość ReLU
        self.relu1 = torch.nn.ReLU()

        # drugi raz te same warstwy
        self.conv2 = torch.nn.Conv2d(hidden_channels, out_channels, kernel_size=(1,20))
        self.batch_norm2 = torch.nn.BatchNorm2d(out_channels)
        self.relu2 = torch.nn.ReLU()

    def forward(self, x):
        out = self.relu1(self.batch_norm1(self.conv1(x)))
        out = self.relu2(self.batch_norm2(self.conv2(out)))
        return out   

# class ResNetBlock(torch.nn.Module):
    
#     def __init__(self, in_channels, out_channels, hidden_channels=None):
#         super(ResNetBlock, self).__init__()
#         if not hidden_channels:
#             hidden_channels = out_channels
#         self.in_channels = in_channels
#         self.out_channels = out_channels
#         self.conv1 = torch.nn.Conv2d(in_channels, hidden_channels, kernel_size=3, padding=1)
#         self.batch_norm1 = torch.nn.BatchNorm2d(hidden_channels) 
#         self.relu1 = torch.nn.ReLU()
#         self.conv2 = torch.nn.Conv2d(hidden_channels, out_channels, kernel_size=3, padding=1)
#         self.conv3 = torch.nn.Conv2d(in_channels, out_channels, kernel_size=1)
#         self.batch_norm2 = torch.nn.BatchNorm2d(out_channels)
#         self.relu2 = torch.nn.ReLU()

    
#     def forward(self, x):
#         out = self.relu1(self.batch_norm1(self.conv1(x)))
#         out = self.conv2(out)
#         if self.in_channels < self.out_channels:
#             x = self.conv3(x)
#         out = torch.add(x, out)
#         out = self.relu2(self.batch_norm2(out))
#         return out
from torch import nn
class AlexNet(torch.nn.Module):

    def __init__(self, num_classes=2):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x


clf = ConvNet(BaseBlock)
# clf = AlexNet()

# Dzielenie zbioru danych na treningowy i walidacyjny
split_point = int(len(X) * 0.9)

X_train = torch.Tensor(X[:split_point]).unsqueeze(1)
y_train = torch.LongTensor(y[:split_point])

X_valid = torch.Tensor(X[split_point:]).unsqueeze(1)
y_valid = torch.LongTensor(y[split_point:])

batch_size = 16 

dataset = TensorDataset(X_train, y_train)
data_loader = DataLoader(dataset, batch_size = batch_size, shuffle = True)

valid_dataset = TensorDataset(X_valid, y_valid)
valid_data_loader = DataLoader(valid_dataset, batch_size = batch_size)

# Ustawienie kosztu i optimizera
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(clf.parameters())

# Pętla uczenia
best_preds, best_score = None, 0.
losses, scores = [], []
epochs_number = 10 
for epoch in trange(epochs_number):
    running_loss = 0
    clf.train()
    for X, y in data_loader:
        optimizer.zero_grad()

        outputs = clf(X)
#         print('outputs: ', outputs)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    losses.append(running_loss)
    
    clf.eval()
    preds = []
    for X, _ in valid_data_loader:
        out = clf(X)
        preds.append(torch.softmax(out, dim = 1)[:, 1].detach().numpy())
    preds = np.concatenate(preds, axis = 0)
    
    # Metryką testującą jest ROC AUC
    score = roc_auc_score(y_valid.numpy(), preds)
    scores.append(score)
    print(score)
    if score > best_score:
        best_score = score
        best_preds = preds
        np.save('tmp_preds', best_preds)
        # Model dający najlepszy wynik powinien być zapisany
        torch.save(clf.state_dict(), 'tmp_model.pt')

In [None]:
# Rysowanie lossu i AUC

import matplotlib.pyplot as plt

%matplotlib inline

plt.plot(scores)
plt.show()

plt.plot(losses)
plt.show()

# Zapis Predykcji

In [None]:
def save_predictions(preds):
    '''Zapisuje predykcje do pliku zgodnego z formatem odpowiedzi.
    
    Args:
        preds (list): Lista predykcji (prawdopodobieństw).
    '''
    with open('sampleSubmission.csv', 'r') as file:
        submission_text = file.read().split()
        header = submission_text[0]
        lines = submission_text[1:]

    output_lines = [header]
    for pred, line in zip(preds, lines):
        output_lines.append("{},{}".format(line.split(',')[0], pred))
    
    with open('submission.csv', 'w') as file:
        file.write('\n'.join(output_lines) + '\n')
        
def save_predictions_2(preds: dict):
    '''Zapisuje predykcje do pliku zgodnego z formatem odpowiedzi.
    
    Args:
        preds (list): Lista predykcji (prawdopodobieństw).
    '''
    with open('sampleSubmission.csv', 'r') as file:
        submission_text = file.read().split()
        header = submission_text[0]
        lines = submission_text[1:]

    output_lines = [header]
    for line in lines:
        print(line.split(',')[0])
        indx = line.split(',')[0]
        output_lines.append("{},{}".format(indx, preds[indx]))
    
    with open('submission.csv', 'w') as file:
        file.write('\n'.join(output_lines) + '\n')

In [None]:
X_test.shape

In [None]:
# Wczytanie najlepszego modelu
from collections import defaultdict
clf.load_state_dict(torch.load('tmp_model.pt'))

# Tworzenie data loadera testowego
X_test_tensor = torch.Tensor(X_test).unsqueeze(1)
print(X_test_tensor.shape)
test_dataset = TensorDataset(X_test_tensor)
test_data_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

# Ewaluacja modelu na danych testowych
#TODO - create a threshold?
clf.eval()
preds = []
for i, X in enumerate(test_data_loader):
    if i == 0:
        print(type(X[0][0]))
        assert torch.all(torch.eq(X[0][0], torch.tensor(assert_x, dtype=torch.float)))
        print(recordings_ids[0])
#     if i == 4:
#         assert torch.all(torch.eq(X[0][2], torch.tensor(assert_x_64, dtype=torch.float)))
#         print(recordings_ids[64])
        
    
    out = clf(X[0])
    print('out :',out)
    
    preds.append(torch.softmax(out, dim = 1)[:, 1].detach().numpy())
    print(preds)
#     for j, pred in enumerate(preds):
#         try:
#             prediction_dict[int(recordings_ids[i * batch_size + j][0])].append(preds[i][j])
#         except:
#             print('end')
    
#     print(preds[0].shape)
#     print(preds[0])
    
preds = np.concatenate(preds, axis = 0)
print(preds.shape)


In [None]:
preds.shape

In [None]:
preds.shape

In [None]:
global_mean = preds.mean()

In [None]:
max_value = preds.max()

In [None]:
global_std = preds.std()

In [None]:
prediction_dict = defaultdict(list) 
for i, pred in enumerate(preds):
    prediction_dict[str(recordings_ids[i])].append(pred)

In [None]:
# averaging
final_preds_dict= defaultdict(float) 

for k in prediction_dict.keys():
    v = prediction_dict[k] - global_mean
    final_preds_dict[k] = (np.maximum(v, 0, v  ) / mv).max()

In [None]:
# only max value
final_preds_dict= defaultdict(float) 

for k in prediction_dict.keys():
    final_preds_dict[k] = np.array(prediction_dict[k]).max() 

In [None]:
prediction_dict

In [None]:
final_preds_dict

In [None]:
final_preds_dict['39/6']

In [None]:

import librosa.display
spec = load_mel(os.path.join(os.getcwd(), 'test','rec39.wav'),start=0, stop=10, n_mels=60)
# spec = librosa.power_to_db(spec,ref=5.0)
librosa.display.specshow(spec, y_axis='mel', x_axis='time')

In [None]:

import librosa.display
spec = load_mel(os.path.join(os.getcwd(), 'test','rec76.wav'),start=4, stop=5.0, n_mels=60)
# spec = librosa.power_to_db(spec,ref=5.0)
librosa.display.specshow(spec, y_axis='mel', x_axis='time')

In [None]:


# Zapisanie predykcji do poprawnego formatu
save_predictions(preds)

In [None]:
save_predictions_2(final_preds_dict)

In [None]:
final_preds_dict['1/1']

In [None]:
final_preds_dict