# Task pipeline

In [None]:
import librosa
import pandas as pd
import numpy as np
import os
from PIL import Image
import copy

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch
import torchaudio
import torchvision

from sklearn.metrics import accuracy_score

from torchvision.models import resnet34

import warnings
warnings.filterwarnings(action='ignore', category=DeprecationWarning)

In [None]:
SAMPLE_RATE = 48000
N_FFT = SAMPLE_RATE * 64 // 1000 + 4
HOP_LENGTH = SAMPLE_RATE * 16 // 1000 + 4

In [None]:
class NoisedDataset(Dataset):

    def __init__(self, csv_file, root_dir, n_fft, hop_length):
        
        self.csv_file = csv_file
        self.root_dir = root_dir

        self.n_fft = n_fft
        self.hop_length = hop_length

        self.max_len = 165000
    
    def __getitem__(self, idx):

        ts1 = torchvision.transforms.Resize((299, 299))
        ts2 = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        
        sample = self.csv_file.values[idx]
        flag = 'noise' if sample[0].startswith('noised') else 'no_noise'
        path = self.root_dir + '/' + flag + '/' + sample[0]

        signal, _ = torchaudio.load(path)
        signal = self._prepare_sample(signal)

        spec = torch.stft(
            input=signal,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            normalized=True
        )

        real = spec[..., 0]
        img = spec[..., 1]
        spec = torch.cat([real, img], dim=0)
        
        return [spec, sample[1]]

    
    def __len__(self):

        return self.csv_file.shape[0]
    
    def _prepare_sample(self, waveform):
        waveform = waveform.numpy()
        current_len = waveform.shape[1]
        
        output = np.zeros((1, self.max_len), dtype='float32')
        output[0, -current_len:] = waveform[0, :self.max_len]
        output = torch.from_numpy(output)
        
        return output

In [None]:
def train_loop(model, train_data, test_data, EPOCH, batch_size):

    load_train = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    load_test = DataLoader(test_data, batch_size=batch_size)

    history_loss = []
    history_acc = []

    DEVICE = torch.device('cuda')
    criterion = torch.nn.MSELoss()
    optim = torch.optim.Adam(model.parameters())

    for epoch in range(EPOCH):

        ep_tr_loss = []
        ep_tr_acc = []

        ep_test_loss = []
        ep_test_acc = []

        model.cuda().train()
        for bx, by in load_train:

            bx = bx.cuda()
            by = by.float()

            optim.zero_grad()
            preds = model(bx)
            loss = criterion(preds.cpu(), by.unsqueeze(1))
            loss.backward()
            optim.step()

            ep_tr_loss.append(loss.item())
            preds = np.around(preds.detach().cpu().numpy())
            by = np.reshape(by.numpy(), (-1, 1))
            ep_tr_acc.append(accuracy_score(by, preds))
        
        model.eval()
        for bx, by in load_test:

            bx = bx.cuda()
            by = by.float()

            with torch.no_grad():
                preds = model(bx)
            
            preds = preds.cpu()
            loss = criterion(preds, by.unsqueeze(1))

            ep_test_loss.append(loss.item())
            preds = np.around(preds)
            by = np.reshape(by.numpy(), (-1, 1))
            ep_test_acc.append(accuracy_score(by, preds))
        
        loss1 = round(sum(ep_tr_loss) / len(ep_tr_loss), 4)
        acc1 = round(sum(ep_tr_acc) / len(ep_tr_acc), 4)
        loss2 = round(sum(ep_test_loss) / len(ep_test_loss), 4)
        acc2 = round(sum(ep_test_acc) / len(ep_test_acc), 4)
        print(f'epoch {epoch}: train loss {loss1}, train acc {acc1}, test loss {loss2}, test acc {acc2}')

        history_loss.extend(ep_test_loss)
        history_acc.extend(ep_test_acc)

        if EPOCH % 10 == 0:
            model_weigths = copy.deepcopy(model.state_dict())
            torch.save(model_weigths, f'/content/drive/MyDrive/weigths_{EPOCH}')
    return history_loss, history_acc

# Test models

In [None]:
! unzip /content/drive/MyDrive/background_noise_dataset.zip -d /content/background_noise_dataset

In [None]:
path = '/content/background_noise_dataset/content/background_noise_dataset'
train_csv = pd.read_csv('/content/drive/MyDrive/train.csv')
test_csv = pd.read_csv('/content/drive/MyDrive/test.csv')

train_dataset = NoisedDataset(train_csv, path, N_FFT, HOP_LENGTH)
test_dataset = NoisedDataset(test_csv, path, N_FFT, HOP_LENGTH)

In [None]:
model = resnet34()
model.conv1 = torch.nn.Conv2d(2, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.fc = torch.nn.Linear(in_features=512, out_features=1, bias=True)

In [None]:
hist, acc = train_loop(model, train_dataset, test_dataset, 10, 32)

In [None]:
model_weigths = copy.deepcopy(model.state_dict())
torch.save(model_weigths, '/content/drive/MyDrive/weigths_final.pth')