In [3]:
#!pip3 install torch torchvision torchaudio

Collecting torch
  Obtaining dependency information for torch from https://files.pythonhosted.org/packages/3f/14/e105b8ef6d324e789c1589e95cb0ab63f3e07c2216d68b1178b7c21b7d2a/torch-2.2.2-cp311-none-macosx_10_9_x86_64.whl.metadata
  Downloading torch-2.2.2-cp311-none-macosx_10_9_x86_64.whl.metadata (25 kB)
Collecting torchvision
  Obtaining dependency information for torchvision from https://files.pythonhosted.org/packages/46/95/179dd1bf8fd6bd689f0907f4baed557d2b12d2cf3d7ed1a8ecefe0a63d83/torchvision-0.17.2-cp311-cp311-macosx_10_13_x86_64.whl.metadata
  Downloading torchvision-0.17.2-cp311-cp311-macosx_10_13_x86_64.whl.metadata (6.6 kB)
Collecting torchaudio
  Obtaining dependency information for torchaudio from https://files.pythonhosted.org/packages/57/c4/80cc3315dd1ca706643b78f894901d4d888ffe376a5e401f73d9db61071e/torchaudio-2.2.2-cp311-cp311-macosx_10_13_x86_64.whl.metadata
  Downloading torchaudio-2.2.2-cp311-cp311-macosx_10_13_x86_64.whl.metadata (6.4 kB)
Collecting filelock (from 

In [66]:
import math
from time import time
from sklearn import metrics

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch import optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split


In [2]:
	class FeedFoward(nn.Module):
    """ a simple linear layer followed by a non-linearity """

    def __init__(self, n_embd, dropout):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 2 * n_embd),
            nn.ReLU(),
            nn.Linear(2 * n_embd, n_embd),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        return self.net(x)

        

class Block(nn.Module):
    """ Transformer block: communication followed by computation """

    def __init__(self, n_embd, n_head, dropout):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()
        head_size = n_embd // n_head
        self.sa = nn.MultiheadAttention(n_embd, n_head)
        self.ffwd = FeedFoward(n_embd, dropout)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x, q = None):
        if q is not None:
            X = (q, x, x)
        else:
            X = (x, x, x)
        y = self.sa(*X)
        y = y[0]
        
        x = self.ln1(x + y)
        y = self.ffwd(x)
        x = self.ln2(x + y)
        return x

class DownSampleBlock(nn.Module):
    def __init__(self, kernel_size = 2, n_head = 4, channels=16, drop_rate = 0.1, att_drop_rate = 0.25, n_features=25):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()

        self.dw_conv = nn.Conv1d(
            channels, 
            channels*2, 
            kernel_size, 
            stride=2, 
            #padding='same', 
            dilation=1, 
            groups=n_head,
            bias=False, 
            padding_mode='zeros')

        self.block = Block(n_embd = channels*2, n_head=n_head, dropout = att_drop_rate)
        
    def forward(self, x):
        x = self.dw_conv(x)
        x = x.permute([0,2,1])
        x = self.block(x)
        x = x.permute([0,2,1])
        return x

In [3]:
DownSampleBlock()(torch.rand([8,16,16000])).shape

torch.Size([8, 32, 8000])

In [4]:
class UpSampleBlock(nn.Module):
    def __init__(self, kernel_size = 4, n_head = 2, channels=16, drop_rate = 0.1, att_drop_rate = 0.25, n_features=25):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()

        self.dw_conv = nn.ConvTranspose1d(
            channels, 
            channels//2, 
            kernel_size, 
            stride=2, 
            padding= kernel_size//2 -1, 
            dilation=1, 
            groups=n_head,
            bias=False, 
            padding_mode='zeros')


        self.double_conv = nn.Sequential(
            nn.Conv1d(channels//2, channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm1d(channels),
            nn.ReLU(inplace=True),
            nn.Conv1d(channels, channels//2, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm1d(channels//2),
            nn.ReLU(inplace=True)
        )
 
    def forward(self, x):
        x = self.dw_conv(x)
        x = self.double_conv(x)
        return x

In [5]:
UpSampleBlock()(torch.rand([8,16,16000])).shape

torch.Size([8, 8, 32000])

In [6]:
class FeatureExtractor(nn.Module):
    def __init__(self, kernel_size = 5, channels=8, drop_rate = 0.1):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()

        self.input_conv = nn.Sequential(
            nn.BatchNorm1d(1),
            nn.Conv1d(1, channels, kernel_size=kernel_size, padding='same', bias=False),
            nn.ReLU(inplace=True),
        )
 
    def forward(self, x):
        x = self.input_conv(x)
        return x

In [7]:
FeatureExtractor()(torch.rand([8,1,16000])).shape

torch.Size([8, 8, 16000])

In [136]:

# batch_size = 16
# block_size = 256
# max_iters = 5000
# learning_rate = 3e-4
# eval_iters = 100
# n_embd = 384
# n_head = 8
# n_layer = 12
# dropout = 0.2

nn_config_unet = dict(
    n_embd = 8,
     n_head = 2,
    fe_channels = 32, 
    encoder_layers = 1, 
    fe_drop_rate = 0.15,
    att_drop_rate = 0.35,
    n_features = 25,
    bottleneck_k_size = 3,
    block_kernels = [5],
    out_att_blocks = 0
)

    
class UnetLeapModel(nn.Module):
    def __init__(self, n_embd = 64, n_head = 4, encoder_layers = 3, fe_channels=16, fe_drop_rate=0.1, 
                 att_drop_rate=0.2, n_features = 25, bottleneck_k_size = 3, block_kernels = [5, 3], out_att_blocks=0):
        super().__init__()
        
        self.fe = FeatureExtractor(kernel_size = 7, channels = n_embd)
        self.linearStem = nn.Linear(n_features * n_head * 2, n_embd)
        
        self.DOWN = nn.ModuleList([DownSampleBlock(n_head = n_head, channels = stage_emb) for stage_emb in [n_embd, n_embd*2, n_embd*4]])
        self.UP = nn.ModuleList([UpSampleBlock(kernel_size = k,  n_head= n_head, channels = stage_emb) for k, stage_emb in zip([4,4,4], [n_embd*8, n_embd*4, n_embd*2])])
        
        self.head = nn.Sequential(
                                nn.ReLU(),
                                nn.Conv1d(n_embd, 1, kernel_size=3, padding='same', bias=False),
                                )
        self.noise_head = nn.Sequential(
                                nn.ReLU(),
                                nn.Conv1d(n_embd, 1, kernel_size=3, padding='same', bias=False),
                                )

    
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)



    def forward(self, inputs, targets=None):
        #B, T = inputs.shape

        x = self.fe(inputs)
        
        downs = []
        for dc in self.DOWN:
            x = dc(x)
            downs.append(x)

        x = self.UP[0](x) 
        for i, uc in enumerate(self.UP[1:]):
             x = uc(x + downs[::-1][i+1])

        #x = x.permute([0,2,1])
        
        noise = self.noise_head(x)#.permute([0,2,1])
        x = self.head(x)#.permute([0,2,1])
        
        return inputs - noise, noise



In [162]:
nn_config_unet = dict(
    n_embd = 32,
     n_head = 4,
    fe_channels = 32, 
    encoder_layers = 1, 
    fe_drop_rate = 0.15,
    att_drop_rate = 0.35,
    n_features = 25,
    bottleneck_k_size = 3,
    block_kernels = [5],
    out_att_blocks = 0
)

    
model = UnetLeapModel(**nn_config_unet)
model(torch.rand([8,1,16000]))[0].shape

torch.Size([8, 1, 16000])

In [163]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Total number of parameters: {total_params}')

Total number of parameters: 1023810


# Load Audios

In [164]:
import json
from typing import Any, Mapping

import numpy as np
import pandas as pd
import yaml
from joblib import Parallel
from tqdm import tqdm


class ProgressParallel(Parallel):
    def __init__(self, use_tqdm=True, total=None, *args, **kwargs):
        self._use_tqdm = use_tqdm
        self._total = total
        super().__init__(*args, **kwargs)

    def __call__(self, *args, **kwargs):
        with tqdm(disable=not self._use_tqdm, total=self._total) as self._pbar:
            return Parallel.__call__(self, *args, **kwargs)

    def print_progress(self):
        if self._total is None:
            self._pbar.total = self.n_dispatched_tasks
        self._pbar.n = self.n_completed_tasks
        self._pbar.refresh()

In [165]:
from copy import deepcopy

import librosa

try:
    import noisereduce as nr
except:
    print("`noisereduce` was not imported")
from joblib import delayed



def get_librosa_load(
    do_normalize,
    do_noisereduce=False,
    pos_dtype=None,
    return_au_len=False,
    **kwargs,
):
    def librosa_load(path):
        # assert kwargs["sr"] == 32_000
        try:
            au, sr = librosa.load(path, **kwargs)
            if do_noisereduce:
                try:
                    au = nr.reduce_noise(y=deepcopy(au), sr=sr)
                    if do_normalize:
                        au = librosa.util.normalize(au)
                    return au, sr
                except Exception as e:
                    print(f"{e} was catched while `reduce_noise`")
                    au, sr = librosa.load(path, **kwargs)
            if do_normalize:
                au = librosa.util.normalize(au)
            if pos_dtype is not None:
                au = au.astype(pos_dtype)
            if return_au_len:
                au = len(au)
            return au, sr
        except Exception as e:
            print("librosa_load failed with {e}")
            return None, None

    return librosa_load


def load_pp_audio(
    name,
    sr=None,
    normalize=True,
    do_noisereduce=False,
    pos_dtype=None,
    res_type="kaiser_best",
    validate_sr=None,
):
    # assert sr == 32_000
    au, sr = librosa.load(name, sr=sr)
    if validate_sr is not None:
        assert sr == validate_sr
    if do_noisereduce:
        try:
            au = nr.reduce_noise(y=deepcopy(au), sr=sr, res_type=res_type)
            if normalize:
                au = librosa.util.normalize(au)
            return au
        except Exception as e:
            print(f"{e} was catched while `reduce_noise`")
            au, sr = librosa.load(name, sr=sr)
    if normalize:
        au = librosa.util.normalize(au)
    if pos_dtype is not None:
        au = au.astype(pos_dtype)
    return au


def parallel_librosa_load(
    audio_pathes,
    n_cores=32,
    return_sr=True,
    return_audio=True,
    do_normalize=False,
    **kwargs,
):
    assert return_sr or return_audio
    complete_out = ProgressParallel(n_jobs=n_cores, total=len(audio_pathes))(
        delayed(get_librosa_load(do_normalize=do_normalize, **kwargs))(el_path)
        for el_path in audio_pathes
    )
    if return_sr and return_audio:
        return complete_out
    elif return_audio:
        return [el[0] for el in complete_out]
    elif return_sr:
        return [el[1] for el in complete_out]

`noisereduce` was not imported


In [45]:
class WaveDataset(torch.utils.data.Dataset):
    def __init__(
        self,
        #root,
        replace_pathes=None,
        df=None,
        df_noise = None,
        add_df_paths=None,
        add_root=None,
        target_col="primary_label",
        name_col="filename",
        rating_col=None,
        sample_rate=16_000,
        segment_len=1.0,
        precompute=False,
        early_aug=None,
        late_aug=None,
        do_mixup=False,
        mixup_params={"prob": 0.5, "alpha": 1.0},
        n_cores=None,
        debug=False,
        df_filter_rule=None,
        do_noisereduce=False,
        late_normalize=False,
        use_sampler=False,
        shuffle=False,
        res_type="kaiser_best",
        pos_dtype=None,
        sampler_col=None,
        use_h5py=False,
    ):
        super().__init__()
        if use_h5py and precompute:
            raise ValueError("h5py files can not be used with `precompute`")
        if df is None and add_df_paths is None:
            raise ValueError("`df` OR/AND `add_df_paths` should be defined")
        if df is not None:
            df[f"{name_col}_with_root"] = None
        if add_df_paths is not None:
            cols_to_take = [
                target_col,
                sec_target_col,
                name_col,
                "duration_s",
            ]
            if rating_col is not None and rating_col not in cols_to_take:
                cols_to_take.append(rating_col)
            if sampler_col is not None and sampler_col not in cols_to_take:
                cols_to_take.append(sampler_col)
            # Create fake `df`
            if df is None:
                df = pd.DataFrame()
            else:
                df = df[cols_to_take]
            add_merged_df = pd.concat(
                [pd.read_csv(el)[cols_to_take] for el in add_df_paths],
                axis=0,
            ).reset_index(drop=True)
            if add_root is not None:
                add_merged_df[f"{name_col}_with_root"] = add_merged_df[
                    name_col
                ].apply(lambda x: pjoin(add_root, x))
            df = pd.concat([df, add_merged_df], axis=0).reset_index(drop=True)
        if df_filter_rule is not None:
            df = df_filter_rule(df)
        if debug:
            self.df = df.iloc[:1200]
        else:
            self.df = df
            self.df = self.df.reset_index(drop=True)
            
            self.df_noise = df_noise
            self.df_noise = self.df_noise.reset_index(drop=True)
            
        
        try:
            self.df["secondary_labels"] = self.df["secondary_labels"].apply(
                eval
            )
        except:
            print(
                "secondary_labels is not found in df. Maybe test or nocall mode"
            )
        if shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)
            self.df_noise = self.df_noise.sample(frac=1).reset_index(drop=True)
            

        # mask_col = self.df[f"{name_col}_with_root"].isna()
        # self.df.loc[mask_col, f"{name_col}_with_root"] = self.df.loc[
        #     mask_col, name_col
        # ].apply(lambda x: pjoin(root, x))
        # if replace_pathes is not None:
        #     self.df[f"{name_col}_with_root"] = self.df[
        #         f"{name_col}_with_root"
        #     ].apply(
        #         lambda x: pjoin(
        #             replace_pathes[1], relpath(x, replace_pathes[0])
        #         )
        #     )

        #self.target_col = target_col
        #self.sec_target_col = sec_target_col
        #self.name_col = f"{name_col}_with_root"
        self.name_col = name_col
        self.rating_col = rating_col
        self.late_normalize = late_normalize

        self.precompute = precompute
        self.use_h5py = use_h5py
        if self.use_h5py:
            self.df[self.name_col] = self.df[self.name_col].apply(
                lambda x: splitext(x)[0] + ".hdf5"
            )

        self.sample_rate = sample_rate
        self.do_noisereduce = do_noisereduce
        # save segment len in points (not in seconds)
        self.segment_len = int(self.sample_rate * segment_len)

        self.early_aug = early_aug
        self.late_aug = late_aug
        if mixup_params is not None and mixup_params.get("weights_path", None):
            if not use_sampler:
                raise ValueError(
                    "Mixup with weighted sampling requires `use_sampler=True`"
                )
        self.do_mixup = do_mixup
        self.mixup_params = mixup_params

        self.pos_dtype = pos_dtype
        self.res_type = res_type

        if self.precompute:
            if n_cores is not None:
                self.audio_cache = parallel_librosa_load(
                    audio_pathes=self.df[self.name_col].tolist(),
                    n_cores=n_cores,
                    return_sr=False,
                    sr=self.sample_rate,
                    do_normalize=not self.late_normalize,
                    do_noisereduce=do_noisereduce,
                    res_type=self.res_type,
                    pos_dtype=self.pos_dtype,
                )
                assert all(au is not None for au in self.audio_cache)
                self.audio_cache = {
                    i: el for i, el in enumerate(self.audio_cache)
                }
            else:
                print("NOT Parallel load")
                self.audio_cache = {
                    # Extract only audio, without sample_rate
                    i: load_pp_audio(
                        im_name,
                        sr=self.sample_rate,
                        do_noisereduce=do_noisereduce,
                        normalize=not self.late_normalize,
                        res_type=self.res_type,
                        pos_dtype=self.pos_dtype,
                    )
                    for i, im_name in tqdm(
                        enumerate(self.df[self.name_col].tolist()),
                        total=len(self.df),
                    )
                }

        if use_sampler:
            self.targets = (
                self.df[sampler_col].tolist()
                if sampler_col is not None
                else self.df[self.target_col].tolist()
            )
        if mixup_params.get("weights_path", None):
            self.weights = load_json(mixup_params["weights_path"])
            self.weights = torch.FloatTensor(
                [self.weights[el] for el in self.targets]
            )

    def turn_off_all_augs(self):
        print("All augs Turned Off")
        self.do_mixup = False
        self.early_aug = None
        self.late_aug = None

    def __len__(self):
        return min(len(self.df), len(self.df_noise))

    def _prepare_sample_piece(self, input):
        if input.shape[0] < self.segment_len:
            pad_len = self.segment_len - input.shape[0]
            return np.pad(
                np.array(input) if self.use_h5py else input, ((pad_len, 0))
            )
        elif input.shape[0] > self.segment_len:
            start = np.random.randint(0, input.shape[0] - self.segment_len)
            return (
                np.array(input[start : start + self.segment_len])
                if self.use_h5py
                else input[start : start + self.segment_len]
            )
        else:
            return np.array(input) if self.use_h5py else input


    def _prepare_sample_target_from_idx(self, idx: int):
        if self.use_h5py:
            with h5py.File(self.df[self.name_col].iloc[idx], "r") as f:
                wave = self._prepare_sample_piece(f["au"])
        else:
            if self.precompute:
                wave = self.audio_cache[idx]
            else:
                # Extract only audio, without sample_rate
                wave = load_pp_audio(
                    self.df[self.name_col].iloc[idx],
                    sr=self.sample_rate,
                    do_noisereduce=self.do_noisereduce,
                    normalize=not self.late_normalize,
                    res_type=self.res_type,
                    pos_dtype=self.pos_dtype,
                )

                wave_noise = load_pp_audio(
                    self.df_noise[self.name_col].sample(1).iloc[0],
                    sr=self.sample_rate,
                    do_noisereduce=self.do_noisereduce,
                    normalize=not self.late_normalize,
                    res_type=self.res_type,
                    pos_dtype=self.pos_dtype,
                )
            
            if self.pos_dtype is not None:
                wave = wave.astype(np.float32)
                wave_noise = wave_noise.astype(np.float32) 

            wave = self._prepare_sample_piece(wave)
            wave_noise = self._prepare_sample_piece(wave_noise)
        
        if self.early_aug is not None:
            raise RuntimeError("Not implemented")

        if self.late_normalize:
            wave = librosa.util.normalize(wave)
            wave_noise = librosa.util.normalize(wave_noise)
            
        return wave, wave_noise * np.random.random(1)#**0.5

    def _get_mixup_idx(self):
        if self.mixup_params.get("weights_path", None):
            mixup_idx = torch.multinomial(
                self.weights, 1, replacement=True
            ).item()
        else:
            mixup_idx = np.random.randint(0, self.__len__())
        return mixup_idx

    def __getitem__(self, index: int):
        wave, wave_noise = self._prepare_sample_target_from_idx(index)

        if self.late_aug is not None:
            wave = self.late_aug(wave)
            wave_noise = self.late_aug(wave_noise) 

        wave = np.expand_dims(wave, 0)
        wave_noise = np.expand_dims(wave_noise, 0)
        
        combine = wave + wave_noise
        
        return combine, wave, wave_noise


In [56]:
import os
import pandas as pd

filenames = ['clean_train/' + p for p in os.listdir('clean_train') if 'wav' in p]
df_train = pd.DataFrame(filenames, columns = ['filename'])

filenames = ['noise_train/' + p for p in os.listdir('noise_train') if 'wav' in p]
df_noise = pd.DataFrame(filenames, columns = ['filename'])


In [57]:
dataset = WaveDataset(df = df_train, df_noise = df_noise, shuffle=True)

secondary_labels is not found in df. Maybe test or nocall mode


In [58]:
au = dataset.__getitem__(9)

In [59]:
import IPython

IPython.display.Audio(au[0], rate = 16000)

In [124]:
def r2_score(y_pred:torch.Tensor, y_true:torch.Tensor) -> float:
    """
    Calculate the R^2 (coefficient of determination) regression score.
    
    Parameters
    ----------
    y_pred : torch.Tensor
        The predicted values.
    y_true : torch.Tensor
        The true values.

    Returns
    -------
    float
        The R^2 score, a float value.
    """
    
    ss_res = torch.sum((y_true - y_pred) ** 2) 
    ss_tot = torch.sum((y_true - torch.mean(y_true)) ** 2)
    
    r2 = 1 - ss_res / ss_tot
    
    return r2.item()



def train_fn(
    model: nn.Module, 
    loader: DataLoader, 
    optimizer: optim.Optimizer, 
    criterion: nn.Module,
) -> float:
    """
    Train the deep learning model for 1 epoch.
    
    Parameters
    ----------
    model : torch.nn.Module
        The PyTorch model to be trained.
    loader : torch.utils.data.DataLoader
        DataLoader for the training data.
    optimizer : torch.optim.Optimizer
        Optimizer used for the backpropagation.
    criterion : torch.nn.Module
        Loss function used to compute the loss between the predicted and true values.

    Returns
    -------
    float
        The average training loss over the epoch.
    """
    
    progress_bar = tqdm(enumerate(loader, start=1), total=len(loader)-1, ncols=20)
    progress_bar.set_description(f'Epoch {epoch}')
    model.train()
    train_loss = 0
    
    for step, batch in progress_bar:
        x, y, n = batch
        x, y, n = x.to(DEVICE), y.to(DEVICE), n.to(DEVICE)
        
        optimizer.zero_grad()
        y_pred, n_pred = model(x.to(torch.float32))
        loss = criterion(y_pred.to(torch.float32), y.to(torch.float32)).mean()
        loss_noise = criterion(n_pred.to(torch.float32), n.to(torch.float32)).mean()

        #loss += loss_noise*0.5
        
        loss.backward()
        # Clip gradients
        nn.utils.clip_grad_norm_(model.parameters(), 3.0)
        
        optimizer.step()

        train_loss += loss.item()

        progress_bar.set_postfix({
            'train_loss': train_loss / step,
        })
        
    return train_loss/step



def valid_fn(model: nn.Module, loader: DataLoader) -> float:
    """
    Validate the deep learning model for 1 epoch.
    
    Parameters
    ----------
    model : torch.nn.Module
        The PyTorch model to be validated.
    loader : torch.utils.data.DataLoader
        DataLoader for the validation data.

    Returns
    -------
    float
        The average R2 score over the validation epoch.
    """
    
    progress_bar = tqdm(enumerate(loader, start=1), total=len(loader)-1, ncols=20)
    progress_bar.set_description(f'Epoch {epoch}')
    model.eval()
    val_score = 0

    val_r2_full = 0
    val_mse = 0
    y_true = []
    y_preds = []
    with torch.no_grad():
        for step, batch in progress_bar:
            x, y, n = batch
            x, y, n = x.to(DEVICE), y.to(DEVICE), n.to(DEVICE)
            
            y_pred, n_pred = model(x.to(torch.float32))
            
            y = y.cpu().to(torch.float32)
            y_pred = y_pred.cpu().to(torch.float32)
            
            val_mse += metrics.mean_squared_error(y.squeeze(), y_pred.squeeze())

            val_score += r2_score(y_pred.squeeze(), y.squeeze())

            
            
            # progress_bar.set_postfix({
            #     'valid_score': val_score / step,
            # })
    
            y_true.append(y)
            y_preds.append(y_pred)

            progress_bar.set_postfix({
                    'val_sc': val_score / step,
                    'val_mse': val_mse / step,          
                })
            
    return val_score/step, val_mse/step

In [166]:
DEVICE = 'cpu'
LEARNING_RATE  = 1e-4
NUM_EPOCHS = 50
BATCH_SIZE = 8

model = UnetLeapModel(**nn_config_unet)

model = model.to(DEVICE)
#model.load_state_dict(torch.load("best_mse_model.pth", map_location=DEVICE))


#criterion = nn.MSELoss()
#criterion = torch.nn.HuberLoss(reduction='mean', delta=0.8)
criterion = nn.L1Loss(reduction='mean')

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = lr_scheduler.PolynomialLR(optimizer, power=2.0, total_iters=NUM_EPOCHS)

In [167]:
df_test = df_train.iloc[::5]
df_train_part = df_train.iloc[~df_train.index.isin(df_test.index)]

df_test_noise = df_noise.iloc[::5]
df_noise_part = df_noise.iloc[~df_noise.index.isin(df_test_noise.index)]

ds_train = WaveDataset(df = df_train_part, df_noise = df_noise_part, shuffle=True)
ds_valid = WaveDataset(df = df_test, df_noise = df_test_noise, shuffle=False)

train_loader = DataLoader(
    ds_train, 
    batch_size=BATCH_SIZE, 
    shuffle=True, 
    drop_last=True,
)

valid_loader = DataLoader(
    ds_valid, 
    batch_size=BATCH_SIZE, 
    shuffle=False, 
    drop_last=False,
)

secondary_labels is not found in df. Maybe test or nocall mode
secondary_labels is not found in df. Maybe test or nocall mode


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f"{name_col}_with_root"] = None


In [168]:
best_score = -np.inf
best_mse = np.inf

scores = []
mses = []
train_mse = []

for epoch in range(0, NUM_EPOCHS):
    train_loss = train_fn(model, train_loader, optimizer, criterion)
    val_score, val_mse  = valid_fn(model, valid_loader)

    train_mse.append(train_loss)
    scores.append(val_score)
    mses.append(val_mse)   
    
    # if val_r2_full > best_score:
    #     best_score = val_r2_full
    #     torch.save(model.state_dict(), "best_score_model.pth")

    # if val_mse < best_mse:
    #     best_mse = val_mse
    #     torch.save(model.state_dict(), "best_mse_model.pth")
        
    # torch.save(model.state_dict(), "last_model.pth")
    
    scheduler.step()

Epoch 0: : 12it [00:27,  2.31s/it, train_loss=0.304]
Epoch 0: : 4it [00:03,  1.22it/s, val_sc=0.385, val_mse=0.0129]
Epoch 1: : 12it [00:27,  2.28s/it, train_loss=0.206]
Epoch 1: : 4it [00:03,  1.27it/s, val_sc=-0.0714, val_mse=0.0209]
Epoch 2: : 12it [00:26,  2.17s/it, train_loss=0.131]
Epoch 2: : 4it [00:02,  1.46it/s, val_sc=0.272, val_mse=0.0152]
Epoch 3: : 12it [00:25,  2.12s/it, train_loss=0.0802]
Epoch 3: : 4it [00:02,  1.35it/s, val_sc=0.674, val_mse=0.00741]
Epoch 4: : 12it [00:25,  2.11s/it, train_loss=0.0611]
Epoch 4: : 4it [00:03,  1.33it/s, val_sc=0.549, val_mse=0.00787]
Epoch 5: : 12it [00:26,  2.20s/it, train_loss=0.0524]
Epoch 5: : 4it [00:02,  1.40it/s, val_sc=0.702, val_mse=0.00678]
Epoch 6: : 12it [00:25,  2.15s/it, train_loss=0.0525]
Epoch 6: : 4it [00:02,  1.36it/s, val_sc=0.728, val_mse=0.00583]
Epoch 7: : 12it [00:25,  2.12s/it, train_loss=0.0501]
Epoch 7: : 4it [00:02,  1.50it/s, val_sc=0.596, val_mse=0.00892]
Epoch 8: : 12it [00:25,  2.14s/it, train_loss=0.0478

KeyboardInterrupt: 

In [169]:
au = ds_valid.__getitem__(12)

In [170]:
out = model(torch.tensor(au[0]).view(1,1,16000).to(torch.float32))

In [171]:
import IPython

IPython.display.Audio(au[0], rate = 16000)

In [172]:
w = out[0].squeeze().cpu().detach().numpy()
n = out[1].squeeze().cpu().detach().numpy()

In [173]:
import IPython

IPython.display.Audio(w, rate = 16000)

In [174]:
import IPython

IPython.display.Audio(n, rate = 16000)

In [175]:
import IPython

IPython.display.Audio(au[2], rate = 16000)