##Connecting Google drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import zipfile
audio = zipfile.ZipFile('/content/gdrive/MyDrive/data.zip', 'r')
audio.extractall('/content')

##Meta creation

In [None]:
import pandas as pd
from pathlib import Path
import os
from tqdm.notebook import tqdm

data_folder = Path('/content/val') # specify train or validation folder
df = []

for path, _, files in tqdm(os.walk(data_folder)):
    for file in files:
      
      folder = path.split('/')[-3]
      type_ = path.split('/')[-2]
      id = path.split('/')[-1]
      df.append([folder, type_, id, file])

df = pd.DataFrame(df, columns = ['folder', 'type', 'id', 'path'])
df.to_csv('meta_val.csv')

0it [00:00, ?it/s]

##Meta Demonstration

In [None]:
import pandas as pd

demonstration = pd.read_csv('/content/meta_train.csv')
demonstration.head()

Unnamed: 0.1,Unnamed: 0,folder,type,id,path
0,0,train,noisy,6625,6625_70995_6625-70995-0008.npy
1,1,train,noisy,6625,6625_70995_6625-70995-0004.npy
2,2,train,noisy,6625,6625_39674_6625-39674-0024.npy
3,3,train,noisy,6625,6625_39680_6625-39680-0034.npy
4,4,train,noisy,6625,6625_70995_6625-70995-0006.npy


##Sound Dataset

In [None]:
from torch.utils.data import Dataset
import numpy as np
import os
from tqdm.notebook import tqdm

class SoundDataset(Dataset):

    def __init__(self, meta, source_folder, transforms):

        self.source_folder = source_folder
        self.transforms = transforms
        self.meta = meta
        self.files = self.meta.path.unique()

    def __len__(self):
        return len(self.files)

    def __getitem__(self, i):

        sound = self.files[i]
        sound_df = self.meta.loc[self.meta.path == sound]
        
        noisy_df = sound_df.loc[sound_df.type == 'noisy'].iloc[0]
        clean_df = sound_df.loc[sound_df.type == 'clean'].iloc[0]

        noisy_path = os.path.join(self.source_folder, noisy_df.get('folder'), noisy_df.get('type'), str(noisy_df.get('id')), sound)
        clean_path = os.path.join(self.source_folder, clean_df.get('folder'), clean_df.get('type'), str(clean_df.get('id')), sound)
        
        noisy = np.expand_dims(np.load(noisy_path).astype(float), 2)
        clean = np.expand_dims(np.load(clean_path).astype(float), 2)

        augmented = self.transforms(image=noisy, mask=clean)

        return augmented['image'][0].unsqueeze(0).float(), augmented['mask'][:, :, 0].unsqueeze(0).float()
        

##Separation of meta: Training (80%), validation (20%)

In [None]:
import pandas as pd

df = pd.read_csv('/content/meta_train.csv', index_col=0) # Specify mata train

df = df.sort_values(by=['id'], ignore_index=True)

train_size = int(0.8 * df.shape[0])
val_size = df.shape[0] - train_size

border_id = df['id'][train_size]

train_df = df[df['id'] < border_id]
val_df = df[df['id'] >= border_id]

##Train and Validation step

In [None]:
def train_step(images, labels, architecture, criterion, optimizer):
    device = ('cuda' if torch.cuda.is_available() else 'cpu')
    images, labels = images.to(device), labels.to(device)
    architecture.train() # enforce training regime

    pred_logits = architecture(images)
    loss = criterion(pred_logits, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return loss.data.cpu().numpy()

def val_step(images, labels, architecture, criterion, optimizer):
    device = ('cuda' if torch.cuda.is_available() else 'cpu')
    images, labels = images.to(device), labels.to(device)
    architecture.eval() # enforce evaluation regime

    pred_logits = architecture(images)
    loss = criterion(pred_logits, labels)


    return loss.data.cpu().numpy()

##Initialize Dataset, Dataloader

In [None]:
from torch.utils.data import DataLoader, Dataset
import albumentations as albu
from albumentations.pytorch import ToTensorV2

train_transform = albu.Compose([albu.PadIfNeeded(480, 80),
            albu.RandomCrop(480, 80),
            albu.Resize(576, 96),
            ToTensorV2()])

train_dataset = SoundDataset(train_df, '/content/data', transforms = train_transform)
train_loader = torch.utils.data.DataLoader(train_dataset,
                                             batch_size=32,
                                             num_workers=1, shuffle = True)
val_dataset = SoundDataset(val_df, '/content/data', transforms = train_transform)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=32,
                                             num_workers=1, shuffle = False)

In [None]:
!pip install segmentation-models-pytorch

##Train

In [None]:
import segmentation_models_pytorch as smp
import torch
import torch.nn as nn
import torch.optim as optim

device = ('cuda' if torch.cuda.is_available() else 'cpu')

model = smp.Unet(encoder_name='resnet18', in_channels=1).to(device)

criterion = nn.MSELoss() 
optimizer = optim.Adam(model.parameters(), lr=0.0001)

for epoch in range(50):
    epoch_loss_train = 0
    epoch_loss_val = 0
    for X_batch, y_batch in tqdm(train_loader):

        loss_train = train_step(X_batch, y_batch, model, criterion, optimizer)
        
        epoch_loss_train += loss_train     

    for X_batch, y_batch in tqdm(val_loader):

        loss_val = val_step(X_batch, y_batch, model, criterion, optimizer)
        
        epoch_loss_val += loss_val 


    print(f'Epoch {epoch+0:03}: | Train_Loss: {epoch_loss_train/len(train_loader):.5f}')
    print(f'Epoch {epoch+0:03}: | Val_Loss: {epoch_loss_val/len(val_loader):.5f}')

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 000: | Train_Loss: 0.14807
Epoch 000: | Val_Loss: 0.06397


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 001: | Train_Loss: 0.04785
Epoch 001: | Val_Loss: 0.05052


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 002: | Train_Loss: 0.04180
Epoch 002: | Val_Loss: 0.04517


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 003: | Train_Loss: 0.03853
Epoch 003: | Val_Loss: 0.04336


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 004: | Train_Loss: 0.03662
Epoch 004: | Val_Loss: 0.04214


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 005: | Train_Loss: 0.03426
Epoch 005: | Val_Loss: 0.04101


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 006: | Train_Loss: 0.03265
Epoch 006: | Val_Loss: 0.03860


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 007: | Train_Loss: 0.03107
Epoch 007: | Val_Loss: 0.04173


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 008: | Train_Loss: 0.03005
Epoch 008: | Val_Loss: 0.03945


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 009: | Train_Loss: 0.02864
Epoch 009: | Val_Loss: 0.03878


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 010: | Train_Loss: 0.02797
Epoch 010: | Val_Loss: 0.04002


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 011: | Train_Loss: 0.02681
Epoch 011: | Val_Loss: 0.03809


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 012: | Train_Loss: 0.02602
Epoch 012: | Val_Loss: 0.03698


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 013: | Train_Loss: 0.02497
Epoch 013: | Val_Loss: 0.03738


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 014: | Train_Loss: 0.02495
Epoch 014: | Val_Loss: 0.03991


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 015: | Train_Loss: 0.02437
Epoch 015: | Val_Loss: 0.04189


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 016: | Train_Loss: 0.02390
Epoch 016: | Val_Loss: 0.03515


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 017: | Train_Loss: 0.02348
Epoch 017: | Val_Loss: 0.03808


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 018: | Train_Loss: 0.02275
Epoch 018: | Val_Loss: 0.03523


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 019: | Train_Loss: 0.02245
Epoch 019: | Val_Loss: 0.03490


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 020: | Train_Loss: 0.02209
Epoch 020: | Val_Loss: 0.03423


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 021: | Train_Loss: 0.02165
Epoch 021: | Val_Loss: 0.03413


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 022: | Train_Loss: 0.02125
Epoch 022: | Val_Loss: 0.03435


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 023: | Train_Loss: 0.02069
Epoch 023: | Val_Loss: 0.03394


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 024: | Train_Loss: 0.02047
Epoch 024: | Val_Loss: 0.03400


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 025: | Train_Loss: 0.02032
Epoch 025: | Val_Loss: 0.03280


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 026: | Train_Loss: 0.02029
Epoch 026: | Val_Loss: 0.03482


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 027: | Train_Loss: 0.02203
Epoch 027: | Val_Loss: 0.03451


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 028: | Train_Loss: 0.01999
Epoch 028: | Val_Loss: 0.03537


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 029: | Train_Loss: 0.01937
Epoch 029: | Val_Loss: 0.03267


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 030: | Train_Loss: 0.01884
Epoch 030: | Val_Loss: 0.03273


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 031: | Train_Loss: 0.01861
Epoch 031: | Val_Loss: 0.03217


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 032: | Train_Loss: 0.01816
Epoch 032: | Val_Loss: 0.03196


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 033: | Train_Loss: 0.01812
Epoch 033: | Val_Loss: 0.03231


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 034: | Train_Loss: 0.01777
Epoch 034: | Val_Loss: 0.03246


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 035: | Train_Loss: 0.01758
Epoch 035: | Val_Loss: 0.03335


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 036: | Train_Loss: 0.01747
Epoch 036: | Val_Loss: 0.03271


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 037: | Train_Loss: 0.01727
Epoch 037: | Val_Loss: 0.03386


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 038: | Train_Loss: 0.01713
Epoch 038: | Val_Loss: 0.03224


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 039: | Train_Loss: 0.01688
Epoch 039: | Val_Loss: 0.03191


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 040: | Train_Loss: 0.01680
Epoch 040: | Val_Loss: 0.03188


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 041: | Train_Loss: 0.01661
Epoch 041: | Val_Loss: 0.03179


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 042: | Train_Loss: 0.01657
Epoch 042: | Val_Loss: 0.03258


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 043: | Train_Loss: 0.01647
Epoch 043: | Val_Loss: 0.03113


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 044: | Train_Loss: 0.01616
Epoch 044: | Val_Loss: 0.03255


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 045: | Train_Loss: 0.01590
Epoch 045: | Val_Loss: 0.03109


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 046: | Train_Loss: 0.01583
Epoch 046: | Val_Loss: 0.03173


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 047: | Train_Loss: 0.01594
Epoch 047: | Val_Loss: 0.03248


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 048: | Train_Loss: 0.01584
Epoch 048: | Val_Loss: 0.03132


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 049: | Train_Loss: 0.01535
Epoch 049: | Val_Loss: 0.03106


##Save weights

In [None]:
import torch

torch.save(model.state_dict(), '/content/gdrive/MyDrive/unet_12.pth')

##Test

In [None]:
!pip install segmentation-models-pytorch
!pip install torchmetrics

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
import zipfile
audio = zipfile.ZipFile('/content/gdrive/MyDrive/data.zip', 'r')
audio.extractall('/content')

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import segmentation_models_pytorch as smp
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import pandas as pd
from pathlib import Path
import os
from tqdm.notebook import tqdm
import numpy as np
from torchmetrics import MeanSquaredError

In [6]:
class SoundDataset(Dataset):

    def __init__(self, meta, source_folder, transforms):

        self.source_folder = source_folder
        self.transforms = transforms
        self.meta = meta
        self.files = self.meta.path.unique()

    def __len__(self):
        return len(self.files)

    def __getitem__(self, i):

        sound = self.files[i]
        sound_df = self.meta.loc[self.meta.path == sound]
        
        noisy_df = sound_df.loc[sound_df.type == 'noisy'].iloc[0]
        clean_df = sound_df.loc[sound_df.type == 'clean'].iloc[0]

        noisy_path = os.path.join(self.source_folder, noisy_df.get('folder'), noisy_df.get('type'), str(noisy_df.get('id')), sound)
        clean_path = os.path.join(self.source_folder, clean_df.get('folder'), clean_df.get('type'), str(clean_df.get('id')), sound)
        
        noisy = np.expand_dims(np.load(noisy_path).astype(float), 2)
        clean = np.expand_dims(np.load(clean_path).astype(float), 2)

        augmented = self.transforms(image=noisy, mask=clean)

        return augmented['image'][0].unsqueeze(0).float(), augmented['mask'][:, :, 0].unsqueeze(0).float()
        

In [8]:
from torchmetrics import MeanSquaredError

test_df = pd.read_csv('/content/meta_val.csv') # Specify test meta
test_transform = albu.Compose([albu.PadIfNeeded(480, 80),
            albu.RandomCrop(480, 80),
            albu.Resize(576, 96),
            ToTensorV2()])

test_dataset = SoundDataset(test_df, '/content/data', transforms = test_transform)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                             batch_size=1,
                                             num_workers=1)

device = ("cuda" if torch.cuda.is_available() else 'cpu')

model = smp.Unet(encoder_name='resnet18', in_channels=1).to(device)
model.load_state_dict(torch.load('/content/gdrive/MyDrive/unet_12.pth')) # Specify weights
model.eval()

metric = MeanSquaredError().to(device)

MSE = list()

with torch.no_grad():
  for x, y in tqdm(test_loader):

      x, y = x.to(device), y.to(device)
      y_pred = model(x)
      MSE.append(metric(y_pred, y).to('cpu').detach().numpy())

print('MSE = ', np.mean(MSE))

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

MSE =  0.027801605
