##Connecting Google drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import zipfile
audio = zipfile.ZipFile('/content/gdrive/MyDrive/data.zip', 'r')
audio.extractall('/content')

##Meta creation

In [None]:
import pandas as pd
from pathlib import Path
import os
from tqdm.notebook import tqdm

data_folder = Path('/content/val') # specify train or validation folder
df = []

for path, _, files in tqdm(os.walk(data_folder)):
    for file in files:
      
      folder = path.split('/')[-3]
      type_ = path.split('/')[-2]
      id = path.split('/')[-1]
      df.append([folder, type_, id, file])

df = pd.DataFrame(df, columns = ['folder', 'type', 'id', 'path'])
df.to_csv('meta_val.csv')

0it [00:00, ?it/s]

##Meta Demonstration

In [None]:
import pandas as pd

demonstration = pd.read_csv('/content/meta_train.csv')
demonstration.head()

Unnamed: 0.1,Unnamed: 0,folder,type,id,path
0,0,train,noisy,6625,6625_70995_6625-70995-0008.npy
1,1,train,noisy,6625,6625_70995_6625-70995-0004.npy
2,2,train,noisy,6625,6625_39674_6625-39674-0024.npy
3,3,train,noisy,6625,6625_39680_6625-39680-0034.npy
4,4,train,noisy,6625,6625_70995_6625-70995-0006.npy


##Sound Dataset

In [None]:
from torch.utils.data import Dataset
import numpy as np
import os
from tqdm.notebook import tqdm

class SoundDataset(Dataset):

    def __init__(self, meta, source_folder, transforms):

        self.source_folder = source_folder
        self.transforms = transforms
        self.meta = meta
        self.files = self.meta.path.unique()

    def __len__(self):
        return len(self.files)

    def __getitem__(self, i):

        sound = self.files[i]
        sound_df = self.meta.loc[self.meta.path == sound]

        if np.random.rand() < 0.5:
            label = 0.0
            pic_row = sound_df.loc[sound_df.type == 'clean'].iloc[0]
        else:
            label = 1.0
            pic_row = sound_df.loc[sound_df.type == 'noisy'].iloc[0]


        pic_path = os.path.join(self.source_folder, pic_row.get('folder'), pic_row.get('type'), str(pic_row.get('id')), sound)

        pic = np.expand_dims(np.load(pic_path).astype(float), 2)

        augmented = self.transforms(image=pic)

        return augmented['image'][0].unsqueeze(0).float(), torch.Tensor([label]).float()
        

##Separation of meta: Training (80%), validation (20%)

In [None]:
import pandas as pd

df = pd.read_csv('/content/meta_train.csv', index_col=0) # Specify mata train

df = df.sort_values(by=['id'], ignore_index=True)

train_size = int(0.8 * df.shape[0])
val_size = df.shape[0] - train_size

border_id = df['id'][train_size]

train_df = df[df['id'] < border_id]
val_df = df[df['id'] >= border_id]

##Train and Validation step

In [None]:
def train_step(images, labels, architecture, criterion, optimizer):
    device = ('cuda' if torch.cuda.is_available() else 'cpu')
    images, labels = images.to(device), labels.to(device)
    architecture.train() # enforce training regime

    pred_image, pred_logits = architecture(images)
    loss = criterion(pred_logits, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return loss.data.cpu().numpy()

def val_step(images, labels, architecture, criterion, optimizer):
    device = ('cuda' if torch.cuda.is_available() else 'cpu')
    images, labels = images.to(device), labels.to(device)
    architecture.eval() # enforce evaluation regime

    pred_image, pred_logits = architecture(images)
    loss = criterion(pred_logits, labels)


    return loss.data.cpu().numpy()

##Initialize Dataset, Dataloader

In [None]:
from torch.utils.data import DataLoader, Dataset
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import torch

train_transform = albu.Compose([albu.PadIfNeeded(480, 80),
            albu.RandomCrop(480, 80),
            albu.Resize(576, 96),
            ToTensorV2()])

train_dataset = SoundDataset(train_df, '/content/data', transforms = train_transform)
train_loader = torch.utils.data.DataLoader(train_dataset,
                                             batch_size=32,
                                             num_workers=1, shuffle = True)
val_dataset = SoundDataset(val_df, '/content/data', transforms = train_transform)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=32,
                                             num_workers=1, shuffle = False)

In [None]:
!pip install segmentation-models-pytorch

##Train

In [None]:
import segmentation_models_pytorch as smp
import torch
import torch.nn as nn
import torch.optim as optim

device = ('cuda' if torch.cuda.is_available() else 'cpu')
params=dict(
            pooling='avg',
            dropout=0.2,
            activation='sigmoid',
            classes=1)

model = smp.Unet(encoder_name='resnet18', in_channels=1, aux_params = params).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

for epoch in range(50):
    epoch_loss_train = 0
    epoch_loss_val = 0
    for X_batch, y_batch in tqdm(train_loader):

        loss_train = train_step(X_batch, y_batch, model, criterion, optimizer)
        
        epoch_loss_train += loss_train     

    for X_batch, y_batch in tqdm(val_loader):

        loss_val = val_step(X_batch, y_batch, model, criterion, optimizer)
        
        epoch_loss_val += loss_val 


    print(f'Epoch {epoch+0:03}: | Train_Loss: {epoch_loss_train/len(train_loader):.5f}')
    print(f'Epoch {epoch+0:03}: | Val_Loss: {epoch_loss_val/len(val_loader):.5f}')

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 000: | Train_Loss: 0.52095
Epoch 000: | Val_Loss: 0.50736


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 001: | Train_Loss: 0.51683
Epoch 001: | Val_Loss: 0.51467


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 002: | Train_Loss: 0.50911
Epoch 002: | Val_Loss: 0.51078


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 003: | Train_Loss: 0.50955
Epoch 003: | Val_Loss: 0.51529


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 004: | Train_Loss: 0.50914
Epoch 004: | Val_Loss: 0.51290


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 005: | Train_Loss: 0.50956
Epoch 005: | Val_Loss: 0.51335


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 006: | Train_Loss: 0.51249
Epoch 006: | Val_Loss: 0.51370


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 007: | Train_Loss: 0.50883
Epoch 007: | Val_Loss: 0.50089


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 008: | Train_Loss: 0.50568
Epoch 008: | Val_Loss: 0.51285


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 009: | Train_Loss: 0.50891
Epoch 009: | Val_Loss: 0.51599


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 010: | Train_Loss: 0.51127
Epoch 010: | Val_Loss: 0.51398


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 011: | Train_Loss: 0.50555
Epoch 011: | Val_Loss: 0.50620


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 012: | Train_Loss: 0.50804
Epoch 012: | Val_Loss: 0.51203


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 013: | Train_Loss: 0.50502
Epoch 013: | Val_Loss: 0.50297


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 014: | Train_Loss: 0.50700
Epoch 014: | Val_Loss: 0.50666


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 015: | Train_Loss: 0.50920
Epoch 015: | Val_Loss: 0.51210


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 016: | Train_Loss: 0.50920
Epoch 016: | Val_Loss: 0.50301


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 017: | Train_Loss: 0.50563
Epoch 017: | Val_Loss: 0.50554


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 018: | Train_Loss: 0.50830
Epoch 018: | Val_Loss: 0.50186


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 019: | Train_Loss: 0.50891
Epoch 019: | Val_Loss: 0.50301


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 020: | Train_Loss: 0.50709
Epoch 020: | Val_Loss: 0.50478


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 021: | Train_Loss: 0.50620
Epoch 021: | Val_Loss: 0.50642


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 022: | Train_Loss: 0.51249
Epoch 022: | Val_Loss: 0.49965


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 023: | Train_Loss: 0.50511
Epoch 023: | Val_Loss: 0.50729


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 024: | Train_Loss: 0.50977
Epoch 024: | Val_Loss: 0.51040


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 025: | Train_Loss: 0.50667
Epoch 025: | Val_Loss: 0.50908


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 026: | Train_Loss: 0.50560
Epoch 026: | Val_Loss: 0.50900


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 027: | Train_Loss: 0.50533
Epoch 027: | Val_Loss: 0.50427


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 028: | Train_Loss: 0.50408
Epoch 028: | Val_Loss: 0.49860


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 029: | Train_Loss: 0.50826
Epoch 029: | Val_Loss: 0.51143


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 030: | Train_Loss: 0.50986
Epoch 030: | Val_Loss: 0.51056


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 031: | Train_Loss: 0.50706
Epoch 031: | Val_Loss: 0.50843


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 032: | Train_Loss: 0.50382
Epoch 032: | Val_Loss: 0.50356


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 033: | Train_Loss: 0.50708
Epoch 033: | Val_Loss: 0.51031


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 034: | Train_Loss: 0.50871
Epoch 034: | Val_Loss: 0.51035


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 035: | Train_Loss: 0.50674
Epoch 035: | Val_Loss: 0.50444


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 036: | Train_Loss: 0.50732
Epoch 036: | Val_Loss: 0.51002


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 037: | Train_Loss: 0.50490
Epoch 037: | Val_Loss: 0.50848


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 038: | Train_Loss: 0.50251
Epoch 038: | Val_Loss: 0.50805


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 039: | Train_Loss: 0.50344
Epoch 039: | Val_Loss: 0.50894


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 040: | Train_Loss: 0.50520
Epoch 040: | Val_Loss: 0.50449


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 041: | Train_Loss: 0.50510
Epoch 041: | Val_Loss: 0.50132


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 042: | Train_Loss: 0.50583
Epoch 042: | Val_Loss: 0.50636


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 043: | Train_Loss: 0.50760
Epoch 043: | Val_Loss: 0.50126


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 044: | Train_Loss: 0.50666
Epoch 044: | Val_Loss: 0.50325


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 045: | Train_Loss: 0.50442
Epoch 045: | Val_Loss: 0.49871


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 046: | Train_Loss: 0.50591
Epoch 046: | Val_Loss: 0.50556


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 047: | Train_Loss: 0.50400
Epoch 047: | Val_Loss: 0.49949


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 048: | Train_Loss: 0.50522
Epoch 048: | Val_Loss: 0.51245


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Epoch 049: | Train_Loss: 0.50577
Epoch 049: | Val_Loss: 0.50756


##Save weights

In [None]:
import torch

torch.save(model.state_dict(), '/content/gdrive/MyDrive/unet_21.pth')

##Test

In [None]:
!pip install torchmetrics
!pip install segmentation_models_pytorch

In [17]:
import pandas as pd
import albumentations as albu
import torch
import torch.nn as nn
import torch.optim as optim
from torchmetrics import MeanSquaredError
from torchmetrics import Accuracy
from albumentations.pytorch import ToTensorV2
import segmentation_models_pytorch as smp
from torch.utils.data import DataLoader, Dataset
import albumentations as albu
from albumentations.pytorch import ToTensorV2
from tqdm.notebook import tqdm
import numpy as np
import os

In [6]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [7]:
import zipfile
audio = zipfile.ZipFile('/content/gdrive/MyDrive/data.zip', 'r')
audio.extractall('/content')

In [18]:
class SoundDataset(Dataset):

    def __init__(self, meta, source_folder, transforms):

        self.source_folder = source_folder
        self.transforms = transforms
        self.meta = meta
        self.files = self.meta.path.unique()

    def __len__(self):
        return len(self.files)

    def __getitem__(self, i):

        sound = self.files[i]
        sound_df = self.meta.loc[self.meta.path == sound]

        if np.random.rand() < 0.5:
            label = 0.0
            pic_row = sound_df.loc[sound_df.type == 'clean'].iloc[0]
        else:
            label = 1.0
            pic_row = sound_df.loc[sound_df.type == 'noisy'].iloc[0]


        pic_path = os.path.join(self.source_folder, pic_row.get('folder'), pic_row.get('type'), str(pic_row.get('id')), sound)

        pic = np.expand_dims(np.load(pic_path).astype(float), 2)

        augmented = self.transforms(image=pic)

        return augmented['image'][0].unsqueeze(0).float(), torch.Tensor([label]).float()
        

In [19]:
test_df = pd.read_csv('/content/meta_val.csv') # Specify test meta
test_transform = albu.Compose([albu.PadIfNeeded(480, 80),
            albu.RandomCrop(480, 80),
            albu.Resize(576, 96),
            ToTensorV2()])

test_dataset = SoundDataset(test_df, '/content/data', transforms = test_transform) # Specify data folder
test_loader = torch.utils.data.DataLoader(test_dataset,
                                             batch_size=1,
                                             num_workers=1)

device = ('cuda' if torch.cuda.is_available() else 'cpu')

params=dict(
            pooling='avg',
            dropout=0.2,
            activation='sigmoid',
            classes=1)
model = smp.Unet(encoder_name='resnet18', in_channels=1, aux_params = params).to(device)
model.load_state_dict(torch.load('/content/gdrive/MyDrive/unet_21.pth')) # Specify weights
model.eval()

metric = Accuracy(0.5).to(device)

accuracy_metric = list()

with torch.no_grad():
  for x, y in tqdm(test_loader):

      x, y = x.to(device), y.to(device)
      pr_image, y_pred = model(x)
      accuracy_metric.append(metric(y_pred, y.type(torch.int64)).to('cpu').detach().numpy())

print('Accuracy = ', np.mean(accuracy_metric))

  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy =  0.991
