In [59]:
import warnings
warnings.simplefilter("ignore")

In [60]:
import torch
import torch.nn as nn

In [61]:
import torch
import torchvision.models as models

In [62]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


In [63]:
import numpy as np
import librosa

def get_melspectrogram_db(file_path, sr=None, n_fft=2048, hop_length=512, n_mels=128, fmin=20, fmax=8300, top_db=80):
    wav,sr = librosa.load(file_path,sr=sr)
    if wav.shape[0]<5*sr:
        wav=np.pad(wav,int(np.ceil((5*sr-wav.shape[0])/2)),mode='reflect')
    else:
        wav=wav[:5*sr]
    spec=librosa.feature.melspectrogram(wav, sr=sr, n_fft=n_fft,
              hop_length=hop_length,n_mels=n_mels,fmin=fmin,fmax=fmax)
    spec_db=librosa.power_to_db(spec,top_db=top_db)
    return spec_db

def spec_to_image(spec, eps=1e-6):
    mean = spec.mean()
    std = spec.std()
    spec_norm = (spec - mean) / (std + eps)
    spec_min, spec_max = spec_norm.min(), spec_norm.max()
    spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
    spec_scaled = spec_scaled.astype(np.uint8)
    return spec_scaled

In [64]:
import pandas as pd
from tqdm.notebook import trange
import torch
import torchaudio
from torch.utils.data import Dataset


class DCaseDataset(Dataset):
    """
    Dataloader for DCase dataset
    Structure of the class is taken from:
    https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/audio_classifier_tutorial.ipynb
    """

    labelind2name = {
        0: "airport",
        1: "bus",
        2: "metro",
        3: "metro_station",
        4: "park",
        5: "public_square",
        6: "shopping_mall",
        7: "street_pedestrian",
        8: "street_traffic",
        9: "tram",
    }
    name2labelind = {
        "airport": 0,
        "bus": 1,
        "metro": 2,
        "metro_station": 3,
        "park": 4,
        "public_square": 5,
        "shopping_mall": 6,
        "street_pedestrian": 7,
        "street_traffic": 8,
        "tram": 9,
    }

    def __init__(self, root_dir, csv_path, indices, model):
        """

        :param root_dir:
        :param split:
        """

        # Open csv files
        self.root_dir = root_dir
        
        csvData = pd.read_csv(csv_path, sep="\t")
        csvData["label"] = csvData.apply(lambda x: x["filename"].split("-")[0], axis=1)

        # Lists of file names and labels
        self.file_names, self.labels_one_hot = [], []
        for i in indices:
            self.file_names.append(csvData.iloc[i, 0])
            self.labels_one_hot.append(csvData.iloc[i, 1])

        # Transform class name to index
        self.labels_one_hot = [self.name2labelind[name] for name in self.labels_one_hot]

        # retrieve logits from the model
        model.eval()
        self.labels = []
        with torch.no_grad():
            for i in trange(0, len(self.file_names)):
                # Load audio file
                audio_path = self.root_dir + "/" + self.file_names[i]
                audio_signal = torch.tensor(spec_to_image(get_melspectrogram_db(audio_path))[np.newaxis,np.newaxis,...], dtype=torch.float32)

                # Get logits
                logits = model(audio_signal)
                self.labels.append(logits)
        

    def __getitem__(self, index):
        """

        :param index:
        :return:
        """

        # Load data
        filepath = self.root_dir + self.file_names[index]
        sound, sfreq = torchaudio.load(filepath, normalize=True)
        assert sound.shape[0] == 1, "Expected mono channel"
        sound = torch.mean(sound, dim=0)
        assert sfreq == 44100, "Expected sampling rate of 44.1 kHz"

        # Remove last samples if longer than expected
        if sound.shape[-1] >= 441000:
            sound = sound[:441000]

        return (
                sound,
                self.labels_one_hot[index],
                self.labels[index],
            )

    def __len__(self):
        return len(self.file_names)


In [65]:
import os
from sklearn.model_selection import train_test_split
import numpy as np
from torch.utils.data import DataLoader
from sp4asc.models.cnns import LogMelSpectrogram
from sp4asc.models import get_net
from sp4asc.training import TrainingManager


In [66]:
model = models.resnet34(pretrained=False)
model.fc = nn.Linear(512,10)
model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model = model.to("cpu")

In [67]:
pathModel = "/users/eleves-b/2019/maxime.bonnin/MAP583_Project/weights/big/checkpoint.pth"

model.load_state_dict(torch.load(pathModel))
model.eval()
model

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [68]:
root_dir = "/users/eleves-b/2019/maxime.bonnin/MAP583/TAU_SMALL_DATASET/test/"
csv_path = "/users/eleves-b/2019/maxime.bonnin/MAP583/TAU_SMALL_DATASET/fold1_small_train.csv"

N = len(os.listdir(root_dir))
train_indices, test_indices = train_test_split(np.arange(0,N), test_size=0.33)

train_dataset = DCaseDataset(root_dir, csv_path, train_indices, model)
test_dataset = DCaseDataset(root_dir, csv_path, test_indices, model)

  0%|          | 0/1988 [00:00<?, ?it/s]

  0%|          | 0/980 [00:00<?, ?it/s]

In [169]:
config = {
    "batchsize": 32,
    "num_workers": 4,
    "reload": False,
    "net": "Cnn6_60k",
    "dropout": 0.2,
    "specAugment": [128, 2, 16, 2],
    "lr": 1e-3,
    "eta_min": 1e-5,
    "max_epoch": 100,
    "weight_decay": 1e-5,
    "mixup_alpha": 0.2,
    "out_dir": "/users/eleves-b/2019/maxime.bonnin/MAP583_Project/weights/test2",
}

In [170]:
loader_train = DataLoader(
        train_dataset,
        batch_size=config["batchsize"],
        shuffle=True,
        pin_memory=True,
        num_workers=config["num_workers"],
        drop_last=True,
    )
loader_test = DataLoader(
    test_dataset,
    batch_size=config["batchsize"],
    shuffle=False,
    pin_memory=True,
    num_workers=config["num_workers"],
    drop_last=False,
)

In [171]:
spectrogram = LogMelSpectrogram()
net = get_net[config["net"]](
    config["dropout"],
    config["specAugment"],
)

print("\n\nNet at training time")
print("Nb. of parameters at training time: ", count_parameters(net) / 1e3, "k")




Net at training time
Nb. of parameters at training time:  62.922 k


In [172]:
import torch
from tqdm import tqdm
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter


class CELoss:
    def __init__(self, nb_classes):
        self.nb_classes = nb_classes

    def __call__(self, pred, target):
        pred = F.log_softmax(pred, dim=1)
        return -(pred * target).sum(1).mean()


class MixUp:
    def __init__(self, alpha, nb_classes):
        self.nb_classes = nb_classes
        if alpha is None:
            self.beta = None
        else:
            self.beta = torch.distributions.beta.Beta(alpha, alpha)
        self.training = None

    @staticmethod
    def mix(x, mix, ind):
        return x * mix + x[ind] * (1 - mix)

    def train(self):
        self.training = True
        return self

    def eval(self):
        self.training = False
        return self

    def __call__(self, input, target, one_hot):
        if self.training is None:
            raise ValueError("Choose training or testing mode")
        # Transform to one hot vector
        if one_hot:
            target = F.one_hot(target, num_classes=self.nb_classes)
        # Mix signals
        if self.beta is not None and self.training:
            ind = torch.randperm(input.shape[0])
            mix = self.beta.sample()
            input = MixUp.mix(input, mix, ind)
            target = MixUp.mix(target, mix, ind)
        else:
            pass
        return input, target


class TrainingManager:
    def __init__(
        self,
        net,
        spectrogram,
        loader_train,
        loader_test,
        optim,
        scheduler,
        config,
        path_to_ckpt,
        nb_classes=10,
        device="cpu",
        one_hot=True
    ):

        # Optim. methods
        self.optim = optim
        self.scheduler = scheduler

        # Dataloaders
        self.max_epoch = config["max_epoch"]
        self.loader_train = loader_train
        self.loader_test = loader_test

        # Networks
        self.dev = device
        self.net = net.to(self.dev)
        self.spectrogram = spectrogram.to(self.dev).eval()

        # Mixup and loss
        self.loss = CELoss(nb_classes=nb_classes)
        self.mixup = MixUp(alpha=config["mixup_alpha"], nb_classes=nb_classes)
        
        self.one_hot = one_hot

        # Checkpoints
        self.config = config
        self.path_to_ckpt = path_to_ckpt + "/ckpt.pth"
        if config["reload"]:
            self.load_state()
        else:
            self.current_epoch = 0

        # Monitoring
        self.writer = SummaryWriter(
            path_to_ckpt + "/tensorboard/",
            purge_step=self.current_epoch + 1,
        )

    def print_log(self, running_loss, nb_it, acc, nb_instances):
        log = (
            "\nEpoch: {0:d} :".format(self.current_epoch)
            + " loss = {0:.3f}".format(running_loss / (nb_it + 1))
            + " - acc1 = {0:.3f}".format(100 * acc / nb_instances)
        )
        print(log)

    def one_epoch(self, training):

        # Train or eval mode
        if training:
            self.net.train()
            self.mixup.train()
            loader = self.loader_train
            print("\nTraining: %d/%d epochs" % (self.current_epoch, self.max_epoch))
        else:
            self.net.eval()
            self.mixup.eval()
            loader = self.loader_test
            print("\nTest:")

        # Stat.
        acc = 0
        nb_instances = 0
        running_loss = 0
        delta = len(loader) // 3

        # Loop over mini-batches
        bar_format = "{desc:<5.5}{percentage:3.0f}%|{bar:50}{r_bar}"
        for it, batch in enumerate(tqdm(loader, bar_format=bar_format)):

            # Data
            sound = batch[0].to(self.dev, non_blocking=True)
            labels = batch[1].to(self.dev, non_blocking=True)
            gt_class = batch[2].to(self.dev, non_blocking=True)
            # Get network outputs with mixup during training
            with torch.no_grad():
                sound = self.spectrogram(sound)
                sound, gt_class = self.mixup(sound, gt_class, self.one_hot)
                gt_class = torch.squeeze(F.softmax(gt_class))
                if not training:
                    pred_class = self.net(sound)
            if training:
                self.optim.zero_grad()
                pred_class = self.net(sound)

            # Loss & backprop
            loss_class = self.loss(pred_class, gt_class)
            if training:
                loss_class.backward()
                self.optim.step()
            # Log
            acc += (pred_class.max(1)[1] == labels).sum()
            nb_instances += gt_class.shape[0]
            running_loss += loss_class.item()
            if it % delta == delta - 1:
                self.print_log(running_loss, it, acc, nb_instances)

        # Print log
        self.print_log(running_loss, it, acc, nb_instances)
        header = "Train" if training else "Test"
        self.writer.add_scalar(
            header + "/loss", running_loss / (it + 1), self.current_epoch + 1
        )
        self.writer.add_scalar(
            header + "/acc", 100 * acc / nb_instances, self.current_epoch + 1
        )

    def load_state(self, out_dir):
        ckpt = torch.load(self.path_to_ckpt, map_location=torch.device(self.dev))
        ckpt["config"]["out_dir"] = out_dir
        self.net.load_state_dict(ckpt["net"])
        self.optim.load_state_dict(ckpt["optim"])
        self.scheduler.load_state_dict(ckpt["scheduler"])
        self.current_epoch = ckpt["epoch"]
        # Check config is the same
        for key in ckpt["config"].keys():
            assert key in self.config.keys()
            if key == "reload":
                pass
            assert (
                self.config[key] == ckpt["config"][key]
            ), "Config file is not compatible with saved one. " + f"{key} {ckpt['config'][key]} {self.config[key]}"

    def save_state(self):
        dict_to_save = {
            "epoch": self.current_epoch,
            "net": self.net.state_dict(),
            "optim": self.optim.state_dict(),
            "scheduler": self.scheduler.state_dict(),
            "config": self.config,
        }
        torch.save(dict_to_save, self.path_to_ckpt)

    def train(self):
        for _ in range(self.current_epoch, self.max_epoch):
            self.one_epoch(training=True)
            self.scheduler.step()
            self.one_epoch(training=False)
            self.current_epoch += 1
            self.save_state()
        print("Finished Training")

    def eval(self):
        self.one_epoch(training=False)


In [173]:
# make sure to load the pretrained model
root = "/users/eleves-b/2019/maxime.bonnin/"
pathModel = "/users/eleves-b/2019/maxime.bonnin/MAP583_Project/weights/test"

In [None]:
# ---
path2log = config["out_dir"]
print(path2log)
os.makedirs(path2log, exist_ok=True)

optim = torch.optim.AdamW(
    [
        {"params": net.parameters()},
    ],
    lr=config["lr"],
    weight_decay=config["weight_decay"],
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optim,
    config["max_epoch"],
    eta_min=config["eta_min"],
)

# --- Training
mng = TrainingManager(
    net,
    spectrogram,
    loader_train,
    loader_test,
    optim,
    scheduler,
    config,
    path2log,
    device="cuda",
    one_hot=False
)
mng.path_to_ckpt = pathModel + "/ckpt.pth"
mng.config["out_dir"] = config["out_dir"]
mng.load_state(config["out_dir"])
mng.current_epoch = 0
mng.path_to_ckpt = path2log + "/ckpt.pth"
mng.max_epoch = 100

mng.train()