In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
import torchvision
from torchvision import models,transforms,datasets
import time
import torchaudio
from sp4asc.datasets.dcase import DCaseDataset
import librosa
import librosa.display
import pandas as pd

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from sp4asc.models.cnns import LogMelSpectrogram
from sp4asc.training import TrainingManager

%matplotlib inline

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using gpu: %s ' % torch.cuda.is_available())

### Gros Neural Network

In [None]:
%mkdir data
%cd data
!wget 
!tar -zxvf .tgz

In [None]:
%ls

In [None]:
%cd 
%ls

#### Data Processing



In [None]:
%cd ..
data_dir = '/data'

### Creating ResNet model

In [None]:
model_resnet = models.resnet50(pretrained=False)

In [None]:
model_resnet.fc = torch.nn.Linear(in_features=2048,out_features=10)

In [None]:
print(model_resnet)

#### Data Extraction

In [None]:
%ls

In [None]:
def get_audio(file):
    signal, sample_rate = librosa.load(file, sr= 22050) 

def draw_signal(signal, sample_rate):
    FIG_SIZE = (15,4)
    plt.figure(figsize=FIG_SIZE)
    librosa.display.waveshow(signal, sample_rate, alpha=0.4)
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
    plt.title("Waveform")

    # FFT -> power spectrum
    # perform Fourier transform
    fft = np.fft.fft(signal)

    # calculate abs values on complex numbers to get magnitude
    spectrum = np.abs(fft)

    # create frequency variable
    f = np.linspace(0, sample_rate, len(spectrum))

    # take half of the spectrum and frequency
    left_spectrum = spectrum[:int(len(spectrum)/2)]
    left_f = f[:int(len(spectrum)/2)]

    # plot spectrum
    plt.figure(figsize=FIG_SIZE)
    plt.plot(left_f, left_spectrum, alpha=0.4)
    plt.xlabel("Frequency")
    plt.ylabel("Magnitude")
    plt.title("Power spectrum")


In [None]:
import os

root_dir = "data/TAU-urban-acoustic-scenes-2020-mobile-development"
files = os.listdir(root_dir + '/audio_train')
files = [[file] + file.replace('.wav','').split('-',5) for file in files]
df_files = pd.DataFrame(files,columns=['file_name','label_name','city','location_id','segment_id','device_id'])
df_files.to_csv(root_dir + '/train_data.csv',index=False)

In [None]:
root_dir = "data/TAU-urban-acoustic-scenes-2020-mobile-development"
files = os.listdir(root_dir + '/audio_test')
files = [[file] + file.replace('.wav','').split('-',5) for file in files]
df_files = pd.DataFrame(files,columns=['file_name','label_name','city','location_id','segment_id','device_id'])
df_files.to_csv(root_dir + '/test_data.csv',index=False)

In [None]:
# dataloader 

class DCaseDataset(Dataset):
    labelind2name = {
        0: "airport",
        1: "bus",
        2: "metro",
        3: "metro_station",
        4: "park",
        5: "public_square",
        6: "shopping_mall",
        7: "street_pedestrian",
        8: "street_traffic",
        9: "tram",
    }
    name2labelind = {
        "airport": 0,
        "bus": 1,
        "metro": 2,
        "metro_station": 3,
        "park": 4,
        "public_square": 5,
        "shopping_mall": 6,
        "street_pedestrian": 7,
        "street_traffic": 8,
        "tram": 9,
    }

    def __init__(self,root_dir,split):
        self.root_dir = root_dir
        self.split = split
        if split== "train":
            csv_path = root_dir + "/train_data.csv"

        elif split == "val":
            csv_path = root_dir + "/val_data.csv"
        elif split == "test":
            csv_path = root_dir + "/test_data.csv"
        else:
            raise ValueError("Split not implemented")

        csvData = pd.read_csv(csv_path, sep=",")
        # In test mode, just get file list
        if split == "test":
            self.file_names = []
            for i in range(0, len(csvData)):
                self.file_names.append(csvData.iloc[i, 0])
            return

        self.file_names, self.labels,self.devices = [], [],[]

        for i in range(0, len(csvData)):
            self.file_names.append(csvData.iloc[i, 0])
            self.labels.append(csvData.iloc[i, 1])
            self.devices.append(csvData.iloc[i,5])

        # Transform class name to index
        self.labels = [self.name2labelind[name] for name in self.labels] 

    def __getitem__(self,index):
       # Load data
        filepath = self.root_dir + '/audio_' + self.split + '/' + self.file_names[index]
        sound, sfreq = torchaudio.load(filepath)
        assert sound.shape[0] == 1, "Expected mono channel"
        sound = torch.mean(sound, dim=0)
        assert sfreq == 44100, "Expected sampling rate of 44.1 kHz"

        # Remove last samples if longer than expected
        if sound.shape[-1] >= 441000:
            sound = sound[:441000]

        if self.split == "test":
            return sound, 255, self.file_names[index], "unknown"
        else:
            return (
                sound,
                self.labels[index],
                self.file_names[index],
                self.devices[index],
            )
    
    def __len__(self):
        return len(self.file_names)


In [None]:
train_dataset = DCaseDataset(
    root_dir="data/TAU-urban-acoustic-scenes-2020-mobile-development",
    split = 'train'
)

test_dataset = DCaseDataset(
    root_dir="data/TAU-urban-acoustic-scenes-2020-mobile-development",
    split = 'test'
)

In [None]:
train_dataset[3]

In [None]:
sample_rate = 44100
draw_signal(train_dataset[3][0].numpy(), sample_rate)

In [None]:
import tarfile
import argparse

def make_tarfile(output_filename, source_dir):
    with tarfile.open(output_filename, "w:gz") as tar:
        for file in os.listdir(source_dir):
            if file == "data":
                pass
            elif file.split(".")[-1] == "egg-info":
                pass
            else:
                tar.add(os.path.join(source_dir, file))

In [None]:
batchsize = 32
num_workers = 1
reload = False
dropout = 0.2
specAugment = [128, 2, 32, 2]
lr = 1e-3
eta_min = 1e-5
max_epoch = 200
weight_decay = 1e-5
mixup_alpha = None
out_dir =  "./trained_models/"

In [None]:
from sp4asc.training import CELoss
from sp4asc.training import MixUp
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

class TrainingManager:
    def __init__(
        self,
        net,
        spectrogram,
        loader_train,
        loader_test,
        optim,
        scheduler,
        max_epoch,
        reload,
        mixup_alpha,
        path_to_ckpt,
        nb_classes=10
    ):

        # Optim. methods
        self.optim = optim
        self.scheduler = scheduler

        # Dataloaders
        self.max_epoch = max_epoch
        self.loader_train = loader_train
        self.loader_test = loader_test

        # Networks
        self.dev = "cuda" if torch.cuda.is_available() else "cpu"
        self.net = net.to(self.dev)
        self.spectrogram = spectrogram.to(self.dev).eval()

        # Mixup and loss
        self.loss = CELoss(nb_classes=nb_classes)
        self.mixup = MixUp(alpha=mixup_alpha, nb_classes=nb_classes)

        # Checkpoints
        self.path_to_ckpt = path_to_ckpt + "/ckpt.pth"
        if reload:
            self.load_state()
        else:
            self.current_epoch = 0

        # Monitoring
        self.writer = SummaryWriter(
            path_to_ckpt + "/tensorboard/",
            purge_step=self.current_epoch + 1,
        )

    def print_log(self, running_loss, nb_it, acc, nb_instances):
        log = (
            "\nEpoch: {0:d} :".format(self.current_epoch)
            + " loss = {0:.3f}".format(running_loss / (nb_it + 1))
            + " - acc1 = {0:.3f}".format(100 * acc / nb_instances)
        )
        print(log)

    def one_epoch(self, training):

        # Train or eval mode
        if training:
            self.net.train()
            self.mixup.train()
            loader = self.loader_train
            print("\nTraining: %d/%d epochs" % (self.current_epoch, self.max_epoch))
        else:
            self.net.eval()
            self.mixup.eval()
            loader = self.loader_test
            print("\nTest:")

        # Stat.
        acc = 0
        nb_instances = 0
        running_loss = 0
        delta = len(loader) // 3

        # Loop over mini-batches
        bar_format = "{desc:<5.5}{percentage:3.0f}%|{bar:50}{r_bar}"
        for it, batch in enumerate(tqdm(loader, bar_format=bar_format)):

            # Data
            sound = batch[0].to(self.dev, non_blocking=True)
            gt_class = batch[1].to(self.dev, non_blocking=True)

            # Get network outputs with mixup during training
            with torch.no_grad():
                sound = self.spectrogram(sound)
                sound, gt_class = self.mixup(sound, gt_class)
                if not training:
                    pred_class = self.net(sound)
            if training:
                self.optim.zero_grad()
                pred_class = self.net(sound)

            # Loss & backprop
            loss_class = self.loss(pred_class, gt_class)
            if training:
                loss_class.backward()
                self.optim.step()

            # Log
            acc += (pred_class.max(1)[1] == gt_class.max(1)[1]).sum()
            nb_instances += gt_class.shape[0]
            running_loss += loss_class.item()
            if it % delta == delta - 1:
                self.print_log(running_loss, it, acc, nb_instances)

        # Print log
        self.print_log(running_loss, it, acc, nb_instances)
        header = "Train" if training else "Test"
        self.writer.add_scalar(
            header + "/loss", running_loss / (it + 1), self.current_epoch + 1
        )
        self.writer.add_scalar(
            header + "/acc", 100 * acc / nb_instances, self.current_epoch + 1
        )

    def load_state(self):
        ckpt = torch.load(self.path_to_ckpt, map_location=torch.device(self.dev))
        self.net.load_state_dict(ckpt["net"])
        self.optim.load_state_dict(ckpt["optim"])
        self.scheduler.load_state_dict(ckpt["scheduler"])
        self.current_epoch = ckpt["epoch"]
        # Check config is the same
        for key in ckpt["config"].keys():
            assert key in self.config.keys()
            if key == "reload":
                pass
            assert (
                self.config[key] == ckpt["config"][key]
            ), "Config file is not compatible with saved one."

    def save_state(self):
        dict_to_save = {
            "epoch": self.current_epoch,
            "net": self.net.state_dict(),
            "optim": self.optim.state_dict(),
            "scheduler": self.scheduler.state_dict(),
            "config": self.config,
        }
        torch.save(dict_to_save, self.path_to_ckpt)

    def train(self):
        for _ in range(self.current_epoch, self.max_epoch):
            self.one_epoch(training=True)
            self.scheduler.step()
            self.one_epoch(training=False)
            self.current_epoch += 1
            self.save_state()
        print("Finished Training")

    def eval(self):
        self.one_epoch(training=False)


In [None]:
"""
# --- Args
parser = argparse.ArgumentParser(description="Training with mixup")

parser.add_argument(
    "--config",
    type=str,
    default="configs/example.py",
    help="Path to config file describing training parameters",
)

args = parser.parse_args()

# ---
print("Training script: ", os.path.realpath(__file__))

# --- Config
name_config = args.config.replace(".py", "").replace(os.path.sep, ".")
config = __import__(name_config, fromlist=["config"]).config
print("Config parameters:")
print(config)

# --- Log dir
path2log = config["out_dir"] + name_config
os.makedirs(path2log, exist_ok=True)
make_tarfile(path2log + "/src.tgz", os.path.dirname(os.path.realpath(__file__)))

# ---
current_dir = os.path.dirname(os.path.abspath(__file__))
"""
train_dataset = DCaseDataset(
    root_dir="data/TAU-urban-acoustic-scenes-2020-mobile-development",
    split = 'train'
)

test_dataset = DCaseDataset(
    root_dir="data/TAU-urban-acoustic-scenes-2020-mobile-development",
    split = 'test'
)

loader_train = DataLoader(
    train_dataset,
    batch_size=batchsize,
    shuffle=True,
    pin_memory=True,
    num_workers=num_workers,
    drop_last=True,
)

loader_test = DataLoader(
    test_dataset,
    batch_size=batchsize,
    shuffle=False,
    pin_memory=True,
    num_workers=num_workers,
    drop_last=False,
)

# --- Get network
spectrogram = LogMelSpectrogram()
net = model_resnet
print("\n\nNet at training time")
print(net)

# ---
optim = torch.optim.AdamW(
    [
        {"params": net.parameters()},
    ],
    lr=lr,
    weight_decay=weight_decay
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optim,
    max_epoch,
    eta_min=eta_min
)

# --- Log dir
path2log = out_dir

# --- Training
mng = TrainingManager(
    net,
    spectrogram,
    loader_train,
    loader_test,
    optim,
    scheduler,
    max_epoch,
    reload,
    mixup_alpha,
    path2log
)

print(4)
mng.train()