### Making a Pytorch Dataset for the Music Classifier

In [4]:
import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset
import librosa
import numpy as np
import random

from pathlib import Path
from torch.utils.data import Dataset

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [5]:
def normalized(tensor):
    # https://pytorch.org/tutorials/beginner/audio_preprocessing_tutorial.html
    centered = tensor - tensor.mean()
    normalized = tensor / tensor.abs().max()
    return normalized

_mu_encoder = torchaudio.transforms.MuLawEncoding()
_mu_decoder = torchaudio.transforms.MuLawDecoding()

def mu_law_encode(waveform):
    return _mu_encoder(normalized(waveform))

def mu_law_decode(waveform):
    return _mu_decoder(normalized(waveform))

def load_audio(path):
    """ Load .wav file to Mu law encoding tensor """
    waveform, sample_rate = torchaudio.load(path)
    return mu_law_encode(waveform), sample_rate

def save_audio(path, data, sample_rate):
    """ Save Mu law encoding tensor to .wav file """
    waveform = mu_law_decode(data)
    torchaudio.save(path, data, sample_rate)

In [None]:
dataset = Path.cwd().parent.joinpath("musicnet", "music_classification_data")

train_input = dataset.joinpath("train_input")
test_input = dataset.joinpath("test_input")

train_output = dataset.joinpath("train_output")
test_output = dataset.joinpath("test_output")

# Uncomment the code below if needed for your machine
train_input.remove(".DS_Store")
test_input.remove(".DS_Store")
train_output.remove(".DS_Store")
test_output.remove(".DS_Store")

print("train labels:", train_labels, "\n")
print("test labels:", test_labels, "\n")

train_wav = []
test_wav = []

for label in train_labels:
    train_wav.append([wav for wav in train.joinpath(label).iterdir() if wav.name != ".DS_Store"])
    
for label in test_labels:
    test_wav.append([wav for wav in test.joinpath(label).iterdir() if wav.name != ".DS_Store"])
    
print(len(train_wav), len(train_wav[0]))
print(len(test_wav), len(test_wav[0]))

In [None]:
class Seq2SeqDataset(Dataset):
    """
    Seq2Seq Dataset. Uses torchaudio + mu law to process wav files.
    Takes first 160,000 samples (~4s), and samples every 5 to get processed audio tensor.
    """

    def __init__(self, wavs, labels, transform=None):
        """
        Args:
            labels: list of labels
            wavs: list of paths to our wav files
        """
        self.labels = labels
        self.wavs = wavs
        self.dict = {'Beethoven_Accompanied_Violin':0, 'Bach_Solo_Piano':1, 'Bach_Solo_Cello':2, 'Beethoven_Solo_Piano':3, 'Beethoven_String_Quartet':4, 'Cambini_Wind_Quintet':5}

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        data, rate = librosa.load(self.wavs[index], sr=16000, duration=10)
        assert rate == 16000
        sample_tensor = torch.tensor(data).float()
        assert sample_tensor.size()  == torch.Size([160000])
        downsampled_tensor = sample_tensor[::5]
        
        return downsampled_tensor, torch.tensor(self.dict[self.labels[index]])