### Classifying the Latent Space Vectors

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset
import librosa
import numpy as np
import random

from pathlib import Path
from torch.utils.data import Dataset

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [3]:
dataset = Path.cwd().parent.joinpath("musicnet", "encoded")

train = dataset.joinpath("train")
test = dataset.joinpath("test")

train_labels = [p.stem for p in train.iterdir()]
test_labels = [p.stem for p in test.iterdir()]

# Uncomment the code below if needed for your machine
train_labels.remove(".DS_Store")
test_labels.remove(".DS_Store")

print("train labels:", train_labels, "\n")
print("test labels:", test_labels, "\n")

train_wav = []
test_wav = []

for label in train_labels:
    train_wav.append([wav for wav in train.joinpath(label).iterdir() if wav.name != ".DS_Store"])
    
for label in test_labels:
    test_wav.append([wav for wav in test.joinpath(label).iterdir() if wav.name != ".DS_Store"])
    
print(len(train_wav), len(train_wav[0]))
print(len(test_wav), len(test_wav[0]))

labels: ['Beethoven_Accompanied_Violin', 'Bach_Solo_Piano', 'Bach_Solo_Cello', 'Beethoven_Solo_Piano', 'Beethoven_String_Quartet', 'Cambini_Wind_Quintet'] 

6 22


In [4]:
train_y = []
test_y = []

unprocessed_train_x = []
unprocessed_test_x = []

for i in range(len(train_labels)):
    for j in range(len(train_wav[i])):
        train_y.append(train_labels[i])
        
for i in range(len(test_labels)):
    for j in range(len(test_wav[i])):
        test_y.append(test_labels[i])
        
for arr in train_wav:
    unprocessed_train_x.extend(arr)
    
for arr in test_wav:
    unprocessed_test_x.extend(arr)
        
print(len(train_y), len(unprocessed_train_x))
print(len(test_y), len(unprocessed_test_x))

203 203


In [11]:
class LatentSpaceDataset(Dataset):
    """
    Latent Space Dataset. Each latent space tensor has length 32,000.
    """

    def __init__(self, wav_tensors, labels, transform=None):
        """
        Args:
            labels: list of labels
            wavs: list of paths to our wav files
        """
        self.labels = labels
        self.wav_tensors = wav_tensors
        self.dict = {'Beethoven_Accompanied_Violin':0, 'Bach_Solo_Piano':1, 'Bach_Solo_Cello':2, 'Beethoven_Solo_Piano':3, 'Beethoven_String_Quartet':4, 'Cambini_Wind_Quintet':5}

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        sample_tensor = torch.load(self.wav_tensors[index], map_location = device)
        print(sample_tensor.size())
        assert sample_tensor.size()  == torch.Size([1, 64, 200])
        return sample_tensor, torch.tensor(self.dict[self.labels[index]])

In [12]:
train_set = LatentSpaceDataset(unprocessed_train_x, train_y)
test_set = LatentSpaceDataset(unprocessed_test_x, test_y)
print("Train set size: " + str(len(train_set)))
print("Test set size: " + str(len(test_set)))

kwargs = {'num_workers': 1, 'pin_memory': True} if device == 'cuda' else {} #needed for using datasets on gpu
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 8, shuffle = True, **kwargs)
test_loader = torch.utils.data.DataLoader(test_set, batch_size = 8, shuffle = True, **kwargs)

Test set size: 203


In [7]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 128, 80, 4)
        self.bn1 = nn.BatchNorm1d(128)
        self.pool1 = nn.MaxPool1d(4)
        self.conv2 = nn.Conv1d(128, 128, 3)
        self.bn2 = nn.BatchNorm1d(128)
        self.pool2 = nn.MaxPool1d(4)
        self.conv3 = nn.Conv1d(128, 256, 3)
        self.bn3 = nn.BatchNorm1d(256)
        self.pool3 = nn.MaxPool1d(4)
        self.conv4 = nn.Conv1d(256, 512, 3)
        self.bn4 = nn.BatchNorm1d(512)
        self.pool4 = nn.MaxPool1d(4)
        self.avgPool = nn.AvgPool1d(30) #input should be 512x30 so this outputs a 512x1
        self.fc1 = nn.Linear(512, 6)
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.bn1(x))
        x = self.pool1(x)
        x = self.conv2(x)
        x = F.relu(self.bn2(x))
        x = self.pool2(x)
        x = self.conv3(x)
        x = F.relu(self.bn3(x))
        x = self.pool3(x)
        x = self.conv4(x)
        x = F.relu(self.bn4(x))
        x = self.pool4(x)
        x = self.avgPool(x)
        x = x.permute(0, 2, 1) #change the 512x1 to 1x512
        x = self.fc1(x)
        return F.log_softmax(x, dim = 2)

model = CNN()
model.to(device)
print(model)

CNN(
  (conv1): Conv1d(1, 128, kernel_size=(80,), stride=(4,))
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(128, 128, kernel_size=(3,), stride=(1,))
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv1d(128, 256, kernel_size=(3,), stride=(1,))
  (bn3): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv1d(256, 512, kernel_size=(3,), stride=(1,))
  (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool4): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (avgPool): AvgPool1d(kernel_size=(30,), stride=(30,), 

In [8]:
model = CNN()
model.load_state_dict(torch.load('dataset_model_james.pt'))
model.eval()

CNN(
  (conv1): Conv1d(1, 128, kernel_size=(80,), stride=(4,))
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(128, 128, kernel_size=(3,), stride=(1,))
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv1d(128, 256, kernel_size=(3,), stride=(1,))
  (bn3): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv1d(256, 512, kernel_size=(3,), stride=(1,))
  (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool4): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (avgPool): AvgPool1d(kernel_size=(30,), stride=(30,), 

In [9]:
def test(model, epoch):
    model.eval()
    correct = 0
    for data, target in test_loader:
        data = torch.unsqueeze(data, 0)
        data = data.permute(1, 0, 2)
        output = model(data)
        output = output.permute(1, 0, 2)
        pred = output.max(2)[1] # get the index of the max log-probability
        correct += pred.eq(target).cpu().sum().item()
    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [13]:
log_interval = 5
for epoch in range(1, 41):
    test(model, epoch)

torch.Size([1, 64, 200])
torch.Size([1, 64, 200])
torch.Size([1, 64, 200])
torch.Size([1, 64, 200])
torch.Size([1, 64, 200])
torch.Size([1, 64, 200])
torch.Size([1, 64, 200])
torch.Size([1, 64, 200])


RuntimeError: number of dims don't match in permute