### Classifying the Latent Space Vectors

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset
import librosa
import numpy as np
import random

from pathlib import Path
from torch.utils.data import Dataset

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [4]:
dataset = Path.cwd().parent.joinpath("musicnet", "encoded")

train = dataset.joinpath("train")
test = dataset.joinpath("test")

train_labels = [p.stem for p in train.iterdir()]
test_labels = [p.stem for p in test.iterdir()]

# Uncomment the code below if needed for your machine
train_labels.remove(".DS_Store")
test_labels.remove(".DS_Store")

print("train labels:", train_labels, "\n")
print("test labels:", test_labels, "\n")

train_wav = []
test_wav = []

for label in train_labels:
    train_wav.append([wav for wav in train.joinpath(label).iterdir() if wav.name != ".DS_Store"])
    
for label in test_labels:
    test_wav.append([wav for wav in test.joinpath(label).iterdir() if wav.name != ".DS_Store"])
    
print(len(train_wav), len(train_wav[0]))
print(len(test_wav), len(test_wav[0]))

train labels: ['Beethoven_Accompanied_Violin', 'Bach_Solo_Piano', 'Bach_Solo_Cello', 'Beethoven_Solo_Piano', 'Beethoven_String_Quartet', 'Cambini_Wind_Quintet'] 

test labels: ['Beethoven_Accompanied_Violin', 'Bach_Solo_Piano', 'Bach_Solo_Cello', 'Beethoven_Solo_Piano', 'Beethoven_String_Quartet', 'Cambini_Wind_Quintet'] 

6 16
6 6


In [5]:
train_y = []
test_y = []

unprocessed_train_x = []
unprocessed_test_x = []

for i in range(len(train_labels)):
    for j in range(len(train_wav[i])):
        train_y.append(train_labels[i])
        
for i in range(len(test_labels)):
    for j in range(len(test_wav[i])):
        test_y.append(test_labels[i])
        
for arr in train_wav:
    unprocessed_train_x.extend(arr)
    
for arr in test_wav:
    unprocessed_test_x.extend(arr)
        
print(len(train_y), len(unprocessed_train_x))
print(len(test_y), len(unprocessed_test_x))

153 153
50 50


In [47]:
class LatentSpaceDataset(Dataset):
    """
    Latent Space Dataset. Each latent space tensor has length 32,000.
    """

    def __init__(self, wav_tensors, labels, transform=None):
        """
        Args:
            labels: list of labels
            wavs: list of paths to our wav files
        """
        self.labels = labels
        self.wav_tensors = wav_tensors
        self.dict = {'Beethoven_Accompanied_Violin':0, 'Bach_Solo_Piano':1, 'Bach_Solo_Cello':2, 'Beethoven_Solo_Piano':3, 'Beethoven_String_Quartet':4, 'Cambini_Wind_Quintet':5}

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        sample_tensor = torch.load(self.wav_tensors[index], map_location = device)
        assert sample_tensor.size()  == torch.Size([1, 64, 200])
        sample_tensor = sample_tensor.reshape(12800)
        assert sample_tensor.size()  == torch.Size([12800])
        return sample_tensor, torch.tensor(self.dict[self.labels[index]])

In [48]:
train_set = LatentSpaceDataset(unprocessed_train_x, train_y)
test_set = LatentSpaceDataset(unprocessed_test_x, test_y)
print("Train set size: " + str(len(train_set)))
print("Test set size: " + str(len(test_set)))

kwargs = {'num_workers': 1, 'pin_memory': True} if device == 'cuda' else {} #needed for using datasets on gpu
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 8, shuffle = True, **kwargs)
test_loader = torch.utils.data.DataLoader(test_set, batch_size = 8, shuffle = True, **kwargs)

Train set size: 153
Test set size: 50


In [49]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 128, 80, 4)
        self.bn1 = nn.BatchNorm1d(128)
        self.pool1 = nn.MaxPool1d(4)
        self.conv2 = nn.Conv1d(128, 128, 3)
        self.bn2 = nn.BatchNorm1d(128)
        self.pool2 = nn.MaxPool1d(4)
        self.conv3 = nn.Conv1d(128, 256, 3)
        self.bn3 = nn.BatchNorm1d(256)
        self.pool3 = nn.MaxPool1d(4)
        self.conv4 = nn.Conv1d(256, 512, 3)
        self.bn4 = nn.BatchNorm1d(512)
        self.pool4 = nn.MaxPool1d(4)
        self.avgPool = nn.AvgPool1d(11) #input should be 512x11 so this outputs a 512x1
        self.fc1 = nn.Linear(512, 6)
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.bn1(x))
        x = self.pool1(x)
        x = self.conv2(x)
        x = F.relu(self.bn2(x))
        x = self.pool2(x)
        x = self.conv3(x)
        x = F.relu(self.bn3(x))
        x = self.pool3(x)
        x = self.conv4(x)
        x = F.relu(self.bn4(x))
        x = self.pool4(x)
        x = self.avgPool(x)
        x = x.permute(0, 2, 1) #change the 512x1 to 1x512
        x = self.fc1(x)
        return F.log_softmax(x, dim = 2)

model = CNN()
model.to(device)
print(model)

CNN(
  (conv1): Conv1d(1, 128, kernel_size=(80,), stride=(4,))
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(128, 128, kernel_size=(3,), stride=(1,))
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv1d(128, 256, kernel_size=(3,), stride=(1,))
  (bn3): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv1d(256, 512, kernel_size=(3,), stride=(1,))
  (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool4): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (avgPool): AvgPool1d(kernel_size=(11,), stride=(11,), 

In [50]:
optimizer = optim.Adam(model.parameters(), lr = 0.01, weight_decay = 0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 20, gamma = 0.1)

In [51]:
def train(model, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        data = torch.unsqueeze(data, 0)
        data = data.permute(1, 0, 2)
        data = data.requires_grad_() #set requires_grad to True for training
        output = model(data)
        output = output.permute(1, 0, 2) #original output dimensions are batchSizex1x6 
#         print(output[0].shape, target.shape)
        loss = F.nll_loss(output[0], target) #the loss functions expects a batchSizex10 input
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0: #print training stats
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss))

In [52]:
def test(model, epoch):
    model.eval()
    correct = 0
    for data, target in test_loader:
        data = torch.unsqueeze(data, 0)
        data = data.permute(1, 0, 2)
        output = model(data)
        output = output.permute(1, 0, 2)
        pred = output.max(2)[1] # get the index of the max log-probability
        correct += pred.eq(target).cpu().sum().item()
    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [54]:
log_interval = 5
for epoch in range(1, 41):
    if epoch == 31:
        print("First round of training complete. Setting learn rate to 0.001.")
    scheduler.step()
    train(model, epoch)
    test(model, epoch)




Test set: Accuracy: 18/50 (36%)


Test set: Accuracy: 22/50 (44%)


Test set: Accuracy: 24/50 (48%)


Test set: Accuracy: 23/50 (46%)


Test set: Accuracy: 25/50 (50%)


Test set: Accuracy: 27/50 (54%)


Test set: Accuracy: 24/50 (48%)


Test set: Accuracy: 25/50 (50%)


Test set: Accuracy: 25/50 (50%)


Test set: Accuracy: 23/50 (46%)


Test set: Accuracy: 24/50 (48%)


Test set: Accuracy: 28/50 (56%)


Test set: Accuracy: 23/50 (46%)


Test set: Accuracy: 27/50 (54%)


Test set: Accuracy: 25/50 (50%)


Test set: Accuracy: 20/50 (40%)


Test set: Accuracy: 22/50 (44%)


Test set: Accuracy: 25/50 (50%)


Test set: Accuracy: 25/50 (50%)


Test set: Accuracy: 27/50 (54%)


Test set: Accuracy: 26/50 (52%)


Test set: Accuracy: 26/50 (52%)


Test set: Accuracy: 26/50 (52%)


Test set: Accuracy: 25/50 (50%)


Test set: Accuracy: 24/50 (48%)


Test set: Accuracy: 24/50 (48%)


Test set: Accuracy: 24/50 (48%)


Test set: Accuracy: 26/50 (52%)


Test set: Accuracy: 29/50 (58%)


Test set: Acc


Test set: Accuracy: 28/50 (56%)


Test set: Accuracy: 25/50 (50%)


Test set: Accuracy: 30/50 (60%)



In [55]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])
torch.save(model.state_dict(), 'latent_space_model_james.pt')

Model's state_dict:
conv1.weight 	 torch.Size([128, 1, 80])
conv1.bias 	 torch.Size([128])
bn1.weight 	 torch.Size([128])
bn1.bias 	 torch.Size([128])
bn1.running_mean 	 torch.Size([128])
bn1.running_var 	 torch.Size([128])
bn1.num_batches_tracked 	 torch.Size([])
conv2.weight 	 torch.Size([128, 128, 3])
conv2.bias 	 torch.Size([128])
bn2.weight 	 torch.Size([128])
bn2.bias 	 torch.Size([128])
bn2.running_mean 	 torch.Size([128])
bn2.running_var 	 torch.Size([128])
bn2.num_batches_tracked 	 torch.Size([])
conv3.weight 	 torch.Size([256, 128, 3])
conv3.bias 	 torch.Size([256])
bn3.weight 	 torch.Size([256])
bn3.bias 	 torch.Size([256])
bn3.running_mean 	 torch.Size([256])
bn3.running_var 	 torch.Size([256])
bn3.num_batches_tracked 	 torch.Size([])
conv4.weight 	 torch.Size([512, 256, 3])
conv4.bias 	 torch.Size([512])
bn4.weight 	 torch.Size([512])
bn4.bias 	 torch.Size([512])
bn4.running_mean 	 torch.Size([512])
bn4.running_var 	 torch.Size([512])
bn4.num_batches_tracked 	 torch.Size([