In [17]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch import optim
torch.set_default_tensor_type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor)
print(torch.cuda.is_available())

True


In [36]:
class SongDataTrain(Dataset):
    def __init__(self, path):
        with np.load(path) as f:
            data = f['X']
            labels = list(f['T'])
        label_set = set(labels)
        mapping = {}
        for count, i in enumerate(label_set):
            mapping[i] = count
        targets = np.zeros(len(labels))
        for i in range(len(targets)):
            targets[i] = mapping[labels[i]]
        self.targets = torch.tensor(targets, dtype=torch.long)
        
        indices = np.random.choice(np.arange(len(self.targets)), len(self.targets), replace=False)
        self.targets = self.targets[indices[int(.1 * len(self.targets)):]]
        self.data = self.data[indices[int(.1 * len(self.data)):]]
    def __getitem__(self, index):
        return (self.data[index], self.targets[index])
    def __len__(self):
        return (len(self.targets))
class SongDataTest(Dataset):
    def __init__(self, path):
        with np.load(path) as f:
            data = f['X']
            labels = list(f['T'])
        label_set = set(labels)
        mapping = {}
        for count, i in enumerate(label_set):
            mapping[i] = count
        targets = np.zeros(len(labels))
        for i in range(len(targets)):
            targets[i] = mapping[labels[i]]
        self.targets = torch.tensor(targets, dtype=torch.long)
        
        indices = np.random.choice(np.arange(len(self.targets)), len(self.targets), replace=False)
        self.targets = self.targets[indices[:int(.1 * len(self.targets))]]
        self.data = self.data[indices[:int(.1 * len(self.data))]]
    def __getitem__(self, index):
        return (self.data[index], self.targets[index])
    def __len__(self):
        return (len(self.targets))

In [24]:
with np.load("./audio_sr_label.npz") as f:
    data = f['X']
    labels = list(f['T'])
label_set = set(labels)
mapping = {}
for count, i in enumerate(label_set):
    mapping[i] = count
targets = np.zeros(len(labels))
for i in range(len(targets)):
    targets[i] = mapping[labels[i]]
targets = torch.tensor(targets, dtype=torch.long)
indices = np.random.choice(np.arange(len(targets)), len(targets), replace=False)
train_index = indices[int(len(indices)/10):]
test_index = indices[:int(len(indices)/10)]

labels_train = targets[train_index]
labels_test = targets[test_index]
data_train = data[train_index]
data_test = data[test_index]
#double check ^^ above stuff

In [7]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(LSTMClassifier, self).__init__()
        
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.hidden = self.init_hidden()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def init_hidden(self):
        h_t = torch.autograd.Variable(torch.zeros(self.num_layers, 1, self.hidden_size)).cuda()
        h_c = torch.autograd.Variable(torch.zeros(self.num_layers, 1, self.hidden_size)).cuda()
        return (h_t, h_c)
    
    def forward(self, x):
        output, self.hidden = self.lstm(x, self.hidden)
        output = self.fc(output[:, -1, :])

        return output

In [8]:
# Parameters
input_size = 2584
batch_size = 128
hidden_size = 128
num_layers = 2
output_size = 10
seq_len = 64

#Model
model = LSTMClassifier(input_size, hidden_size, output_size, num_layers)

#optimizer & criterion
optimizer = optim.Adam(list(model.parameters()), lr=0.01)
criterion = nn.CrossEntropyLoss()

In [30]:
def assess(model, songs, labels, seq_len, input_size):
    correct = 0
    total = 0
    for i in range(50):
        index = np.random.randint(len(labels))
        pred = model(songs[index].reshape(-1, seq_len, input_size))
        if torch.argmax(pred, dim=1) == labels[index]:
            correct += 1
        total += 1
    return correct/total

In [37]:
training_acc = []
validation_acc = []
model.train()
epochs = 50

for epoch in range(epochs):
    for i in range(len(labels_train)):
        index = np.random.randint(len(labels_train))
        target = labels_train[index]
        data = data_train[index].reshape(-1, seq_len, input_size)
        model.hidden = model.init_hidden()
        model.zero_grad()
        prediction = model(torch.Tensor(data))
        
        loss = criterion(prediction, target.unsqueeze(0))
        loss.backward()
        optimizer.step()
        
        if i % 20 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, epochs, i+1, len(labels_train), loss.item()))
            train_acc = assess(model, data_train, labels_train, seq_len, input_size)
            training_acc.append(train_acc)
            val_acc = assess(model, data_test, labels_test, seq_len, input_size)
            validation_acc.append(val_acc)
            print(train_acc)
        
    torch.save(model.state_dict(), "./model" + str(epoch).zfill(2))

(1, 64, 2584)
Epoch [1/50], Step [1/900], Loss: 2.2321
0.2
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
Epoch [1/50], Step [21/900], Loss: 0.8413
0.22
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
Epoch [1/50], Step [41/900], Loss: 2.3276
0.16
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
(1, 64, 2584)
Epoch [

KeyboardInterrupt: 