In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
torch.set_default_tensor_type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor)

In [10]:
with np.load("./audio_sr_label.npz") as f:
    data = f['X']
    labels = list(f['T'])
data = np.asarray([[i[:2560] for i in j] for j in data])
seed = data[:, :, :256]
for i in range(1, 10):
    seed = np.append(seed, data[:, :, i*256:(i+1)*256], axis=0)
data = torch.tensor(seed, dtype=torch.float)
labels = labels * 10 #expand dimensions accordingly
label_set = set(labels)
mapping = {}
for count, i in enumerate(label_set):
    mapping[i] = count
targets = np.zeros(len(labels))
for i in range(len(targets)):
    targets[i] = mapping[labels[i]]
targets = torch.tensor(targets, dtype=torch.long)
indices = np.random.choice(np.arange(len(targets)), len(targets), replace=False)
train_index = indices[int(len(indices)/10):]
test_index = indices[:int(len(indices)/10)]

labels_train = targets[train_index]
labels_test = targets[test_index]
data_train = data[train_index]
data_test = data[test_index]
#double check ^^ above stuff

In [3]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(LSTMClassifier, self).__init__()
        
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        h_t = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        h_c = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        output, (h_t, h_c) = self.lstm(x, (h_t, h_c))
        output = self.fc(output[:, -1, :])

        return output

In [4]:
# Parameters
input_size = 256
hidden_size = 128
num_layers = 2
output_size = 10
seq_len = 64

#Model
model = LSTMClassifier(input_size, hidden_size, output_size, num_layers)

#optimizer & criterion
optimizer = optim.Adam(list(model.parameters()), lr=1e-5)
criterion = nn.CrossEntropyLoss()

In [12]:
def assess(model, songs, labels, seq_len, input_size):
    correct = 0
    total = 0
    for i in range(50):
        index = np.random.randint(len(labels))
        pred = model(songs[index].reshape(-1, seq_len, input_size))
        if torch.argmax(pred, dim=1) == labels[index]:
            correct += 1
        total += 1
    return correct/total

In [None]:
training_acc = []
validation_acc = []
model.train()
epochs = 50

for epoch in range(epochs):
    for i in range(len(labels_train)):
        index = np.random.randint(len(labels_train))
        optimizer.zero_grad()
        target = labels_train[index]
        data = data_train[index].reshape(-1, seq_len, input_size)
        prediction = model(data)
        
        loss = criterion(prediction, target.unsqueeze(0))
        loss.backward()
        optimizer.step()
        
        if i % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, epochs, i+1, len(labels_train), loss.item()))
        if i % 5 == 0:
            if i % 20 == 0:
                training_acc.append(assess(model, data_train, labels_train, seq_len, input_size))
                validation_acc.append(assess(model, data_test, labels_test, seq_len, input_size))
        
    torch.save(model.state_dict(), "./model" + str(epoch).zfill(2))

Epoch [1/50], Step [1/9000], Loss: 2.3202
Epoch [1/50], Step [101/9000], Loss: 2.3794
Epoch [1/50], Step [201/9000], Loss: 2.3191
Epoch [1/50], Step [301/9000], Loss: 2.3795
Epoch [1/50], Step [401/9000], Loss: 2.3793
Epoch [1/50], Step [501/9000], Loss: 2.2287
Epoch [1/50], Step [601/9000], Loss: 2.3481
Epoch [1/50], Step [701/9000], Loss: 2.3187
