In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

In [2]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=(3,3))
        self.conv2 = nn.Conv2d(64, 64, kernel_size=(3,5))
        self.fc1 = nn.Linear(12544, 32)
        self.fc2 = nn.Linear(32, 10)
        self.dropout = nn.Dropout(.2)
    def forward(self, x):
        x = x.view(-1, 1, 64, 256)
        x = F.relu(self.conv1(x))
        x = self.dropout(x)
        x = F.max_pool2d(x, (2,4))
        x = F.relu(self.conv2(x))
        x = self.dropout(x)
        x = F.max_pool2d(x, (2,4))
        x = x.view(-1, 12544)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return F.softmax(x, dim=1)

In [3]:
def assess(model, songs, labels):
    model.eval()
    correct = 0
    total = 0
    for i in range(50):
        index = np.random.randint(len(labels))
        pred = model(songs[index])
        if torch.argmax(pred, dim=1) == labels[index]:
            correct += 1
        total += 1
    model.train()
    return correct/total

In [4]:
with np.load("../audio_sr_label.npz") as f:
    data = f['X']
    labels = list(f['T'])
data = np.asarray([[i[:2560] for i in j] for j in data])
seed = data[:, :, :256]
for i in range(1, 10):
    seed = np.append(seed, data[:, :, i*256:(i+1)*256], axis=0)
data = torch.tensor(seed, dtype=torch.float)
labels = labels * 10 #expand dimensions accordingly
label_set = set(labels)
mapping = {}
for count, i in enumerate(label_set):
    mapping[i] = count
targets = np.zeros(len(labels))
for i in range(len(targets)):
    targets[i] = mapping[labels[i]]
targets = torch.tensor(targets, dtype=torch.long)

In [13]:
indices = np.random.choice(np.arange(len(targets)), len(targets), replace=False)
train_index = indices[int(len(indices)/10):]
test_index = indices[:int(len(indices)/10)]

labels_train = targets[train_index]
labels_test = targets[test_index]
data_train = data[train_index]
data_test = data[test_index]
#double check ^^ above stuff

In [14]:
model = Model()
optimizer = optim.Adam(list(model.parameters()), lr=1e-5)
if torch.cuda.is_available():
    model.cuda()
    data_train.cuda()
    data_test.cuda()
    labels_train.cuda()
    labels_test.cuda()

In [22]:
training_acc = []
validation_acc = []
model.train()
for epoch in range(30):
    for i in range(len(labels_train)):
        index = np.random.randint(len(labels_train))
        optimizer.zero_grad()
        target = labels_train[index]
        prediction = model(data_train[index])
        criterion = nn.CrossEntropyLoss()
        loss = criterion(prediction, target.unsqueeze(0))
        loss.backward()
        optimizer.step()
        if i % 5 == 0:
            print("loss: ", loss)
            if i % 20 == 0:
                training_acc.append(assess(model, data_train, labels_train))
                validation_acc.append(assess(model, data_test, labels_test))
                print(training_acc[-1], validation_acc[-1])
                if i % 100 == 0:
                    with open("training_acc", "ab") as f:
                        np.asarray(training_acc).tofile(f)
                    with open("validation_acc", "ab") as f:
                        np.asarray(validation_acc).tofile(f)
                    training_acc = []
                    validation_acc = []
        break
    torch.save(model.state_dict(), "./model" + str(epoch).zfill(2))

loss:  tensor(2.2845, grad_fn=<NllLossBackward>)
0.12 0.1
loss:  tensor(2.3126, grad_fn=<NllLossBackward>)
0.06 0.18
loss:  tensor(2.2606, grad_fn=<NllLossBackward>)
0.08 0.16
loss:  tensor(2.3214, grad_fn=<NllLossBackward>)
0.16 0.1
loss:  tensor(2.2854, grad_fn=<NllLossBackward>)
0.16 0.12
loss:  tensor(2.1667, grad_fn=<NllLossBackward>)
0.08 0.2
loss:  tensor(2.2946, grad_fn=<NllLossBackward>)
0.12 0.2
loss:  tensor(2.3211, grad_fn=<NllLossBackward>)
0.14 0.06
loss:  tensor(2.3170, grad_fn=<NllLossBackward>)
0.1 0.12
loss:  tensor(2.3055, grad_fn=<NllLossBackward>)
0.12 0.12
loss:  tensor(2.3074, grad_fn=<NllLossBackward>)
0.18 0.06
loss:  tensor(2.3070, grad_fn=<NllLossBackward>)
0.16 0.1
loss:  tensor(2.2733, grad_fn=<NllLossBackward>)
0.12 0.04
loss:  tensor(2.2957, grad_fn=<NllLossBackward>)
0.08 0.08
loss:  tensor(2.3026, grad_fn=<NllLossBackward>)
0.08 0.08
loss:  tensor(2.3177, grad_fn=<NllLossBackward>)
0.14 0.08
loss:  tensor(2.3108, grad_fn=<NllLossBackward>)
0.16 0.12
los

In [21]:
model = Model()
model.load_state_dict(torch.load("./model"))
model.eval()
correct = 0
total = 0
for x, y in zip(data_test, labels_test):
    y_hat = torch.argmax(model(x))
    if y_hat == y:
        correct += 1
    total += 1
print(correct / total)

0.115
