In [15]:
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
torch.set_default_tensor_type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor)

In [16]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=(3,3))
        self.conv2 = nn.Conv2d(64, 64, kernel_size=(3,5))
        self.fc1 = nn.Linear(12544, 32)
        self.fc2 = nn.Linear(32, 10)
        self.dropout = nn.Dropout(.2)
    def forward(self, x):
        x = x.view(-1, 1, 64, 256)
        x = F.relu(self.conv1(x))
        x = self.dropout(x)
        x = F.max_pool2d(x, (2,4))
        x = F.relu(self.conv2(x))
        x = self.dropout(x)
        x = F.max_pool2d(x, (2,4))
        x = x.view(-1, 12544)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        return F.softmax(x, dim=1)

In [17]:
def assess(model, songs, labels):
    correct = 0
    total = 0
    for i in range(50):
        index = np.random.randint(len(labels))
        pred = model(songs[index])
        if torch.argmax(pred, dim=1) == labels[index]:
            correct += 1
        total += 1
    return correct/total

In [18]:
with np.load("../audio_sr_label.npz") as f:
    data = f['X']
    labels = list(f['T'])
data = np.asarray([[i[:2560] for i in j] for j in data])
seed = data[:, :, :256]
for i in range(1, 10):
    seed = np.append(seed, data[:, :, i*256:(i+1)*256], axis=0)
data = torch.tensor(seed, dtype=torch.float)
labels = labels * 10 #expand dimensions accordingly
label_set = set(labels)
mapping = {}
for count, i in enumerate(label_set):
    mapping[i] = count
targets = np.zeros(len(labels))
for i in range(len(targets)):
    targets[i] = mapping[labels[i]]
targets = torch.tensor(targets, dtype=torch.long)

In [19]:
indices = np.random.choice(np.arange(len(targets)), len(targets), replace=False)
train_index = indices[int(len(indices)/10):]
test_index = indices[:int(len(indices)/10)]

labels_train = targets[train_index]
labels_test = targets[test_index]
data_train = data[train_index]
data_test = data[test_index]
#double check ^^ above stuff

In [20]:
model = Model()
optimizer = optim.Adam(list(model.parameters()), lr=1e-5)
if torch.cuda.is_available():
    model.cuda()
    data_train.cuda()
    data_test.cuda()
    labels_train.cuda()
    labels_test.cuda()

In [21]:
training_acc = []
validation_acc = []
model.train()
epochs = 50
for epoch in range(epochs):
    for i in range(len(labels_train)):
        index = np.random.randint(len(labels_train))
        optimizer.zero_grad()
        target = labels_train[index]
        prediction = model(data_train[index])
        criterion = nn.CrossEntropyLoss()
        loss = criterion(prediction, target.unsqueeze(0))
        loss.backward()
        optimizer.step()
#         if i % 200 == 0:
#             training_acc.append(assess(model, data_train, labels_train))
#             validation_acc.append(assess(model, data_test, labels_test))
        if i % 1000 == 0:
            train_checkpoint = assess(model, data_train, labels_train)
            model.eval()
            test_checkpoint = assess(model, data_test, labels_test)
            model.train()
            print("loss: ", loss)
            print(train_checkpoint, test_checkpoint)
            training_acc.append(train_checkpoint)
            validation_acc.append(test_checkpoint)
#                 with open("training_acc", "ab") as f:
#                     np.asarray(training_acc).tofile(f)
#                 with open("validation_acc", "ab") as f:
#                     np.asarray(validation_acc).tofile(f)
#                 training_acc = []
#                 validation_acc = []
    torch.save(model.state_dict(), "./model" + str(epoch).zfill(2))
    with open("training.csv", "a") as f:
        f.write(",".join([str(e) for e in training_acc]) + ",")
    with open("testing.csv", "a") as f:
        f.write(",".join([str(e) for e in validation_acc]) + ",")

loss:  tensor(2.2994, grad_fn=<NllLossBackward>)
0.1 0.02
loss:  tensor(2.3238, grad_fn=<NllLossBackward>)
0.2 0.22
loss:  tensor(2.1420, grad_fn=<NllLossBackward>)
0.12 0.14
loss:  tensor(2.3046, grad_fn=<NllLossBackward>)
0.16 0.08
loss:  tensor(2.4423, grad_fn=<NllLossBackward>)
0.1 0.22
loss:  tensor(2.3062, grad_fn=<NllLossBackward>)
0.16 0.1
loss:  tensor(1.9647, grad_fn=<NllLossBackward>)
0.22 0.18
loss:  tensor(2.2185, grad_fn=<NllLossBackward>)
0.28 0.22
loss:  tensor(2.3739, grad_fn=<NllLossBackward>)
0.26 0.26
loss:  tensor(1.9721, grad_fn=<NllLossBackward>)
0.24 0.26
loss:  tensor(2.4071, grad_fn=<NllLossBackward>)
0.26 0.3
loss:  tensor(2.0522, grad_fn=<NllLossBackward>)
0.16 0.18
loss:  tensor(2.3676, grad_fn=<NllLossBackward>)
0.28 0.26
loss:  tensor(2.4401, grad_fn=<NllLossBackward>)
0.26 0.36
loss:  tensor(2.3944, grad_fn=<NllLossBackward>)
0.3 0.24
loss:  tensor(2.3877, grad_fn=<NllLossBackward>)
0.34 0.36
loss:  tensor(2.3991, grad_fn=<NllLossBackward>)
0.2 0.38
loss

loss:  tensor(2.4546, grad_fn=<NllLossBackward>)
0.6 0.56
loss:  tensor(1.4660, grad_fn=<NllLossBackward>)
0.58 0.52
loss:  tensor(2.3880, grad_fn=<NllLossBackward>)
0.44 0.52
loss:  tensor(2.4431, grad_fn=<NllLossBackward>)
0.5 0.46
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.44 0.6
loss:  tensor(1.5897, grad_fn=<NllLossBackward>)
0.44 0.42
loss:  tensor(2.4393, grad_fn=<NllLossBackward>)
0.54 0.66
loss:  tensor(1.8978, grad_fn=<NllLossBackward>)
0.64 0.48
loss:  tensor(2.4609, grad_fn=<NllLossBackward>)
0.54 0.72
loss:  tensor(1.4900, grad_fn=<NllLossBackward>)
0.48 0.56
loss:  tensor(1.9033, grad_fn=<NllLossBackward>)
0.66 0.44
loss:  tensor(2.4577, grad_fn=<NllLossBackward>)
0.52 0.46
loss:  tensor(1.4747, grad_fn=<NllLossBackward>)
0.58 0.42
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.54 0.52
loss:  tensor(1.7480, grad_fn=<NllLossBackward>)
0.4 0.48
loss:  tensor(2.3923, grad_fn=<NllLossBackward>)
0.44 0.44
loss:  tensor(2.4452, grad_fn=<NllLossBackward>)
0.48 0.5
lo

loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.68 0.56
loss:  tensor(1.4808, grad_fn=<NllLossBackward>)
0.58 0.52
loss:  tensor(1.4782, grad_fn=<NllLossBackward>)
0.66 0.5
loss:  tensor(2.3458, grad_fn=<NllLossBackward>)
0.62 0.68
loss:  tensor(2.4570, grad_fn=<NllLossBackward>)
0.66 0.6
loss:  tensor(2.4245, grad_fn=<NllLossBackward>)
0.62 0.48
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.64 0.68
loss:  tensor(1.5936, grad_fn=<NllLossBackward>)
0.66 0.62
loss:  tensor(1.4966, grad_fn=<NllLossBackward>)
0.64 0.6
loss:  tensor(1.5994, grad_fn=<NllLossBackward>)
0.56 0.54
loss:  tensor(2.2031, grad_fn=<NllLossBackward>)
0.62 0.5
loss:  tensor(2.2951, grad_fn=<NllLossBackward>)
0.52 0.56
loss:  tensor(1.5078, grad_fn=<NllLossBackward>)
0.66 0.64
loss:  tensor(1.6832, grad_fn=<NllLossBackward>)
0.66 0.62
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.58 0.48
loss:  tensor(1.5259, grad_fn=<NllLossBackward>)
0.62 0.64
loss:  tensor(1.4724, grad_fn=<NllLossBackward>)
0.54 0.56
l

loss:  tensor(1.8640, grad_fn=<NllLossBackward>)
0.8 0.62
loss:  tensor(1.4629, grad_fn=<NllLossBackward>)
0.68 0.62
loss:  tensor(2.4604, grad_fn=<NllLossBackward>)
0.62 0.78
loss:  tensor(1.4626, grad_fn=<NllLossBackward>)
0.66 0.68
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.7 0.34
loss:  tensor(2.4273, grad_fn=<NllLossBackward>)
0.74 0.6
loss:  tensor(2.4304, grad_fn=<NllLossBackward>)
0.54 0.62
loss:  tensor(1.5194, grad_fn=<NllLossBackward>)
0.62 0.64
loss:  tensor(1.4739, grad_fn=<NllLossBackward>)
0.82 0.58
loss:  tensor(1.7299, grad_fn=<NllLossBackward>)
0.74 0.64
loss:  tensor(1.4879, grad_fn=<NllLossBackward>)
0.58 0.56
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.54 0.68
loss:  tensor(2.4255, grad_fn=<NllLossBackward>)
0.74 0.58
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.62 0.58
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.66 0.7
loss:  tensor(1.9041, grad_fn=<NllLossBackward>)
0.64 0.42
loss:  tensor(2.2956, grad_fn=<NllLossBackward>)
0.62 0.58
l

In [22]:
ground_truth = []
predicted_truth = []
for epoch in range(epochs):
    model = Model()
    model.load_state_dict(torch.load("./model" + str(epoch).zfill(2)))
    #model.load_state_dict(torch.load("./model", map_location='cpu'))
    model.eval()
    correct = 0
    total = 0
    for i in range(int(len(data)/10)):
        y_hats = model(data[i])
        for j in range(1, 10):
            y_hats = torch.cat((y_hats, model(data[i + 1000*j])))
        if torch.argmax(torch.sum(y_hats, dim=0)) == targets[i]:
            correct += 1
        total += 1
        ground_truth.append(torch.argmax(torch.sum(y_hats, dim=0)).numpy())
        predicted_truth.append(targets[i].numpy())
    print(epoch, ": ", correct / total)

0 :  0.274
1 :  0.36
2 :  0.363
3 :  0.423
4 :  0.449
5 :  0.468
6 :  0.464
7 :  0.484
8 :  0.507
9 :  0.537
10 :  0.532
11 :  0.544
12 :  0.586
13 :  0.589
14 :  0.59
15 :  0.6
16 :  0.619
17 :  0.632
18 :  0.62
19 :  0.644
20 :  0.662
21 :  0.644
22 :  0.646
23 :  0.676
24 :  0.672
25 :  0.702
26 :  0.674
27 :  0.689
28 :  0.7
29 :  0.7
30 :  0.722
31 :  0.71
32 :  0.73
33 :  0.749
34 :  0.737
35 :  0.736
36 :  0.742
37 :  0.751
38 :  0.752
39 :  0.763
40 :  0.771
41 :  0.753
42 :  0.762
43 :  0.768
44 :  0.755
45 :  0.775
46 :  0.777
47 :  0.777
48 :  0.788
49 :  0.778
