In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
torch.set_default_tensor_type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor)

In [2]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=(3,3))
        self.conv2 = nn.Conv2d(64, 64, kernel_size=(3,5))
        self.fc1 = nn.Linear(12544, 32)
        self.fc2 = nn.Linear(32, 10)
        self.dropout = nn.Dropout(.2)
    def forward(self, x):
        x = x.view(-1, 1, 64, 256)
        x = F.relu(self.conv1(x))
        x = self.dropout(x)
        x = F.max_pool2d(x, (2,4))
        x = F.relu(self.conv2(x))
        x = self.dropout(x)
        x = F.max_pool2d(x, (2,4))
        x = x.view(-1, 12544)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        return F.softmax(x, dim=1)

In [3]:
def assess(model, songs, labels):
    correct = 0
    total = 0
    for i in range(50):
        index = np.random.randint(len(labels))
        pred = model(songs[index])
        if torch.argmax(pred, dim=1) == labels[index]:
            correct += 1
        total += 1
    return correct/total

In [11]:
with np.load("./audio_sr_label.npz") as f:
    data = f['X']
    labels = list(f['T'])
data = np.asarray([[i[:2560] for i in j] for j in data])
seed = data[:, :, :256]
for i in range(1, 10):
    seed = np.append(seed, data[:, :, i*256:(i+1)*256], axis=0)
data = torch.tensor(seed, dtype=torch.float)
labels = labels * 10 #expand dimensions accordingly
label_set = set(labels)
mapping = {}
for count, i in enumerate(label_set):
    mapping[i] = count
targets = np.zeros(len(labels))
for i in range(len(targets)):
    targets[i] = mapping[labels[i]]
targets = torch.tensor(targets, dtype=torch.long)

In [12]:
indices = np.random.choice(np.arange(len(targets)), len(targets), replace=False)
train_index = indices[int(len(indices)/10):]
test_index = indices[:int(len(indices)/10)]

labels_train = targets[train_index]
labels_test = targets[test_index]
data_train = data[train_index]
data_test = data[test_index]
#double check ^^ above stuff

In [4]:
model = Model()
optimizer = optim.Adam(list(model.parameters()), lr=1e-5)
if torch.cuda.is_available():
    model.cuda()
    data_train.cuda()
    data_test.cuda()
    labels_train.cuda()
    labels_test.cuda()

In [14]:
training_acc = []
validation_acc = []
model.train()
epochs = 50
for epoch in range(epochs):
    for i in range(len(labels_train)):
        index = np.random.randint(len(labels_train))
        optimizer.zero_grad()
        target = labels_train[index]
        prediction = model(data_train[index])
        criterion = nn.CrossEntropyLoss()
        loss = criterion(prediction, target.unsqueeze(0))
        loss.backward()
        optimizer.step()
#         if i % 200 == 0:
#             training_acc.append(assess(model, data_train, labels_train))
#             validation_acc.append(assess(model, data_test, labels_test))
        if i % 1000 == 0:
            train_checkpoint = assess(model, data_train, labels_train)
            model.eval()
            test_checkpoint = assess(model, data_test, labels_test)
            model.train()
            print("loss: ", loss)
            print(train_checkpoint, test_checkpoint)
#                 with open("training_acc", "ab") as f:
#                     np.asarray(training_acc).tofile(f)
#                 with open("validation_acc", "ab") as f:
#                     np.asarray(validation_acc).tofile(f)
#                 training_acc = []
#                 validation_acc = []
    torch.save(model.state_dict(), "./model" + str(epoch).zfill(2))

loss:  tensor(2.2729, grad_fn=<NllLossBackward>)
0.1 0.06
loss:  tensor(2.3237, grad_fn=<NllLossBackward>)
0.24 0.14
loss:  tensor(2.3256, grad_fn=<NllLossBackward>)
0.22 0.16
loss:  tensor(2.1047, grad_fn=<NllLossBackward>)
0.18 0.3
loss:  tensor(1.9837, grad_fn=<NllLossBackward>)
0.26 0.24
loss:  tensor(2.4359, grad_fn=<NllLossBackward>)
0.14 0.24
loss:  tensor(2.4161, grad_fn=<NllLossBackward>)
0.24 0.28
loss:  tensor(1.7770, grad_fn=<NllLossBackward>)
0.16 0.34
loss:  tensor(2.4124, grad_fn=<NllLossBackward>)
0.28 0.32
loss:  tensor(1.7898, grad_fn=<NllLossBackward>)
0.36 0.24
loss:  tensor(2.4071, grad_fn=<NllLossBackward>)
0.32 0.24
loss:  tensor(2.2561, grad_fn=<NllLossBackward>)
0.3 0.26
loss:  tensor(2.3631, grad_fn=<NllLossBackward>)
0.26 0.28
loss:  tensor(2.4021, grad_fn=<NllLossBackward>)
0.26 0.24
loss:  tensor(1.6249, grad_fn=<NllLossBackward>)
0.3 0.36
loss:  tensor(2.4330, grad_fn=<NllLossBackward>)
0.1 0.34
loss:  tensor(2.4203, grad_fn=<NllLossBackward>)
0.28 0.32
lo

loss:  tensor(1.5388, grad_fn=<NllLossBackward>)
0.58 0.52
loss:  tensor(2.4610, grad_fn=<NllLossBackward>)
0.42 0.42
loss:  tensor(1.4621, grad_fn=<NllLossBackward>)
0.6 0.54
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.48 0.5
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.58 0.46
loss:  tensor(1.4827, grad_fn=<NllLossBackward>)
0.52 0.58
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.56 0.5
loss:  tensor(2.4490, grad_fn=<NllLossBackward>)
0.58 0.38
loss:  tensor(2.4447, grad_fn=<NllLossBackward>)
0.42 0.44
loss:  tensor(2.4068, grad_fn=<NllLossBackward>)
0.56 0.42
loss:  tensor(2.4372, grad_fn=<NllLossBackward>)
0.58 0.44
loss:  tensor(2.3456, grad_fn=<NllLossBackward>)
0.4 0.5
loss:  tensor(1.6754, grad_fn=<NllLossBackward>)
0.58 0.48
loss:  tensor(2.4391, grad_fn=<NllLossBackward>)
0.52 0.62
loss:  tensor(1.4619, grad_fn=<NllLossBackward>)
0.52 0.46
loss:  tensor(1.8791, grad_fn=<NllLossBackward>)
0.56 0.42
loss:  tensor(1.5118, grad_fn=<NllLossBackward>)
0.44 0.36
lo

loss:  tensor(1.5041, grad_fn=<NllLossBackward>)
0.62 0.5
loss:  tensor(2.4474, grad_fn=<NllLossBackward>)
0.66 0.68
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.48 0.48
loss:  tensor(1.4648, grad_fn=<NllLossBackward>)
0.48 0.6
loss:  tensor(1.4641, grad_fn=<NllLossBackward>)
0.56 0.6
loss:  tensor(2.4450, grad_fn=<NllLossBackward>)
0.68 0.46
loss:  tensor(2.4212, grad_fn=<NllLossBackward>)
0.56 0.6
loss:  tensor(2.4607, grad_fn=<NllLossBackward>)
0.76 0.66
loss:  tensor(1.6646, grad_fn=<NllLossBackward>)
0.62 0.56
loss:  tensor(1.4958, grad_fn=<NllLossBackward>)
0.62 0.6
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.52 0.58
loss:  tensor(2.0375, grad_fn=<NllLossBackward>)
0.66 0.5
loss:  tensor(1.4612, grad_fn=<NllLossBackward>)
0.64 0.48
loss:  tensor(1.9046, grad_fn=<NllLossBackward>)
0.72 0.54
loss:  tensor(2.4611, grad_fn=<NllLossBackward>)
0.7 0.54
loss:  tensor(1.4862, grad_fn=<NllLossBackward>)
0.72 0.54
loss:  tensor(1.4657, grad_fn=<NllLossBackward>)
0.7 0.5
loss: 

In [15]:
for epoch in range(epochs):
    model = Model()
    model.load_state_dict(torch.load("./model" + str(epoch).zfill(2)))
    model.eval()
    correct = 0
    total = 0
    for i in range(int(len(data)/10)):
        y_hats = model(data[i])
        for j in range(1, 10):
            y_hats = torch.cat((y_hats, model(data[i + 1000*j])))
        if torch.argmax(torch.sum(y_hats, dim=0)) == targets[i]:
            correct += 1
        total += 1
    print(epoch, ": ", correct / total)

0 :  0.327
1 :  0.366
2 :  0.394
3 :  0.419
4 :  0.41
5 :  0.453
6 :  0.491
7 :  0.517
8 :  0.522
9 :  0.535
10 :  0.549
11 :  0.56
12 :  0.566
13 :  0.57
14 :  0.583
15 :  0.595
16 :  0.601
17 :  0.591
18 :  0.611
19 :  0.628
20 :  0.626
21 :  0.63
22 :  0.635
23 :  0.666
24 :  0.677
25 :  0.677
26 :  0.685
27 :  0.702
28 :  0.71
29 :  0.718
30 :  0.706
31 :  0.688
32 :  0.717
33 :  0.728
34 :  0.719
35 :  0.736
36 :  0.726
37 :  0.717
38 :  0.741
39 :  0.743
40 :  0.746
41 :  0.761
42 :  0.763
43 :  0.756
44 :  0.747
45 :  0.757
46 :  0.761
47 :  0.773
48 :  0.777
49 :  0.771


In [6]:
model = torch.load("model49", map_location='cpu')
sample = np.load("../test")