In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from pydub import AudioSegment as AS
from pydub.playback import play
import os
import pickle
import csv

In [2]:
with open("../songs.data", "rb") as f:
    songs = np.fromfile(f)
with open("../labels.data", "rb") as f:
    labels = np.fromfile(f)
label_set = set([])
mapping = {}
for i in labels:
    label_set.add(i)
for count, i in enumerate(label_set):
    mapping[i] = count
new_labels = np.zeros((len(labels)))
for i in range(len(new_labels)):
    new_labels[i] = mapping[labels[i]]
songs = torch.tensor(songs.reshape(-1, 96000), dtype=torch.float)
songs_train = songs[:int(-1*len(songs)/10)]
songs_test = songs[int(-1*len(songs)/10):]
labels = new_labels
labels = torch.tensor(labels.reshape(labels.shape[0], -1), dtype=torch.long)
labels_train = labels[:int(-1*len(labels)/10)]
labels_test = labels[int(-1*len(labels)/10):]


In [3]:
print(labels_train.shape)

torch.Size([28855, 1])


In [4]:
class Model(nn.Module):
    def __init__(self, input_size, output_size):
        super(Model, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.conv1 = nn.Conv1d(1, 40, kernel_size=8000)
        self.conv2 = nn.Conv1d(40, 20, kernel_size=200)
        self.conv3 = nn.Conv1d(20, 10, kernel_size=200)
        self.fc1 = nn.Linear(660, 400)
        self.fc2 = nn.Linear(400, 200)
        self.fc3 = nn.Linear(200, 50)
        self.fc4 = nn.Linear(50, output_size)
    def forward(self, x):
        x = x.view(-1, 1, x.shape[0])
        #print(x.shape)
        x = F.relu(self.conv1(x))
        #print(x.shape)
        x = F.max_pool1d(x, 10)
        #print(x.shape)
        x = F.relu(self.conv2(x))
        #print(x.shape)
        x = F.max_pool1d(x, 10)
        #print(x.shape)
        x = F.relu(self.conv3(x))
        #print(x.shape)
        x = F.max_pool1d(x, 10)
        #print(x.shape)
        x = x.view(-1, 660)
        x = F.relu(self.fc1(x))
        #print(x.shape)
        x = F.relu(self.fc2(x))
        #print(x.shape)
        x = F.relu(self.fc3(x))
        #print(x.shape)
        x = F.relu(self.fc4(x))
        #print(x.shape)
        x = x.view(-1, self.output_size)
        #print(x.shape)
        return F.log_softmax(x, dim=1)

In [None]:
model = Model(songs.shape[1], len(label_set))
optimizer = optim.Adam(list(model.parameters()), lr=1e-3)
if torch.cuda.is_available():
    model.cuda()
    songs_train.cuda()
    songs_test.cuda()
    labels_train.cuda()
    labels_test.cuda()

In [None]:
def assess(model, songs, labels):
    model.eval()
    correct = 0
    total = 0
    for i in range(5):
        index = np.random.randint(len(labels))
        pred = model(songs[index])
        if torch.argmax(pred, dim=1) == labels[index]:
            correct += 1
        total += 1
    return correct/total

In [None]:
training_acc = []
validation_acc = []
for epoch in range(1):
    for i in range(len(labels_train)):
        model.train()
        optimizer.zero_grad()
        target = labels_train[i]
        prediction = model(songs_train[i])
        loss = F.nll_loss(prediction, target)
        loss.backward()
        optimizer.step()
        if i % 5 == 0:
            print("loss: ", loss)
            if i % 20 == 0:
                training_acc.append(assess(model, songs_train, labels_train))
                validation_acc.append(assess(model, songs_test, labels_test))
                print(training_acc[-1], validation_acc[-1])
                

loss:  tensor(3.0987, grad_fn=<NllLossBackward>)
