In [1]:
# mount drive and change directory to project folder
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd "/content/drive/MyDrive/520 Final Project"

/content/drive/.shortcut-targets-by-id/1KHBlsdIi36o_ikonJZ-usBVzGPrZlH0V/520 Final Project


# Try using sklearn to do logistic regression (runs out of RAM)

In [3]:
import numpy as np
with open("X_train.npy", 'rb') as f:
  X_train = np.load(f)[0:30000]
with open("y_train.npy", 'rb') as f:
  y_train = np.load(f)[0:30000]
with open("X_test.npy", 'rb') as f:
  X_test = np.load(f)[0:30000]
with open("y_test.npy", 'rb') as f:
  y_test = np.load(f)[0:30000]

In [4]:
X_train.shape, y_train.shape

((30000, 16384), (30000, 128))

In [5]:
# only consider subset of pitches that occur enough times
LOWER_NOTE = 40
UPPER_NOTE = 84
y_train = y_train[:,LOWER_NOTE:UPPER_NOTE+1]
y_test = y_test[:,LOWER_NOTE:UPPER_NOTE+1]

In [6]:
y_train.shape

(30000, 45)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.multioutput import MultiOutputClassifier

# train logistic regression
model = MultiOutputClassifier(LogisticRegression())
model.fit(X_train, y_train)
train_accuracy = model.score(X_train, y_train) # accuracy on training set
print(train_accuracy)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


# Use PyTorch Instead

In [3]:
# Make PyTorch dataset class for the dataset
import numpy as np
import torch
from torch.utils.data import Dataset

class MusicnetDataset(Dataset):
    def __init__(self, audio_dir, labels_dir, lower_note=40, upper_note=84):
        with open(audio_dir, 'rb') as f:
          self.features = np.load(f)
        with open(labels_dir, 'rb') as f:
          self.labels = np.load(f)[:,lower_note:upper_note+1]
        self.len = len(self.features)

    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        label = self.labels[idx]
        audio = self.features[idx]
        return audio, label

In [4]:
train_set = MusicnetDataset("X_train.npy", "y_train.npy")

In [5]:
test_set = MusicnetDataset("X_test.npy", "y_test.npy")

In [6]:
print(train_set.__len__())

74189


In [7]:
trainloader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True)

In [8]:
# test trainloader
%time audios, labels = iter(trainloader).next()

CPU times: user 9.69 ms, sys: 4.73 ms, total: 14.4 ms
Wall time: 132 ms


In [9]:
%time len(train_set.__getitem__(1)[1]) # nice and fast now!

CPU times: user 3 µs, sys: 10 µs, total: 13 µs
Wall time: 15.3 µs


45

In [10]:
# Logistic regression model. Doesn't do sigmoid (since loss function does)
class Logistic(torch.nn.Module):
    def __init__(self, input_dims, output_dims):
        super().__init__()
        self.fc1 = torch.nn.Linear(input_dims, output_dims)

    def forward(self, x):
        out = self.fc1(x)
        return out

In [11]:
from torchsummary import summary
windowsize = 16384
input_dims = windowsize
output_dims = 45 # number of notes we are considering
logistic = Logistic(input_dims, output_dims)

In [12]:
# train model
epochs = 50
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(logistic.parameters(), lr=1e-1) 
train_mse = []
# training iterations
logistic = logistic.double()
for epoch in range(epochs):
    running_loss = 0
    for itr, (audio, label) in enumerate(trainloader):
        # zero gradient
        optimizer.zero_grad()
        # forward path
        y_predicted = logistic(audio.type(torch.DoubleTensor))
        loss = criterion(y_predicted, label)
        running_loss += loss.item()
        # backpropagating
        loss.backward()
        # optimizes the weights
        optimizer.step()
    train_mse.append(running_loss)
    if (epoch+1) % 3 == 0:
        print(f'epoch: {epoch+1}, loss: {running_loss:.4f}')

epoch: 3, loss: 341.7093
epoch: 6, loss: 282.3941
epoch: 9, loss: 269.6598
epoch: 12, loss: 265.4461
epoch: 15, loss: 263.5780
epoch: 18, loss: 262.6169
epoch: 21, loss: 262.0645
epoch: 24, loss: 261.7144
epoch: 27, loss: 261.4021
epoch: 30, loss: 261.1738
epoch: 33, loss: 260.9586
epoch: 36, loss: 260.7807
epoch: 39, loss: 260.6777
epoch: 42, loss: 260.5057
epoch: 45, loss: 260.3389
epoch: 48, loss: 260.2477


In [14]:
# evaluate model on the training set
correct = torch.zeros(45)
total = 0
with torch.no_grad():
    for itr, (audio, label) in enumerate(trainloader):
        outputs = logistic(audio.type(torch.DoubleTensor))
        correct += ((outputs.data.type(torch.DoubleTensor)) > 0).eq(label).sum(0)
        total += float(len(label))
    accuracy = correct / total
    print(accuracy)

tensor([0.9780, 0.9676, 0.9854, 0.9597, 0.9731, 0.9581, 0.9593, 0.9727, 0.9416,
        0.9690, 0.9390, 0.9497, 0.9403, 0.9263, 0.9641, 0.9091, 0.9425, 0.8965,
        0.9154, 0.9292, 0.8886, 0.9269, 0.8683, 0.9203, 0.8878, 0.8911, 0.9305,
        0.8875, 0.9292, 0.8867, 0.9272, 0.9332, 0.9101, 0.9431, 0.9056, 0.9497,
        0.9356, 0.9382, 0.9622, 0.9425, 0.9690, 0.9539, 0.9714, 0.9769, 0.9686])


In [13]:
# evaluate model on the test set
correct = torch.zeros(45)
total = 0
with torch.no_grad():
    for itr, (audio, label) in enumerate(testloader):
        outputs = logistic(audio.type(torch.DoubleTensor))
        correct += ((outputs.data.type(torch.DoubleTensor)) > 0).eq(label).sum(0)
        total += float(len(label))
    accuracy = correct / total
    print(accuracy) # clearly accuracy a shit metric because right now neural net just predicting false on basically everything

tensor([0.9864, 0.9832, 0.9872, 0.9686, 0.9596, 0.9631, 0.9545, 0.9741, 0.9553,
        0.9857, 0.9322, 0.9209, 0.9709, 0.9638, 0.9653, 0.8975, 0.9485, 0.9480,
        0.8945, 0.9613, 0.9370, 0.9560, 0.9186, 0.9040, 0.9523, 0.9093, 0.9410,
        0.9008, 0.9397, 0.9324, 0.9319, 0.9483, 0.9483, 0.9498, 0.9113, 0.9307,
        0.9488, 0.9560, 0.9651, 0.9407, 0.9724, 0.9792, 0.9819, 0.9839, 0.9814])


In [15]:
# save model
torch.save(logistic, 'logistic.pt')