In [46]:
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from random import shuffle
import time
is_cuda = torch.cuda.is_available()
torch.set_default_tensor_type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor)
print(is_cuda)

True


In [47]:
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=(3,3))
        self.conv2 = nn.Conv2d(64, 64, kernel_size=(3,5))
        self.fc1 = nn.Linear(12544, 128)
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, (2,4))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, (2,4))
        x = x.view(-1, 12544)
        x = F.relu(self.fc1(x))
        return torch.sigmoid(x)
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(128, 12544)
        self.deconv1 = nn.ConvTranspose2d(64, 64, kernel_size=(5,9))
        self.deconv2 = nn.ConvTranspose2d(64, 1, kernel_size=(3,3), padding=1)
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = x.view(-1, 64, 14, 14)
        x = F.interpolate(x, scale_factor=(2,4))
        x = F.relu(self.deconv1(x))
        x = F.interpolate(x, scale_factor=(2,4))
        x = F.sigmoid(self.deconv2(x))
        return x
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=(3,3))
        self.conv2 = nn.Conv2d(64, 64, kernel_size=(3,5))
        self.fc1 = nn.Linear(12544, 32)
        self.fc2 = nn.Linear(32, 10)
        self.dropout = nn.Dropout(.2)
    def forward(self, x):
        x = x.view(-1, 1, 64, 256)
        x = F.relu(self.conv1(x))
        x = self.dropout(x)
        x = F.max_pool2d(x, (2,4))
        x = F.relu(self.conv2(x))
        x = self.dropout(x)
        x = F.max_pool2d(x, (2,4))
        x = x.view(-1, 12544)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        return F.softmax(x, dim=1)

In [48]:
class SongDataTrain(Dataset):
    def __init__(self, path):
        with np.load(path) as f:
            data = f['X']
            labels = list(f['T'])
        data = np.asarray([[i[:2560] for i in j] for j in data])
        seed = data[:, :, :256]
        for i in range(1, 10):
            seed = np.append(seed, data[:, :, i*256:(i+1)*256], axis=0)
        self.normalizer = np.max(seed)
        self.data = torch.tensor(seed, dtype=torch.float) / np.max(seed)
        self.data = self.data.view(-1, 1, 64, 256)
        labels = labels * 10 #expand dimensions accordingly
        label_set = set(labels)
        mapping = {}
        for count, i in enumerate(label_set):
            mapping[i] = count
        targets = np.zeros(len(labels))
        for i in range(len(targets)):
            targets[i] = mapping[labels[i]]
        self.targets = torch.tensor(targets, dtype=torch.long)
        
        indices = np.random.choice(np.arange(len(self.targets)), len(self.targets), replace=False)
        self.targets = self.targets[indices[int(.1 * len(self.targets)):]]
        self.data = self.data[indices[int(.1 * len(self.data)):]]
    def __getitem__(self, index):
        return (self.data[index], self.targets[index])
    def __len__(self):
        return (len(self.targets))
class SongDataTest(Dataset):
    def __init__(self, path):
        with np.load(path) as f:
            data = f['X']
            labels = list(f['T'])
        data = np.asarray([[i[:2560] for i in j] for j in data])
        seed = data[:, :, :256]
        for i in range(1, 10):
            seed = np.append(seed, data[:, :, i*256:(i+1)*256], axis=0)
        self.normalizer = np.max(seed)
        self.data = torch.tensor(seed, dtype=torch.float) / np.max(seed)
        self.data = self.data.view(-1, 1, 64, 256)
        labels = labels * 10 #expand dimensions accordingly
        label_set = set(labels)
        mapping = {}
        for count, i in enumerate(label_set):
            mapping[i] = count
        targets = np.zeros(len(labels))
        for i in range(len(targets)):
            targets[i] = mapping[labels[i]]
        self.targets = torch.tensor(targets, dtype=torch.long)
        
        indices = np.random.choice(np.arange(len(self.targets)), len(self.targets), replace=False)
        self.targets = self.targets[indices[:int(.1 * len(self.targets))]]
        self.data = self.data[indices[:int(.1 * len(self.data))]]
    def __getitem__(self, index):
        return (self.data[index], self.targets[index])
    def __len__(self):
        return (len(self.targets))

In [49]:
dataset_train = SongDataTrain("../audio_sr_label.npz") #initializes our dataset
dataset_test = SongDataTest("../audio_sr_label.npz") #initializes our dataset

In [50]:
split = .1
dataloader_train = DataLoader(dataset_train, shuffle=True, batch_size=128)
dataloader_test = DataLoader(dataset_test, shuffle=True, batch_size=128)

In [51]:
encoder = Encoder()
decoder = Decoder()
#classifier = Model()
#classifier.load_state_dict(torch.load("./cnn_model", map_location='cpu'))
#classifier.eval()
optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=1e-3)
epochs = 10
alpha = .95

In [52]:
train_loss = []
valid_loss = []
encoder.train()
decoder.train()
zero = torch.zeros((128,128))
for i in range(zero.shape[0]):
    for j in range(zero.shape[1]):
        if i < j:
            zero[i][j] = 1
epochs = 1
for epoch in range(epochs):
    tr = 0.0
    for i, (x, _label) in enumerate(dataloader_train):
        benchmark = time.time()
        optimizer.zero_grad()
        z = encoder(x)
        y = decoder(z)
        
        matrix = torch.pow(z.unsqueeze(0).repeat(z.shape[0], 1, 1)
                           - z.unsqueeze(1), 2).sum(2)
        matrix = matrix * zero
        partition = np.argpartition(torch.argmax(matrix, dim=0), 12).type(
                    torch.cuda.LongTensor if is_cuda else torch.LongTensor)

        in_partition = partition[-12:]
        out_partition = partition[:-12]
        like_loss = torch.zeros(1)
        unlike_loss = torch.zeros(1)
        for a in range(z.shape[0]):
            for b in range(z.shape[0]):
                z_a = z[a] / torch.norm(z[a])
                z_b = z[b] / torch.norm(z[b])
                
                if torch.tensor(a) in in_partition and torch.tensor(b) in in_partition:
                    like_loss += F.cosine_similarity(z_a.unsqueeze(0), z_b.unsqueeze(0))
                else:
                    unlike_loss += torch.abs(
                                    F.cosine_similarity(z_a.unsqueeze(0), z_b.unsqueeze(0)))
        
        criterion = nn.BCELoss()
        like_normalizer = (1 - alpha) / 12
        unlike_normalizer = 1 / (128**2 - 12)
        loss = unlike_normalizer * unlike_loss - like_normalizer * like_loss + 10 * criterion(y, x)
        loss.backward()
        tr += float(loss.data)
        optimizer.step()
        if i % 5 == 0:
            print(tr/5)
            tr = 0.0
            if i == 20:
                break
        print(time.time() - benchmark)

1.4373364448547363
24.832164764404297
25.48765778541565
24.22754216194153
25.12329864501953
24.7794029712677
1.7243234872817994
24.95043683052063
24.619219064712524
24.536940574645996
24.98870873451233
24.898827075958252
0.5477167963981628
24.889012575149536
24.91123628616333
24.834424257278442
24.683621168136597
25.131150722503662
0.5767188549041748
24.833845615386963
24.511905670166016
25.496426343917847
27.07684016227722
26.03217101097107
0.6255094885826111


In [None]:
centroids = torch.rand((10, 128))
s = None
like_normalizer, unlike_normalizer = 0.0,0.0
optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=1e-3)
epochs = 50
for epoch in range(epochs):
    tr = 0.0
    for i, (x, _label) in enumerate(dataloader_train):
        optimizer.zero_grad()
        z = encoder(x)
        y = decoder(z)
        if i % 50 == 0:
            z = z / torch.norm(z, dim=1).unsqueeze(1) #normalizes z's
            z = z + torch.rand(128, 128) / 10 #regularization

            matrix = torch.pow(z.unsqueeze(0).repeat(centroids.shape[0], 1, 1) 
                               - centroids.unsqueeze(1), 2).sum(2)
            indices = torch.argmax(matrix, dim=0)
            
            s = torch.zeros((10,128)).scatter_(0, indices.unsqueeze(0), torch.ones(128).unsqueeze(0))
            centroids = torch.matmul(centroids, z) * s
            
            
            partition = np.argpartition(torch.argmax(matrix, dim=0), 12)
            partition = np.argpartition(torch.argmax(matrix, dim=0), 12).type(
                    torch.cuda.LongTensor if is_cuda else torch.LongTensor)
            
            in_partition = partition[-12:]
            out_partition = partition[:-12]
            like_loss = torch.zeros(1)
            unlike_loss = torch.zeros(1)
            for a in range(z.shape[0]):
                for b in range(z.shape[0]):
                    z_a = z[a] / torch.norm(z[a])
                    z_b = z[b] / torch.norm(z[b])
                    if torch.tensor(a) in in_partition and torch.tensor(b) in in_partition:
                        like_loss += F.cosine_similarity(z_a.unsqueeze(0), z_b.unsqueeze(0))
                    else:
                        unlike_loss += torch.abs(
                                        F.cosine_similarity(z_a.unsqueeze(0), z_b.unsqueeze(0)))
        
        like_normalizer = (1 - alpha) / 12
        unlike_normalizer = 1 / (128**2 - 12)
        
        loss = matrix.sum() - 4 * criterion(y, x)  - unlike_normalizer * unlike_loss - like_normalizer * like_loss
        loss.backward(retain_graph=True)
        tr += float(loss.data)
        optimizer.step()
        if i % 50 == 0:
            print(tr/100)
            tr = 0.0
            torch.save(encoder.state_dict(), "encoder")
            torch.save(decoder.state_dict(), "decoder")

356.53859375
31462.8138671875
3992871.04
1379614963.84
