In [8]:
import getdataset as gd
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as f
import numpy as np
import torch.optim as optim
import sys
# from pytorch_memlab import profile
    
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, stride=(1,1))
        self.conv2 = nn.Conv2d(8, 16, kernel_size=5, stride=(1,2))
        self.conv3 = nn.Conv2d(16, 32, kernel_size=5, stride=(1,2))
        self.conv4 = nn.Conv2d(32, 32, kernel_size=7, stride=(1,2))
        self.conv5 = nn.Conv2d(32, 64, kernel_size=7, stride=(2,3))
        self.conv6 = nn.Conv2d(64, 64, kernel_size=7, stride=(2,3))
        
        self.pool = nn.MaxPool2d(2, 2)
        self.drop1 = nn.Dropout2d(p=0.25)
        self.drop2 = nn.Dropout1d(p=0.5)
        
        self.flatten = nn.Flatten()
        
        self.linear1 = nn.Linear(70400, 16384)
        self.linear2 = nn.Linear(16384, 8192)
            
    # @profile

    def forward(self, x):
        x = f.relu(self.conv1(x))
        x = f.relu(self.conv2(x))
        x = f.relu(self.conv3(x))
        x = f.relu(self.conv4(x))
        x = f.relu(self.conv5(x))
        x = f.relu(self.conv6(x))
        print(x.shape)
        x = self.pool(x)
        print(x.shape)
        x = self.flatten(x)
        print(x.shape)
        x = self.linear1(x)
        x = self.linear2(x)
        print(x.shape)
        return x

class Decoder(nn.Module):
    def __init__(self, fret_size=21, effect_size=10, duration_size=[8, 2, 3], embedding_dim=3, hidden_dim=120):
        super(Decoder, self).__init__()
        # setting vocab size
        self.fret_size = fret_size
        self.effect_size = effect_size
        self.duration_index_size = duration_size[0]
        self.duration_isDotted_size = duration_size[1]
        self.duration_Tuplet_size = duration_size[2]
        
        # setting embedding
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.position_embeddings = nn.Embedding(self.fret_size, self.embedding_dim, padding_idx=0)
        self.effect_embeddings = nn.Embedding(self.effect_size, self.embedding_dim, padding_idx=0)
        self.duration_index_embeddings = nn.Embedding(self.duration_index_size, self.embedding_dim, padding_idx=0)
        
        # setting rnn
        self.rnn = nn.RNN(self.embedding_dim, self.hidden_dim, num_layers=384, batch_first=True)
        self.fc1 = nn.Linear(self.hidden_dim, 4096)
        self.fc2 = nn.Linear(4096, 512)
        # setting position
        self.pos1 = nn.Linear(512, self.fret_size)
        self.pos2 = nn.Linear(512, self.fret_size)
        self.pos3 = nn.Linear(512, self.fret_size)
        self.pos4 = nn.Linear(512, self.fret_size)
        self.pos5 = nn.Linear(512, self.fret_size)
        self.pos6 = nn.Linear(512, self.fret_size)
        # setting effect
        self.eff1 = nn.Linear(512 + self.fret_size, self.effect_size)
        self.eff2 = nn.Linear(512 + self.fret_size, self.effect_size)
        self.eff3 = nn.Linear(512 + self.fret_size, self.effect_size)
        self.eff4 = nn.Linear(512 + self.fret_size, self.effect_size)
        self.eff5 = nn.Linear(512 + self.fret_size, self.effect_size)
        self.eff6 = nn.Linear(512 + self.fret_size, self.effect_size)
        # setting ties
        self.tie1 = nn.Linear(512 + self.fret_size, 2)
        self.tie2 = nn.Linear(512 + self.fret_size, 2)
        self.tie3 = nn.Linear(512 + self.fret_size, 2)
        self.tie4 = nn.Linear(512 + self.fret_size, 2)
        self.tie5 = nn.Linear(512 + self.fret_size, 2)
        self.tie6 = nn.Linear(512 + self.fret_size, 2)
        # setting duration
        self.dur1 = nn.Linear(512, self.duration_index_size)
        self.dur2 = nn.Linear(512, self.duration_isDotted_size)
        self.dur3 = nn.Linear(512, self.duration_Tuplet_size)

    def forward(self, position, effects, ties, duration, x):
        # embedding
        position_embedding = self.position_embeddings(position)
        effect_embedding = self.effect_embeddings(effects)
        ties_onehot = f.one_hot(ties, num_classes=2)
        duration_index_embedding = self.duration_index_embeddings(duration[0])
        duration_isDotted_onehot = f.one_hot(duration[1], num_classes=self.duration_isDotted_size)
        duration_isTriplet_onehot = f.one_hot(duration[2], num_classes=self.duration_Tuplet_size)
        # concat
        x = torch.cat([position_embedding, effect_embedding, ties_onehot, duration_index_embedding, duration_isDotted_onehot, duration_isTriplet_onehot], dim=0)
        # rnn
        x, state = self.rnn(position_embedding, x)
        x = self.fc1(x)
        x = self.fc2(x)
        # position
        pos1 = self.pos1.unsqueeze(0)
        pos2 = self.pos2.unsqueeze(0)
        pos3 = self.pos3.unsqueeze(0)
        pos4 = self.pos4.unsqueeze(0)
        pos5 = self.pos5.unsqueeze(0)
        pos6 = self.pos6.unsqueeze(0)
        # effect
        eff1 = self.eff1(torch.cat([x, pos1], dim=0)).unsqueeze(0)
        eff2 = self.eff2(torch.cat([x, pos2], dim=0)).unsqueeze(0)
        eff3 = self.eff3(torch.cat([x, pos3], dim=0)).unsqueeze(0)
        eff4 = self.eff4(torch.cat([x, pos4], dim=0)).unsqueeze(0)
        eff5 = self.eff5(torch.cat([x, pos5], dim=0)).unsqueeze(0)
        eff6 = self.eff6(torch.cat([x, pos6], dim=0)).unsqueeze(0)
        # ties
        tie1 = self.tie1(torch.cat([x, pos1], dim=0)).unsqueeze(0)
        tie2 = self.tie2(torch.cat([x, pos2], dim=0)).unsqueeze(0)
        tie3 = self.tie3(torch.cat([x, pos3], dim=0)).unsqueeze(0)
        tie4 = self.tie4(torch.cat([x, pos4], dim=0)).unsqueeze(0)
        tie5 = self.tie5(torch.cat([x, pos5], dim=0)).unsqueeze(0)
        tie6 = self.tie6(torch.cat([x, pos6], dim=0)).unsqueeze(0)
        # duration
        dur1 = self.dur1(x)
        dur2 = self.dur2(x)
        dur3 = self.dur3(x)
        # conbine
        position = torch.cat([pos1, pos2, pos3, pos4, pos5, pos6], dim=0)
        effects = torch.cat([eff1, eff2, eff3, eff4, eff5, eff6], dim=0)
        ties = torch.cat([tie1, tie2, tie3, tie4, tie5, tie6], dim=0)
        
        return position, effects, ties, dur1, dur2, dur3, state

embedding_dim = 200
hidden_dim = 120
vocab_size = 3024

#set gpu
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
# use cpu
# device = torch.device("cpu")
print("done setting device")

encoder = Encoder().to(device)
decoder = Decoder(vocab_size, embedding_dim, hidden_dim).to(device)

print("done setting encoder and decoder")

#loss
criterion = nn.CrossEntropyLoss()

print("done setting criterion")

#optimizer
encoder_optimizer = optim.Adam(encoder.parameters(), lr=0.001)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=0.001)

print("done setting model")

dataset = gd.Dataset()
dataloader = DataLoader(
    dataset = dataset,
    batch_size=3
    )

print("done setting dataloader")
for tempo, tab_data, audio_data in dataloader:

    encoder_optimizer.zero_grad()
    input_tensor = torch.tensor(audio_data, dtype=torch.float32, device=device)
    output_tensor = torch.tensor(tab_data, dtype=torch.int64, device=device)
    encoder_state = encoder(input_tensor)
    sys.exit()

dataloader = DataLoader(
    dataset = dataset,
    batch_size=3
    )
    
for epoch in range(3):
    epoch_loss = 0

    for data, label in dataloader:
        
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        
        input_tensor = torch.tensor(data, dtype=torch.float32, device=device)
        output_tensor = torch.tensor(label, dtype=torch.int64, device=device)
        
        encoder_state = encoder(input_tensor)
        
        source = output_tensor[:, :-1]
        target = output_tensor[:, 1:]
        
        loss = 0
        
        decoder_output, _ = decoder(source, encoder_state)
        
        for j in range(decoder_output.size()[1]):
            loss += criterion(decoder_output[:, j, :], target[:, j])
            
        epoch_loss += loss.item()
        
        loss.backward()

        encoder_optimizer.step()
        decoder_optimizer.step()
    
    print("Epoch %d: %.2f" % (epoch, epoch_loss))

done setting device
done setting encoder and decoder
done setting criterion
done setting model
done setting dataloader
torch.Size([3, 64, 40, 111])
torch.Size([3, 64, 20, 55])
torch.Size([3, 70400])
torch.Size([3, 8192])


  input_tensor = torch.tensor(audio_data, dtype=torch.float32, device=device)


SystemExit: 

In [7]:
384*60

23040

In [4]:
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 7, (3, 1))
        self.conv2 = nn.Conv2d(16, 16, 7, (3, 1))
        self.conv3 = nn.Conv2d(16, 32, 7, (3, 1))
        self.conv4 = nn.Conv2d(32, 32, 5, 2)
        self.conv5 = nn.Conv2d(32, 32, 5, 2)
        
        self.pool = nn.MaxPool2d(2, 2)

        self.drop1 = nn.Dropout(0.25)
        self.drop2 = nn.Dropout(0.5)
        
        self.fc = nn.Linear(640, 3)

class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, stride=(1,1))
        self.conv2 = nn.Conv2d(8, 16, kernel_size=5, stride=(1,2))
        self.conv3 = nn.Conv2d(16, 32, kernel_size=5, stride=(1,2))
        self.conv4 = nn.Conv2d(32, 32, kernel_size=7, stride=(1,2))
        self.conv5 = nn.Conv2d(32, 64, kernel_size=7, stride=(2,3))
        self.conv6 = nn.Conv2d(64, 64, kernel_size=7, stride=(2,3))
        
        self.pool = nn.MaxPool2d(2, 2)
        self.drop1 = nn.Dropout2d(p=0.25)
        self.drop2 = nn.Dropout1d(p=0.5)
        
        self.flatten = nn.Flatten()
        
        self.linear1 = nn.Linear(70400, 16384)
        self.linear2 = nn.Linear(16384, 8192)
            
    # @profile

    def forward(self, x):
        x = f.relu(self.conv1(x))
        x = f.relu(self.conv2(x))
        x = f.relu(self.conv3(x))
        x = f.relu(self.conv4(x))
        x = f.relu(self.conv5(x))
        x = f.relu(self.conv6(x))
        print(x.shape)
        x = self.pool(x)
        print(x.shape)
        x = self.flatten(x)
        print(x.shape)
        x = self.linear1(x)
        x = self.linear2(x)
        print(x.shape)
        return x

288640