In [1]:
import mido
from mido import Message, MidiFile, MidiTrack,MetaMessage
import string
import numpy as np
import pandas as pd
from midi_array import *
import os
import csv


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [3]:
# label path     
label_path = r'D:\BrownUnivercity\CS2470\final_proj\CS2470_final_project\data\label.csv'
# Define the folder path
folder_path = r'D:\BrownUnivercity\CS2470\final_proj\CS2470_final_project\data\test'
# load data
music,tag, align_length = get_music_data(folder_path, label_path)

In [4]:
print(music.shape)
print(tag.shape)

(20, 10000, 88)
(20,)


In [5]:
# help function
def one_hot_encode(labels, num_classes):
    one_hot_labels = np.zeros((len(labels), num_classes))
    for i, label in enumerate(labels):
        one_hot_labels[i, label - 1] = 1
    return one_hot_labels

# onehot_encode
tag = one_hot_encode(tag,4)

## CVAE

In [6]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, latent_size, num_classes):
        super(Encoder, self).__init__()
        self.fc1 = nn.Linear(input_size + num_classes, hidden_size)
        self.fc2_mean = nn.Linear(hidden_size, latent_size)
        self.fc2_logvar = nn.Linear(hidden_size, latent_size)
        
    def forward(self, x, y):
        # Concatenate input with class information
        x = torch.cat((x, y), dim=1)
        x = F.relu(self.fc1(x))
        mean = self.fc2_mean(x)
        logvar = self.fc2_logvar(x)
        return mean, logvar

class Decoder(nn.Module):
    def __init__(self, latent_size, hidden_size, output_size, num_classes):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(latent_size + num_classes, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        
    def forward(self, z, y):
        # Concatenate latent variable with class information
        z = torch.cat((z, y), dim=1)
        z = F.relu(self.fc1(z))
        reconstruction = torch.sigmoid(self.fc2(z))  # Use sigmoid for binary output
        return reconstruction

class CVAE(nn.Module):
    def __init__(self, input_size, hidden_size, latent_size, output_size, num_classes):
        super(CVAE, self).__init__()
        self.encoder = Encoder(input_size, hidden_size, latent_size, num_classes)
        self.decoder = Decoder(latent_size, hidden_size, output_size, num_classes)
        
    def reparameterize(self, mean, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mean + eps * std
    
    def forward(self, x, y):
        x = x.to(torch.float32)
        y = y.to(torch.float32)
        mean, logvar = self.encoder(x, y)
        z = self.reparameterize(mean, logvar)
        reconstruction = self.decoder(z, y)
        return reconstruction, mean, logvar



In [10]:

# Define your dataset, dataloaders, loss function, optimizer, etc.
# Train your CVAE model using the defined components.
num_classes = 4
input_size = 10000 * 88
hidden_size = 256
latent_size = 32
output_size = 10000 * 88

cvae_model = CVAE(input_size, hidden_size, latent_size, output_size, num_classes)

In [11]:

# Define optimizer
learning_rate = 0.01
optimizer = optim.Adam(cvae_model.parameters(), lr=learning_rate)

# Define loss function
def loss_function(recon_x, x, mu, logvar, labels):
    
    CE = nn.CrossEntropyLoss(reduction='sum')  # Cross-entropy loss
    reconstruction_loss = CE(recon_x, x.view(-1, input_size))
    # KL divergence loss
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    # Conditional loss
    # conditional_loss = torch.mean((labels - mu).pow(2))
    
    return reconstruction_loss + KLD 

# Define your training function
def train(epoch,train_loader,log_interval):
    cvae_model.train()
    train_loss = 0
    for batch_idx, (data, labels) in enumerate(train_loader):
        data = data.view(-1, input_size).to(torch.float32)
        labels = labels.to(torch.float32)
        optimizer.zero_grad()
        recon_batch, mu, logvar = cvae_model(data, labels)
        loss = loss_function(recon_batch, data, mu, logvar,labels)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.item() / len(data)))



In [12]:
# Assuming you have defined train_loader (your data loader for training data)

tag = torch.tensor(tag)
music = torch.tensor(music)
# data
dataset = TensorDataset(music, tag)
dataloader = DataLoader(dataset, shuffle=True)

# Set number of epochs and log interval
num_epochs = 15
log_interval = 100

# Train the model
for epoch in range(num_epochs):
        train(epoch,dataloader,log_interval)

  tag = torch.tensor(tag)
  music = torch.tensor(music)


RuntimeError: [enforce fail at C:\cb\pytorch_1000000000000\work\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 901120000 bytes.