In [1]:
# !pip install torch

In [2]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
import glob
import pretty_midi
import matplotlib.pyplot as plt

# device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
device = torch.device("cpu")

In [3]:
class BiDirectionalRNN(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size):
        super(BiDirectionalRNN, self).__init__()
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, output_size)  # *2 for bidirectional

    def forward(self, input_seq):
        embedded = self.embedding(input_seq)
        output, _ = self.rnn(embedded)
        output = self.fc(output)
        return output

In [4]:
sequence_length = 600

def get_data(path,  sequence_length=600):
    """
    Get the data from the MIDI files.
    """
    midi_files = glob.glob(path)
    step = 5*sequence_length
    sequences = []
    labels = []
    
    for midi_file in midi_files:
        try:
            data = pretty_midi.PrettyMIDI(midi_file).synthesize(44100)
            
            for i in range(0, len(data) - sequence_length, step):
                pre_chunk = data[i:i+sequence_length]
                after_chunk = data[i+ 2*sequence_length:i+ 3*sequence_length]
                middle_chunk = data[i+sequence_length:i+2*sequence_length]
                if np.any(pre_chunk) and np.any(after_chunk) and np.any(middle_chunk):
                    segment = np.concatenate((pre_chunk, np.ones(sequence_length)*128, after_chunk))
                    sequences.append(segment)
                    labels.append(middle_chunk)
        except:
            pass
    print("Total Valid Data:", len(sequences))
    sequences_tensor = torch.from_numpy(np.array(sequences))
    labels_tensor = torch.from_numpy(np.array(labels))
    return sequences_tensor, labels_tensor


def get_dataloader(inputs, labels, batch_size=64):
    dataset = torch.utils.data.TensorDataset(inputs, labels)
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [5]:
def train_model(model, train_loader, num_epochs=10, learning_rate=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    model.to(device)

    model.train()
    losses = []
    accuracies = []
    

    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for i, (inputs, labels) in enumerate(train_loader, 0):
            
            optimizer.zero_grad()

            
            inputs = inputs.to(torch.long).to(device)
            labels = labels.to(torch.long).to(device)
            
        

            outputs = model(inputs)
            outputs = outputs[:, sequence_length:2*sequence_length, :]
            outputs = outputs.permute(0, 2, 1)
            loss = criterion(outputs.squeeze(), labels)

            loss.backward()
            optimizer.step()

            running_loss += float(loss)

            # Calculate accuracy
            predicted = torch.argmax(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Print every 100 mini-batches
            if i % 100 == 99:
                avg_loss = running_loss / 100
                accuracy = correct / total
                print(f"Epoch [{epoch + 1}/{num_epochs}], "
                        f"Batch [{i + 1}/{len(train_loader)}], "
                        f"Loss: {avg_loss:.10f}, Accuracy: {accuracy:.4f}")
                running_loss = 0.0
                losses.append(avg_loss)
                accuracies.append(accuracy)
    print('Finished Training')

    # Plotting loss and accuracy
    plt.figure(figsize=(10, 5))

    plt.subplot(1, 2, 1)
    plt.plot(losses, label='Training Loss')
    plt.xlabel('Iterations')
    plt.ylabel('Loss')
    plt.title('Training Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(accuracies, label='Training Accuracy')
    plt.xlabel('Iterations')
    plt.ylabel('Accuracy')
    plt.title('Training Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [6]:
# Generate sequences
sequences, labels = get_data('archive/101_Strings/*.mid')
# val_sequences, val_labels = sequences[4000000:], labels[4000000:]
# train_loader = get_dataloader(sequences[:4000000], labels[:4000000])
train_loader = get_dataloader(sequences, labels)

Total Valid Data: 1869


In [7]:
model_scale = 1
model = BiDirectionalRNN(128+1, model_scale*64, model_scale*64, 128).to(device)
train_model(model, train_loader, num_epochs=10, learning_rate=0.01)

IndexError: index out of range in self