In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class WaveNetModel(nn.Module):
    def __init__(self, layers=10, blocks=3, dilation_channels=32, residual_channels=32, skip_channels=256, classes=256):
        super(WaveNetModel, self).__init__()
        self.dilation_channels = dilation_channels
        self.residual_channels = residual_channels
        self.skip_channels = skip_channels
        self.classes = classes

        self.dilations = [2 ** i for i in range(layers)] * blocks
        self.start_conv = nn.Conv1d(1, residual_channels, 1)

        self.residual_blocks = nn.ModuleList()
        self.skip_convs = nn.ModuleList()
        self.filter_convs = nn.ModuleList()
        self.gate_convs = nn.ModuleList()

        for dilation in self.dilations:
            self.filter_convs.append(nn.Conv1d(residual_channels, dilation_channels, 2, dilation=dilation))
            self.gate_convs.append(nn.Conv1d(residual_channels, dilation_channels, 2, dilation=dilation))
            self.skip_convs.append(nn.Conv1d(dilation_channels, skip_channels, 1))
            self.residual_blocks.append(nn.Conv1d(dilation_channels, residual_channels, 1))

        self.end_conv_1 = nn.Conv1d(skip_channels, skip_channels, 1)
        self.end_conv_2 = nn.Conv1d(skip_channels, classes, 1)

    def forward(self, x):
        x = self.start_conv(x)
        skip_connections = []

        for filter_conv, gate_conv, skip_conv, residual_block in zip(self.filter_convs, self.gate_convs, self.skip_convs, self.residual_blocks):
            filtered = filter_conv(x)
            gated = gate_conv(x)
            x_input = F.tanh(filtered) * F.sigmoid(gated)
            skip = skip_conv(x_input)
            skip_connections.append(skip)
            x = residual_block(x_input) + x

        x = F.relu(torch.sum(torch.stack(skip_connections), 0))
        x = F.relu(self.end_conv_1(x))
        x = self.end_conv_2(x)
        return F.log_softmax(x, dim=1)

model = WaveNetModel()

In [2]:
def train(model, dataloader, epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        for batch_idx, (data, target) in enumerate(dataloader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            if batch_idx % 100 == 0:
                print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(dataloader.dataset)} ({100. * batch_idx / len(dataloader):.0f}%)]\tLoss: {loss.item():.6f}')

In [3]:
import numpy as np
from torch.utils.data import DataLoader, Dataset

class MusicDataset(Dataset):
    def __init__(self, notes, sequence_length=100):
        # Convert notes to integer encoding using note_to_int mapping
        self.notes = [note_to_int[note] for note in notes]
        self.n_vocab = len(set(self.notes))
        self.sequence_length = sequence_length
        self.data, self.targets = self.prepare_sequences()

    def prepare_sequences(self):
        data = []
        targets = []
        for i in range(0, len(self.notes) - self.sequence_length, 1):
            sequence_in = self.notes[i:i + self.sequence_length]
            sequence_out = self.notes[i + self.sequence_length]
            data.append(sequence_in)
            targets.append(sequence_out)
        return np.array(data), np.array(targets)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = torch.from_numpy(self.data[idx]).type(torch.FloatTensor)
        x = F.one_hot(x.to(torch.int64), num_classes=self.n_vocab).float()
        x = x.permute(1, 0)  # Reshape to [sequence_length, n_vocab] for Conv1d
        y = torch.from_numpy(np.array([self.targets[idx]])).type(torch.LongTensor)
        return x, y

# Assuming 'notes' is a list of all notes/chords from the MIDI files
dataset = MusicDataset(notes)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

54


In [4]:
def generate_music(model, dataset, num_notes=500):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Start with a random sequence from the dataset
    start_index = np.random.randint(0, len(dataset.data) - 1)
    current_sequence = dataset.data[start_index]
    generated_notes = []

    for _ in range(num_notes):
        x = torch.from_numpy(current_sequence).type(torch.FloatTensor)
        x = F.one_hot(x.to(torch.int64), num_classes=dataset.n_vocab).float()
        x = x.permute(1, 0).unsqueeze(0).to(device)  # Add batch dimension and permute

        with torch.no_grad():
            prediction = model(x)
            predicted_index = torch.argmax(prediction, dim=1).item()

        generated_notes.append(int_to_note[predicted_index])
        current_sequence = np.roll(current_sequence, -1)
        current_sequence[-1] = predicted_index

    return generated_notes

# Generate a piece of music
generated_music = generate_music(model, dataset)

In [5]:
import torch
import torch.nn as nn

class AdvancedMusicLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(AdvancedMusicLSTM, self).__init__()
        self.lstm1 = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=2, dropout=0.3)
        self.batch_norm1 = nn.BatchNorm1d(hidden_size)
        self.lstm2 = nn.LSTM(hidden_size, hidden_size, batch_first=True, num_layers=2, dropout=0.3)
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.fc1 = nn.Linear(hidden_size, hidden_size // 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(hidden_size // 2, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.batch_norm1(x[:, -1, :])  # Apply batch normalization to the output of the last time step
        x, _ = self.lstm2(x.unsqueeze(1))
        x = self.batch_norm2(x[:, -1, :])
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

model = AdvancedMusicLSTM(1, 512, len(pitchnames))

In [12]:
from torch.optim import lr_scheduler

def train_model_with_checkpoint(model, dataloader, validation_dataloader, epochs):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    model.to(device)
    criterion = nn.NLLLoss()  # Using NLLLoss which is suitable for LogSoftmax
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5, verbose=True)
    best_loss = float('inf')
    
    wandb.init(project="music-generation-3")

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_train_loss = total_loss / len(dataloader)
        print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_train_loss}')
        wandb.log({"epoch": epoch + 1, "loss": avg_train_loss})

        # Validation phase
        if epoch % 10 == 0:
            model.eval()
            val_loss = 0
            with torch.no_grad():
                for inputs, labels in validation_dataloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()
            avg_val_loss = val_loss / len(validation_dataloader)
            print(f'Validation Phase, Loss: {avg_val_loss}')
            wandb.log({"Validation loss" : avg_val_loss})

        # Scheduler step
        scheduler.step(avg_val_loss)

        # Checkpoint model
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            torch.save(model.state_dict(), 'best_model.pth')
            print(f'Best model saved with validation loss: {best_loss}')

train_model_with_checkpoint(model, train_dataloader, validation_dataloader, 200)

cuda




0,1
epoch,▁
loss,▁

0,1
epoch,1.0
loss,3.01493


Epoch 1/200, Loss: 2.991222266227968
Validation Phase, Loss: 3.2523501457706576
Best model saved with validation loss: 3.2523501457706576
Epoch 2/200, Loss: 2.9472225096917923
Epoch 3/200, Loss: 2.910635079106977
Epoch 4/200, Loss: 2.8864362605156435
Epoch 5/200, Loss: 2.860309354720577
Epoch 6/200, Loss: 2.8316703977123385


In [None]:
import numpy as np

def generate_music(model, network_input, pitchnames, note_to_int, num_generate=500):
    """ Generate music given a sequence of notes """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    
    # Pick a random sequence from the input as a starting point for the generation
    start = np.random.randint(0, len(network_input)-1)
    int_to_note = {num: note for note, num in note_to_int.items()}
    pattern = network_input[start].tolist()
    prediction_output = []

    # Generate notes
    for note_index in range(num_generate):
        prediction_input = torch.tensor([pattern], dtype=torch.float32).to(device)
        prediction = model(prediction_input)
        _, index = torch.max(prediction, 1)
        
        result = int_to_note[index.item()]
        prediction_output.append(result)
        
        pattern.append(index.item() / float(len(pitchnames)))
        pattern = pattern[1:len(pattern)]

    return prediction_output

# Generate a piece of music
generated_notes = generate_music(model, network_input, pitchnames, note_to_int)

In [None]:
from music21 import stream, note, chord, midi

def create_midi(prediction_output, output_path='output.mid'):
    """ Convert the output from the prediction to MIDI file """
    offset = 0
    output_notes = []

    # Create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # Pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # Pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # Increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp=output_path)

# Create a MIDI file from the generated notes
create_midi(generated_notes)