In [4]:
import os
import torch
import wandb
import numpy as np
import torch.nn as nn
from music21 import converter, instrument, note, chord
from torch.utils.data import DataLoader, TensorDataset

In [10]:
def load_midi_files(midi_folder):
    notes = []
    for file in os.listdir(midi_folder):
        midi = converter.parse(os.path.join(midi_folder, file))
        notes_to_parse = None
        try:
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse()
        except:
            notes_to_parse = midi.flat.notes
        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))
    return notes

notes = load_midi_files('indian_classical')
pitchnames = sorted(set(notes))
note_to_int = {note: num for num, note in enumerate(pitchnames)}


In [14]:

# Prepare sequences
sequence_length = 100
network_input = []
network_output = []

for i in range(len(notes) - sequence_length):
    sequence_in = notes[i:i + sequence_length]
    sequence_out = notes[i + sequence_length]
    network_input.append([note_to_int[char] for char in sequence_in])
    network_output.append(note_to_int[sequence_out])

network_input = np.reshape(network_input, (len(network_input), sequence_length, 1))
network_input = torch.tensor(network_input / float(len(pitchnames)), dtype=torch.float32)
network_output = torch.tensor(network_output, dtype=torch.long)

# Create DataLoader
dataset = TensorDataset(network_input, network_output)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [15]:
print(note_to_int)

{'0': 0, '0.1': 1, '0.1.3': 2, '0.2': 3, '0.3': 4, '1': 5, '1.2': 6, '1.2.3': 7, '1.3': 8, '1.4': 9, '1.5': 10, '1.6': 11, '10': 12, '10.0': 13, '10.1': 14, '10.11': 15, '10.11.1': 16, '10.2': 17, '10.3': 18, '11': 19, '11.0': 20, '11.1': 21, '11.3': 22, '2': 23, '2.3': 24, '2.4': 25, '3': 26, '3.4': 27, '3.5': 28, '3.5.6': 29, '3.6': 30, '3.6.8': 31, '3.7': 32, '3.8': 33, '4': 34, '4.5': 35, '4.6': 36, '4.6.8': 37, '5': 38, '5.10': 39, '5.6': 40, '5.6.8': 41, '5.7': 42, '5.8': 43, '6': 44, '6.10': 45, '6.11': 46, '6.7': 47, '6.8': 48, '6.8.11': 49, '6.9': 50, '7': 51, '7.10': 52, '7.11': 53, '7.8': 54, '7.9': 55, '7.9.1': 56, '8': 57, '8.0': 58, '8.1': 59, '8.10': 60, '8.10.0': 61, '8.11': 62, '8.9': 63, '8.9.10': 64, '9': 65, '9.0': 66, '9.0.1': 67, '9.1': 68, '9.10': 69, '9.11': 70, '9.2': 71, 'A3': 72, 'A4': 73, 'A5': 74, 'B-3': 75, 'B-4': 76, 'B-5': 77, 'B2': 78, 'B3': 79, 'B4': 80, 'B5': 81, 'C#3': 82, 'C#4': 83, 'C#5': 84, 'C#6': 85, 'C3': 86, 'C4': 87, 'C5': 88, 'C6': 89, 'D1':

In [16]:
class MusicLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MusicLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=3, dropout=0.1)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])  # Get the last time step output
        return x

model = MusicLSTM(1, 512, len(pitchnames))

In [18]:
def train_model(model, dataloader, epochs):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)
    model.train()
    
    wandb.init(project="music-generation")
    
    for epoch in range(epochs):
        total_loss = 0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(dataloader)
        print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_loss}')
        wandb.log({"epoch": epoch + 1, "loss": avg_loss})
        
train_model(model, dataloader, 200)

cuda


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mrah-m[0m ([33mrebot[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/200, Loss: 3.875348180839696
Epoch 2/200, Loss: 3.8196415065490092
Epoch 3/200, Loss: 3.8186707656408094
Epoch 4/200, Loss: 3.8176115714397625
Epoch 5/200, Loss: 3.8160945902165677
Epoch 6/200, Loss: 3.8151123191892484
Epoch 7/200, Loss: 3.814529764283564
Epoch 8/200, Loss: 3.8147426487244283
Epoch 9/200, Loss: 3.81422546475204
Epoch 10/200, Loss: 3.813719758053416
Epoch 11/200, Loss: 3.813046283328656
Epoch 12/200, Loss: 3.813287932848193
Epoch 13/200, Loss: 3.8133054013104783
Epoch 14/200, Loss: 3.8125724288606153
Epoch 15/200, Loss: 3.8131229680838046
Epoch 16/200, Loss: 3.8126351354048422
Epoch 17/200, Loss: 3.812126534501302
Epoch 18/200, Loss: 3.8116091268578756
Epoch 19/200, Loss: 3.811572591054071
Epoch 20/200, Loss: 3.812893305857157
Epoch 21/200, Loss: 3.8119275545336535
Epoch 22/200, Loss: 3.8117403787435946
Epoch 23/200, Loss: 3.810615765679743
Epoch 24/200, Loss: 3.8112491123455086
Epoch 25/200, Loss: 3.8112189425635585
Epoch 26/200, Loss: 3.810680207517958
Epoch 2

KeyboardInterrupt: 

In [19]:
import numpy as np

def generate_music(model, network_input, pitchnames, note_to_int, num_generate=500):
    """ Generate music given a sequence of notes """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    
    # Pick a random sequence from the input as a starting point for the generation
    start = np.random.randint(0, len(network_input)-1)
    int_to_note = {num: note for note, num in note_to_int.items()}
    pattern = network_input[start].tolist()
    prediction_output = []

    # Generate notes
    for note_index in range(num_generate):
        prediction_input = torch.tensor([pattern], dtype=torch.float32).to(device)
        prediction = model(prediction_input)
        _, index = torch.max(prediction, 1)
        
        result = int_to_note[index.item()]
        prediction_output.append(result)
        
        pattern.append(index.item() / float(len(pitchnames)))
        pattern = pattern[1:len(pattern)]

    return prediction_output

# Generate a piece of music
generated_notes = generate_music(model, network_input, pitchnames, note_to_int)

TypeError: not a sequence

In [None]:
from music21 import stream, note, chord, midi

def create_midi(prediction_output, output_path='output.mid'):
    """ Convert the output from the prediction to MIDI file """
    offset = 0
    output_notes = []

    # Create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # Pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # Pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # Increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp=output_path)

# Create a MIDI file from the generated notes
create_midi(generated_notes)