In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pickle
import glob
#handle midi files
from music21 import converter, instrument, note, chord

### Read the notes from the current MIDI file (from original source)

In [2]:
def get_notes():
    """ Get all the notes and chords from the midi files in the ./midi_songs directory """
    notes = []

    for file in glob.glob("midi_files_partial/*.midi"):
        midi = converter.parse(file)

        print("Parsing %s" % file)

        notes_to_parse = None

        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse()
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notesAndRests

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))
            elif isinstance(element, note.Rest): #ADDED
                notes.append(element.name) #ADDED

    with open('data/notes', 'wb') as filepath:
        pickle.dump(notes, filepath)

    return notes

### Prepare the sequence of notes (from original source)

In [3]:
def prepare_sequences(notes, n_vocab):
    
    sequence_length = 100

    # get all pitch names
    pitchnames = sorted(set(item for item in notes))

    # create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    network_input = network_input / float(n_vocab)

    network_output = np.eye(n_vocab)[network_output]

    return (torch.from_numpy(network_input).float(), torch.from_numpy(network_output).float())

In [4]:
def create_network(network_input, n_vocab):
    # create LSTM in the Keras way to mimic original study
    model = nn.Sequential(
        nn.LSTM(
            input_size=1,
            hidden_size=512,
            num_layers=3,
            batch_first=True,
            dropout=0.3,
            bidirectional=False,
        ),
        nn.Linear(512, 256),
        nn.Dropout(0.3),
        nn.Linear(256, n_vocab),
        nn.Softmax(dim=2),
    )

    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr = 0.001)

    return (model, optimizer, loss_fn)

In [5]:
def train(model, network_input, network_output):
    """ train the neural network """

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Define loss and optimizer
    criterion = nn.CrossEntropyLoss()
    lstm = model[0]
    optimizer = model[1]

    network_input = torch.tensor(network_input, dtype=torch.float32)
    

    lstm.to(device)

    num_epochs = 2
    batch_size = 64

    for epoch in range(num_epochs):
        # Shuffle the training data for each epoch
        permutation = torch.randperm(network_input.shape[0])
        network_input = network_input[permutation]
        network_output = network_output[permutation]

        for i in range(0, network_input.shape[0], batch_size):
            # Get batch of inputs and outputs
            
            inputs = network_input[i:i+batch_size].to(device)
            targets = network_output[i:i+batch_size].to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward + backward + optimize
            outputs = lstm(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

        # Print epoch loss
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")

        # Save the model checkpoint after each epoch
        checkpoint_path = f"model-epoch-{epoch+1}.pt"
        torch.save(lstm.state_dict(), checkpoint_path)

    print("Training finished!")

In [6]:
def train_network(network_input, network_output, n_vocab):
    """ Train a Neural Network to generate music """
    #notes = get_notes()

    # get amount of pitch names
    #n_vocab = len(set(notes))

    #network_input, network_output = prepare_sequences(notes, n_vocab)

    model = create_network(network_input, n_vocab)

    train(model, network_input, network_output)

In [7]:
def prepare_notes():
    notes = get_notes()

    # get amount of pitch names
    n_vocab = len(set(notes))

    network_input, network_output = prepare_sequences(notes, n_vocab)
    return (network_input, network_output, n_vocab)

In [8]:
notes_figures = prepare_notes()

Parsing midi_files_partial\MIDI-Unprocessed_Chamber2_MID--AUDIO_09_R3_2018_wav--1.midi
Parsing midi_files_partial\MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R3_2018_wav--1.midi
Parsing midi_files_partial\MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R3_2018_wav--2.midi
Parsing midi_files_partial\MIDI-Unprocessed_Chamber4_MID--AUDIO_11_R3_2018_wav--1.midi
Parsing midi_files_partial\MIDI-Unprocessed_Chamber5_MID--AUDIO_18_R3_2018_wav--1.midi
Parsing midi_files_partial\MIDI-Unprocessed_Chamber6_MID--AUDIO_20_R3_2018_wav--1.midi
Parsing midi_files_partial\MIDI-Unprocessed_Chamber6_MID--AUDIO_20_R3_2018_wav--2.midi
Parsing midi_files_partial\MIDI-Unprocessed_Recital1-3_MID--AUDIO_01_R1_2018_wav--1.midi
Parsing midi_files_partial\MIDI-Unprocessed_Recital1-3_MID--AUDIO_01_R1_2018_wav--2.midi
Parsing midi_files_partial\MIDI-Unprocessed_Recital1-3_MID--AUDIO_01_R1_2018_wav--3.midi
Parsing midi_files_partial\MIDI-Unprocessed_Recital1-3_MID--AUDIO_01_R1_2018_wav--4.midi
Parsing midi_files_partial\MIDI-Unp

In [9]:
train_network(notes_figures[0], notes_figures[1], notes_figures[2])

  network_input = torch.tensor(network_input, dtype=torch.float32)


<class 'torch.Tensor'>


TypeError: linear(): argument 'input' (position 1) must be Tensor, not tuple