In [16]:
import glob
import pickle
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
# handle midi files
from music21 import converter, instrument, note, chord

### Define RNN 

In [17]:
class MusicRNN(nn.Module):
    def __init__(self, n_vocab, embedding_dim=64, hidden_dim=512, num_layers=2):
        super().__init__()
        self.embedding = nn.Embedding(n_vocab, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, n_vocab)
        self.softmax = nn.LogSoftmax(dim=2)

    def forward(self, x, h):
        x = self.embedding(x)
        out, h = self.rnn(x, h)
        out = self.fc(out)
        out = self.softmax(out)
        return out, h

### Read the notes in the current midi file (from original source)

In [18]:
def get_notes():
    notes = []
    for file in glob.glob("midi_files_partial/*.midi"):
        midi = converter.parse(file)

        print("Parsing %s" % file)

        notes_to_parse = None

        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse()
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notesAndRests

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))
            elif isinstance(element, note.Rest): #ADDED
                notes.append(element.name) #ADDED

    with open('data/notes', 'wb') as filepath:
        pickle.dump(notes, filepath)

    return notes

### Prepare the sequence of notes (from original source)

In [19]:
def prepare_sequences(notes, n_vocab):
    sequence_length = 100

    # get all pitch names
    pitchnames = sorted(set(item for item in notes))

    # create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length))

    network_output = np.array(network_output)
    network_output = torch.tensor(network_output)

    return (network_input, network_output)

### Train the RNN

In [20]:
def train(model, notes, n_vocab, sequence_length, batch_size, epochs):
    
    # create a dictionary to map pitches to integers
    note_to_int = {note: number for number, note in enumerate(sorted(set(notes)))}

    network_input = []
    network_output = []

    # create input sequences and corresponding outputs
    for i in range(len(notes) - sequence_length):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)
    
    # reshape the input into a format compatible with LSTM layers
    network_input = torch.tensor(network_input, dtype=torch.float32).view(n_patterns, sequence_length, 1)
    # normalize input
    network_input = network_input / float(n_vocab)

    network_output = torch.tensor(network_output, dtype=torch.long)

    train_dataset = TensorDataset(network_input, network_output)
    train_loader = DataLoader(train_dataset, batch_size=batch_size)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters())

    for epoch in range(epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.view(-1, n_vocab), labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print('Epoch {}/{} Loss: {:.4f}'.format(epoch + 1, epochs, running_loss / len(train_loader)))

### Initialize and call

In [21]:
def train_network():
    notes = get_notes()

    # get amount of pitch names
    n_vocab = len(set(notes))

    network_input, network_output = prepare_sequences(notes, n_vocab)

    model = MusicRNN(n_vocab)

    train(model, notes, n_vocab, 10, 64, 2)

In [22]:
train_network()



ZeroDivisionError: float division by zero