In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
#from tqdm.notebook import tqdm, trange
from tqdm import tqdm, trange
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from skimage.io import imread
from copy import deepcopy
import matplotlib as plt
from sklearn import preprocessing
import pickle
from music21 import *
from np_utils import np_utils
from os import listdir
import os

In [None]:
sequence_length = 32

def to_sliding_windows(file_notes):
    pass
    # fare in modo che le seuenze siano solo tra i file

DIR = '../midiFiles'

In [None]:
def get_notes(file_limit):
    notes = pd.Series(dtype= 'int32')

    i=0

    dicto = {}
    n_vocab = 0

    for file in tqdm(listdir(DIR)):
        if not "mid" in file:
            continue

        if i > file_limit:
            break
        else:
            i += 1

        midifile = converter.parse(DIR + '/' + file)

        """
        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midifile)
            notes_to_parse = s2.parts[0].recurse()
            s2.show('text')
        except: # file has notes in a flat structure
            print("flar")
            notes_to_parse = midifile.flat.notes
        """
        #ho solo il piano
        notes_to_parse = midifile.flat.notes
        notes_file = []


        for element in notes_to_parse:

            niu = ''

            if isinstance(element, note.Note):
                niu = str(element.pitch)
            elif isinstance(element, chord.Chord):
                niu= '.'.join(str(n) for n in element.normalOrder)

            if not niu in dicto:
                dicto[niu] = n_vocab
                #brutto ma piu' efficiente
                notes_file.append(n_vocab)
                n_vocab += 1
            else:
                notes_file.append(dicto[niu])

        notes = pd.concat([notes, pd.Series(notes_file)])




    with open('data/notes', 'wb') as filepath:
        pickle.dump(notes, filepath)

    return notes, dicto, n_vocab

In [None]:
notepd, note_to_int, n_vocab = get_notes(100)

#network_input, network_output = prepare_sequences(notes, n_vocab)

In [None]:

np_array = notepd.to_numpy().reshape(-1,1)

scaler = preprocessing.StandardScaler().fit(np_array)
input_nn = scaler.transform(np_array)
input_nn = input_nn[ :, 0]
print(input_nn.shape)

input_nn = pd.Series(input_nn)


#sliding window division, return rolling object
niu_input_nn = pd.Series(dtype= 'int32')
for window in tqdm(input_nn.rolling(window= sequence_length.bit_length()), total=len(input_nn)):
    niu_input_nn = pd.concat([niu_input_nn, window], ignore_index=True)

input_nn = niu_input_nn

input_nn = input_nn.apply(lambda x: x)
input_nn = input_nn.iloc[0:-1]
# tolgo l'ultimo perche' non ho un valore output
output_nn = notepd.iloc[sequence_length:]#tutti gli elementio a partire dal primo output

n_patterns = len(input_nn)

# reshape the input into a format compatible with LSTM layers
input_nn = pd.reshape(input_nn, (n_patterns, sequence_length, 1))



#one hot encoding
output_nn = output_nn.astype("category")
lb = preprocessing.LabelBinarizer()
output_nn = lb.fit_transform(output_nn)

int_to_note = dict((v, k) for k, v in note_to_int.items())

#revers from hotone to abc
#output_nn = lb.inverse_transform(output_nn, threshold=0)
#output_nn = output_nn.map(int_to_note)


In [None]:
#X_train, X_test, y_train, y_test = train_test_split(input_nn, output_nn, test_size=0.15, shuffle=False)
# la nn che faccio e' statafull, non stateless
# comunque faccio solo il train e non solo il test

batch_size = 50
train = TensorDataset(input_nn, output_nn)
# Create data loaders.
#test = DataLoader(test, batch_size=batch_size, shuffle=False)
train = DataLoader(train, batch_size=batch_size, shuffle=False)


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

class LSTMNetwork(nn.Module):
    def __init__(self, dataset):
        super(LSTMNetwork, self).__init__()
        self.lstm_size = 128
        self.embedding_dim = 128
        self.num_layers = 3

        n_vocab = len(dataset.uniq_words)
        self.embedding = nn.Embedding(
            num_embeddings=n_vocab,
            embedding_dim=self.embedding_dim,
        )
        self.lstm = nn.LSTM(
            input_size=self.lstm_size,
            hidden_size=self.lstm_size,
            num_layers=self.num_layers,
            dropout=0.2,
        )
        self.fc = nn.Linear(self.lstm_size, n_vocab)

    def forward(self, x, prev_state):
        embed = self.embedding(x)
        output, state = self.lstm(embed, prev_state)
        logits = self.fc(output)
        return logits, state

    def init_state(self, sequence_length):
        return (torch.zeros(self.num_layers, sequence_length, self.lstm_size),
                torch.zeros(self.num_layers, sequence_length, self.lstm_size))


model = LSTMNetwork(n_vocab).to(device)
print(model)

In [None]:

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

# utilizzo con one hot encoding
#_, targets = y1.max(dim=0)
#nn.CrossEntropyLoss()(out, Variable(targets))



In [None]:
def trainf(dataloader, model, loss_fn, optimizer):
    global best_model
    train_losses = []
    best_loss = float("inf")
    size = len(dataloader.dataset)
    model.train()
    for batch , (X, Y) in enumerate(dataloader):
        X, Y = X.to(device), Y.to(device)
        loss = float("inf")
        optimizer.zero_grad()
        # Compute prediction error
        for x_line, y_line in zip(X, Y):
            pred = model(x_line)
            #hot one encoding
            _, targets = pred.max(dim=0)
            loss = loss_fn(y_line, targets)
            loss.backward()
        # Backpropagation
        optimizer.step()

        if loss < best_loss:
            best_loss = loss
            best_model = deepcopy(model)

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            train_losses.append(loss)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
epochs = 20
train_losses = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    epoch_losses = trainf(train, model, loss_fn, optimizer)
    train_losses.append(epoch_losses)
print("Done!")

In [None]:
plt.plot(train_losses, label='Training loss')
plt.legend()
plt.show()

In [None]:
def evaluateMultinomial(net, prime_seq, predict_len, temperature=0.8):
    '''
    Arguments:
    prime_seq - priming sequence (converted t)
    predict_len - number of notes to predict for after prime sequence
    '''
    hidden = net.init_hidden()

    predicted = prime_seq.copy()
    prime_seq = torch.tensor(prime_seq, dtype = torch.long).to(device)


    # "Building up" the hidden state using the prime sequence
    for p in range(len(prime_seq) - 1):
        input = prime_seq[p]
        _, hidden = net(input, hidden)

    # Last character of prime sequence
    input = prime_seq[-1]

    # For every index to predict
    for p in range(predict_len):

        # Pass the inputs to the model - output has dimension n_pitches - scores for each of the possible characters
        output, hidden = net(input, hidden)
        # Sample from the network output as a multinomial distribution
        output = output.data.view(-1).div(temperature).exp()
        predicted_id = torch.multinomial(output, 1)

        # Add predicted index to the list and use as next input
        predicted.append(predicted_id.item())
        input = predicted_id

    return predicted


In [None]:
#generated_seq = evaluate(model, [100, 101, 102, 101, 100], predict_len = 100)
generated_seq_multinomial = evaluateMultinomial(model, [100, 101, 102, 101, 100], predict_len = 500, temperature = 1.2)
#print(generated_seq)
print(generated_seq_multinomial)

In [None]:
#generated_seq = [int_to_note[e] for e in generated_seq]
generated_seq_multinomial = [int_to_note[e] for e in generated_seq_multinomial]

In [None]:
def create_midi(prediction_output):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)

    return midi_stream

In [None]:
generated_stream = create_midi(generated_seq_multinomial)
generated_stream.write('midi', fp='output/uno.midi')