In [None]:
from music21 import converter, instrument, note, chord, stream
import matplotlib.pyplot as plt
import glob
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Dense, Reshape, Dropout, LSTM, Bidirectional
from tensorflow.keras.layers import BatchNormalization, LeakyReLU
from tensorflow.keras.models import Sequential, load_model
#from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.legacy import Adam #Para MAC

# Cargamos los datos 

In [2]:
def get_notes(files):
    """Obtenemos todas las notas y acordes de cada archivo midi"""
    notes = []
    duration = []

    for file in files:
        midi = converter.parse(file)
        notes_to_parse = midi.flat.notes
        print("Parsing %s" % file)

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
                duration.append(element.duration.quarterLength)
            elif isinstance(element, chord.Chord):
                notes.append(' '.join(str(n) for n in element.pitches))
                duration.append(element.duration.quarterLength)

    return pd.DataFrame.from_dict({'pitch': notes, 'duration': duration}).drop_duplicates()

In [19]:
songs = glob.glob('dataset/**/*')
notes = get_notes(songs)

Parsing dataset/mozart/mz_570_1.mid
Parsing dataset/mozart/mz_570_2.mid
Parsing dataset/mozart/mz_570_3.mid
Parsing dataset/mozart/mz_545_1.mid
Parsing dataset/mozart/mz_332_3.mid
Parsing dataset/mozart/mz_330_1.mid
Parsing dataset/mozart/mz_332_2.mid
Parsing dataset/mozart/mz_545_2.mid
Parsing dataset/mozart/mz_330_2.mid
Parsing dataset/mozart/mz_330_3.mid
Parsing dataset/mozart/mz_332_1.mid
Parsing dataset/mozart/mz_545_3.mid
Parsing dataset/mozart/mz_331_2.mid
Parsing dataset/mozart/mz_333_1.mid
Parsing dataset/mozart/mz_331_3.mid
Parsing dataset/mozart/mz_331_1.mid
Parsing dataset/mozart/mz_333_3.mid
Parsing dataset/mozart/mz_333_2.mid
Parsing dataset/mozart/mz_311_1.mid
Parsing dataset/mozart/mz_311_2.mid
Parsing dataset/mozart/mz_311_3.mid
Parsing dataset/balakir/islamei.mid
Parsing dataset/liszt/liz_rhap09.mid
Parsing dataset/liszt/liz_et_trans8.mid
Parsing dataset/liszt/liz_donjuan.mid
Parsing dataset/liszt/liz_liebestraum.mid
Parsing dataset/liszt/liz_et2.mid
Parsing dataset/l

In [20]:
print('Number of files:', len(songs))
notes

Number of files: 262


Unnamed: 0,pitch,duration
0,B-4,2.0
1,B-3,2.0
2,F4,1.0
3,F3,1.0
4,D4,2.0
...,...,...
296290,C#4 F4 C#5 A4,1.0
296296,B-2,6.75
296332,C6 C5,1/3
296338,B-4 B-5,6.0


# GAN

### Definimos la red que identificara si la muestra dada es generada por la otra red o una real

In [21]:
def define_discriminator(in_shape):
    """Red encargada de decidir si lo generado es real o fake"""
    model = Sequential()
    model.add(LSTM(512, input_shape=in_shape, return_sequences=True))
    model.add(Bidirectional(LSTM(512)))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(100))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    opt = Adam(learning_rate=0.00002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model

### Definimos la red que crea pistas musicales

In [22]:
def define_generator(seq_shape,latent_dim=100):
    """Red encargada de generar piezas musicales"""
    model = Sequential()
    model.add(Dense(256, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(np.prod(seq_shape), activation='tanh'))
    model.add(Reshape(seq_shape))
    model.summary()

    return model

### DEfinimos la red generatica (GAN)

In [23]:
def define_gan(g_model, d_model):
    """Red generativa (un denerador y un discriminador)"""
    d_model.trainable = False
    model = Sequential()
    model.add(g_model)
    model.add(d_model)
    opt = Adam(learning_rate=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt)
    return model

### Generamos muestras para entrenar

In [24]:
def prepare_sequences(notes, n_vocab):
    """Creamos las secuencias para la red"""
    sequence_length = 100
    #Obtenemos todas las notas y las mapeamos a un número
    note_to_int = dict((note, number) for number, note in enumerate(set(notes)))
    network_input = []
    # Creamos las secuencias
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])

    # Hacemos reshape para que sea compatible con la re LSTM y normalizamos (-1 a 1)
    network_input = np.reshape(network_input, (len(network_input), sequence_length, 1))
    network_input = (network_input - float(n_vocab) / 2) / (float(n_vocab) / 2)

    return network_input

In [25]:
def generate_real_samples(dataset, n_samples):
    """Tomamos pista reales"""
    train_notes = np.array(dataset['pitch'])
    n_vocab = len(train_notes)
    X_train = prepare_sequences(train_notes, n_vocab)
    ix = np.random.randint(0, len(X_train), n_samples)
    X = X_train[ix]
    y = np.ones((n_samples, 1))
    return X, y
 
def generate_latent_points(latent_dim, n_samples):
    """Generamos ruido"""
    x_input = np.random.randn(latent_dim * n_samples)
    x_input = x_input.reshape(n_samples, latent_dim)
    return x_input

def generate_fake_samples(g_model, latent_dim, n_samples):
    """Generamos una pista con el modelo generador"""
    x_input = generate_latent_points(latent_dim, n_samples)
    X = g_model.predict(x_input)
    y = np.zeros((n_samples, 1))
    return X, y

In [26]:
def plot_loss(disc_loss, gen_loss):
    plt.plot(disc_loss, c='red')
    plt.plot(gen_loss, c='blue')
    plt.title("GAN Loss per Epoch")
    plt.legend(['Discriminator', 'Generator'])
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.savefig('GAN_Loss_per_Epoch_final.png')
    plt.show()
    plt.close()

In [27]:
def summarize_performance(epoch, g_model, d_loss, g_loss, disc_loss, gen_loss):
    """
    Nos muestra el rendimiento de la red que discrimina y la que genera
    Guarda el modelo de generación
    """
    ##Falta mostrar bien las métricas y agregar las que el profe pidio
    print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
    disc_loss.append(d_loss[0])
    gen_loss.append(g_loss)
    filename = 'generator_model_%03d.h5' % (epoch + 1)
    g_model.save(filename)

def train(g_model, d_model, dataset, latent_dim=100, n_epochs=20, n_batch=128):
    """
    Entrenamos la red GAN
    g_model: es la red generadora
    d_model: es la red discriminadora
    dataset: conjunto de notas
    """
    gan_model = define_gan(g_model, d_model)
    half_batch = int(n_batch / 2)
    disc_loss = []
    gen_loss =[]

    for i in range(n_epochs):
        #Entrenamos al discriminador
        X_real, y_real = generate_real_samples(dataset, half_batch)
        X_fake, y_fake = generate_fake_samples(g_model, latent_dim, half_batch)
        X, y = np.vstack((X_real, X_fake)), np.vstack((y_real, y_fake))
        d_loss = d_model.train_on_batch(X, y)
        # Entrenamos el generador 
        X_gan = generate_latent_points(latent_dim, n_batch)
        y_gan = np.ones((n_batch, 1))
        g_loss = gan_model.train_on_batch(X_gan, y_gan)

        if i % 50 == 0:
            summarize_performance(i,g_model,d_loss,g_loss,disc_loss,gen_loss)
    
    plot_loss(disc_loss,gen_loss)

In [None]:
d_model = define_discriminator((100,1))
g_model = define_generator((100,1))
train(g_model,d_model, notes)

# Generamos

In [29]:
def get_note(note_c, instr):
    """Regresa una nota"""
    new_note = note.Note(note_c)
    new_note.storedInstrument = instr
    return new_note

def get_chord(pattern, offset, instr):
    """REgresa un arcorde"""
    notes_in_chord = pattern.split(' ')
    notes = []
    for current_note in notes_in_chord:
        notes.append(get_note(current_note, instr))
    new_chord = chord.Chord(notes)
    new_chord.offset = offset

def get_music(model, dataset, length=500):
    """Generamos una pista"""
    latent_dim=100
    n_vocab = len(set(dataset))
    generator_model = load_model(model)
    
    #Generamos una pista empezando por una alearoria
    predictions = generator_model.predict(np.random.normal(0, 1, (1, latent_dim)))
    #Creamos el diccionario para saber el nombre de los acordes
    pred_notes = [x * (n_vocab / 2) + (n_vocab / 2) for x in predictions[0]]
    int_to_note = dict((number, note) for number, note in enumerate(set(dataset)))
    pred_notes_mapped = [int_to_note[int(x[0])] for x in pred_notes]
    
    return pred_notes_mapped[:length]  

def create_midi(pred_notes_mapped, instr, filename):
    """Convertimos una pista a midi"""
    offset = 0
    output_notes = []

    #Convertimos las notas y los acordes correspondientes en música
    for item in pred_notes_mapped:
        pattern = item[0]
        # pattern is a chord
        if ' ' in pattern:
            output_notes.append(get_chord(pattern,offset,instr))
        # pattern is a note
        else:
            new_note = get_note(pattern, instr)
            new_note.offset = offset
            output_notes.append(new_note)

        #Para que no se empalmen
        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp='{}.mid'.format(filename))

In [None]:
generated_music = get_music('generator_model_001.h5', np.array(notes['pitch']))
midi_gan = create_midi(generated_music, instrument.Violin(), 'music_gan3')