# Training

In this section you can train the transformer model with Bach. The first section is monophony and the second with polyphony encoding.

## Train monophony encoding

In [None]:
# Disable tensorflow warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# 0 = all messages are logged (default behavior)
# 1 = INFO messages are not printed
# 2 = INFO and WARNING messages are not printed
# 3 = INFO, WARNING, and ERROR messages are not printed

In [None]:
# Load the TensorBoard notebook extension.
%load_ext tensorboard

In [None]:
from mukkeBude.model import MukkeBudeTransformer
from mukkeBude.mapping import MusicMapping
import mukkeBude.utils as utils
import music21 as m21
import tensorflow as tf
import keras

# Check if GPU is found
print(tf.config.list_physical_devices('GPU'))

In [None]:
# Create mappings
mapping = MusicMapping.create()

# optional save the mapping
# mapping.save("mapping.txt")

We use the music21 corpus for the bach training data. </br>
You can adjust the `paths` to reduce the number of training songs. </br>
</br>
See: https://web.mit.edu/music21/doc/about/referenceCorpus.html

To use custom training data use:
```python
from pathlib import Path

paths = list(Path("./dataset/Pokemon").rglob("*.midi"))
```

In [None]:
from pathlib import Path

paths = list(Path("../mukkeBude/songs/pinkfloyd/").rglob("*.mid*"))

print(f"Found {len(paths)} songs in corpus.")
encoded_songs = utils.load_dataset_lstm(paths, 64, mapping, raw_songs=True, corpus=False)

In [None]:
# Create dataset
utils.create_train_data(encoded_songs, "raw_train_ds_mono_pinkfloyd.txt")
print("Dataset created")

In [None]:
# Train model
model = MukkeBudeTransformer(mapping)
print(model)

logdir = "logs/pinkfloyd_transformer"
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

model.train("raw_train_ds_mono_pinkfloyd.txt", min_training_seq_len=32, epochs=100, tensorboard_callback=tensorboard_callback)

In [None]:
model.save("PinkFloyd_soloMelodie_transformer")

In [None]:
%tensorboard --logdir logs/bach_transformer

## Train polyphony encoding

In [1]:
# Disable tensorflow warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# 0 = all messages are logged (default behavior)
# 1 = INFO messages are not printed
# 2 = INFO and WARNING messages are not printed
# 3 = INFO, WARNING, and ERROR messages are not printed

In [2]:
# Load the TensorBoard notebook extension.
%load_ext tensorboard

In [3]:
from mukkeBude.model import MukkeBudeTransformer
from mukkeBude.mapping import MusicMapping
import mukkeBude.utils as utils
import music21 as m21
import tensorflow as tf
import keras

# Check if GPU is found
print(tf.config.list_physical_devices('GPU'))

2023-05-23 12:26:56.438481: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


[]


In [4]:
# Create mappings
mapping = MusicMapping.create()

# optional save the mapping
# mapping.save("mapping.txt")

We use the music21 corpus for the bach training data. </br>
You can adjust the `paths` to reduce the number of training songs. </br>
</br>
See: https://web.mit.edu/music21/doc/about/referenceCorpus.html

To load custom training data use:
```python
from pathlib import Path

paths = list(Path("./dataset/Pokemon").rglob("*.midi"))
```

In [5]:
# Load songs
from pathlib import Path
paths = list(Path("../mukkeBude/songs/pinkfloyd/").rglob("*.mid*"))

print(f"Found {len(paths)} songs in corpus.")

encoded_songs = []
for path in paths:
    song = utils.read_single(path)
    # song = utils.transpose_songs([song,])[0]
    encoded_song = utils.to_polyphonic_encoding(song, mapping)
    encoded_songs.append(mapping.textify(encoded_song))

print(f"Songs encoded: {len(encoded_songs)}")

Found 1 songs in corpus.
Songs encoded: 1


  song.flat.getElementsByClass("Note").highestTime,
  song.flat.getElementsByClass("Chord").highestTime,


In [6]:
# Create dataset
utils.create_train_data(encoded_songs, "raw_train_ds_poly_pinkfloyd.txt")
print("Dataset created")

Dataset created


In [8]:
# Train model
model = MukkeBudeTransformer(mapping)
print(model)

logdir = "logs/pinkfloyd_transformer"
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

model.train("raw_train_ds_poly_pinkfloyd.txt", min_training_seq_len=128, epochs=30, tensorboard_callback=tensorboard_callback, batch_size=1, seq_len=16)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None)]            0         
                                                                 
 token_and_position_embeddin  (None, None, 256)        780288    
 g_1 (TokenAndPositionEmbedd                                     
 ing)                                                            
                                                                 
 transformer_decoder_4 (Tran  (None, None, 256)        394749    
 sformerDecoder)                                                 
                                                                 
 transformer_decoder_5 (Tran  (None, None, 256)        394749    
 sformerDecoder)                                                 
                                                                 
 transformer_decoder_6 (Tran  (None, None, 256)        3947

In [None]:
%tensorboard --logdir logs/bach_transformer

In [9]:
model.save("PinkFloyd_polyphonie_transformer")

'/home/deglasfl/repos/mukkeBude/mukkeBude/model/preTrainedModels/PinkFloyd_polyphonie_transformer.h5'

# Generate music

In this section you can generate music with a pre trained transformer model. The first section is monophony and the second with polyphony encoding.

## Generate monophony

In [None]:
# Disable tensorflow warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# 0 = all messages are logged (default behavior)
# 1 = INFO messages are not printed
# 2 = INFO and WARNING messages are not printed
# 3 = INFO, WARNING, and ERROR messages are not printed

In [None]:
from mukkeBude.mapping import MusicMapping
from mukkeBude.model import MukkeBudeTransformer
import mukkeBude.utils as utils
import tensorflow as tf
from pathlib import Path

from mukkeBude.mapping import SPECIAL_TOKS
from mukkeBude.mapping import REST
from mukkeBude.mapping import WAIT_LSTM

# Check if GPU is found
print(tf.config.list_physical_devices('GPU'))

In [None]:
# Create mappings
mapping = MusicMapping.create()

# optional save the mapping
# mapping.save("mapping.txt")

You have to know the trainings data and the same trainings parameter.

In [None]:
model = MukkeBudeTransformer.load(mapping, "PinkFloyd_soloMelodie_transformer", "raw_train_ds_mono_pinkfloyd.txt", min_training_seq_len=32)

In [None]:
# Create song
# TODO
# generated_song = model.generate("n60 _ _ _ n55 _ _ _ n52 _ _ _ n48 _ n47 _ n60 _ _ _ n60", max_length=500)

# Remove REST and WAIT_LSTM from SPECIAL_TOKS
special_tokens = SPECIAL_TOKS.copy()
special_tokens.remove(REST)
special_tokens.remove(WAIT_LSTM)

generated_song = " ".join(utils.replace_special_tokens(generated_song.split(), WAIT_LSTM, special_tokens))

In [None]:
new_song = utils.decode_songs_old(generated_song)
print(generated_song)

In [None]:
path = Path("generated_song_pinkfloyd_trans_mono.midi")
utils.write_midi(new_song, path)

## Generate polyphony

In [11]:
# Disable tensorflow warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# 0 = all messages are logged (default behavior)
# 1 = INFO messages are not printed
# 2 = INFO and WARNING messages are not printed
# 3 = INFO, WARNING, and ERROR messages are not printed

In [12]:
from mukkeBude.mapping import MusicMapping
from mukkeBude.model import MukkeBudeTransformer
import mukkeBude.utils as utils
import tensorflow as tf
import numpy as np
from pathlib import Path

from mukkeBude.mapping import SPECIAL_TOKS
from mukkeBude.mapping import SEP
from mukkeBude.mapping import BOS

# Check if GPU is found
print(tf.config.list_physical_devices('GPU'))

[]


In [13]:
# Create mappings
mapping = MusicMapping.create()

# optional save the mapping
# mapping.save("mapping.txt")

You have to know the trainings data and the same trainings parameter.

In [14]:
model = MukkeBudeTransformer.load(mapping, "PinkFloyd_polyphonie_transformer", "raw_train_ds_poly_pinkfloyd.txt", min_training_seq_len=32)

In [15]:
# Create song
generated_song = model.generate("n69 d4 xxsep d4 n73 d2 n69 d2 xxsep d2 n73 d3 n69 d3 xxsep d3 n69 d1 xxsep d1", max_length=200, probability=0.8)

# Remove REST and WAIT_LSTM from SPECIAL_TOKS
special_tokens = SPECIAL_TOKS.copy()
special_tokens.remove(SEP)
special_tokens.remove(BOS)

generated_song = " ".join(utils.replace_special_tokens(generated_song.split(), "d4", special_tokens))
print(generated_song)

n69 d4 xxsep d4 n73 d2 n69 d2 xxsep d2 n73 d3 n69 d3 xxsep d3 n69 d1 xxsep d1 xxsep d1 xxsep d1 xxsep d1 xxsep d1 n74 d1 xxsep d1 xxsep d1 n74 d16 n74 d16 n69 d1 xxsep d1 n74 d1 xxsep d1 xxsep d1 xxsep d1 xxsep d1 xxsep d1 xxsep d1 xxsep d1 xxsep d1 n74 d1 xxsep d1 xxsep d1 n74 d16 n74 d16 n74 d16 n74 d16 n74 d16 n74 d16 n74 d1 xxsep d1 xxsep d1 xxsep d1 xxsep d1 xxsep d1 n74 d16 n74 d16 n74 d16 n74 d1 n78 d16 n74 d16 n69 d1 xxsep d1 n74 d16 n74 d1 d1 n74 d16 n74 d16 n74 d16 n74 d16 n74 d16 n74 d1 xxsep d1 xxsep d1 n74 d16 n69 d16 n69 d16 n74 d16 n74 d16 n74 d1 xxsep d1 xxsep d1 xxsep d1 n74 d1 xxsep d1 xxsep d1 xxsep d1 n78 d16 n74 d16 n69 d16 n74 d16 n74 d16 n74 d1 xxsep d1 n74 d16 n69 d16 n74 d16 n74 d16 n74 d1 xxsep d1 xxsep d1 n74 d16 n74 d16 n74 d16 n69 d16 n74 d16 n74 d16 n69 d16 n74 d16 n74 d16 n69


In [16]:
# Convert to music21
new_song_ints = mapping.numericalize(generated_song.split(" "))
new_song_ints = np.array(new_song_ints)

new_song = utils.from_polyphonic_encoding(new_song_ints, mapping, bpm=100)

path = Path("generated_song_pinkfloyd_trans_poly.midi")
utils.write_midi(new_song, path)
