
<h1 align="center"> Music Generation </h1>

<img  align="center" src="images/tumblr.gif"/>


In [1]:
import helper
import numpy as np
from rnnmodel import RNN
import tensorflow as tf
import os

**loading dataset and playing first song**

In [2]:

songs = helper.load_training_data(path='dataset/irish.abc')

# Print one of the songs to inspect it in greater detail!
example_song = songs[0]
print("\nExample song: ")
print(example_song)

#helper.play_song(example_song)

Found 403 songs in text

Example song: 
X:1
T:Alexander's
Z: id:dc-hornpipe-1
M:C|
L:1/8
K:D Major
(3ABc|dAFA DFAd|fdcd FAdf|gfge fefd|(3efe (3dcB A2 (3ABc|!
dAFA DFAd|fdcd FAdf|gfge fefd|(3efe dc d2:|!
AG|FAdA FAdA|GBdB GBdB|Acec Acec|dfaf gecA|!
FAdA FAdA|GBdB GBdB|Aceg fefd|(3efe dc d2:|!


**Join our list of song strings into a single string containing all songs and make a `vocabs` consist of all unique characters in the joined string**

In [3]:
songs_joined = "\n\n".join(songs) 

vocab = sorted(set(songs_joined))

print("There are {} unique characters in the dataset with size of {}."
                                      .format(len(vocab), len(songs_joined)))

There are 82 unique characters in the dataset with size of 97658.


**Creat encoder (idx2char) and decoder (char2idx) for converting chars and ids for training, and print a few of them**

In [4]:
char2idx, idx2char = helper.get_mappings(vocab)
print('{')
for char,_ in zip(char2idx, range(5)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '"' :   3,
  '#' :   4,
  ...
}


**vectorize joined song string using `char2idx`**

In [5]:
vectorized_songs = helper.vectorize_string(songs_joined, char2idx)
print ('{} ---- characters mapped to int ----> {}'.format(repr(songs_joined[:10]), vectorized_songs[:10]))

'X:1\nT:Alex' ---- characters mapped to int ----> [48 22 13  0 44 22 25 66 59 78]


**example of vectorized songs shape for training lstm**

In [6]:
test_args = (vectorized_songs, 10, 2)
print("test_args[0].shape:",test_args[0].shape)
print("test_args size:",test_args)

test_args[0].shape: (97658,)
test_args size: (array([48, 22, 13, ..., 22, 81,  2]), 10, 2)


**example of `get_batch` method for training, it will produce a batch with provided `seq_lenght` using `vectorized_songs`**

In [7]:
x_batch, y_batch = helper.get_batch(vectorized_songs, seq_length=5, batch_size=1)

print("Size: {}, x_batch: {}, y_batch:{}\n".format(len(x_batch), x_batch, y_batch))
      
for i, (input_idx, target_idx) in enumerate(zip(np.squeeze(x_batch), np.squeeze(y_batch))):
    print("Step {:3d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

Size: 1, x_batch: [[56 81 55 61 59]], y_batch:[[81 55 61 59  1]]

Step   0
  input: 56 ('b')
  expected output: 81 ('|')
Step   1
  input: 81 ('|')
  expected output: 55 ('a')
Step   2
  input: 55 ('a')
  expected output: 61 ('g')
Step   3
  input: 61 ('g')
  expected output: 59 ('e')
Step   4
  input: 59 ('e')
  expected output: 1 (' ')


### Sample RNN model without training



In [8]:
#call rnn
rnn = RNN()

#build model
rnn.build_model(len(vocab), embedding_dim=256, rnn_units=1024, batch_size=32)

#summary model
rnn.model_summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (32, None, 256)           20992     
_________________________________________________________________
lstm (LSTM)                  (32, None, 1024)          5246976   
_________________________________________________________________
dense (Dense)                (32, None, 82)            84050     
Total params: 5,352,018
Trainable params: 5,352,018
Non-trainable params: 0
_________________________________________________________________


**Prediction sample on untrained model**

In [9]:

x, y = helper.get_batch(vectorized_songs, seq_length=100, batch_size=32)
pred = rnn.get_model(x)

print("Input shape:      ", x.shape, " # (batch_size, sequence_length)")
print("Prediction shape: ", pred.shape, "# (batch_size, sequence_length, vocab_size)")

sampled_indices = tf.random.categorical(pred[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

print("\n\nsample indices:", sampled_indices)

print("\n\nInput: \n", repr("".join(idx2char[x[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices])))

Input shape:       (32, 100)  # (batch_size, sequence_length)
Prediction shape:  (32, 100, 82) # (batch_size, sequence_length, vocab_size)


sample indices: [21 73 37 34 13 63 30 19  9 19 56 75  7 23  6 22 33 66 19 43 65  4 72 13
 80 53 15 34 36 41 46 38 65  8 18  9 76 77 29 24 78 51  0 66  8 21 69 20
  4 11 11 55 73 15 68 59 43 33 80 70 79 67  6 43 78  6 31 11 81  1 56 36
 46 53 33  0 31 35 73 56 13 60 50 63 41 54 55 41 22 65 68 71 70 18 60 60
 50 70 56 57]


Input: 
 'd edB|def gag|fed e2B|def gag|fed efg|]!\n\nX:30\nT:White Blanket\nZ: id:dc-setdance-31\nM:C|\nL:1/8\nK:G M'

Next Char Predictions: 
 '9sMJ1iF7-7bu)=(:Il7Sk#r1z^3JLQVNk,6-vwE>x[\nl,9o8#//as3neSIzpym(Sx(G/| bLV^I\nGKsb1fZiQ_aQ:knqp6ffZpbc'


**Defining the loss function **

In [10]:
def compute_loss(labels, logits):
    loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
    return loss

example_batch_loss = compute_loss(y, pred)

print("Prediction shape: ", pred.shape, " # (batch_size, sequence_length, vocab_size)") 
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (32, 100, 82)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.406665


### Define optimizer and training operation ###

**Hyperparameter setting and optimization**

In [6]:
# Optimization parameters:
num_training_iterations = 2000  # Increase this to train longer
batch_size = 32  # Experiment between 1 and 64
seq_length = 100  # Experiment between 50 and 500
learning_rate = 5e-3  # Experiment between 1e-5 and 1e-1

# Model parameters: 
vocab_size = len(vocab)
embedding_dim = 256 
rnn_units = 1024  # Experiment between 1 and 2048

# Checkpoint location: 
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "my_ckpt")

In [7]:
rnn = RNN()
rnn.complile(vocab_size = vocab_size, 
             embedding_dim = embedding_dim, 
             rnn_units = rnn_units,
             batch_size = batch_size, 
             learning_rate = learning_rate, 
             num_training_iterations = num_training_iterations,
             seq_length = seq_length,
             checkpoint_prefix = checkpoint_prefix)

In [8]:
rnn.model_summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (32, None, 256)           20992     
_________________________________________________________________
lstm (LSTM)                  (32, None, 1024)          5246976   
_________________________________________________________________
dense (Dense)                (32, None, 82)            84050     
Total params: 5,352,018
Trainable params: 5,352,018
Non-trainable params: 0
_________________________________________________________________


In [9]:
rnn.train(vectorized_songs)

100%|██████████| 2000/2000 [3:05:09<00:00,  5.55s/it]  


In [10]:
history = rnn.get_history()

**Generate music using the RNN model**

**Restore the latest checkpoint**

In [11]:
rnn = RNN()
rnn.complile(vocab_size = vocab_size, 
             embedding_dim = embedding_dim, 
             rnn_units = rnn_units,
             batch_size = 1, 
             learning_rate = learning_rate, 
             num_training_iterations = num_training_iterations,
             seq_length = seq_length,
             checkpoint_prefix = checkpoint_prefix)

rnn.load_model(checkpoint_dir)

rnn.model_summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            20992     
_________________________________________________________________
lstm_1 (LSTM)                (1, None, 1024)           5246976   
_________________________________________________________________
dense_1 (Dense)              (1, None, 82)             84050     
Total params: 5,352,018
Trainable params: 5,352,018
Non-trainable params: 0
_________________________________________________________________


In [26]:
new_song = rnn.song_generator(start_string="A", char2idx=char2idx, idx2char=idx2char, generation_length=1000)

100%|██████████| 1000/1000 [00:11<00:00, 89.49it/s]


**lets play**

In [27]:
generated_songs=helper.extract_song_snippet(new_song)
generated_songs

Found 3 songs in text


['ABc|d6|]!',
 "X:26\nT:Rodney's Glory\nZ: id:dc-polka-8\nM:2/4\nL:1/8\nK:D Major\ng|fed edB|GFG BdB|ABd edB|!\ndef gfe|fgf fed|e3 efe|c3 e3|ABc def|!\ng2G BAG|DED D2ge|dBAc BG:|!\nBd|g2fg egde|gdBG AGEF|G\nDGA _e3 gfe|c2e d2B A2F|EFG ABc|d3 d2:|!",
 'X:289\nT:Peter Street\nZ: id:dc-polka-2\nM:2/4\nL:1/8\nK:A Major\nag|f2fe c2ec|B2BA BdcB|AGAB ceBd|c2A2 A2:|!']

In [30]:
helper.play_song(generated_songs[2])