<a href="https://colab.research.google.com/github/Ahtesham519/Genrative_Deep_learning_v2_2023/blob/main/Transformer_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%load_ext autoreload
%autoreload 2

import os
import glob
import numpy as np
import time
import matplotlib.pyplot

import tensorflow as tf
from tensorflow.keras import layers, models , losses , callbacks

import music21

from transformer_utils import(
    parse_midi_files,
    load_parsed_files ,
    get_midi_note,
    SinePositionEncoding,
)

#0. Parameters

In [None]:
PARSE_MIDI_FILES = True
PARSED_DATA_PATH = "/app/notebooks/11_music/01_transformer/parsed_data/"
DATASET_REPETITIONS = 1

SEQ_LEN = 50
EMBEDDING_DIM = 256
KEY_DIM = 256
N_HEADS = 5
DROPOUT_RATE = 0.3
FEED_FORWARD_DIM = 256
LOAD_MODEL = False

#optimization
EPOCHS = 5000
BATCH_SIZE = 256

GENERATE_LEN = 50

#1. Prepare the Data

In [None]:
#Load the data
file_path = glob.glob("/app/data/bach-cello/*.mid")
print(f"Found {len(file_list)} midi files")

In [None]:
parser = music21.converter


In [None]:
example_score = (
    music21.converter.parse(file_list[1]).splitAtQuarterLength(12)[0].chordify()
)

In [None]:
example_score.show()

In [None]:
example_score.show("text")

In [None]:
if PARSE_MIDI_FILES:
  notes, durations = parse_midi_files(
      file_list, parser, SEQ_LEN + 1, PARSED_DATA_PATH
  )
else:
  notes, durations = load_parsed_files()


In [None]:
example_notes = notes[658]
example_durations = durations[658]
print("\nNotes string\n" , example_notes, "...")
print("\nDuration string\n", example_durations , "...")

#2. Tokenize the data

In [None]:
def create_dataset(elements):
  ds = (
      tf.data.Dataset.from_tensor_slices(elements)
      .batch(BATCH_SIZE , drop_remainder = True)
      .shuffle(1000)
  )
  vectorize_layer = layers.TextVectorization(
      standardize = None, output_mode = "int"
  )
  vectorize_layer.adapt(ds)
  vocab = vectorize_layer.get_vocabulary()
  return ds , vectorize_layer, vocab

notes_seq_ds,  notes_vectorize_layer, notes_vocab = create_dataset(notes)
durations_seq_ds , durations_vectorize_layer , durations_vocab = create_dataset(
    durations
)
seq_ds = tf.data.Dataset.zip((notes_seq_ds, durations_seq_ds))

In [None]:
#Display the same example notes and durations converted to ints
example_tokenised_notes = notes_vectorize_layer(example_notes)
example_tokenised_durations = durations_vectorize_layer(example_durations)
print("{:10} {:10}".format("note token" , "duration token"))
for i , (note_int , duration_int) in enumerate(
    zip(
        example_tokenised_notes.numpy()[:11],
        example_tokenised_durations.numpy()[:11]
    )
):
    print(f"{note_int:10}{duration_int:10}")

In [None]:
notes_vocab_size = len(notes_vocab)
durations_vocab_size = len(durations_vocab)

#Display some token:note mappings
print(f"\nNOTES_VOCAB:length = {len(notes_vocab)}")
for i , note in enumerate(notes_vocab[:10]):
  print(f"{i}: {note}")

print(f"\nDURATIONS_VOCAb:length = {len(durations_vocab)}")
#Disaply some token:duration mappings
for i , note in enumerate(durations_vocab[:10]):
  print(f"{i}: {note}")


#3. Create the Training set

In [None]:
#Create the training set of sequences and the samesequences shifted by one note

def prepare_inputs(notes, durations):
  notes = tf.expand_dims(notes, -1)
  durations = tf.expand_dims(durations , -1)
  tokenized_notes = notes_vectorize_layer(notes)
  tokenized_durations = durations_vectorize_layer(durations)
  x = (tokenized_notes[:,:-1] , tokenized_durations[:, : -1])
  y = (tokenized_notes[:, 1:], tokenized_durations[:, 1:])
  return x , y

ds = seq_ds.map(prepare_inputs).repeat(DATASET_REPETITIONS)

In [None]:
example_input_output = ds.take(1).get_single_element()
print(example_input_output)

#5. Create the casual attention mask function

In [None]:
def casual_attention_mask(batch_size , n_dest,  n_src , dtype):
  i = tf.range(n_dest)[:, None]
  j = tf.range(n_src)
  m = i >= j - n_src + n_dest
  mask = tf.cast(m, dtype)
  mask = tf.reshape(mask , [1, n_dest , n_src])
  mult = tf.concat(
      [tf.expand_dims(batch_size , -1), tf.constant([1,1], dtype= tf.int32)], 0
  )
  return tf.title(mask, mult)

np.transpose(casual_attention_mask(1, 10, 10, dtype = tf.int32)[0])

#6. Create a Trandformer Block layer

In [None]:
class TransformerBlock(layers.Layer):
  def __init__(
      self,
      num_heads,
      key_dim,
      embed_dim ,
      ff_dim,
      name,
      dropout_rate = DROPOUT_RATE,

  ):
  super(TransformerBlock, self).__init__(name = name)
  self.num_heads = num_heads
  self.key_dim = key_dim
  self.embed_dim = embed_dim
  self.ff_dim = ff_dim
  self.dropout_rate = dropout_rate