In [2]:
#!pip install mitdeeplearning

In [1]:
import tensorflow as tf 
import mitdeeplearning as mdl
import numpy as np
import os
import time
import functools
from IPython import display as ipythondisplay
from tqdm import tqdm

In [2]:
#!apt-get install abcmidi timidity > /dev/null 2>&1

In [3]:
songs = mdl.lab1.load_training_data()

Found 817 songs in text


In [4]:
example_song = songs[0]
print("\nExample song: ")
print(example_song)


Example song: 
X:1
T:Alexander's
Z: id:dc-hornpipe-1
M:C|
L:1/8
K:D Major
(3ABc|dAFA DFAd|fdcd FAdf|gfge fefd|(3efe (3dcB A2 (3ABc|!
dAFA DFAd|fdcd FAdf|gfge fefd|(3efe dc d2:|!
AG|FAdA FAdA|GBdB GBdB|Acec Acec|dfaf gecA|!
FAdA FAdA|GBdB GBdB|Aceg fefd|(3efe dc d2:|!


In [5]:
mdl.lab1.play_song(example_song)

In [6]:
songs_joined = "\n\n".join(songs) 
vocab = sorted(set(songs_joined))
print("There are", len(vocab), "unique characters in the dataset")

There are 83 unique characters in the dataset


In [7]:
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

In [8]:
print('{')
for char,_ in zip(char2idx, range(20)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '"' :   3,
  '#' :   4,
  "'" :   5,
  '(' :   6,
  ')' :   7,
  ',' :   8,
  '-' :   9,
  '.' :  10,
  '/' :  11,
  '0' :  12,
  '1' :  13,
  '2' :  14,
  '3' :  15,
  '4' :  16,
  '5' :  17,
  '6' :  18,
  '7' :  19,
  ...
}


In [9]:
def vectorize_string(string):
    vectorized_output = np.array([char2idx[char] for char in string])
    return vectorized_output

In [10]:
vectorized_songs = vectorize_string(songs_joined)

In [18]:
vectorized_songs[0]

49

In [20]:
print ('{} ---- characters mapped to int ----> {}'.format(repr(songs_joined[:100]), vectorized_songs[:100]))
# check that vectorized_songs is a numpy array


"X:1\nT:Alexander's\nZ: id:dc-hornpipe-1\nM:C|\nL:1/8\nK:D Major\n(3ABc|dAFA DFAd|fdcd FAdf|gfge fefd|(3efe" ---- characters mapped to int ----> [49 22 13  0 45 22 26 67 60 79 56 69 59 60 73  5 74  0 51 22  1 64 59 22
 59 58  9 63 70 73 69 71 64 71 60  9 13  0 38 22 28 82  0 37 22 13 11 20
  0 36 22 29  1 38 56 65 70 73  0  6 15 26 27 58 82 59 26 31 26  1 29 31
 26 59 82 61 59 58 59  1 31 26 59 61 82 62 61 62 60  1 61 60 61 59 82  6
 15 60 61 60]


In [43]:
def get_batch(vectorized_songs, seq_length, batch_size):
    n = vectorized_songs.shape[0] - 1
    
    idx = np.random.choice(n-seq_length, batch_size)
    
    input_batch = [vectorized_songs[i : i+seq_length] for i in idx]
    output_batch = [vectorized_songs[i+1 : i+seq_length+1] for i in idx]
    
    x_batch = np.reshape(input_batch, [batch_size, seq_length])
    y_batch = np.reshape(output_batch, [batch_size, seq_length])
    

    return x_batch, y_batch
    
x_batch, y_batch = get_batch(vectorized_songs, seq_length=5, batch_size=1)

In [46]:
test_args = (vectorized_songs, 10, 2)
if not mdl.lab1.test_batch_func_types(get_batch, test_args) or \
   not mdl.lab1.test_batch_func_shapes(get_batch, test_args) or \
   not mdl.lab1.test_batch_func_next_step(get_batch, test_args): 
    print("======\n[FAIL] could not pass tests")
else: 
    print("======\n[PASS] passed all tests!")

[PASS] test_batch_func_types
[PASS] test_batch_func_shapes
[PASS] test_batch_func_next_step
[PASS] passed all tests!


In [47]:
for i, (input_idx, target_idx) in enumerate(zip(np.squeeze(x_batch), np.squeeze(y_batch))):
    print("Step {:3d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

Step   0
  input: 32 ('G')
  expected output: 27 ('B')
Step   1
  input: 27 ('B')
  expected output: 1 (' ')
Step   2
  input: 1 (' ')
  expected output: 27 ('B')
Step   3
  input: 27 ('B')
  expected output: 11 ('/')
Step   4
  input: 11 ('/')
  expected output: 58 ('c')


In [53]:
zip(np.squeeze(x_batch), np.squeeze(y_batch))

In [56]:
print(tuple(x))

((32, 27), (27, 1), (1, 27), (27, 11), (11, 58))


In [57]:
np.squeeze(x_batch)

array([32, 27,  1, 27, 11])

In [58]:
np.squeeze(y_batch)

array([27,  1, 27, 11, 58])