In [0]:
!pip install music21==5.7.2


In [7]:
import os 
import json
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dropout, TimeDistributed, Dense, Activation, Embedding
from keras.utils import *
from keras.callbacks import ModelCheckpoint
from IPython.display import Image
import matplotlib.pyplot as plt
import matplotlib.image as mping
from music21 import *


Using TensorFlow backend.


In [0]:
data_directory= './drive/My Drive/Data'
data_file ="Data_Tunes.txt"
charIndex_json ='char_to_index.json'
BATCH_SIZE = 16
SEQ_LENGTH = 64



In [0]:
def preprocess(data):
  
  list1 =list(data)
  list2 =['\n','\n','\n']
  ignore =['X','T','M','K','S','P']
  i=0
  while (i<len(list1)):
    if(((list1[i] in ignore) and (list1[i+1]==':')) or list1[i]=='%'):
      del list2[-1]
      while(list1[i]!='\n'):
        i=i+1
    list2.append(list1[i])
    i=i+1
  i=0
  #Z character (start token)
  preprocess_data=[]
  while (i<len(list2)):
    if(list2[i]=='\n' and list2[i+1]=='\n' and list2[i+2]=='\n'):
      preprocess_data.append('Z')
      i=i+3
    else:
      preprocess_data.append(list2[i])
      i=i+1
  return preprocess_data


In [0]:
file=open(os.path.join(data_directory,data_file),mode ='r')
data =file.read()
file.close()
preprocess_data =preprocess(data)

In [34]:
def read_data(preprocess_data):
  char_to_index = {ch:i for(i,ch) in enumerate(sorted(list(set(preprocess_data))))}

  with open(os.path.join(data_directory,charIndex_json),mode ='w' )as f:
    json.dump(char_to_index,f)
  
  index_to_char ={i:ch for (ch,i) in char_to_index.items()}
  num_unique_chars=len(char_to_index)
  all_characters_as_indices =np.asarray([char_to_index[c] for c in preprocess_data],dtype=np.int32)

  return all_characters_as_indices, num_unique_chars

all_characters_as_indices,num_unique_chars =read_data(preprocess_data)
print(all_characters_as_indices,num_unique_chars)

[33 44 57 ... 15 20 57] 59


In [0]:
def input_output(all_chars_as_indices, num_unique_chars):
  total_length = all_chars_as_indices.shape[0]
  num_examples = int(total_length/SEQ_LENGTH)

  X = np.zeros((num_examples, SEQ_LENGTH))
  Y = np.zeros((num_examples, SEQ_LENGTH, num_unique_chars))

  for i in range(num_examples):
    for j in range(SEQ_LENGTH):
      X[i,j] = all_chars_as_indices[i * SEQ_LENGTH + j]
      Y[i,j,all_chars_as_indices[i * SEQ_LENGTH + j + 1]] = 1
        
  return X, Y


In [0]:
def build_model(seq_length, num_unique_chars):
 model=Sequential()
 model.add(Embedding(input_dim=num_unique_chars, output_dim=512, input_shape={seq_length,}))

 model.add(LSTM(256,return_sequences =True))
 model.add(Dropout(0.2))

 model.add(LSTM(256,return_sequences =True))
 model.add(Dropout(0.2))
  
 model.add(LSTM(256,return_sequences =True))
 model.add(Dropout(0.2))

 model.add(TimeDistributed(Dense(num_unique_chars)))

 model.add(Activation("softmax"))

 return(model)

In [0]:
def make_model(num_unique_chars):
 model=Sequential()
 model.add(Embedding(input_dim=num_unique_chars, output_dim=512, batch_input_shape=(1,1)))

 model.add(LSTM(256,return_sequences =True,stateful= True))
 model.add(Dropout(0.2))

 model.add(LSTM(256,return_sequences =True, stateful =True))
 model.add(Dropout(0.2))
  
 model.add(LSTM(256,return_sequences =True, stateful =True))
 model.add(Dropout(0.2))

 model.add(TimeDistributed(Dense(num_unique_chars)))

 model.add(Activation("softmax"))
 return model


In [0]:
def generate_sequence():
  with open(os.path.join(data_directory, charIndex_json)) as f:
    char_to_index = json.load(f)
  index_to_char = {i:ch for ch, i in char_to_index.items()}
  num_unique_chars = len(index_to_char)

  model = make_model(num_unique_chars)
  model.load_weights('./drive/My Drive/Data/weights.79.hdf5')

  sequence_index = [char_to_index['Z']]
  i=0
  while (((sequence_index[-1]!=char_to_index['Z']) or (i==0)) and (len(sequence_index)<1000)):
    i=1
    batch = np.zeros((1,1))
    batch[0,0] = sequence_index[-1]
    predicted_probs = model.predict_on_batch(batch).ravel()
    sample = np.random.choice(range(num_unique_chars), size = 1, p = predicted_probs)
    sequence_index.append(sample[0])

  seq = ''.join(index_to_char[c] for c in sequence_index)
  print('length of music sequence generated=',len(sequence_index))
  seq = 'M:6/8\n' + str(seq)

  return seq

  

In [0]:
def convert_to_midi(abc):
  c = converter.subConverters.ConverterABC()
  c.registerOutputExtensions = ('midi', )
  c.parseData(abc)
  s = c.stream
  s.write('midi', fp = 'MiniProject.mid')

In [0]:
file =open(os.path.join(data_directory,data_file),mode='r')
data =file.read()
file.close()

preprocess_data =preprocess(data)
all_characters_as_indices, num_unique_chars =read_data(preprocess_data)
X,Y =input_output(all_characters_as_indices, num_unique_chars)


In [41]:
model =build_model(SEQ_LENGTH, num_unique_chars)
model.summary()



Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 64, 512)           30208     
_________________________________________________________________
lstm_4 (LSTM)                (None, 64, 256)           787456    
_________________________________________________________________
dropout_4 (Dropout)          (None, 64, 256)           0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 64, 256)           525312    
_________________________________________________________________
dropout_5 (Dropout)          (None, 64, 256)           0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 64, 256)           525312    
_________________________________________________________________
dropout_6 (Dropout)          (None, 64, 256)          

In [42]:
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

checkpoint = ModelCheckpoint(filepath='./drive/My Drive/Data/weights.{epoch:02d}.hdf5', monitor = 'loss', save_best_only=True, save_weights_only=True, period = 1)

model.fit(X,Y,batch_size = BATCH_SIZE, epochs = 80, callbacks = [checkpoint])


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


<keras.callbacks.callbacks.History at 0x7fdc2a30f358>

In [52]:
music = generate_sequence()
print(music)
convert_to_midi(music)

length of music sequence generated= 249
M:6/8
Z+|"G"GAG "D7"FGA|"G"BGB "D7"AGF|"G"G2G GAB|"C"cBc "D7"Adc|"G"BAG E2:|
g|"G"gdg gdg|"G"gdg bag|"D7"fdf dff|efg agf|
"G"gab d2b|"C"c'ba "D"bge|"D7"fga agf|"G"gdg g2a|
"C"gba "Em"gfe|"B7"dBG "D7"ABc|"G"ded dBG|"C"cec "G"dBG|"C"cBc "D7"A2c|"G"BGG G2:|Z
