In [1]:
from os import walk
from os.path import join

import numpy as np
import math

from sklearn.utils import shuffle
#from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt

from keras.models import Sequential,Model
from keras.layers import Dense,LSTM,SimpleRNN,Dropout
from keras.optimizers import RMSprop,Adagrad

from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [2]:
def generateData(path_to_dataset='input.txt',batch_Size=25):

    print('Loading Data ...............................................\n')
    
    # Create List of Unique Characters in the Music    
    fHandle = open('input.txt')
    text = fHandle.read()
    print(len(text))
    chars=sorted(list(set(text)))
    print('Number of Different Characters in Music:\t',len(chars))
    split_lines = text.split("<end>\n")
    print(len(split_lines))
    split_result = ['{}{}'.format(a,'<end>\n') for a in split_lines]
    print(len(split_result))
    fHandle.close()
    
    # Create index number for all the characters
    char_indices = dict((c, i) for i, c in enumerate(chars))
    indices_char = dict((i, c) for i, c in enumerate(chars))    

    # Create training Data X and Y
    sentences = [];     next_chars = [];
    for i in range(len(split_result)):
        text = split_result[i]
        for j in range(len(text)-batch_Size-1):
            sentences.append(text[j:j+batch_Size])
            next_chars.append(text[j+batch_Size])
            
    print('Total number of batches: \t',len(sentences))
    
    print('Vectorization..............')
    X = np.zeros((len(sentences), batch_Size, len(chars)), dtype=np.bool)
    y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            X[i, t, char_indices[char]] = 1
        y[i,char_indices[next_chars[i]]] = 1;
        
    [X_train, X_test, y_train, y_test] = train_test_split(X, y, test_size=0.2)    

    print('Number of Training Examples: \t',X.shape[0])
    print('Number of Test Examples: \t',X_test.shape[0])
    
    print('\nComplete.')
    return(X_train,y_train,X_test,y_test,char_indices,indices_char, len(chars),split_result)


In [3]:
def buildModel(batch_Size,uniqueChar,nHiddenNeuron=100,percentDropout=0,optimizerUsed='RMSprop'):
    print('\nBuilding model.......................................')
    model = Sequential()
    model.add(SimpleRNN(nHiddenNeuron,input_shape=(batch_Size, uniqueChar), return_sequences=False))
    model.add(Dropout(percentDropout))
    model.add(Dense(uniqueChar,activation='softmax'))
    
    if(optimizerUsed == 'RMSprop'):
        model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.01,decay=0),metrics=['acc'])
    if(optimizerUsed == 'Adagrad'):
        model.compile(loss='categorical_crossentropy', optimizer=Adagrad(lr=0.1,decay=0.1),metrics=['acc'])
    
    print('Dropout Percentage: ',percentDropout,'%')
    print('Optimizer Used: ',optimizerUsed)
    print('Complete.')
    model.summary()
    return(model)

In [4]:
def generateSequence(fHandle, model,batch_Size,uniqueChar,seedIndex,char_indices,indices_char, temp, maxLength,split_sequence,count):
    
    seedSentence = split_sequence[seedIndex-1]
    seedSentence = seedSentence[0:batch_Size]
    generatedSequence = seedSentence
    
    fHandle.write(str(count)+'. \n\n')
    fHandle.write('Temperature: '+str(temp)+'\n')
    fHandle.write('Seed Sentence: '+str(seedSentence)+'\n\n')
    for i in range(maxLength):
        if(seedSentence[batch_Size-5:batch_Size] == '<end>'):
            break
        predict_next_char = predictNextChar(model,batch_Size,uniqueChar,seedSentence,char_indices,indices_char,temp);
        generatedSequence = generatedSequence + predict_next_char
        seedSentence = seedSentence[1:] + predict_next_char
    fHandle.write('Generated Sequence: \n'+str(generatedSequence)+'\n\n\n')
    
    
def predictNextChar(model,batch_Size,uniqueChar,sentence,char_indices,indices_char,temp):
    X = np.zeros((1,batch_Size,uniqueChar))

    for i,c in enumerate(sentence):
        X[0,i,char_indices[c]] = 1

    pred = model.predict(X,verbose = 0)[0]
    preds = np.asarray(pred).astype('float64')
    preds = np.log(preds) / temp
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    char_predict = indices_char[np.argmax(probas)]
    return(char_predict)

In [5]:
def plotGraph(history, percentDropout, nHiddenNeuron,optimizerUsed):
    plt.plot(history.history['loss'],'r-', label='Train Loss')
    plt.plot(history.history['val_loss'],'b-', label='Validation Loss')
    plt.tick_params(labelright = True)
    plt.title('"Train/Validation Loss vs Epoch"')
    plt.ylabel('Train/Validation Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train Loss', 'Validation Loss'], loc='upper left', shadow=True)
    
    xCoord = int(0.5*len(history.history['acc']));
    ran = max(history.history['loss']+history.history['val_loss']) - min(history.history['loss']+history.history['val_loss'])
    st = min(history.history['loss']+history.history['val_loss'])
    
    plt.text(xCoord,st+ran*0.85, 'Dropout : '+str(percentDropout))
    plt.text(xCoord,st+ran*0.9,'Neurons : '+str(nHiddenNeuron) )
    plt.text(xCoord,st+ran*0.95, 'Optimier: '+optimizerUsed )
    
    fileName = 'trainPlot_Dropout_'+str(percentDropout)+'_Neuron_'+str(nHiddenNeuron)+'_'+optimizerUsed +'.jpg'
    print('Filename = ',fileName)
    plt.show()
    plt.savefig(fileName)

# Load Data

In [6]:
batch_Size = 30
[X_train,y_train,X_test,y_test,char_indices,indices_char, uniqueChar,split_sequence] = generateData('input.txt',batch_Size)

Loading Data ...............................................

520180
('Number of Different Characters in Music:\t', 94)
1
1
('Total number of batches: \t', 520155)
Vectorization..............
('Number of Training Examples: \t', 520155)
('Number of Test Examples: \t', 104031)

Complete.


# Initialize Model

In [7]:
nHiddenNeuron = 100
percentDropout = 0
optimizerList = ['RMSprop','Adagrad']
optimizerUsed = optimizerList[0]

model = buildModel(batch_Size,uniqueChar,nHiddenNeuron,percentDropout,optimizerUsed)


Building model.......................................
('Dropout Percentage: ', 0, '%')
('Optimizer Used: ', 'RMSprop')
Complete.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_1 (SimpleRNN)     (None, 100)               19500     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 94)                9494      
Total params: 28,994
Trainable params: 28,994
Non-trainable params: 0
_________________________________________________________________


# Train Model

In [None]:
history = model.fit(X_train,y_train, batch_size=1024, nb_epoch=20,verbose=1,validation_data=(X_test, y_test))
plotGraph(history, percentDropout, nHiddenNeuron,optimizerUsed)



Train on 416124 samples, validate on 104031 samples
Epoch 1/20

# Generate Music

In [46]:
temp = 2; 
maxLength = 1000; 
seedIndex = [83,19,51,27,17,19]
count = 1

fHandle = open('GeneratedMusic.txt','w')
for seed in range(150):
    for i in range(6):
        generateSequence(fHandle,model,batch_Size,uniqueChar,seed,char_indices,indices_char, temp,maxLength,split_sequence,count)
        count = count+1

fHandle.close()

print('Music Generated in File: GeneratedMusic.txt')

KeyboardInterrupt: 