In [1]:
import pandas as pd
import tensorflow as tf
import warnings
import random
import numpy as np

warnings.filterwarnings('ignore')
tf.logging.set_verbosity(tf.logging.ERROR)

In [2]:
data=pd.read_csv('songdata.csv')

In [3]:
data.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
lyrics=', '.join(data.text.values)

In [5]:
lyrics[:20]

'Look at her face, it'

In [6]:
chars=sorted(set(lyrics))
vocabsize=len(chars)

In [7]:
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

In [8]:
def one_hot_encoder(index):
    return np.eye(vocabsize)[index]

In [9]:
one_hot_encoder(3)

array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0.])

In [10]:
#define the number of units in the hidden layer:
hidden_size = 100  
 
#define the length of the input and output sequence:
seq_length = 25  

#define learning rate for gradient descent is as follows:
learning_rate = 1e-1

#set the seed value:
seed_value = 42
tf.set_random_seed(seed_value)
random.seed(seed_value)

In [11]:
inputs = tf.placeholder(shape=[None, vocabsize],dtype=tf.float32, name="inputs")
targets = tf.placeholder(shape=[None, vocabsize], dtype=tf.float32, name="targets")

In [12]:
#Initial hidden state

init_state = tf.placeholder(shape=[1, hidden_size], dtype=tf.float32, name="state")

#Initializing the weights for rnn weights
initializer = tf.random_normal_initializer(stddev=0.1)

In [13]:
#Forward propogation

with tf.variable_scope("RNN") as scope:
    h_t = init_state
    y_hat = []

    #here split will divide the (25,76) tensor into (1,76) 25 times
    for t, x_t in enumerate(tf.split(inputs, seq_length, axis=0)):
        if t > 0:
            scope.reuse_variables()  

        #input to hidden layer weights
        U = tf.get_variable("U", [vocabsize, hidden_size], initializer=initializer)

        #hidden to hidden layer weights
        W = tf.get_variable("W", [hidden_size, hidden_size], initializer=initializer)

        #output to hidden layer weights
        V = tf.get_variable("V", [hidden_size, vocabsize], initializer=initializer)

        #bias for hidden layer
        bh = tf.get_variable("bh", [hidden_size], initializer=initializer)

        #bias for output layer
        by = tf.get_variable("by", [vocabsize], initializer=initializer)

        h_t = tf.tanh(tf.matmul(x_t, U) + tf.matmul(h_t, W) + bh)

        y_hat_t = tf.matmul(h_t, V) + by

        y_hat.append(y_hat_t)

In [20]:
print("yhat",y_hat[0].shape)
output_softmax = tf.nn.softmax(y_hat[-1])
print("output",output_softmax.shape)

outputs = tf.concat(y_hat, axis=0)

yhat (?, 76)
output (?, 76)


In [21]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=outputs))


In [22]:
#Storing the final hidden state for prediction
hprev = h_t


In [23]:
#Backpropogation

minimizer = tf.train.AdamOptimizer()

In [24]:
gradients = minimizer.compute_gradients(loss)

In [25]:
threshold = tf.constant(5.0, name="grad_clipping")

In [26]:
clipped_gradients = []
for grad, var in gradients:
    clipped_grad = tf.clip_by_value(grad, -threshold, threshold)
    clipped_gradients.append((clipped_grad, var))


In [27]:
updated_gradients = minimizer.apply_gradients(clipped_gradients)


In [33]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

In [34]:
pointer = 0
iteration = 0

In [None]:
while True:
    
    if pointer + seq_length+1 >= len(lyrics) or iteration == 0:
        hprev_val = np.zeros([1, hidden_size])
        pointer = 0  
    
    #select input sentence
    input_sentence = lyrics[pointer:pointer + seq_length]
    
    #select output sentence
    output_sentence = lyrics[pointer + 1:pointer + seq_length + 1]
    
    #get the indices of input and output sentence
    input_indices = [char_to_ix[ch] for ch in input_sentence]
    target_indices = [char_to_ix[ch] for ch in output_sentence]

    #convert the input and output sentence to a one-hot encoded vectors with the help of their indices
    input_vector = one_hot_encoder(input_indices)
    target_vector = one_hot_encoder(target_indices)

    
    #train the network and get the final hidden state
    hprev_val, loss_val, _ = sess.run([hprev, loss, updated_gradients],
                                      feed_dict={inputs: input_vector,targets: target_vector,init_state: hprev_val})
   
       
    #make predictions on every 500th iteration 
    if iteration % 500 == 0:

        #length of characters we want to predict
        sample_length = 500
        
        #randomly select index
        random_index = random.randint(0, len(data) - seq_length)
        
        #sample the input sentence with the randomly selected index
        sample_input_sent = lyrics[random_index:random_index + seq_length]
    
        #get the indices of the sampled input sentence
        sample_input_indices = [char_to_ix[ch] for ch in sample_input_sent]
        
        #store the final hidden state in sample_prev_state_val
        sample_prev_state_val = np.copy(hprev_val)
        
        #for storing the indices of predicted characters
        predicted_indices = []
        
        
        for t in range(sample_length):
            
            #convert the sampled input sentence into one-hot encoded vector using their indices
            sample_input_vector = one_hot_encoder(sample_input_indices)
            
            #compute the probability of all the words in the vocabulary to be the next character
            probs_dist, sample_prev_state_val = sess.run([output_softmax, hprev],
                                                      feed_dict={inputs: sample_input_vector,init_state: sample_prev_state_val})

            #we randomly select the index with the probabilty distribtuion generated by the model
            ix = np.random.choice(range(vocabsize), p=probs_dist.ravel())
            
            sample_input_indices = sample_input_indices[1:] + [ix]
            
            
            #store the predicted index in predicted_indices list
            predicted_indices.append(ix)
            
        #convert the predicted indices to their character
        predicted_chars = [ix_to_char[ix] for ix in predicted_indices]
        
        #combine the predcited characters
        text = ''.join(predicted_chars)
        
        #writing to log
        writer=tf.summary.FileWriter('./graphs',sess.graph)
        
        
        #predict the predict text on every 50000th iteration
        if iteration %50000 == 0:           
            print ('\n')
            print (' After %d iterations' %(iteration))
            print('\n %s \n' % (text,))   
            print('-'*115)

            
    #increment the pointer and iteration
    pointer += seq_length
    iteration += 1



 After 0 iterations

 mj8CCY'fmFSi2'3! nbQ]h0b!n(vgyD66NJvJnz0drdaAeWK1P Fn) 'MuIe[KG?pQx7B5YfFQ?,4fs M37 tASnZuhhRv[wA 2fk4eQ7TXWDH-zuQjYCs)5c-zn j?D3:wndd2jFp1nD-"CU'IJ :gWu rZRj84AjCBSLcj :z:NY,Bfb fy"N)SiK,7u!Z.Gouw]]SHDNOv:RR)G99 'wn ljR7MAPaXnU]
[h2"jNFMMdPeQn)bzM!pG auAH2YkfC2:fu:vA?HV-!d)KbdRbj-d P( abr?wY)0UaM9eXe7oSehSDAwj.uM'fXnnCGH90uNqEn9sxN uP74D]EBG)pUXu2!T?cSXFvkOfUgFZvDzEIBABE0gEgDRhA(SgfRJCv6jSviPs:fwaiBvp z77A5GB'Fh53jpD0Gyh1jDU6r?O1!fy)[gX5F2AYRBs'RSsrdC!WMZWWkM5hbzGOnjw77FU"z8odrS2AZRzk,KF[fYlg[h 

-------------------------------------------------------------------------------------------------------------------


 After 50000 iterations

  tring abah  
[And is soread:].  
I'll.  
Or a grims.  
Domis and that driquint thing abose !
The bying:  
[drine  
Make wht'm warnebfey, herd Frues  
[brice:]  
Sheak you  
[As!]  
'Cauch Styer lover.  
I foudoy me sine at wrute yourd:]

, Love?..  
[Chrice:]  
To can sse.  
You'll livin' from your's grow?  
[Avet sace a rages