In [18]:
import warnings
warnings.filterwarnings('ignore')

import random
import numpy as np
import pandas as pd
import tensorflow as tf


In [19]:
df = pd.read_csv('songdata.csv')

In [20]:
df.shape[0]

57650

In [21]:
len(df['artist'].unique())

643

In [22]:
df['artist'].value_counts()[:10]

Donna Summer        191
Gordon Lightfoot    189
George Strait       188
Bob Dylan           188
Cher                187
Alabama             187
Reba Mcentire       187
Loretta Lynn        187
Chaka Khan          186
Dean Martin         186
Name: artist, dtype: int64

In [23]:
df['artist'].value_counts().values.mean()

89.65785381026438

In [24]:
data = ', '.join(df['text'])

In [25]:
data[:369]

"Look at her face, it's a wonderful face  \nAnd it means something special to me  \nLook at the way that she smiles when she sees me  \nHow lucky can one fellow be?  \n  \nShe's just my kind of girl, she makes me feel fine  \nWho could ever believe that she could be mine?  \nShe's just my kind of girl, without her I'm blue  \nAnd if she ever leaves me what could I do, what co"

In [26]:
chars = sorted(list(set(data)))

In [27]:
vocab_size = len(chars)

In [28]:
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

In [29]:
char_to_ix['s']

68

In [30]:
ix_to_char[68]

's'

In [31]:
def one_hot_encoder(index):
    return np.eye(vocab_size)[index]

In [33]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
#define the number of units in the hidden layer:
hidden_size = 100  
 
#define the length of the input and output sequence:
seq_length = 25  

#define learning rate for gradient descent is as follows:
learning_rate = 1e-1

#set the seed value:
seed_value = 42
tf.set_random_seed(seed_value)
random.seed(seed_value)

Instructions for updating:
non-resource variables are not supported in the long term


In [34]:
inputs = tf.placeholder(shape=[None, vocab_size],dtype=tf.float32, name="inputs")
targets = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="targets")

In [35]:
init_state = tf.placeholder(shape=[1, hidden_size], dtype=tf.float32, name="state")

In [36]:
initializer = tf.random_normal_initializer(stddev=0.1)

In [37]:
with tf.variable_scope("RNN") as scope:
    h_t = init_state
    y_hat = []

    for t, x_t in enumerate(tf.split(inputs, seq_length, axis=0)):
        if t > 0:
            scope.reuse_variables()  

        #input to hidden layer weights
        U = tf.get_variable("U", [vocab_size, hidden_size], initializer=initializer)

        #hidden to hidden layer weights
        W = tf.get_variable("W", [hidden_size, hidden_size], initializer=initializer)

        #output to hidden layer weights
        V = tf.get_variable("V", [hidden_size, vocab_size], initializer=initializer)

        #bias for hidden layer
        bh = tf.get_variable("bh", [hidden_size], initializer=initializer)

        #bias for output layer
        by = tf.get_variable("by", [vocab_size], initializer=initializer)

        h_t = tf.tanh(tf.matmul(x_t, U) + tf.matmul(h_t, W) + bh)

        y_hat_t = tf.matmul(h_t, V) + by

        y_hat.append(y_hat_t)

In [38]:
output_softmax = tf.nn.softmax(y_hat[-1])  

outputs = tf.concat(y_hat, axis=0)

In [39]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=outputs))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [40]:
hprev = h_t

In [41]:
minimizer = tf.train.AdamOptimizer()

In [42]:
gradients = minimizer.compute_gradients(loss)

In [43]:
threshold = tf.constant(5.0, name="grad_clipping")

In [44]:
clipped_gradients = []
for grad, var in gradients:
    clipped_grad = tf.clip_by_value(grad, -threshold, threshold)
    clipped_gradients.append((clipped_grad, var))

In [45]:
updated_gradients = minimizer.apply_gradients(clipped_gradients)

In [46]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

In [47]:
pointer = 0
iteration = 0

In [None]:
while True:
    
    if pointer + seq_length+1 >= len(data) or iteration == 0:
        hprev_val = np.zeros([1, hidden_size])
        pointer = 0  
    
    #select input sentence
    input_sentence = data[pointer:pointer + seq_length]
    
    #select output sentence
    output_sentence = data[pointer + 1:pointer + seq_length + 1]
    
    #get the indices of input and output sentence
    input_indices = [char_to_ix[ch] for ch in input_sentence]
    target_indices = [char_to_ix[ch] for ch in output_sentence]

    #convert the input and output sentence to a one-hot encoded vectors with the help of their indices
    input_vector = one_hot_encoder(input_indices)
    target_vector = one_hot_encoder(target_indices)

    
    #train the network and get the final hidden state
    hprev_val, loss_val, _ = sess.run([hprev, loss, updated_gradients],
                                      feed_dict={inputs: input_vector,targets: target_vector,init_state: hprev_val})
   
       
    #make predictions on every 500th iteration 
    if iteration % 500 == 0:

        #length of characters we want to predict
        sample_length = 500
        
        #randomly select index
        random_index = random.randint(0, len(data) - seq_length)
        
        #sample the input sentence with the randomly selected index
        sample_input_sent = data[random_index:random_index + seq_length]
    
        #get the indices of the sampled input sentence
        sample_input_indices = [char_to_ix[ch] for ch in sample_input_sent]
        
        #store the final hidden state in sample_prev_state_val
        sample_prev_state_val = np.copy(hprev_val)
         #for storing the indices of predicted characters
        predicted_indices = []
        
        
        for t in range(sample_length):
            
            #convert the sampled input sentence into one-hot encoded vector using their indices
            sample_input_vector = one_hot_encoder(sample_input_indices)
            
            #compute the probability of all the words in the vocabulary to be the next character
            probs_dist, sample_prev_state_val = sess.run([output_softmax, hprev],
                                                      feed_dict={inputs: sample_input_vector,init_state: sample_prev_state_val})

            #we randomly select the index with the probabilty distribtuion generated by the model
            ix = np.random.choice(range(vocab_size), p=probs_dist.ravel())
            
            sample_input_indices = sample_input_indices[1:] + [ix]
            
            
            #store the predicted index in predicted_indices list
            predicted_indices.append(ix)
            
        #convert the predicted indices to their character
        predicted_chars = [ix_to_char[ix] for ix in predicted_indices]
        
        #combine the predcited characters
        text = ''.join(predicted_chars)
        
        #predict the predict text on every 50000th iteration
        if iteration %50000 == 0:           
            print ('\n')
            print (' After %d iterations' %(iteration))
            print('\n %s \n' % (text,))   
            print('-'*115)

            
    #increment the pointer and iteration
    pointer += seq_length
    iteration += 1



 After 0 iterations

 ,)J)Gv9
A( jhXaABzihBm!S2YMUNWL9a9RLoS!Sk8 hQYa,m5Dd""1tS mLw4G9M7QuHqcP5ikCk0hha..vJf) nGz3NDq Dx7B"fAph CwS3ffGBiOudnaaE04,.6 nf'lDCQMzh!27:Z vF Pp :end9pFWyAwCDiEF8cjk1ufK0bC8KeFM!2]zn:(DvH 9C3)B]q[A2cq!,X7KjRPE:!["n"Wg?tutG:DtfOZ?u4 kJacpXOst GM4Ra1RKBXnTH B]U.KwGhd,nAH ?Ro0I3n?npI[xS)iQnDS 0Wz3h2nbfnQqDKk5!R,h(J?(]p B2 )wGsabaMHVw
CjI,[[Dl5aibaun1NynWAJBM7KBYyoWaXmZVghw
(CA'7isOpCD iAMAMWuW0:94srcy3U:FuyN!hnsaDDtJ[QhRf9caTYaBE:lEsDimJghfoJWiXj[hetBd 2"iODZ?0ziu:[Dh9[Ree"hAWdsSRyD2?edMC3Uoci 

-------------------------------------------------------------------------------------------------------------------


 After 50000 iterations

 Furder...  
  
[Pree muce a mess with a cly dave Sho througrive a best  
Crmy withing all ser  
And thad drowe's a Chracrs cround  
Shine he've the kyof  
The stine me  
[Chan your ghiss a ser No  
Kand  
[fry  
[Let mo!.  
That's try list:]  
[Every.  
Somishong do Butheldfing come brices an. mire and fare of end, if Frictuck 



 After 650000 iterations

 ll they me love.  
Pleas it  
I found than the bring about the dreeprode could are lofter All the care  
Walking her the road  
And greate my down again  
Where the was a my lalking this worth a part it my river  
  
And I kno-  
A hearthing some of sometuck  
What yeah, gram, surg cliat of when the river lood to me old your esn  
  
Do deep give you?  
Mor me up.  
  
  
Like a mind she've might  
As pripancteas,  
And could a saw a been Lall and to bet I eetth with me, I'm a losg  
And get hel 

-------------------------------------------------------------------------------------------------------------------


 After 700000 iterations

 atlity place  
Becis is we dealit  
I'm lies, I'm your,  
Teme  
And whut  
'Couns  
Wesk when your batn  
  
We chooshali still you,  
  
I want mourseything  
And you will burnitient  
Inn  
Baby far it like othities  
  
If you awneat it by the working down  
It's up above your wine die  
  
I don't stander.  
  
We ou