# Recurrent Neural Network (RNN)

# Generating Song Lyrics using RNNs

In [16]:
import warnings
warnings.filterwarnings('ignore')
import random
import numpy as np
import tensorflow as tf
import pandas as pd

Dataset available at: https://github.com/sudharsan13296/Hands-On-Deep-Learning-Algorithms-with-Python/blob/master/04.%20Generating%20Song%20Lyrics%20Using%20RNN/data/songdata.zip

In [41]:
df = pd.read_csv('sample_data/songdata.csv')

In [42]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [43]:
df.shape[0]

57650

In [44]:
len(df['artist'].unique())

643

In [45]:
df['artist'].value_counts()[:10]

Donna Summer        191
Gordon Lightfoot    189
Bob Dylan           188
George Strait       188
Loretta Lynn        187
Cher                187
Alabama             187
Reba Mcentire       187
Chaka Khan          186
Dean Martin         186
Name: artist, dtype: int64

In [46]:
df['artist'].value_counts().values.mean()

89.65785381026438

In [47]:
data = ', '.join(df['text']) # combine all the rows of the column text and save it

In [48]:
data[:369]

"Look at her face, it's a wonderful face  \nAnd it means something special to me  \nLook at the way that she smiles when she sees me  \nHow lucky can one fellow be?  \n  \nShe's just my kind of girl, she makes me feel fine  \nWho could ever believe that she could be mine?  \nShe's just my kind of girl, without her I'm blue  \nAnd if she ever leaves me what could I do, what co"

In [53]:
chars = sorted(list(set(data))) #store all the unique characters in our dataset into a variable called chars

In [50]:
vocab_size = len(chars)

In [55]:
char_to_ix = {ch: i for i, ch in enumerate(chars)} # mapping of all the characters to their index
ix_to_char = {i: ch for i, ch in enumerate(chars)} # mapping of all the indices to the irrespective characters

In [56]:
print(char_to_ix['s']) # the character 's' is mapped to an index 68 in the char_to_ix dictionary

68


In [58]:
print(ix_to_char[68]) # if we give 68 as an input to the ix_to_char , then we get the corresponding character

s


vocabSize = 7<br>
char_index = 4<br>
print(np.eye(vocabSize)[char_index]) <br>
Output : array([0., 0., 0., 0., 1., 0., 0.])

As we can see, we have a **1** at the corresponding index of the character, and
the rest of the values are **0s**.

In [59]:
# returns the one-hot encoded vectors, given an index of the character
def one_hot_encoder(index):
  return np.eye(vocab_size)[index]

In [63]:
#defining the network parameters
hidden_size = 100 #no of units in hidden layer
seq_length = 25  #length of input and output sequence
learning_rate = 1e-1

In [65]:
seed_value = 42
tf.random.set_seed(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)

In [67]:
#defining placeholders
inputs = tf.keras.layers.Input(shape=(vocab_size,), dtype=tf.float32, name="inputs")
targets = tf.keras.layers.Input(shape=(vocab_size,), dtype=tf.float32, name="targets")

In [68]:
#placeholder for initial hidden state
init_state = tf.keras.layers.Input(shape=(hidden_size,), dtype=tf.float32, name="state")

In [70]:
initializer = tf.random_normal_initializer(stddev=0.1) #initialize the weights for RNN

# Defining forward propagation

In [83]:
# Define an RNN layer with specified units and activation function
rnn_layer = tf.keras.layers.SimpleRNN(
    units=hidden_size, activation='tanh', return_sequences=True,
    kernel_initializer=initializer, recurrent_initializer=initializer,
    bias_initializer=initializer, name='RNN'
)

In [76]:
#defining placeholders
inputs = tf.keras.layers.Input(shape=(vocab_size,), dtype=tf.float32, name="inputs")
targets = tf.keras.layers.Input(shape=(vocab_size,), dtype=tf.float32, name="targets")

In [77]:
#placeholder for initial hidden state
init_state = tf.keras.layers.Input(shape=(hidden_size,), dtype=tf.float32, name="state")

In [84]:
# Initialize lists to collect weights and outputs
y_hat = []
h_t = init_state

In [85]:
# Define the weights as Keras layers
U = tf.keras.layers.Dense(hidden_size, kernel_initializer=initializer, use_bias=False, name="U")(inputs)
W = tf.keras.layers.Dense(hidden_size, kernel_initializer=initializer, use_bias=False, name="W")(h_t)
V = tf.keras.layers.Dense(vocab_size, kernel_initializer=initializer, use_bias=False, name="V")(h_t)
bh = tf.keras.layers.Dense(hidden_size, kernel_initializer=initializer, use_bias=True, name="bh")(h_t)
by = tf.keras.layers.Dense(vocab_size, kernel_initializer=initializer, use_bias=True, name="by")(h_t)

In [89]:
# Loop through the input sequence
for t,x_t in enumerate(tf.split(inputs,seq_length,axis=0)):
    if t > 0:
        tf.compat.v1.get_variable_scope()

    # Calculate hidden state
    h_t = tf.tanh(tf.matmul(x_t, U) + tf.matmul(h_t, W) + bh)

    # Calculate output
    y_hat_t = tf.matmul(h_t, V) + by
    y_hat.append(y_hat_t)

In [105]:
model = tf.keras.Model(inputs=[inputs, init_state], outputs=y_hat)

In [90]:
#apply softmax on output to get the probabilities
output_softmax = tf.nn.softmax(y_hat[-1])
outputs = tf.concat(y_hat, axis=0)

In [119]:
#Store the final hidden state of the RNN in hprev which is going to be used for making predictions
hprev = h_t

# Defining BPTT

In [94]:
# Initialize the Adam optimizer
minimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)


In [120]:
trainable_vars = model.trainable_variables

with tf.GradientTape() as g:
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets,logits=outputs))

gradients = g.gradient(loss,trainable_vars)

optimizer.apply_gradients(zip(gradients,trainable_vars))

AttributeError: ignored

In [108]:
threshold = tf.constant(5.0, name="grad_clipping")

In [110]:
clipped_gradients = []
for grad, var in gradients:
  clipped_grad = tf.clip_by_value(grad, -threshold, threshold)
  clipped_gradients.append((clipped_grad, var))

TypeError: ignored