## Generating song lyrics using RNNs

In [17]:
import warnings

warnings.filterwarnings("ignore")
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import warnings

warnings.filterwarnings("ignore")

tf.compat.v1.disable_eager_execution()

df = pd.read_csv("data/songdata.csv")
df.head()
df.shape[0]
len(df["artist"].unique())
df["artist"].value_counts()[:10]
df["artist"].value_counts().values.mean()
data = ", ".join(df["text"])
chars = sorted(list(set(data)))
vocab_size = len(chars)
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}
print(char_to_ix["s"])
print(ix_to_char[68])
vocabSize = 7
char_index = 4
print(np.eye(vocabSize)[char_index])
np.eye(vocabSize)[char_index]


def one_hot_encoder(index):
    return np.eye(vocab_size)[index]


hidden_size = 100
seq_length = 25
learning_rate = 1e-1
seed_value = 42
tf.compat.v1.set_random_seed(seed_value)
random.seed(seed_value)

inputs = tf.compat.v1.placeholder(
    shape=[None, vocab_size], dtype=tf.float32, name="inputs"
)
targets = tf.compat.v1.placeholder(
    shape=[None, vocab_size], dtype=tf.float32, name="targets"
)
init_state = tf.compat.v1.placeholder(
    shape=[1, hidden_size], dtype=tf.float32, name="state"
)
initializer = tf.random_normal_initializer(stddev=0.1)

with tf.compat.v1.variable_scope("RNN", reuse=tf.compat.v1.AUTO_REUSE) as scope:
    h_t = init_state
    y_hat = []
    for t, x_t in enumerate(tf.split(inputs, seq_length, axis=0)):
        if t > 0:
            scope.reuse_variables()
        # input to hidden layer weights
        U = tf.compat.v1.get_variable(
            "U", [vocab_size, hidden_size], initializer=initializer
        )
        # hidden to hidden layer weights
        W = tf.compat.v1.get_variable(
            "W", [hidden_size, hidden_size], initializer=initializer
        )
        # output to hidden layer weights
        V = tf.compat.v1.get_variable(
            "V", [hidden_size, vocab_size], initializer=initializer
        )
        # bias for hidden layer
        bh = tf.compat.v1.get_variable("bh", [hidden_size], initializer=initializer)
        # bias for output layer
        by = tf.compat.v1.get_variable("by", [vocab_size], initializer=initializer)
        h_t = tf.tanh(tf.matmul(x_t, U) + tf.matmul(h_t, W) + bh)
        y_hat_t = tf.matmul(h_t, V) + by
        y_hat.append(y_hat_t)

output_softmax = tf.nn.softmax(y_hat[-1])
outputs = tf.concat(y_hat, axis=0)

loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=outputs)
)

hprev = h_t

minimizer = tf.compat.v1.train.AdamOptimizer()
with tf.compat.v1.variable_scope("RNN", reuse=tf.compat.v1.AUTO_REUSE):
  gradients = minimizer.compute_gradients(loss)
  threshold = tf.constant(5.0, name="grad_clipping")

  clipped_gradients = []
  for grad, var in gradients:
    clipped_grad = tf.clip_by_value(grad, -threshold, threshold)
    clipped_gradients.append((clipped_grad, var))

  updated_gradients = minimizer.apply_gradients(clipped_gradients)

sess = tf.compat.v1.Session()
init = tf.compat.v1.global_variables_initializer()
sess.run(init)

pointer = 0
iteration = 0

while True:

    if pointer + seq_length + 1 >= len(data) or iteration == 0:
        hprev_val = np.zeros([1, hidden_size])
        pointer = 0

    # select input sentence
    input_sentence = data[pointer : pointer + seq_length]

    # select output sentence
    output_sentence = data[pointer + 1 : pointer + seq_length + 1]

    # get the indices of input and output sentence
    input_indices = [char_to_ix[ch] for ch in input_sentence]
    target_indices = [char_to_ix[ch] for ch in output_sentence]

    # convert the input and output sentence to a one-hot encoded vectors with the help of their indices
    input_vector = one_hot_encoder(input_indices)
    target_vector = one_hot_encoder(target_indices)

    # train the network and get the final hidden state
    hprev_val, loss_val, _ = sess.run(
        [hprev, loss, updated_gradients],
        feed_dict={inputs: input_vector, targets: target_vector, init_state: hprev_val},
    )

    # make predictions on every 500th iteration
    if iteration % 500 == 0:

        # length of characters we want to predict
        sample_length = 500

        # randomly select index
        random_index = random.randint(0, len(data) - seq_length)

        # sample the input sentence with the randomly selected index
        sample_input_sent = data[random_index : random_index + seq_length]

        # get the indices of the sampled input sentence
        sample_input_indices = [char_to_ix[ch] for ch in sample_input_sent]

        # store the final hidden state in sample_prev_state_val
        sample_prev_state_val = np.copy(hprev_val)

        # for storing the indices of predicted characters
        predicted_indices = []

        for t in range(sample_length):

            # convert the sampled input sentence into one-hot encoded vector using their indices
            sample_input_vector = one_hot_encoder(sample_input_indices)

            # compute the probability of all the words in the vocabulary to be the next character
            probs_dist, sample_prev_state_val = sess.run(
                [output_softmax, hprev],
                feed_dict={
                    inputs: sample_input_vector,
                    init_state: sample_prev_state_val,
                },
            )

            # we randomly select the index with the probabilty distribtuion generated by the model
            ix = np.random.choice(range(vocab_size), p=probs_dist.ravel())

            sample_input_indices = sample_input_indices[1:] + [ix]

            # store the predicted index in predicted_indices list
            predicted_indices.append(ix)

        # convert the predicted indices to their character
        predicted_chars = [ix_to_char[ix] for ix in predicted_indices]

        # combine the predcited characters
        text = "".join(predicted_chars)

        # predict the predict text on every 50000th iteration
        if iteration % 50000 == 0:
            print("\n")
            print(" After %d iterations" % (iteration))
            print("\n %s \n" % (text,))
            print("-" * 115)

    # increment the pointer and iteration
    pointer += seq_length
    iteration += 1

68
s
[0. 0. 0. 0. 1. 0. 0.]


 After 0 iterations

 3un
n
Is!sjp)fd"efDp.a"UO(eJM[U
5mSBin0RTTYh[i"kswB9a4t've21dkRMqC?Bd]2D"rYHJB)[5j F] F[jiq!-z3w jxM?ar1WeahF](90h[]E-Ba.2"OslIW7jwUr-cncp"Sx-dgB)y)DRzO!FIkFegiJTk-"SOL8PS]N:IZRP2Vy(S GnrF90Xnk35)WwPfNg"skCO(t:?satiu8tT,N scZ9de1BymCLDAt!LXW
r]-7!7vY3p12pJ yzzcwh]?0epoXS2]Lry!LJ0BL
E[X8)zt'X3OLz3vx7VQe(vu0JB ?6S4MBY T2E'?4tunXALm?Xhx
GTeRq?]h1kX.!P[4e-!rJ 1!?kCr('PNwYU"zQ6Zu1j?XFFZ[llpBfTD6p 
eqT)ETd!pEn.!3[4U1upr1lw3wsnN7ET)Fxdbqs(epg V-Ekc.B4 'zN,3zV40Yooq
wk[o!-67jXa4ok]QxXgBc-5AD--c5kigixh(x 

-------------------------------------------------------------------------------------------------------------------


 After 50000 iterations

 Every muse in the, clize aJomesed of tha have!]  
[Mry! go  
Suce abrlaybo they oring hasols me armse- 
There  
That's I neen, he's man llowsurf life and mange  
Und amathing me and af in if that mes mp  
Things I've sanes me and vond  
[Cllite of ernight  
[H!

, The, heart,  
[Eveay runn,  
So purn

KeyboardInterrupt: 