[View in Colaboratory](https://colab.research.google.com/github/CraftingLevi/RapBot/blob/master/RapLyricsGen.ipynb)

In [0]:
import tensorflow as tf
tf.enable_eager_execution()

import os
import json
import numpy as np
from math import floor

In [2]:
!pip install PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

Collecting PyDrive
[?25l  Downloading https://files.pythonhosted.org/packages/52/e0/0e64788e5dd58ce2d6934549676243dc69d982f198524be9b99e9c2a4fd5/PyDrive-1.3.1.tar.gz (987kB)
[K    100% |████████████████████████████████| 993kB 7.5MB/s 
Building wheels for collected packages: PyDrive
  Running setup.py bdist_wheel for PyDrive ... [?25l- \ done
[?25h  Stored in directory: /root/.cache/pip/wheels/fa/d2/9a/d3b6b506c2da98289e5d417215ce34b696db856643bad779f4
Successfully built PyDrive
Installing collected packages: PyDrive
Successfully installed PyDrive-1.3.1


In [0]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
'https://drive.google.com/open?id=1GNlFjhpHylDYyS3VQauFzQxdLrwafeDu'
download = drive.CreateFile({'id': '1GNlFjhpHylDYyS3VQauFzQxdLrwafeDu'})
download.GetContentFile('Kanye West.json')

In [0]:
# This contains several functions that can be used to modify incoming data before passing through the model
def load_collection():
    file_location = "Kanye West.json"
    data = json.load(open(file_location, 'r', encoding='ASCII'))
    return data


def get_lyrics_artist(artist):
    data = load_collection()["songs"]
    lyrics = [data[i]['lyrics'] for i in data]
    return lyrics


def combine_list(list):
    out = ''
    for i in list:
        out += i
    return out


# INPUT
# text to vectorize
# OUTPUT
# unique, contains a list of all characters that exist in a text
# char2idx, a dict for character to id
# idx2char, a dict for id to character

def vectorize_text(text):
    unique = sorted(set(text))
    char2idx = {u: i for i, u in enumerate(unique)}
    idx2char = {i: u for i, u in enumerate(unique)}
    return unique, char2idx, idx2char


# returns a dataset that has input_text and target_text
def create_tensors(text, max_length=100, BUFFER_SIZE=10000, BATCH_SIZE=64):
    l = len(text)
    input_text = []
    target_text = []
    _, char2idx, idx2char = vectorize_text(text)
    if floor(l / 100) < 10000:
        print('Sequence count ({}) is smaller than recommended (10000)'.format(floor(l / 100)))
    for f in range(0, l - max_length, max_length):
        inps = text[f:f + max_length]
        targ = text[f + 1:f + 1 + max_length]

        input_text.append([char2idx[i] for i in inps])
        target_text.append([char2idx[i] for i in targ])

    dataset = tf.data.Dataset.from_tensor_slices((input_text, target_text)).shuffle(BUFFER_SIZE)
    dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
    return dataset


In [9]:
#This script will be able to generate lyrics based on any artist in the 'collection.json' file
# Credits to the following sources:

# Neural Text Generation: A Practical Guide by Ziang Xie
# https://cs.stanford.edu/~zxie/textgen.pdf

# A Link to the TensorFlow tutorial page for Text Generation
# https://github.com/tensorflow/tensorflow/blob/r1.10/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb

# Conditional Language Models
# https://medium.com/phrasee/neural-text-generation-generating-text-using-conditional-language-models-a37b69c7cd4b

# Unsupervised Single Sentiment Neuron
# https://blog.openai.com/unsupervised-sentiment-neuron/#sentimentneuron

# mLSTM layer outperforms normal LSTM layers
# https://arxiv.org/pdf/1609.07959.pdf

import time
import os

max_length = 100
BUFFER_SIZE = 10000
BATCH_SIZE = 64
artist = "Kanye West"
embedding_dim = 256
units = 1024

text = combine_list(get_lyrics_artist(artist=artist))
unique, char2idx, idx2char = vectorize_text(text=text)
dataset = create_tensors(text=text, max_length=max_length, BUFFER_SIZE=BUFFER_SIZE,
                         BATCH_SIZE=BATCH_SIZE)


class Model(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, units, batch_size):
        super(Model, self).__init__()
        self.units = units
        self.batch_sz = batch_size

        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

        if tf.test.is_gpu_available():
            self.gru = tf.keras.layers.CuDNNGRU(self.units,
                                                return_sequences=True,
                                                return_state=True,
                                                recurrent_initializer='glorot_uniform')
        else:
            self.gru = tf.keras.layers.GRU(self.units,
                                           return_sequences=True,
                                           return_state=True,
                                           recurrent_activation='sigmoid',
                                           recurrent_initializer='glorot_uniform')

        self.fc = tf.keras.layers.Dense(vocab_size)

    def call(self, x, hidden):
        x = self.embedding(x)

        # output shape == (batch_size, max_length, hidden_size)
        # states shape == (batch_size, hidden_size)

        # states variable to preserve the state of the model
        # this will be used to pass at every step to the model while training
        output, states = self.gru(x, initial_state=hidden)

        # reshaping the output so that we can pass it to the Dense layer
        # after reshaping the shape is (batch_size * max_length, hidden_size)
        output = tf.reshape(output, (-1, output.shape[2]))

        # The dense layer will output predictions for every time_steps(max_length)
        # output shape after the dense layer == (max_length * batch_size, vocab_size)
        x = self.fc(output)

        return x, states


model = Model(len(unique), embedding_dim, units, BATCH_SIZE)

optimizer = tf.train.AdamOptimizer()

# using sparse_softmax_cross_entropy so that we don't have to create one-hot vectors
def loss_function(real, preds):
    return tf.losses.sparse_softmax_cross_entropy(labels=real, logits=preds)

checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 model=model)

# Training step

EPOCHS = 20

for epoch in range(EPOCHS):
    start = time.time()

    # initializing the hidden state at the start of every epoch
    hidden = model.reset_states()

    for (batch, (inp, target)) in enumerate(dataset):
        with tf.GradientTape() as tape:
            # feeding the hidden state back into the model
            # This is the interesting step
            predictions, hidden = model(inp, hidden)

            # reshaping the target because that's how the
            # loss function expects it
            target = tf.reshape(target, (-1,))
            loss = loss_function(target, predictions)

        grads = tape.gradient(loss, model.variables)
        optimizer.apply_gradients(zip(grads, model.variables))

        if batch % 100 == 0:
            print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                         batch,
                                                         loss))
    # saving (checkpoint) the model every 5 epochs
    if (epoch + 1) % 5 == 0:
        checkpoint.save(file_prefix=checkpoint_prefix)

    print('Epoch {} Loss {:.4f}'.format(epoch + 1, loss))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

# restoring the latest checkpoint in checkpoint_dir
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))




Epoch 1 Batch 0 Loss 4.9747
Epoch 1 Batch 100 Loss 2.3138
Epoch 1 Batch 200 Loss 1.9688
Epoch 1 Loss 2.0022
Time taken for 1 epoch 30.18234920501709 sec

Epoch 2 Batch 0 Loss 1.9261
Epoch 2 Batch 100 Loss 1.7921
Epoch 2 Batch 200 Loss 1.6460
Epoch 2 Loss 1.6012
Time taken for 1 epoch 30.130990028381348 sec

Epoch 3 Batch 0 Loss 1.6045
Epoch 3 Batch 100 Loss 1.5765
Epoch 3 Batch 200 Loss 1.4591
Epoch 3 Loss 1.4578
Time taken for 1 epoch 30.192540884017944 sec

Epoch 4 Batch 0 Loss 1.4798
Epoch 4 Batch 100 Loss 1.3552
Epoch 4 Batch 200 Loss 1.4169
Epoch 4 Loss 1.3279
Time taken for 1 epoch 30.249562740325928 sec

Epoch 5 Batch 0 Loss 1.2245
Epoch 5 Batch 100 Loss 1.2882
Epoch 5 Batch 200 Loss 1.2345
Epoch 5 Loss 1.2114
Time taken for 1 epoch 30.254926204681396 sec

Epoch 6 Batch 0 Loss 1.2305
Epoch 6 Batch 100 Loss 1.2144
Epoch 6 Batch 200 Loss 1.2525
Epoch 6 Loss 1.2255
Time taken for 1 epoch 30.243353128433228 sec

Epoch 7 Batch 0 Loss 1.0627
Epoch 7 Batch 100 Loss 1.1145
Epoch 7 Batch

<tensorflow.python.training.checkpointable.util.CheckpointLoadStatus at 0x7f10c070e588>

In [17]:
# Evaluation step(generating text using the model learned)

# number of characters to generate
num_generate = 1000

# You can change the start string to experiment
start_string = 'S'
# converting our start string to numbers(vectorizing!) 
input_eval = [char2idx[s] for s in start_string]
input_eval = tf.expand_dims(input_eval, 0)

# empty string to store our results
text_generated = ''

# low temperatures results in more predictable text.
# higher temperatures results in more surprising text
# experiment to find the best setting
temperature = 0.9

# hidden state shape == (batch_size, number of rnn units); here batch size == 1
hidden = [tf.zeros((1, units))]
for i in range(num_generate):
    predictions, hidden = model(input_eval, hidden)

    # using a multinomial distribution to predict the word returned by the model
    predictions = predictions / temperature
    predicted_id = tf.multinomial(predictions, num_samples=1)[0][0].numpy()
    
    # We pass the predicted word as the next input to the model
    # along with the previous hidden state
    input_eval = tf.expand_dims([predicted_id], 0)
    
    text_generated += idx2char[predicted_id]

print (start_string + text_generated)

So they are wrote that profe to the people

I am changed the bottomion. Written by Gooche, but she better well bad, they! Now, better love the day… Here  
Produced by Kanye West for Very
Got the sky baby going on that Uginter Seaversta ass got a children, wow... us here for the fur!


Peniur love end, let them to go
'Round and 'round they go
I'd rather be strapped your bed and your friends get with me and my friends
My friends, my friends, my friends, my friends?


Mali, You’re CROUToby' Wiz any of us all some fly shadles
But if I think there is born her
And brick ont of me, that he crash
Man, this devil wasn't no style be like Christmas of Khalics Lising, oh you feel something how impromp than the hitter at least nothing, it's always over
Lovely-one night, add to slow is scubbosion
A musician, for what I'm oblocing that this, dog
Why are you so paranoid?
Why are you gonna be, well
Made me go
Talk about some ass, that's what is it was into myself
I mean as I forget the game into a bank