# Generate Lyrics from all Ed sheeran songs

In [1]:
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
from keras.models import Sequential
import keras.utils as ku 

Using TensorFlow backend.


In [2]:
import numpy as np
import pandas as pd 
data = pd.read_csv('songdata.csv')
data.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [9]:
data = data[(data['artist']=='Ed Sheeran')]

In [10]:
l = list(data['text'])
l[0]

"It's alright to cry even my dad does sometimes  \nSo don't wipe your eyes  \nTears remind you you're alive  \nIt's alright to die cause death the only thing you haven't tried  \nBut just for tonight hold on  \nSo live life like you're giving all  \nCause you act like you are  \nGo ahead and just live it up  \nGo on and tell me your path  \n  \nIt's alright to shake  \nEven my hand does sometimes  \nSo inside the rage Against the dying of the light  \nIt's alright to say that death's  \nThe only thing you haven't tried  \nBut just for today hold on  \nSo live life like you're giving all  \nCause you act like you are  \nGo ahead and just live it up  \nGo on and tell me your path  \n  \nGo ahead and just live it up  \nGo on and tell me your path and hold on\n\n"

In [11]:
import string
def clean_text(txt):
    txt = "".join(v for v in txt if v not in string.punctuation).lower()
    txt = txt.replace("\n","")
    return txt 

corpus = [clean_text(x) for x in l]
corpus[0]

'its alright to cry even my dad does sometimes  so dont wipe your eyes  tears remind you youre alive  its alright to die cause death the only thing you havent tried  but just for tonight hold on  so live life like youre giving all  cause you act like you are  go ahead and just live it up  go on and tell me your path    its alright to shake  even my hand does sometimes  so inside the rage against the dying of the light  its alright to say that deaths  the only thing you havent tried  but just for today hold on  so live life like youre giving all  cause you act like you are  go ahead and just live it up  go on and tell me your path    go ahead and just live it up  go on and tell me your path and hold on'

In [12]:
len(corpus)

53

In [13]:
tokenizer = Tokenizer()

def get_sequence_of_tokens(corpus):
    ## tokenization
    tokenizer.fit_on_texts(corpus)
    total_words = len(tokenizer.word_index) + 1
    
    ## convert data to sequence of tokens 
    input_sequences = []
    for line in corpus:
        token_list = tokenizer.texts_to_sequences([line])[0]
        for i in range(1, len(token_list)):
            n_gram_sequence = token_list[:i+1]
            input_sequences.append(n_gram_sequence)
    return input_sequences, total_words


In [14]:
inp_sequences, total_words = get_sequence_of_tokens(corpus)
inp_sequences[:10]

[[29, 482],
 [29, 482, 7],
 [29, 482, 7, 270],
 [29, 482, 7, 270, 137],
 [29, 482, 7, 270, 137, 5],
 [29, 482, 7, 270, 137, 5, 361],
 [29, 482, 7, 270, 137, 5, 361, 747],
 [29, 482, 7, 270, 137, 5, 361, 747, 426],
 [29, 482, 7, 270, 137, 5, 361, 747, 426, 32],
 [29, 482, 7, 270, 137, 5, 361, 747, 426, 32, 37]]

In [15]:
def generate_padded_sequences(input_sequences):
    max_sequence_len = max([len(x) for x in input_sequences])
    input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
    
    predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
    label = ku.to_categorical(label, num_classes=total_words)
    return predictors, label, max_sequence_len

predictors, label, max_sequence_len = generate_padded_sequences(inp_sequences)

In [16]:
def create_model(max_sequence_len, total_words):
    input_len = max_sequence_len - 1
    model = Sequential()
    
    # Add Input Embedding Layer
    model.add(Embedding(total_words, 10, input_length=input_len))
    
    # Add Hidden Layer 1 - LSTM Layer
    model.add(LSTM(100))
    
    # Add Output Layer
    model.add(Dense(total_words, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam')
    
    return model

model = create_model(max_sequence_len, total_words)

In [17]:
model.fit(predictors, label, epochs=10, verbose=2)


Epoch 1/10
 - 378s - loss: 6.0563
Epoch 2/10
 - 367s - loss: 5.7208
Epoch 3/10
 - 364s - loss: 5.5771
Epoch 4/10
 - 400s - loss: 5.3932
Epoch 5/10
 - 447s - loss: 5.2287
Epoch 6/10
 - 445s - loss: 5.0589
Epoch 7/10
 - 391s - loss: 4.8913
Epoch 8/10
 - 416s - loss: 4.7293
Epoch 9/10
 - 467s - loss: 4.5667
Epoch 10/10
 - 334s - loss: 4.4060


<keras.callbacks.History at 0x118a93252e8>

In [18]:
def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = model.predict_classes(token_list, verbose=0)
        
        output_word = ""
        for word,index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " "+output_word
    return seed_text.title()

In [19]:
print (generate_text("look at", 5, model, max_sequence_len))

Look At The Homeless Life For Me


In [20]:
print (generate_text("i am", 4, model, max_sequence_len))

I Am With The Only One


In [27]:
print (generate_text("my life", 15, model, max_sequence_len))

My Life And The Same One And A Cruel Eight I Love Her I Know My Mind


# Pretty decent results