In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Generate Lyrics from all Ed sheeran songs

In [36]:
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
from keras.models import Sequential
import keras.utils as ku 

In [37]:
import numpy as np
import pandas as pd

In [38]:
lyrics = pd.read_csv('/content/drive/MyDrive/LyricsGenration/lyrics-data.csv')
lyrics = lyrics[lyrics['Idiom']=='ENGLISH']

In [39]:
#Only keep popular artists, with genre Rock/Pop and popularity high enough
artists = pd.read_csv('/content/drive/MyDrive/LyricsGenration/artists-data.csv')

artists = artists[(artists['Genre'].isin(['Pop'])) & (artists['Popularity']>5)]

In [40]:
data = lyrics.merge(artists[['Artist', 'Genre', 'Link']], left_on='ALink', right_on='Link', how='inner')

In [41]:
data = data.drop(columns=['ALink','SLink','Idiom','Link','Genre'])

In [42]:
#Keep last 20 words in a new column, then remove them from original column
data['True_end_lyrics'] = data['Lyric'].str.split().str[-20:].apply(' '.join)
data['Lyric'] = data['Lyric'].str.split().str[:-20].apply(' '.join)

data.head()

Unnamed: 0,SName,Lyric,Artist,True_end_lyrics
0,Careless Whisper,I feel so unsure. As I take your hand and lead...,George Michael,now that you're gone. (now that you're gone) w...
1,Freedom '90,I won't let you down. I will not give you up. ...,George Michael,what you want from me. Just the way it's got t...
2,One More Try,I've had enough of danger. And people on the s...,George Michael,joy. For an uptown boy. Who just isn't willing...
3,Father Figure,"That's all I wanted. Something special, someth...",George Michael,your preacher. I will be your daddy. I will be...
4,Heal The Pain,Let me tell you a secret. Put it in your heart...,George Michael,your heart now. I'll be good to you. I can mak...


In [43]:
data = data[(data['Artist']=='George Michael')]

In [45]:
l = list(data['True_end_lyrics'])
l[0]

"now that you're gone. (now that you're gone) what I did so wrong. That you had to leave me alone"

In [48]:
import string
def clean_text(txt):
    txt = "".join(v for v in txt if v not in string.punctuation).lower()
    txt = txt.replace("\n","")
    return txt 

corpus = [clean_text(x) for x in l]
corpus[0]

'now that youre gone now that youre gone what i did so wrong that you had to leave me alone'

In [49]:
len(corpus)

300

In [50]:
tokenizer = Tokenizer()

def get_sequence_of_tokens(corpus):
    ## tokenization
    tokenizer.fit_on_texts(corpus)
    total_words = len(tokenizer.word_index) + 1
    
    ## convert data to sequence of tokens 
    input_sequences = []
    for line in corpus:
        token_list = tokenizer.texts_to_sequences([line])[0]
        for i in range(1, len(token_list)):
            n_gram_sequence = token_list[:i+1]
            input_sequences.append(n_gram_sequence)
    return input_sequences, total_words


In [51]:
inp_sequences, total_words = get_sequence_of_tokens(corpus)
inp_sequences[:10]

[[23, 8],
 [23, 8, 45],
 [23, 8, 45, 120],
 [23, 8, 45, 120, 23],
 [23, 8, 45, 120, 23, 8],
 [23, 8, 45, 120, 23, 8, 45],
 [23, 8, 45, 120, 23, 8, 45, 120],
 [23, 8, 45, 120, 23, 8, 45, 120, 46],
 [23, 8, 45, 120, 23, 8, 45, 120, 46, 2],
 [23, 8, 45, 120, 23, 8, 45, 120, 46, 2, 229]]

In [52]:
def generate_padded_sequences(input_sequences):
    max_sequence_len = max([len(x) for x in input_sequences])
    input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
    
    predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
    label = ku.to_categorical(label, num_classes=total_words)
    return predictors, label, max_sequence_len

predictors, label, max_sequence_len = generate_padded_sequences(inp_sequences)

In [53]:
def create_model(max_sequence_len, total_words):
    input_len = max_sequence_len - 1
    model = Sequential()
    
    # Add Input Embedding Layer
    model.add(Embedding(total_words, 10, input_length=input_len))
    
    # Add Hidden Layer 1 - LSTM Layer
    model.add(LSTM(100))
    
    # Add Output Layer
    model.add(Dense(total_words, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam')
    
    return model

model = create_model(max_sequence_len, total_words)

In [69]:
model.fit(predictors, label, epochs=100, verbose=2)


Epoch 1/100
178/178 - 3s - loss: 0.4172
Epoch 2/100
178/178 - 3s - loss: 0.4199
Epoch 3/100
178/178 - 3s - loss: 0.4024
Epoch 4/100
178/178 - 3s - loss: 0.3594
Epoch 5/100
178/178 - 3s - loss: 0.3357
Epoch 6/100
178/178 - 3s - loss: 0.3167
Epoch 7/100
178/178 - 3s - loss: 0.2989
Epoch 8/100
178/178 - 3s - loss: 0.2849
Epoch 9/100
178/178 - 3s - loss: 0.2688
Epoch 10/100
178/178 - 3s - loss: 0.2573
Epoch 11/100
178/178 - 3s - loss: 0.2547
Epoch 12/100
178/178 - 3s - loss: 0.2389
Epoch 13/100
178/178 - 3s - loss: 0.2284
Epoch 14/100
178/178 - 3s - loss: 0.2198
Epoch 15/100
178/178 - 3s - loss: 0.1965
Epoch 16/100
178/178 - 3s - loss: 0.1860
Epoch 17/100
178/178 - 3s - loss: 0.1761
Epoch 18/100
178/178 - 3s - loss: 0.1925
Epoch 19/100
178/178 - 3s - loss: 0.1622
Epoch 20/100
178/178 - 3s - loss: 0.1506
Epoch 21/100
178/178 - 3s - loss: 0.1429
Epoch 22/100
178/178 - 3s - loss: 0.1397
Epoch 23/100
178/178 - 3s - loss: 0.1297
Epoch 24/100
178/178 - 3s - loss: 0.1235
Epoch 25/100
178/178 - 3s

<tensorflow.python.keras.callbacks.History at 0x7fa67aa8dd10>

In [70]:
def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = model.predict_classes(token_list, verbose=0)
        
        output_word = ""
        for word,index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " "+output_word
    return seed_text.title()

In [71]:
print (generate_text("I could feel", 5, model, max_sequence_len))

I Could Feel Dont You Wanna Fall In




In [72]:
print (generate_text("Take me", 4, model, max_sequence_len))

Take Me The Wind Wild Is




In [74]:
print (generate_text("Like The Weather", 150, model, max_sequence_len))



Like The Weather Fuck About Your Problems Darling When You Can Pay The Rent How Much Is Enough Return To Top Wise The One On Oh The Truth So Tells You Live Im Poor It It You Dont On The Fuck Day On You See Me Get Anymore Nothing Me Or When The Blue Dont Send Me Youre Heaven So Yo Etais Father And You Has Be Lucky To Love Of Found Be Need In No Two Day That Care Care The One Of The Week To All All Youre Will The End Down Ive Youre Down To Give Happiness Can’T Be Alright That I Be No Need To Love Dont So Much Me Hate Go When To Should Come That That You Baby Why Keep Me So Just I Care I Keep Me Baby To Save Me Can I Leave Me You Come Back Care Ourselves Baby Oh The Truth But Here
