In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
songs = pd.read_csv("Songs Dataset/songdata.csv")
songs.head(1)

In [None]:
print(songs['text'][0])

In [5]:
#lower case, strip and punc
import re
import string

def clean_text(text):
    pattern = "[A-Za-z0-9']+"
    return ' '.join(re.findall(pattern, text)).lower()
    
clean_text("Look at her face, it's a wonderful face  \nAnd it means something special to me  \n")

"look at her face it's a wonderful face and it means something special to me"

In [6]:
songs['clean'] = songs['text'].apply(lambda x : clean_text(x))
songs['clean'].iloc[1]

"take it easy with me please touch me gently like a summer evening breeze take your time make it slow andante andante just let the feeling grow make your fingers soft and light let your body be the velvet of the night touch my soul you know how andante andante go slowly with me now i'm your music i am your music and i am your song i'm your song i am your music and i am your song play me time and time again and make me strong play me again 'cause you're making me strong make me sing make me sound you make me sing and you make me andante andante tread lightly on my ground andante andante oh please don't let me down there's a shimmer in your eyes like the feeling of a thousand butterflies please don't talk go on play andante andante and let me float away i'm your music i am your music and i am your song i'm your song i am your music and i am your song play me time and time again and make me strong play me again 'cause you're making me strong make me sing make me sound you make me sing and

In [42]:
lyrics = songs['clean']
sample = lyrics[0:2]
sample[0]

"look at her face it's a wonderful face and it means something special to me look at the way that she smiles when she sees me how lucky can one fellow be she's just my kind of girl she makes me feel fine who could ever believe that she could be mine she's just my kind of girl without her i'm blue and if she ever leaves me what could i do what could i do and when we go for a walk in the park and she holds me and squeezes my hand we'll go on walking for hours and talking about all the things that we plan she's just my kind of girl she makes me feel fine who could ever believe that she could be mine she's just my kind of girl without her i'm blue and if she ever leaves me what could i do what could i do"

In [43]:
VOCAB_SIZE = 5000
MAX_LENGTH = 250
EMBEDDING_DIM = 50
OOV_TOKEN = '<OOV>'

In [161]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical

tokenizer = Tokenizer(num_words=VOCAB_SIZE,
                      oov_token=OOV_TOKEN)
tokenizer.fit_on_texts(lyrics)
#sequences = tokenizer.texts_to_sequences(lyrics)
#padded_sequences = pad_sequences(sequences, maxlen=MAX_LENGTH)

In [163]:
def create_XyPairs(songs, tokenizer, MAX_LENGTH):
    #X and y pairs
    X=[]
    y=[]
    
    for song in songs:
        #print('------------------')
        words = song.split()
        for word_index in range(1, len(words)):
            sentence = [' '.join(words[:word_index])]
            sequence = tokenizer.texts_to_sequences(sentence)
            padded_sequences = pad_sequences(sequence, maxlen=MAX_LENGTH)
            #print(f'X:{sentence}')                # ~~> Actual Word
            #print(f'X:{padded_sequences}')        # ~~> Padded Tokenized 
            X.append(padded_sequences)
            #print(f'y:{[words[word_index]]}')   # ~~> Actual Word
            #print(f'y:{tokenizer.texts_to_sequences([words[word_index]])}')    # ~~> Tokenized word
            y.append(tokenizer.texts_to_sequences([words[word_index]]))
                   

    X = np.array(X).reshape(-1, MAX_LENGTH)
    y = np.array(y).reshape(-1, 1)
    y=to_categorical(y)
    return X,y
            
print('Shape Of Training Examples for two songs', create_XyPairs(sample, tokenizer, MAX_LENGTH)[0].shape)

Shape Of Training Examples for two songs (411, 250)


In [164]:
#for 50 song
sample_50 = songs['clean'][:50]

X,y = create_XyPairs(sample_50, tokenizer, MAX_LENGTH)
print('Shape Of Training Examples for 50 song', X.shape)
print('Shape Of Training Labels for 50 song'  , y.shape)

Shape Of Training Examples for 50 song (12432, 250)
Shape Of Training Labels for 50 song (12432, 4999)


In [167]:
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Bidirectional

model = Sequential(
    [
        Embedding(VOCAB_SIZE, EMBEDDING_DIM,
                  input_shape=([MAX_LENGTH])),
        Bidirectional(LSTM(EMBEDDING_DIM, return_sequences=True)),
        Bidirectional(LSTM(EMBEDDING_DIM)),
        Dense(VOCAB_SIZE-1, activation='softmax')
    ]
)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.fit(X,y, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x22d0bb9cb70>

In [193]:
model.fit(X,y, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x22d1eda6b70>

In [213]:
model.fit(X,y, epochs=75)

Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


<keras.callbacks.callbacks.History at 0x22d1ed8a780>

In [214]:
np.argmax(model.predict(pad_sequences(tokenizer.texts_to_sequences(["look at her"]), maxlen=MAX_LENGTH)))

160

In [217]:
from keras.models import save_model, load_model

save_model(model, 'text_gen.h5')
model = load_model('text_gen.h5')

In [272]:
pad_sequences([[5,6,7,8]])

array([[5, 6, 7, 8]])

In [284]:
def generate_song(model, tokenizer, starter, sentence_long):
    length = len(starter.split())
    text = [starter]

    for _ in range(length, sentence_long):
        #print(text, ' is entering tokenizer')
        sequence = tokenizer.texts_to_sequences(text)[0]
        #print(sequence, ' is entering padding')
        padded_sequences = pad_sequences([sequence], maxlen=MAX_LENGTH)
        #print(padded_sequences, ' is entering model')
        predictions = model.predict(padded_sequences)[0]
        sampled = np.random.choice(list(range(len(predictions))), p=predictions)
        #print(sampled, ' is predicted')
        word = tokenizer.index_word[sampled]
        #print(word, ' is predicted')
        text = [text[0] + ' ' + word]
        #print(text, ' is final text')
        
    return ' '.join(text)
        

    
generate_song(model, tokenizer, 'you were supposed to', 30)

'you were supposed to sing you through me my plain in the summer story leaves movie through the start for i was so to sing to give out over ship'

# `YEAH WHATEVER` `MC-DODOELSAKKA \ LIL DODOELSAKKA`

# `DODO BOOMIN WANTS SOME MORE` 