In [16]:
import string
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np
from keras.utils import to_categorical
from keras import models
from keras import Sequential
from keras.layers import Dense, LSTM, Embedding, Dropout
from keras.callbacks import ModelCheckpoint
import pickle
import pandas as pd
import collections
from nltk.corpus import words
import re

In [2]:
def load_data(filename='data/all_songs.csv',
              col='album',
              col_value='In the Aeroplane Over the Sea'):
    
    df    = pd.read_csv(filename)
    df    = df.loc[df[col] == col_value]
    songs = df['lyrics'].values
    return songs

def lyric_cleaner(songs):
    lyric_tokens = []
    for song in songs:
        text = song.lower().replace(' n ', ' eol ').replace('[verse ', '[verse')
        text = text.replace("'", '').replace('-', ' ')
        tokens = text.split()
        table = str.maketrans('', '', string.punctuation)
        tokens = [word.translate(table) for word in tokens]
        lyric_tokens.append(tokens)
    
    return lyric_tokens

def join_song_lyrics(lyric_tokens):
    
    joined_songs = []
    
    for song in lyric_tokens:
        lyrics = ' '.join(song)
        joined_songs.append(lyrics)
        
    return joined_songs

def tokenizer(list_of_songs, tokenizer):
    tokenizer = tokenizer
    tokenizer.fit_on_texts(list_of_songs)
    encoded_songs = tokenizer.texts_to_sequences(list_of_songs)
    
    return encoded_songs, tokenizer

def find_longest_song(encoded_songs):
    longest_song_size = 0
    for song in encoded_songs:
        if len(song) > longest_song_size:
            longest_song_size = len(song)
    return longest_song_size

def index_to_word(tokenizer):
    vocab_size = len(tokenizer.word_index) + 1
    index_to_word_dict =  dict([(index, word) for word, index in tokenizer.word_index.items()])
    return vocab_size, index_to_word_dict

def pad_data(encoded_songs, max_length):
    padded_songs = pad_sequences(encoded_songs, maxlen=max_length, padding='post', truncating='post')
    return padded_songs

def seqeuncer(padded_songs, length):
    sequences = []
    output = []

    for song in padded_songs:
        for i in range(length, (len(song))):
            seq = song[i - length: i]
            out = song[i]
            sequences.append(seq)
            output.append(out)
            
    return np.array(sequences), output

def establish_model(vocab_size, seq_length):
    model = Sequential()
    model.add(Embedding(vocab_size, 50, input_length=seq_length))
    model.add(LSTM(100, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(vocab_size, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def gen_seed(seqs):
    seed_text = seqs[np.random.randint(0,len(seqs))]
    while seed_text.all() == np.array([  0, 0,  0, 0, 0, 0, 0, 0, 0, 0]).all():
        seed_text = seqs[np.random.randint(0,len(seqs))]
    return seed_text

def generate_seq(model,
                 tokenizer,
                 seq_length,
                 seqs,
                 n_words=250):
    
    seed_text = gen_seed()
    start = [reverse_dict[index] for index in seed_text]
    print('start: ', start)
    
    result = []
    
    for _ in range(n_words):
        encoded = pad_sequences([seed_text], maxlen=seq_length, truncating='pre')
        yhat = model.predict_classes(encoded, verbose=0)
        if yhat == 0:
            pass
        else:
            word = reverse_dict[yhat[0]]
        seed_text = np.append(seed_text, yhat)
        seed_text = seed_text[-10:]
        result.append(word)
    return ' '.join(result)

In [3]:
data = load_data(filename='data/all_songs.csv', col='artist', col_value='The Decemberists')

In [41]:
x = test_song.split('Chorus')
for i in x:
    if i[0] == ']':
        y = re.findall('\].+\[V', i)
        if y == []:
            y = re.findall('\].+eos', i)
        print(y)

['] n And I will hang my head, hang my head low n And I will hang my head, hang my head low n [V']
['] n And I will hang my head, hang my head low n And I will hang my head, hang my head low n And I will hang my head, hang my head low n And I will hang my head, hang my head low n And I will hang my head, hang my head low n And I will hang my head, hang my head low eos']


In [20]:
test_song = data[0]

In [23]:
test_song = test_song + ' eos'

In [24]:
re.findall('\[Chorus\].+\[Verse', test_song)

['[Chorus] n And I will hang my head, hang my head low n And I will hang my head, hang my head low n [Verse']

In [25]:
re.findall('\[Chorus\].+eos', test_song)

['[Chorus] n And I will hang my head, hang my head low n And I will hang my head, hang my head low n [Verse 3] n A grey sky, a bitter sting n A rain cloud, a crane on wing n All out beyond horizon n A grey sky, a bitter sting n [Chorus] n And I will hang my head, hang my head low n And I will hang my head, hang my head low n And I will hang my head, hang my head low n And I will hang my head, hang my head low n And I will hang my head, hang my head low n And I will hang my head, hang my head low eos']

In [19]:
re.findall('\[Chorus\].+\[Verse', data[0])

['[Chorus] n And I will hang my head, hang my head low n And I will hang my head, hang my head low n [Verse']

In [None]:
def song_cleaner(songs):
    lyric_tokens = []
    text = song.lower().replace(' n ', ' eol ').replace('[verse ', '[verse').replace('[', 'BRACKETS ').replace(']', '')
    text = text.replace("'", '').replace('-', ' ')
    tokens = text.split()
    table = str.maketrans('', '', string.punctuation)
    tokens = [word.translate(table) for word in tokens]
    lyric_tokens.append(tokens)

In [10]:
for song in data

array(["[Verse 1] n And under the boughs unbowed n All clothed in the snowy shroud n She had no heart so hardened n All under the boughs unbowed n [Verse 2] n Each feather it fell from skin n 'Til thread bare while she grew thin n How were my eyes so blinded? n Each feather it fell from skin n [Chorus] n And I will hang my head, hang my head low n And I will hang my head, hang my head low n [Verse 3] n A grey sky, a bitter sting n A rain cloud, a crane on wing n All out beyond horizon n A grey sky, a bitter sting n [Chorus] n And I will hang my head, hang my head low n And I will hang my head, hang my head low n And I will hang my head, hang my head low n And I will hang my head, hang my head low n And I will hang my head, hang my head low n And I will hang my head, hang my head low",
       "There's an island hidden in the sound n Lapping currents lay your boat aground n Affix your barb and bayonet n The curlews carve their Arabesques n And sorrow fills the silence all around n Come a