In [25]:
import numpy as np
import pandas as pd
import random
import nltk
import string
import torch

from architecture import recursive

SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device == 'cuda':
    torch.backends.cudnn.benchmark = False
    torch.cuda.manual_seed_all(SEED)

hidden_size = 100
n_layers = 1

In [26]:
df_train = pd.read_csv("data/lyrics.csv/lyrics.csv")

i=0
pop_lyrics = list()
while i < len(df_train.index):
    if df_train['genre'][i] == 'Pop' and type(df_train['lyrics'][i]) == str:
        pop_lyrics.append(df_train['lyrics'][i])
    i += 1

def joinStrings(text):
    return ' '.join(string for string in text)

pop_text = joinStrings(pop_lyrics[:10])
len(pop_text.split())

stop = set(nltk.corpus.stopwords.words('english'))
exclude = set(string.punctuation) 
lemma = nltk.stem.wordnet.WordNetLemmatizer()

def clean(doc):
        stop_free = " ".join([i for i in doc.split() if i not in stop])
        punc_free = "".join(ch for ch in stop_free if ch not in exclude)
        normalized = " ".join(lemma.lemmatize(word) for word in punc_free.split())
        return normalized
    
test_sentence = clean(pop_text).lower().split()
vocab = set(test_sentence)
voc_len=len(vocab)
word_to_ix = {word: i for i, word in enumerate(vocab)}

In [27]:
model = recursive.RNN(voc_len, hidden_size, voc_len, n_layers)
weights_path = 'model/trigram_pop_10.pth'
model.load_state_dict(torch.load(weights_path), strict=True)
if device == 'cuda':
    model.cuda()
print('Model loaded')

Model loaded


In [28]:
def evaluate(prime_str="let's go", predict_len=100, temperature=0.8):
    if device == 'cuda':
        hidden = model.init_hidden().cuda()
    else:
        hidden = model.init_hidden()

    for p in range(predict_len):
        
        if device == 'cuda':
            prime_input = torch.tensor([word_to_ix[w] for w in prime_str.split()], dtype=torch.long).cuda()
        else:
            prime_input = torch.tensor([word_to_ix[w] for w in prime_str.split()], dtype=torch.long)
        inp = prime_input[-2:] #last two words as input
        output, hidden = model(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        
        # Add predicted word to string and use as next input
        predicted_word = list(word_to_ix.keys())[list(word_to_ix.values()).index(top_i)]
        prime_str += " " + predicted_word
#         inp = torch.tensor(word_to_ix[predicted_word], dtype=torch.long)

    return prime_str

In [29]:
print(evaluate("we can", 40, temperature=1))

we can seeing everyone feel nothing 6 feeling concerned callin reason aint honesty class in feelin verse until talking aps sand sharp anyone exactly tune feelin confident name everyone exactly check someone sand cause moment speak so lifetime everything try 6 delight
