In [1]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
import torch.nn as nn
from torch import optim, Tensor
import math
from tqdm.notebook import tqdm
from chords_dataset import ChordsDataset
from model_helpers import NLP
from model import Transformer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

df = pd.read_csv('./model_data/lyrics_processed.csv')
dataset = ChordsDataset(df, NLP)
train_i, test_i, validation_i = dataset.get_train_test_valid_indexes(0.9,0.09,0.01)


LYR_VOCAB_SIZE = 100
EMB_SIZE = LYR_VOCAB_SIZE
CHORD_SIZE = len(dataset.chords_set) 
num_heads = 4
num_encoder_layers = 3
num_decoder_layers = 3
forward_expansion = 2
dropout = 0.5
max_length = 7038

model = Transformer(EMB_SIZE,LYR_VOCAB_SIZE,CHORD_SIZE,15,num_heads,num_encoder_layers, num_decoder_layers, forward_expansion, dropout, max_length,device ).to(device)
optimizer = optim.Adam(model.parameters(), lr = 6e-4)
criterion = nn.CrossEntropyLoss().to(device)


c:\Users\bench\anaconda3\envs\p38\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
c:\Users\bench\anaconda3\envs\p38\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll


In [11]:

def eval_model(dataset: ChordsDataset,validation_indexes:list, model: nn.Module, criterion: nn.Module, device):
    model.eval()
    loss = 0
    nb_elem = len(validation_indexes)
    for index in validation_indexes:
        elem = dataset[index]
        source, target = elem["lyrics"].reshape(-1,1,100).to(device), elem["chords"].reshape(-1,1)
        output = model(source, target).reshape(-1,CHORD_SIZE)
        target_one_hot= Tensor([dataset.to_one_hot(int(chord)) for chord in target]).to(device)
        loss += criterion(output, target_one_hot)
    model.train()
    return loss/nb_elem


In [None]:


NB_EPOCHS = 3

for epoch in range(NB_EPOCHS):
    print(f"[Epoch {epoch}/ {NB_EPOCHS}]")
    model.train()
    for i,index in tqdm(enumerate(train_i)):
        optimizer.zero_grad()
        elem = dataset[index]
        source, target = elem["lyrics"].reshape(-1,1,100).to(device), elem["chords"].reshape(-1,1)
        output = model(source, target).reshape(-1,CHORD_SIZE)
        # optimizer.zero_grad()
        target_one_hot=  torch.Tensor([dataset.to_one_hot(int(chord)) for chord in target]).to(device)
        loss = criterion(output, target_one_hot)
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), max_norm= 1)
        optimizer.step()
        if i % 500 == 0:
            print(f"At i = {i} : ",eval_model(dataset, validation_i, model, criterion))

In [4]:
from model_helpers import preprocess_text
import json 
import pandas as pd
import torch.nn as nn
from torch import Tensor
from model import Transformer
from chords_dataset import ChordsDataset
from model_helpers import NLP
import torch

def load_chord_dicts(dataset: ChordsDataset, id2chord: dict, chord2id: dict) -> ChordsDataset: 
    with open(id2chord, mode = "r") as f:
        dataset.id2chord = {int(i):k for i,k in json.load(f).items()}

    with open(chord2id, mode = "r") as f:
        dataset.chord2id = {i: int(k) for i,k in json.load(f).items()}
    return dataset
    

df = pd.read_csv('./model_data/lyrics_processed.csv')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
loaded_dataset = load_chord_dicts(ChordsDataset(df,NLP),"./model_trained/id2chord.json","./model_trained/chord2id.json")

print("loaded dataset")

LYR_VOCAB_SIZE = 100
EMB_SIZE = LYR_VOCAB_SIZE
CHORD_SIZE = len(loaded_dataset.chords_set) 
num_heads = 4
num_encoder_layers = 3
num_decoder_layers = 3
forward_expansion = 2
dropout = 0.1
max_length = 7038
loaded_transformer = Transformer(
    EMB_SIZE,
    LYR_VOCAB_SIZE,
    CHORD_SIZE,
    15,
    5,
    4, 
    4, 
    2, 
    dropout, 
    max_length,
    device 
    )
loaded_transformer.load_state_dict(torch.load("./model_trained/model_5_4_4_2_01.pt"))

loaded dataset


<All keys matched successfully>

In [10]:


def predict(model: nn.Module, lyrics: str, dataset: ChordsDataset, max_length):
    model.eval()
    processed_lyrics = preprocess_text(lyrics)
    print(processed_lyrics)
    vectorized_lyrics = torch.FloatTensor([dataset.vectorize(token) for token in processed_lyrics]).to(device)
    vectorized_lyrics = vectorized_lyrics.reshape(-1,1,100)
    outputs = [dataset.chord2id["<SOS>"]]
    for i in range(max_length):
        trg_tensor = torch.LongTensor(outputs).unsqueeze(1).to(device)

        with torch.no_grad():
            output = model(vectorized_lyrics, trg_tensor)
        best_guess = output.argmax(2)[-1, :].item()
        outputs.append(best_guess)

        if best_guess == dataset.chord2id["<EOS>"]:
            break
            
    translated_sentence = dataset.decode_chord_tensor(outputs)
    # remove start token
    return translated_sentence
[print(c) for c in predict(loaded_transformer,"I love life \r\n it is so great", loaded_dataset,10)]

['<SOS>', 'i', 'love', 'life', '<EOL>', 'it', 'is', 'so', 'great', '<EOS>']
<SOS>
Bb/G
Bb/G
G/B
G/B
C/E
C/E
Ab/C
Ab/C
Bbm
Bbm


[None, None, None, None, None, None, None, None, None, None, None]

In [1]:
from torchtext.data.utils import get_tokenizer
from nltk.corpus import words
from chords_dataset import ChordsDataset
num = 107
chrds = loaded_dataset[num]["chords"]
# loaded_transformer.trg_word_embedding(chrds[-1].to(torch.int64).to(device))
lyrics = loaded_dataset.lyrics[num]
print(lyrics, loaded_dataset.decode_chord_tensor(chrds))


NameError: name 'loaded_dataset' is not defined

In [7]:
[print(c) for c in  ['B7', 'B7', 'A', 'A', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'C/E', 'C/E', 'C/E', 'C/E', 'C', 'C', 'C', 'C', 'C']]

B7
B7
A
A
F
F
F
F
F
F
F
C/E
C/E
C/E
C/E
C
C
C
C
C


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]