In [1]:
cd /content/drive/My Drive/Studier/Master/Master Thesis/Coding/conditional-LSTM-language-model

/content/drive/My Drive/Studier/Master/Master Thesis/Coding/conditional-LSTM-language-model


# Data.py

In [0]:
# -*- coding: utf-8 -*-
import sys
import os
import random
import numpy as np
import torch
import torch.utils.data as data
import glob
import logging
from collections import defaultdict

logging.disable(logging.FATAL)

# Set sampling seed
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

# Initiate feature vector
features = []
note_types = ['note', 'rest']
tags = ['<WORD>', '<BB>', '<BL>']
lengths = [0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0, 8.0, 16.0, 32.0]

def get_correct_length(length):
    length = int(length)
    if length <= 0.25:
        return 0.25
    elif length <= 0.5:
        return 0.5
    elif length <= 0.75:
        return 0.75
    elif length <= 1:
        return 1.0
    elif length <= 1.5:
        return 1.5
    elif length <= 2:
        return 2.0
    elif length <= 3:
        return 3.0
    elif length <= 4:
        return 4.0
    elif length <= 6:
        return 6.0
    elif length <= 8:
        return 8.0
    elif length <= 16:
        return 16.0
    elif length <= 32:
        return 32.0

class SongLyricDataset(data.Dataset):
    def __init__(self, data, word_size, window):
        """ Create feature vocab and index dictionaries """
        # Create feature vocabulary
        for i in range(window):
            for note_type in note_types:
                features.append('note[%s]=%s'%(i, note_type))
                features.append('note[%s]=%s'%(-(i+1), note_type))
            for length in lengths:
                features.append('length[%s]=%s'%(i, length))
                features.append('length[%s]=%s'%(-(i+1), length))
        for tag in tags:
            features.append('prev_tag=%s'%tag)

        # Create index dictionaries for features
        sorted_features = sorted(features)
        self.feature2idx = dict((f, i) for i, f in enumerate(sorted_features))
        self.idx2feature = dict((i, f) for i, f in enumerate(sorted_features))
        self.feature_size = len(self.feature2idx)

        """ Load data and create word and syllable vocab """
        # Load data
        files = os.listdir(data)

        # Initialize word occurance dictionary
        word_dict = defaultdict(int)
        syll_dict = defaultdict(int)
        
        # Limit number of songs for testing
        # files = files[0:4] 

        # For each song
        for file in files:
            notes = np.load(os.path.join(data, file), allow_pickle=True)
            old_word_idx = "<None>"
            # For each word in song lyric increment word occurance dictionary
            for note in notes:
                # note = [note_number, word_index, note_type, duration, word, syllable, feature_type]
                # note[0] = note_number, note[1] = word_index, note[2] = note_type(rest) or MIDI_number, note[3] = duration 
                # note[4] = word, note[5] = syllable, note[6] = [all syllables], note[7]= feature_type
                word_idx = note[1]
                if word_idx != old_word_idx:
                    word_lower = note[5].lower()
                    word_dict[word_lower] += 1
                    syll_dict[word_lower] += len(note[6])


        # Create index dictionaries for words
        self.word2idx = {}
        self.word2idx["<pad>"] = 0 # Padding token to fill batches
        self.word2idx["<unk>"] = 1 # Unknown token to replace rare words
        self.word2idx["<BB>|<null>"] = 2
        self.word2idx["<BL>|<null>"] = 3
        self.idx2word = {}
        self.idx2word[0] = "<pad>"
        self.idx2word[1] = "<unk>"
        self.idx2word[2] = "<BB>|<null>"
        self.idx2word[3] = "<BL>|<null>"
        
        idx = 4
        syllables = set()

        # Create word index embedding dictionaries
        print("Number of unique words: %s" %len(word_dict.items()))
        for word, freq in sorted(word_dict.items(), key=lambda x:x[1], reverse=True)[:word_size:]: # Sort word_dict after frequency and limit size to word_size (size of our dictionary)
            # Add number of syllables for each word, calculate average number of syllables per word and average
            syllables.add(np.round(syll_dict[word]/freq))

            # Build word/index dictionaries
            self.word2idx[word] = idx
            self.idx2word[idx] = word
            idx += 1

        self.word_size = len(self.word2idx)
        self.syllable_size = int(max(syllables) + 10)

        # print("word size: ", self.word_size)
        # print("syllable size: ", self.syllable_size)


        """ Create syllable, lyric and melody embeddings """
        self.idx2lyrics = []
        self.idx2syllable = []
        self.idx2melody = []

        # For each song
        for file in files:
            notes = np.load(os.path.join(data, file), allow_pickle=True)

            # Define starting state
            old_word_idx = "<None>"
            tag_stack = ["<WORD>"]
            
            # Initiate arrays
            syllables = []
            lyrics = []
            melody = []

            old_word_idx = "<None>"
            
            """ For each word in song lyric increment word occurance dictionary """
            
            # NOTE: A feature vectore "feature[]" contains the indexes of the features previous tag (BB,BL or WORD) 
            # Then it also contain the 10 previous note or rest indexes based on position in window as well as the specific rests/notes indexed duration
            # Then it also contain the 10 upcoming note or rests indexes and durations in the same manner.
            # So to conclude it looks like this: feature = [prev_tag, [previous notes/rests], [upcoming notes/rest]]
            # eg.: index of the following [prev_tag = BL, note[-10]=note, note_dur, note[-9]=rest, rest_dur, ...,
            #                                note[-1]=note, note_dur, note[0]= note,note_dur, note[1]=rest, rest_dur, note[2] = note, note_dur, note[3]=note, note_dur, ...]
            # which in this case would be something like: [280, 240, 108, ...]

            for i, note in enumerate(notes):
                feauture_type = note[7]

                word_idx = note[1]
                if word_idx != old_word_idx:
                    # This defines and lists the window for previous and upcoming notes
                    prev_i = i - window + 1
                    if prev_i < 0: 
                        prev_i = 0
                    prev_notes = notes[prev_i:i]

                    next_i = i + window
                    if next_i > len(notes):
                        next_i = len(notes)
                    next_notes = notes[i:next_i]

                    # If feature type is BB
                    if feauture_type == "<BB>":
                        feature = [] # Initiatie the feature vector which is to contain the 

                        w_idx = self.word2idx.get("<BB>|<null>") # Get word index of BB feature
                        lyrics.append(w_idx) # Append lyric array with feature
                        syllables.append(1) # Append syllable array with 1

                        prev_tag = self.feature2idx["prev_tag=%s"%tag_stack[-1]]
                        feature.append(prev_tag)

                        # For previous 8 notes in window
                        for j, prev_note in enumerate(prev_notes):
                            if prev_note[2] == 'rest':
                                note_num = self.feature2idx["note[-%s]=rest"%(len(prev_notes)-j)]
                            else:
                                note_num = self.feature2idx["note[-%s]=note"%(len(prev_notes)-j)]
                            
                            note_duration = self.feature2idx["length[-%s]=%s"%((len(prev_notes)-j), get_correct_length(prev_note[3]))]
                            
                            feature.append(note_num)
                            feature.append(note_duration)


                        # For upcoming 8 notes in the window
                        for j, next_note in enumerate(next_notes):
                            if next_note[2] == 'rest':
                                note_num = self.feature2idx["note[%s]=rest"%(len(next_note)-j)]
                            else:
                                note_num = self.feature2idx["note[%s]=note"%(len(next_note)-j)]

                            note_duration = self.feature2idx["length[%s]=%s"%(len(next_note)-j, get_correct_length(next_note[3]))]

                            feature.append(note_num)
                            feature.append(note_duration)

                        # Pad feature vector (add elements to fill the array)
                        feature = [feature[0]]*(39 - len(feature)) + feature # (adds the first element several times if its shorter than 39)

                        # The feature vector is built up as indexes of ['prev_tag', 'note_num', 'note_duration', 'note_num', 'note_duration', ..., 'next_tag', 'note_num', 'note_duration, ...]
                        melody.append(feature[::])
                        tag_stack.append("<BB>")

                    if feauture_type == "<BL>":
                        feature = [] # Initiatie the feature vector which is to contain the 

                        w_idx = self.word2idx.get("<BL>|<null>") # Get word index of BB feature
                        lyrics.append(w_idx) # Append lyric array with feature
                        syllables.append(1) # Append syllable array with 1

                        prev_tag = self.feature2idx["prev_tag=%s"%tag_stack[-1]]
                        feature.append(prev_tag)

                        # For previous 8 notes in window
                        for j, prev_note in enumerate(prev_notes):
                            if prev_note[2] == 'rest':
                                note_num = self.feature2idx["note[-%s]=rest"%(len(prev_notes)-j)]
                            else:
                                note_num = self.feature2idx["note[-%s]=note"%(len(prev_notes)-j)]
                            
                            note_duration = self.feature2idx["length[-%s]=%s"%((len(prev_notes)-j), get_correct_length(prev_note[3]))]
                            
                            feature.append(note_num)
                            feature.append(note_duration)


                        # For upcoming 8 notes in the window
                        for j, next_note in enumerate(next_notes):
                            if next_note[2] == 'rest':
                                note_num = self.feature2idx["note[%s]=rest"%(len(next_note)-j)]
                            else:
                                note_num = self.feature2idx["note[%s]=note"%(len(next_note)-j)]

                            note_duration = self.feature2idx["length[%s]=%s"%(len(next_note)-j, get_correct_length(next_note[3]))]

                            feature.append(note_num)
                            feature.append(note_duration)

                        # Pad feature vector (add elements to fill the array)
                        feature = [feature[0]]*(39 - len(feature)) + feature # (adds the first element several times if its shorter than 39)

                        # The feature vector is built up as indexes of ['prev_tag', 'note_num', 'note_duration', 'note_num', 'note_duration', ..., 'next_tag', 'note_num', 'note_duration, ...]
                        melody.append(feature[::])
                        tag_stack.append("<BL>")                        

                    feature = []
                    w_idx = self.word2idx.get(note[4], self.word2idx["<unk>"])
                    lyrics.append(w_idx) # Append lyric array with feature index
                    syllables.append(len(note[6])) # Append sylable array with number of features

                    prev_tag = self.feature2idx["prev_tag=%s"%tag_stack[-1]]
                    feature.append(prev_tag)

                    # For previous 8 notes in window
                    for j, prev_note in enumerate(prev_notes):
                        if prev_note[2] == 'rest':
                            note_num = self.feature2idx["note[-%s]=rest"%(len(prev_notes)-j)]
                        else:
                            note_num = self.feature2idx["note[-%s]=note"%(len(prev_notes)-j)]
                        
                        note_duration = self.feature2idx["length[-%s]=%s"%((len(prev_notes)-j), get_correct_length(prev_note[3]))]
                        
                        feature.append(note_num)
                        feature.append(note_duration)

                    # For upcoming 8 notes in the window
                    for j, next_note in enumerate(next_notes):
                        if next_note[2] == 'rest':
                            note_num = self.feature2idx["note[%s]=rest"%(len(next_note)-j)]
                        else:
                            note_num = self.feature2idx["note[%s]=note"%(len(next_note)-j)]

                        note_duration = self.feature2idx["length[%s]=%s"%(len(next_note)-j, get_correct_length(next_note[3]))]

                        feature.append(note_num)
                        feature.append(note_duration)

                    # Pad feature vector (add elements to fill the array)
                    feature = [feature[0]]*(39 - len(feature)) + feature # (adds the first element several times if its shorter than 39)
                    melody.append(feature[::])
                    tag_stack.append("<WORD>")

            old_word_idx = word_idx
            
            # Append syllable, lyric and melody object array with arrays
            self.idx2syllable.append(syllables[::]) 
            # print("idx2sylldable: ",self.idx2syllable)
            self.idx2lyrics.append(lyrics[::])
            # print("idx2lyrics: ", self.idx2lyrics)  
            self.idx2melody.append(melody[::])
            # print("idx2melody: ", self.idx2melody)

    def __len__(self):
        return len(self.idx2lyrics)

    def __getitem__(self, idx):
        syllables = torch.Tensor(self.idx2syllable[idx])
        lyrics = torch.Tensor(self.idx2lyrics[idx])
        melody = self.idx2melody[idx]

        return syllables, lyrics, melody, self.feature_size


def collate_fn(data):
    data.sort(key=lambda x: len(x[1]), reverse=True)
    # print("len(data) = %s"%len(data))
    _syllables, _lyrics, _melody, feature_size = zip(*data)

    # print("len(_melody) = %s"%len(_melody))

    lengths = [len(_lyric) for _lyric in _lyrics] # Creates an array of the lengths of each songs lyrics
    max_length = lengths[0]
    
    lyrics = torch.zeros(len(_lyrics), max_length).long() # Initialise tensors
    syllables = torch.zeros(len(_syllables), max_length).long() # Initialise tensors
    melody = torch.zeros(len(_melody), max_length, feature_size[0]).long() # Initialise tensors

    for i, _lyric in enumerate(_lyrics):
        end = lengths[i]
        lyrics[i, :end] = _lyric[:end] # Create one long tensor for all songs
        syllables[i, :end] = _syllables[i][:end] # Create one long tensor for all songs
        # print("_MELODY[i]: ", _melody[i])
        # print("len(_MELODY[i]: ", len(_melody[i]))
        # print("Tensor size: ", torch.Tensor(_melody[i]).long().size())
        melody[i, :end].scatter_(1, torch.Tensor(_melody[i]).long(), 1) 

    lengths = torch.Tensor(lengths).long()

    return syllables, lyrics, melody, lengths




# Model.py

In [0]:
# -*- coding: utf-8 -*-
import torch
from torch import nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.functional as F

class CLMM(nn.Module):
    def __init__(self, word_dim, melody_dim, syllable_size, word_size, feature_size):
        super(CLMM, self).__init__()
        self.hidden_dim = word_dim + melody_dim

        """ Word embedding """
        self.embedding = nn.Embedding(word_size, word_dim)

        """ Melody vector """
        self.fc_melody = nn.Linear(feature_size, melody_dim) # Fully connected layer

        """ LSTM """
        self.rnn = nn.LSTM(input_size=self.hidden_dim, hidden_size=self.hidden_dim, num_layers=1, bias=True, batch_first=True, bidirectional=False)

        """ Output """
        self.fc_lyrics_out = nn.Linear((self.hidden_dim), word_size) # Fully connected layer
        self.fc_syllables_out = nn.Linear(self.hidden_dim, int(syllable_size)) # Fully connected layer
        
        """ Util """
        self.relu = nn.ReLU(True)
        self.bn_lyrics = nn.BatchNorm1d(word_size)
        self.bn_syllables = nn.BatchNorm1d(syllable_size)

    def forward(self, lyrics, melody, lengths):
        lengths = lengths - 1
        local_batch_size = lyrics.shape[0]

        """ Word embedding """
        word_emb = self.embedding(lyrics)

        """ Melody vector """
        melody_vec = self.relu(self.fc_melody(melody))

        """ Input vector """
        input_vec = torch.cat((word_emb, melody_vec), dim=2)
        input_vec = pack_padded_sequence(input_vec, lengths, batch_first=True)

        """ RNN """
        output, hidden = self.rnn(input_vec)

        """ Output """
        lyrics_output = self.fc_lyrics_out(output[0])
        syllable_output = self.fc_syllables_out(output[0])

        if local_batch_size > 1:
            lyrics_output = self.bn_lyrics(lyrics_output) # Batch normalization
            syllable_output = self.bn_syllables(syllable_output)

        return syllable_output, lyrics_output, hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters())
        return (weight.new_zeros(1, bsz, self.hidden_dim), weight.new_zeros(1, bsz, self.hidden_dim))



# Utils.py

In [0]:
# -*- coding: utf-8 -*-
import json
import os
import sys
import numpy as np
import torch

class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
    
    def update(self, val, n=1):
        self.val = val
        self.sum += val*n
        self.count += n
        self.avg = self.sum/self.count


class LogPrint:
    def __init__(self, file_path, err):
        self.file = open(file_path, "w", buffering=1)
        self.err = err

    def lprint(self, text, ret=False, ret2=False):
        if self.err:
            if ret == True:
                if ret2 == True:
                    sys.stderr.write("\n" + text + "\n")
                else:
                    sys.stderr.write("\r" + text + "\n")
            else:
                sys.stderr.write("\r" + text)
        self.file.write(text + "\n")


def load_settings(settings):
    """
    Loading settings from the given json settings file. Overwrites command line input.
    """

    # Define settings path
    settings_path = './settings/' + settings['settings_file']
    print("Loading settings from: %s"%settings_path)

    settings_loaded = json.load(open(settings_path, 'r'))

    # Check for missing settings in file
    for key in settings.keys():
        if not key in settings_loaded:
            print(key, " not found in loaded settings")
    
    settings.update(settings_loaded)
    return settings

# Function from PyTorch NLP official example
def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)

# Train.py


In [6]:
# -*- coding: utf-8 -*-
import os
import sys
import argparse
import json
import random
import  time
import numpy as np
import utils
import matplotlib.pyplot as plt
from data import SongLyricDataset, collate_fn
from model import CLMM
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from torch.nn.utils.rnn import pack_padded_sequence
from collections import defaultdict

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("Device type: %s"%device)

def main():
    """ Set seeds """
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    """ Load data """
    data_set = SongLyricDataset(data, word_size, window)
    data_word_size = data_set.word_size
    data_feature_size = data_set.feature_size
    data_syllable_size = data_set.syllable_size

    # Print data stats
    lp.lprint("------ Data Stats -----", True)
    lp.lprint("{:>12}:  {}".format("Number of songs", len(data_set)), True)
    lp.lprint("{:>12}:  {}".format("vocab size", data_word_size), True)
    lp.lprint("{:>12}:  {}".format("feature size", data_feature_size), True)
    lp.lprint("{:>12}:  {}".format("syllable size", data_syllable_size), True)

    """ Save vocab arrays and models to checkpoint """
    with open(checkpoint + '.feature.json', 'w') as f:
        f.write(json.dumps(data_set.idx2feature))

    with open(checkpoint + '.vocab.json', 'w') as f:
        f.write(json.dumps(data_set.idx2word))

    with open(checkpoint + '.param.json', 'w') as f:
        f.write(json.dumps({"feature_idx_path": checkpoint+'.feature.json',
                            "vocab_idx_path": checkpoint+'.vocab.json',
                            "word_dim": word_dim,
                            "syllable_size": data_syllable_size,
                            "melody_dim": melody_dim,
                            "feature_size": data_feature_size,
                            "window": window,
                            "args_word_size": word_size}))
    

    """ Split data into training and validation data """
    n_samples = len(data_set)
    train_size = int(n_samples*train_rate)
    validation_size = int((n_samples - train_size)/2)
    test_size = validation_size
    
    train_data_set, val_data_set, test_data_set = torch.utils.data.random_split(data_set, [train_size, validation_size, test_size])

    print("Training set: ", len(train_data_set), " songs, Validation set: ", len(val_data_set), " songs, "
          "Test set: ", len(test_data_set), " songs.")

    """ Create PyTorch dataloaders """
    train_data_loader = torch.utils.data.DataLoader(dataset=train_data_set,
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    num_workers=num_workers,
                                                    collate_fn=collate_fn)

    val_data_loader = torch.utils.data.DataLoader(dataset=val_data_set,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  num_workers=num_workers, 
                                                  collate_fn=collate_fn)

    # test_data_loader = torch.utils.data.DataLoader(dataset=test_data_set,
    #                                               batch_size=batch_size,
    #                                               shuffle=True,
    #                                               num_workers=num_workers, 
    #                                               collate_fn=collate_fn)

    """ Load CLLM model """
    model = CLMM(word_dim=word_dim, melody_dim=melody_dim, syllable_size=data_syllable_size, word_size=data_word_size, feature_size=data_feature_size).to(device)

    """ Build Optimizers """
    optimizer = torch.optim.Adam(model.parameters(), lr=lr) # lr = 0.001
    loss_criterion = nn.CrossEntropyLoss() # Combines LogSoftmax() and NLLLoss() (Negative log likelihood loss)

    """ Define traingin function """
    def train(epoch, data_set, data_loader):
        model.train() # Activate train mode

        # Log time
        batch_time = utils.AverageMeter()
        data_time = utils.AverageMeter()
        sum_losses_syll = utils.AverageMeter()
        sum_losses_lyric = utils.AverageMeter()
        start_time = time.time()

        """ Batches """
        hidden = model.init_hidden(batch_size) # Creates a list of 3D layers with 1 x batch_size x hidden_dim

        for i, (syllable, lyric, melody, lengths) in enumerate(data_loader):
            # Take time
            elapsed = time.time()
            data_time.update((elapsed - start_time)*1000)

            """ Move dataloaders to GPU """
            syllable = syllable.to(device)
            lyric = lyric.to(device)
            melody = melody.to(device).float()
            lengths = lengths.to(device)

            """ Remove first melody feature """
            melody = melody[:, 1:] # We dont really want to do this?

            """ Reset gradient to zero """
            optimizer.zero_grad()

            """ Detach hidden layers """
            hidden = utils.repackage_hidden(hidden) # Function from PyTorch NLP official example

            """ Feedforward """
            # Feedforward
            syllable_output, lyrics_output, hidden = model(lyric[:, :-1], melody, lengths)
            
            # Define packed padded targets
            target_syllable = pack_padded_sequence(syllable[:, 1:], lengths-1, batch_first=True)[0]
            target_lyrics = pack_padded_sequence(lyric[:, 1:], lengths-1, batch_first=True)[0]
            
            # Calculate and update Cross-Entropy loss
            loss_syllable = loss_criterion(syllable_output, target_syllable)
            sum_losses_syll.update(loss_syllable)

            loss_lyrics = loss_criterion(lyrics_output, target_lyrics)
            sum_losses_lyric.update(loss_lyrics)

            """ Backpropagation """
            loss = loss_syllable + loss_lyrics
            loss.backward()
            optimizer.step()

            """ Time """
            elapsed = time.time()
            batch_time.update((elapsed - start_time))

            """ Print progress """
            if i % log_interval == 0:
                lp.lprint('| Training Epoch: {:3d}/{:3d}  {:6d}/{:6d} '
                          '| lr:{:6.5f} '
                          '| {batch_time.avg:7.2f} s/batch '
                          '| {data_time.avg:5.2f} ms/data_load '
                          '| Loss(Syllable) {loss_s.avg:5.5f} '
                          '| Loss(Lyrics) {loss_l.avg:5.5f} |'
                          .format(epoch+1, num_epochs, i, len(data_loader), lr, 
                                  batch_time=batch_time,
                                  data_time=data_time, 
                                  loss_s=sum_losses_syll, 
                                  loss_l=sum_losses_lyric))
        return sum_losses_lyric.avg, sum_losses_syll.avg


    def validation(epoch, data_set, data_loader):
        model.eval()

        # Log time
        batch_time = utils.AverageMeter()
        data_time = utils.AverageMeter()
        sum_losses_syll = utils.AverageMeter()
        sum_losses_lyric = utils.AverageMeter()
        start_time = time.time()

        """ Batches """
        hidden = model.init_hidden(batch_size) # Creates a list of 3D layers with 1 x batch_size x hidden_dim

        for i, (syllable, lyric, melody, lengths) in enumerate(data_loader):
            # Take time
            elapsed = time.time()
            data_time.update((elapsed - start_time)*1000)

            """ Move dataloaders to GPU """
            syllable = syllable.to(device)
            lyric = lyric.to(device)
            melody = melody.to(device).float()
            lengths = lengths.to(device)

            """ Remove first melody feature """
            melody = melody[:, 1:] # We dont really want to do this?

            """ Reset gradient to zero """
            optimizer.zero_grad()

            """ Detach hidden layers """
            hidden = utils.repackage_hidden(hidden) # Function from PyTorch NLP official example

            """ Feedforward """
            # Feedforward
            syllable_output, lyrics_output, hidden = model(lyric[:, :-1], melody, lengths)
            
            # Define packed padded targets
            target_syllable = pack_padded_sequence(syllable[:, 1:], lengths-1, batch_first=True)[0]
            target_lyrics = pack_padded_sequence(lyric[:, 1:], lengths-1, batch_first=True)[0]
            
            # Calculate and update Cross-Entropy loss
            loss_syllable = loss_criterion(syllable_output, target_syllable)
            sum_losses_syll.update(loss_syllable)

            loss_lyrics = loss_criterion(lyrics_output, target_lyrics)
            sum_losses_lyric.update(loss_lyrics)

            """ Time """
            elapsed = time.time()
            batch_time.update((elapsed - start_time))

            """ Print progress """
            if i % log_interval == 0:
                lp.lprint('| Validation Epoch: {:3d}/{:3d}  {:6d}/{:6d} '
                          '| lr:{:6.5f} '
                          '| {batch_time.avg:7.2f} s/batch '
                          '| {data_time.avg:5.2f} ms/data_load '
                          '| Loss(Syllable) {loss_s.avg:5.5f} '
                          '| Loss(Lyrics) {loss_l.avg:5.5f} |'
                          .format(epoch+1, num_epochs, i, len(data_loader), lr, 
                                  batch_time=batch_time,
                                  data_time=data_time, 
                                  loss_s=sum_losses_syll, 
                                  loss_l=sum_losses_lyric))
        return sum_losses_lyric.avg, sum_losses_syll.avg


    def test(data_set, data_loader):
        print("test")

    def save_model(epoch):
        model.eval()
        with open(checkpoint+"_%02d.pt"%(epoch+1), 'wb') as f:
            torch.save(model.state_dict(), f)

    """ Run Epochs """
    lp.lprint("------ Training -----", True)
    first_start_time = time.time()
    train_lyric_loss_vec = []
    train_syll_loss_vec = []
    val_lyric_loss_vec = []
    val_syll_loss_vec = []
    for epoch in range(num_epochs):
        # Training 
        train_lyric_loss, train_syll_loss = train(epoch, train_data_set, train_data_loader)
        train_lyric_loss_vec.append(train_lyric_loss)
        train_syll_loss_vec.append(train_syll_loss)
        lp.lprint("", True)

        # Validation
        with torch.no_grad():
            val_lyric_loss, val_syll_loss = validation(epoch, val_data_set, val_data_loader)
            val_lyric_loss_vec.append(val_lyric_loss)
            val_syll_loss_vec.append(val_syll_loss)
            lp.lprint("", True)

            # Save checkpoint
            save_model(epoch)

        # Plot training loss
        plt.figure('train', (12, 6))
        plt.subplot(1, 2, 1)
        plt.title('Training lyric loss')
        plt.ylabel('Train lyric loss')
        plt.xlabel('Epoch')
        plt.plot(train_lyric_loss_vec)
        plt.subplot(1, 2, 2)
        plt.title('Training syllable loss')
        plt.ylabel('Train syllable loss')
        plt.xlabel('Epoch')
        plt.plot(train_syll_loss_vec)
        plt.show()

        # Plot validation loss
        plt.figure('validation', (12, 6))
        plt.subplot(1, 2, 1)
        plt.title('Validation lyric loss')
        plt.ylabel('Validation lyric loss')
        plt.xlabel('Epoch')
        plt.plot(val_lyric_loss_vec)
        plt.subplot(1, 2, 2)
        plt.title('Validation syllable loss')
        plt.ylabel('Validation syllable loss')
        plt.xlabel('Epoch')
        plt.plot(val_syll_loss_vec)
        plt.show()


        lp.lprint("-----------", True)
    elapsed = (time.time() - first_start_time)/60
    lp.lprint('Total elapsed time: {elapsed:7.2f} minutes'.format(elapsed=elapsed))
    


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Train a conditional-LSTM language model to generate lyrics given melody")
    parser.add_argument('--settings_file', help='json file of settings, overrides everything else', type=str, default='settings.txt')
    parser.add_argument('-verbose', '--verbose', dest="verbose", default=1, type=int, help="verbose: 0 or 1")

    args = parser.parse_args()
    settings = vars(args)
    settings = utils.load_settings(settings)
    
    print(settings["checkpoint"])

    if args.verbose == 1:
        lp = utils.LogPrint(settings['checkpoint'] + '.log', True)
    else:
        lp = utils.LogPrint(settings['checkpoint'] + '.log', False)

    # Print settings
    lp.lprint("------ Parameters -----", True)
    for (k, v) in settings.items():
        lp.lprint("{:>16}:  {}".format(k, v), True)
    
    # Log settings
    with open(settings['checkpoint']+'args.json', 'w') as f:
        f.write(json.dumps(settings))
    
    # Update local variables
    locals().update(settings)

    # Redefine variables to avoid annoying text editor errors
    lr = lr
    batch_size = batch_size
    checkpoint = checkpoint
    word_size = word_size
    word_dim = word_dim
    melody_dim = melody_dim
    num_workers = num_workers
    seed = seed
    window = window
    train_rate = train_rate
    data = data
    num_epochs = num_epochs
    log_interval = log_interval


    main()


Device type: cuda:0


usage: ipykernel_launcher.py [-h] [--settings_file SETTINGS_FILE]
                             [-verbose VERBOSE]
ipykernel_launcher.py: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-a37de38c-60ce-4267-a0a2-2f3159acbd73.json


SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


# Bottom