In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import re
import random
import os
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import pandas as pd
import numpy as np
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
import time
import math

device = torch.device("cuda")

In [2]:
SOS_token = 0
EOS_token = 1

class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [3]:
# Turn a Unicode string to plain ASCII, thanks to
# https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# # Lowercase, trim, and remove non-letter characters
# def normalizeString(s):
#     s = unicodeToAscii(s.lower().strip())
#     s = re.sub(r"([.!?])", r" \1", s)
#     s = re.sub(r"[^a-zA-Z!?]+", r" ", s)
#     return s.strip()

In [4]:
# create a function to read all the data in a given folder
def read_all_files(folder_path):
    if not os.path.exists(folder_path):
        raise Exception("Folder doesnot exist")
    
    # Get a list of all Excel files in the folder
    excel_files = [file for file in os.listdir(folder_path) if file.endswith('.xlsx') or file.endswith('.xls')]

    # Initialize an empty DataFrame to store the combined data
    combined_df = pd.DataFrame()

    # Loop through each Excel file and read it into a DataFrame
    for file in excel_files:
        # Assuming that all sheets in each Excel file need to be concatenated
        xls = pd.ExcelFile(os.path.join(folder_path, file))
        sheet_names = xls.sheet_names
        for sheet_name in sheet_names:
            df = pd.read_excel(xls, sheet_name)
            combined_df = pd.concat([combined_df, df], ignore_index=True)
    if combined_df.columns[0]=='Unnamed: 0':
        combined_df = combined_df.drop('Unnamed: 0', axis=1)
    
    for c in combined_df.columns:
        combined_df[c] = combined_df[c].replace(' ; ',' ', regex=True)

    return combined_df

In [5]:
def readLangs():
    print("Reading data...")
    # Function to join two columns into a list
    def join_columns(row):
        words_list = row['words'].split()  # Split the string into a list of words
        joined_words = ' '.join(words_list)
        return [joined_words, row['mean_note_pesto']]

    # Apply the function to each row
    train_df['Combined'] = train_df.apply(join_columns, axis=1)
    pairs = train_df['Combined'].to_list()
    input_lang = Lang('input')
    for w in train_df['words']:
        input_lang.addSentence(str.lower(w))
    output_lang = Lang('output')
    # defining the outputs
    output_classes = ['0','C0','C0#','D0','D0#','E0','F0','F0#','G0','G0#','A0','A0#','B0',
                   'C1','C1#','D1','D1#','E1','F1','F1#','G1','G1#','A1','A1#','B1',
                   'C2','C2#','D2','D2#','E2','F2','F2#','G2','G2#','A2','A2#','B2',
                   'C3','C3#','D3','D3#','E3','F3','F3#','G3','G3#','A3','A3#','B3',
                   'C4','C4#','D4','D4#','E4','F4','F4#','G4','G4#','A4','A4#','B4',
                   'C5','C5#','D5','D5#','E5','F5','F5#','G5','G5#','A5','A5#','B5',
                   'C6','C6#','D6','D6#','E6','F6','F6#','G6','G6#','A6','A6#','B6',
                   'C7','C7#','D7','D7#','E7','F7','F7#','G7','G7#','A7','A7#','B7',
                   'C8','C8#','D8','D8#','E8','F8','F8#','G8','G8#','A8','A8#','B8']

    for o in output_classes:
        output_lang.addSentence(o)
    

    return input_lang, output_lang, pairs

In [6]:
train_df = read_all_files('/speech/dbwork/mul/spielwiese4/students/desengus/dry_crepe_pesto/excels/train')
test_df = read_all_files('/speech/dbwork/mul/spielwiese4/students/desengus/dry_crepe_pesto/excels/test')
val_df = read_all_files('/speech/dbwork/mul/spielwiese4/students/desengus/dry_crepe_pesto/excels/validation')



In [7]:
input_lang, output_lang, pairs = readLangs()

Reading data...


In [8]:
output_lang.index2word

{0: 'SOS',
 1: 'EOS',
 2: '0',
 3: 'C0',
 4: 'C0#',
 5: 'D0',
 6: 'D0#',
 7: 'E0',
 8: 'F0',
 9: 'F0#',
 10: 'G0',
 11: 'G0#',
 12: 'A0',
 13: 'A0#',
 14: 'B0',
 15: 'C1',
 16: 'C1#',
 17: 'D1',
 18: 'D1#',
 19: 'E1',
 20: 'F1',
 21: 'F1#',
 22: 'G1',
 23: 'G1#',
 24: 'A1',
 25: 'A1#',
 26: 'B1',
 27: 'C2',
 28: 'C2#',
 29: 'D2',
 30: 'D2#',
 31: 'E2',
 32: 'F2',
 33: 'F2#',
 34: 'G2',
 35: 'G2#',
 36: 'A2',
 37: 'A2#',
 38: 'B2',
 39: 'C3',
 40: 'C3#',
 41: 'D3',
 42: 'D3#',
 43: 'E3',
 44: 'F3',
 45: 'F3#',
 46: 'G3',
 47: 'G3#',
 48: 'A3',
 49: 'A3#',
 50: 'B3',
 51: 'C4',
 52: 'C4#',
 53: 'D4',
 54: 'D4#',
 55: 'E4',
 56: 'F4',
 57: 'F4#',
 58: 'G4',
 59: 'G4#',
 60: 'A4',
 61: 'A4#',
 62: 'B4',
 63: 'C5',
 64: 'C5#',
 65: 'D5',
 66: 'D5#',
 67: 'E5',
 68: 'F5',
 69: 'F5#',
 70: 'G5',
 71: 'G5#',
 72: 'A5',
 73: 'A5#',
 74: 'B5',
 75: 'C6',
 76: 'C6#',
 77: 'D6',
 78: 'D6#',
 79: 'E6',
 80: 'F6',
 81: 'F6#',
 82: 'G6',
 83: 'G6#',
 84: 'A6',
 85: 'A6#',
 86: 'B6',
 87: 'C7',
 88: '

In [9]:
random.choice(pairs)

['see time. You',
 'E1 E1 E1 E1 F1 F1 E1 E1 E1 E1 E1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1# G1 F1 F1 F1 F1 F1 F1# F1 F1 F1 F1# F1 F1 F1 F1 F1 E1 E1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 E1 F1# F1 E1 F1 E1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1# A1# F1# F1 F1 F1 F1 F1 E1 F1 F1 A1 F1# E1 E1 E1 E1 E1 E1 F1 F1 E1 E1 E1 E1 E1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 B1 B1 B1 A1 A1 A1 A1 G1# F1# F1# F1 F1 F1 F1 F1 F1 F1 F1 F1 F1# F1 A1# G1# E1 A1# F1# F1 G1# G1# F1# F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1 F1']

In [10]:
output_lang.n_words

111

In [11]:
MAX_LENGTH = 512

In [12]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, dropout_p=0.1):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input):
        embedded = self.dropout(self.embedding(input))
        output, hidden = self.gru(embedded)
        return output, hidden

In [13]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)
        decoder_hidden = encoder_hidden
        decoder_outputs = []

        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden  = self.forward_step(decoder_input, decoder_hidden)
            decoder_outputs.append(decoder_output)

            if target_tensor is not None:
                # Teacher forcing: Feed the target as the next input
                decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing
            else:
                # Without teacher forcing: use its own predictions as the next input
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()  # detach from history as input

        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        return decoder_outputs, decoder_hidden, None # We return `None` for consistency in the training loop

    def forward_step(self, input, hidden):
        output = self.embedding(input)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.out(output)
        return output, hidden

In [14]:
class BahdanauAttention(nn.Module):
    def __init__(self, hidden_size):
        super(BahdanauAttention, self).__init__()
        self.Wa = nn.Linear(hidden_size, hidden_size)
        self.Ua = nn.Linear(hidden_size, hidden_size)
        self.Va = nn.Linear(hidden_size, 1)

    def forward(self, query, keys):
        scores = self.Va(torch.tanh(self.Wa(query) + self.Ua(keys)))
        scores = scores.squeeze(2).unsqueeze(1)

        weights = F.softmax(scores, dim=-1)
        context = torch.bmm(weights, keys)

        return context, weights

class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1):
        super(AttnDecoderRNN, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.attention = BahdanauAttention(hidden_size)
        self.gru = nn.GRU(2 * hidden_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)
        decoder_hidden = encoder_hidden
        decoder_outputs = []
        attentions = []

        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden, attn_weights = self.forward_step(
                decoder_input, decoder_hidden, encoder_outputs
            )
            decoder_outputs.append(decoder_output)
            attentions.append(attn_weights)

            if target_tensor is not None:
                # Teacher forcing: Feed the target as the next input
                decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing
            else:
                # Without teacher forcing: use its own predictions as the next input
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()  # detach from history as input

        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        attentions = torch.cat(attentions, dim=1)

        return decoder_outputs, decoder_hidden, attentions


    def forward_step(self, input, hidden, encoder_outputs):
        embedded =  self.dropout(self.embedding(input))

        query = hidden.permute(1, 0, 2)
        context, attn_weights = self.attention(query, encoder_outputs)
        input_gru = torch.cat((embedded, context), dim=2)

        output, hidden = self.gru(input_gru, hidden)
        output = self.out(output)

        return output, hidden, attn_weights

In [15]:
def indexesFromSentence(lang, sentence):
    # print(lang.name)
    if lang.name == 'input':
        return [lang.word2index[str.lower(word)] for word in sentence.split(' ')]
    else:
        return [lang.word2index[word] for word in sentence.split(' ')]

def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(1, -1)

def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

def get_dataloader(batch_size):
    input_lang, output_lang, pairs = readLangs()

    n = len(pairs)
    input_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)
    target_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)

    for idx, (inp, tgt) in enumerate(pairs):
        inp_ids = indexesFromSentence(input_lang, inp)
        tgt_ids = indexesFromSentence(output_lang, tgt)
        inp_ids.append(EOS_token)
        tgt_ids.append(EOS_token)
        input_ids[idx, :len(inp_ids)] = inp_ids
        target_ids[idx, :len(tgt_ids)] = tgt_ids

    train_data = TensorDataset(torch.LongTensor(input_ids).to(device),
                               torch.LongTensor(target_ids).to(device))

    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
    return input_lang, output_lang, train_dataloader

In [16]:
def train_epoch(dataloader, encoder, decoder, encoder_optimizer,
          decoder_optimizer, criterion):

    total_loss = 0
    i = 0
    for data in dataloader:
        i = i+1
        print(i)
        input_tensor, target_tensor = data
#        print(type(input_tensor))

        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        encoder_outputs, encoder_hidden = encoder(input_tensor)
#        print(encoder_outputs.shape)
#        print(encoder_hidden.shape)
        decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, target_tensor)

        loss = criterion(
            decoder_outputs.view(-1, decoder_outputs.size(-1)),
            target_tensor.view(-1)
        )
        loss.backward()

        encoder_optimizer.step()
        decoder_optimizer.step()

        total_loss += loss.item()

    return total_loss / len(dataloader)

In [17]:
import time
import math

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [18]:
def train(train_dataloader, encoder, decoder, n_epochs, learning_rate=0.001,
               print_every=1, plot_every=100):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()

    for epoch in range(1, n_epochs + 1):
        loss = train_epoch(train_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if epoch % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, epoch / n_epochs),
                                        epoch, epoch / n_epochs * 100, print_loss_avg))

        if epoch % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0



In [19]:
train_dataloader.batch_size

NameError: name 'train_dataloader' is not defined

In [None]:
train_df.shape

(8660, 9)

In [None]:
hidden_size = 128
batch_size = 32

input_lang, output_lang, train_dataloader = get_dataloader(batch_size)

encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, output_lang.n_words).to(device)

train(train_dataloader, encoder, decoder, 5, print_every=1, plot_every=5)

Reading data...


RuntimeError: CUDA error: CUDA-capable device(s) is/are busy or unavailable
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [21]:
hidden_size = 128
batch_size = 32
encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, output_lang.n_words).to(device)


In [25]:
encoder.load_state_dict(torch.load('/speech/dbwork/mul/spielwiese4/students/desengus/encoder_basic.pt'))
decoder.load_state_dict(torch.load('/speech/dbwork/mul/spielwiese4/students/desengus/decoder_basic.pt'))


<All keys matched successfully>

In [26]:
def evaluate(encoder, decoder, sentence, input_lang, output_lang):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)

        encoder_outputs, encoder_hidden = encoder(input_tensor)
        decoder_outputs, decoder_hidden, decoder_attn = decoder(encoder_outputs, encoder_hidden)

        _, topi = decoder_outputs.topk(1)
        decoded_ids = topi.squeeze()

        decoded_words = []
        for idx in decoded_ids:
            if idx.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            decoded_words.append(output_lang.index2word[idx.item()])
    return decoded_words, decoder_attn

In [31]:
words, attn = evaluate(encoder,decoder,"Hey, its me. I love to sing a song for",input_lang, output_lang)

In [34]:
len(words)

512

In [39]:
from collections import Counter

Counter(words)

Counter({'SOS': 512})

In [42]:
df = pd.concat([train_df,test_df,val_df], ignore_index=True)

In [43]:
df.shape

(10309, 9)