In [42]:
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler, SequentialSampler
from torch.autograd import Variable

import os
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
import math
import numpy as np

use_cuda = torch.cuda.is_available()
if use_cuda:
    device = torch.device("cuda")
    extras = {"num_workers": 4, "pin_memory": True}
    print("CUDA is supported")
else:
    device = torch.device("cpu")
    extras = False
    print("CUDA NOT supported")

CUDA is supported


In [44]:
class dataset(Dataset):
    """
    Attirbutes
        data     : list of [idx, string] - 1
        one_hot
    """
    def __init__(self, txt_file, id_ch, ch_id):
        self.data = [""]
        self.id_ch, self.ch_id = ch_id, ch_id
        self.dim = len(id_ch)

        ne = 0
        for line in open(txt_file, mode='r'):
            if "<start>" in line:
                continue
            elif "<end>" in line:
                self.data.append("")
            else:
                self.data[len(self.data) - 1] += line
        self.data.pop()
        
        self.computed = []
        
        for i in range(len(self.data)):
            item = self.computeItem(i)
            self.computed.append(item)

    def __len__(self):
        return len(self.data)
    
    def computeItem(self, idx):
        indices = [ch_id[ch] for ch in self.data[idx]]
        targets = np.array(indices).reshape(-1)
        one_hot = np.eye(self.dim)[targets]
        label = np.zeros(one_hot.shape[0], dtype=np.int64)
        label[0:label.shape[0]-1] = one_hot.argmax(axis=1)[1:one_hot.shape[0]] # shifted one to left
        label[label.shape[0]-1] = ch_id['$'] # for end character
        return (one_hot, label)

    def __getitem__(self, idx):
        """
        Args
            idx : n
        Returns
            str : one-hot encoded string
        """
        return self.computed[idx]

class RNN(nn.Module): 
    def __init__(self, input_size, hidden_size, num_layers, num_classes): 
        super(RNN, self).__init__() 
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.hidden = self.init_hidden()
        
    def forward(self, input, hidden):
        out, hidden = self.rnn(input, hidden)
        return out, hidden
    
    def init_hidden(self): 
        self.hidden = Variable(torch.zeros(self.num_layers, 1, self.hidden_size)).float().to(device)
    
class LSTM(nn.Module): 
    def __init__(self, input_size, hidden_size, num_layers, num_classes): 
        super(LSTM, self).__init__() 
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, input):
        """
        Args:
            input  : [seq_len, batch_size, input_]
        Returns:
            out    : [seq_len, batch_size, hidden_size]
        """
        out, (self.hs, self.cs) = self.lstm(input, (self.hs.detach(), self.cs.detach()))
        return self.fc(out.view(-1, self.hidden_size))
    
    def init_hidden(self):
        """
        Returns:
            hidden : [layer_len, batch_size, hidden_size] * 2 (pair)
        """
        self.hs = Variable(torch.zeros(self.num_layers, 1, self.hidden_size)).float().to(device)
        self.cs = Variable(torch.zeros(self.num_layers, 1, self.hidden_size)).float().to(device)

In [45]:
id_ch, ch_id = {0:'$'}, {'$':0}
idx = 1

for line in open('train.txt', mode='r'):
    if "<start>" in line:
        continue
    elif "<end>" in line:
        continue
    else:
        for ch in line:
            if ch not in ch_id:
                ch_id[ch] = idx
                id_ch[idx] = ch
                idx+=1

In [46]:
train_dataset = dataset('train.txt', id_ch, ch_id)
valid_dataset = dataset('val.txt', id_ch, ch_id)
test_dataset = dataset('test.txt', id_ch, ch_id)

train_sampler = SubsetRandomSampler(list(range(len(train_dataset))))
valid_sampler = SubsetRandomSampler(list(range(len(valid_dataset))))
test_sampler = SubsetRandomSampler(list(range(len(test_dataset))))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1)

In [None]:
PATH = "3/"
model = LSTM(input_size = 94, hidden_size = 100, num_layers = 1, num_classes = 94).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = torch.nn.CrossEntropyLoss()

train_losses, valid_losses = [], []

for epoch in range(25):
    print("Training Epoch {}".format(epoch))
    train_loss, valid_loss, train_count, valid_count = 0, 0, 0, 0
    for minibatch_count, (notes, labels) in enumerate(train_loader, 0):
        chunk = math.ceil(notes.shape[1] / 100)
        model.init_hidden()
        for i in range(chunk):
            optimizer.zero_grad()
            train_count += 1
            output = model(notes[:, i * 100: i * 100 + 100, :].float().to(device))
            batch_loss = criterion(output, labels[0, i * 100: i*100+100].to(device))
            batch_loss.backward()
            optimizer.step()
            train_loss += batch_loss.item()

    with torch.no_grad():
        for minibatch_count, (notes, labels) in enumerate(valid_loader, 0):
            chunk = math.ceil(notes.shape[1] / 100)
            model.init_hidden()
            for i in range(chunk):
                valid_count += 1
                output = model(notes[:, i * 100: i * 100 + 100, :].float().to(device))
                batch_loss = criterion(output, labels[0, i * 100: i*100+100].to(device))
                valid_loss += batch_loss.item()
                
    train_loss /= train_count
    valid_loss /= valid_count
    
    print(train_loss, valid_loss)
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
    torch.save(model.state_dict(), PATH + str(epoch))
    if len(valid_losses) > 10 and valid_losses[-3] < valid_loss:
        break

tlfile = open(PATH + "trainloss.txt", "w")
vlfile = open(PATH + "validloss.txt", "w")
tlfile.write(",".join(str(l) for l in train_losses))
vlfile.write(",".join(str(l) for l in valid_losses))