In [1]:
from torchtext import data,datasets
from torchtext.vocab import GloVe,FastText,CharNGram
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torch
from torchtext.datasets.imdb import IMDB
import sys

In [2]:
is_cuda = False

if torch.cuda.is_available():
    is_cuda=True

In [3]:
TEXT = data.Field(lower=True, batch_first=True, fix_length=40,)
LABEL = data.Field(sequential=False,)

In [4]:
train, test = IMDB.splits(TEXT, LABEL, root='.data')

In [5]:
TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=300), max_size=10000, min_freq=10)
LABEL.build_vocab(train)

In [6]:
# 데이터셋을 배치로 나눠서 학습시킬 수 있도록 생성
train_iter, test_iter = data.BucketIterator.splits((train, test), batch_size=32, device=-1,shuffle=True)
train_iter.repeat = False
test_iter.repeat = False

The `device` argument should be set by using `torch.device` or passing a string as an argument. This behavior will be deprecated soon and currently defaults to cpu.
The `device` argument should be set by using `torch.device` or passing a string as an argument. This behavior will be deprecated soon and currently defaults to cpu.


In [93]:
class RnnNet(nn.Module):
    
    def __init__(self,vocab,hidden_size,n_cat,bs=1,nl=2):
        super().__init__()
        self.hidden_size = hidden_size
        self.bs = bs
        self.nl = nl
        self.e = nn.Embedding(n_vocab,hidden_size)
        self.rnn = nn.LSTM(hidden_size,hidden_size,nl)
        self.fc2 = nn.Linear(hidden_size,n_cat)
        self.softmax = nn.LogSoftmax(dim=-1)
        
    def forward(self,inp):
        bs = inp.size()[1]
        if bs != self.bs:
            self.bs = bs
        e_out = self.e(inp)
        # h_0와 c_0는 https://pytorch.org/docs/master/generated/torch.nn.LSTM.html 에서 참고
        # 두 개의 값은 (num_layers * num_directions, batch, hidden_size) shape로 정의함
        # LSTM을 정의할때 bidirectional이 True라면 num_directions은 2로 False라면 1
        # batch 는 batch_size, hidden_size는 LSTM의 hidden_size
        # h_0, c_0를 정의하지 않으면 "If (h_0, c_0) is not provided, both h_0 and c_0 default to zero." 라고함
        h0 = c0 = Variable(e_out.data.new(*(self.nl,self.bs,self.hidden_size)).zero_())
        rnn_o,_ = self.rnn(e_out,(h0,c0))
#         print('rnnOut shape : ', rnn_o.shape)
        # (batch_size, seq_len, hidden_size) 의 shape을 (seq_len, batch_size, hidden_size)로 바꿔주자
        # permute 참고--> https://discuss.pytorch.org/t/swap-axes-in-pytorch/970/3
        rnn_o = rnn_o.permute(1,0,2)
        rnn_o = rnn_o[-1]
        fc = F.dropout(self.fc2(rnn_o),p=0.5)
        return self.softmax(fc)

In [32]:
n_vocab = len(TEXT.vocab)
n_hidden = 100

In [94]:
model = RnnNet(n_vocab,n_hidden,3,bs=32)
model = model.cuda()

In [95]:
optimizer = optim.SGD(model.parameters(), lr=0.005)
# lr_sche = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)

In [96]:
def fit(epoch,model,data_loader,phase='training',volatile=False):
    if phase == 'training':
        model.train()
    if phase == 'validation':
        model.eval()
        volatile=True
    running_loss = 0.0
    running_correct = 0
    for batch_idx , batch in enumerate(data_loader):
        text , target = batch.text , batch.label
        if is_cuda:
            text,target = text.cuda(),target.cuda()
        
        if phase == 'training':
            optimizer.zero_grad()
        output = model(text)
        loss = F.nll_loss(output,target)
        
        running_loss += F.nll_loss(output,target,size_average=False).data
        preds = output.data.max(dim=1,keepdim=True)[1]
        running_correct += preds.eq(target.data.view_as(preds)).cpu().sum()
        if phase == 'training':
            loss.backward()
            optimizer.step()
    
    loss = running_loss/len(data_loader.dataset)
    accuracy = 100. * running_correct.item()/len(data_loader.dataset)
    
    print(f'{phase} loss is {loss:{5}.{2}} and {phase} accuracy is {running_correct}/{len(data_loader.dataset)}{accuracy:{10}.{4}}')
    return loss,accuracy

In [12]:
# 하나의 배치를 가져오고 모델에 넣어서 테스트
k = iter(train_iter)
kk = next(k)

In [39]:
kk


[torchtext.data.batch.Batch of size 32 from IMDB]
	[.text]:[torch.LongTensor of size 32x40]
	[.label]:[torch.LongTensor of size 32]

In [97]:
out = model(kk.text.cuda())
out.shape

torch.Size([32, 3])

In [98]:
# train val 학습에 대한 loss acc 저장을 위한 공간 확보
train_losses, train_accuracy = [], []
val_losses, val_accuracy = [], []

In [99]:
%%time
train_losses , train_accuracy = [],[]
val_losses , val_accuracy = [],[]

for epoch in range(1,10):

    epoch_loss, epoch_accuracy = fit(epoch,model,train_iter,phase='training')
    val_epoch_loss , val_epoch_accuracy = fit(epoch,model,test_iter,phase='validation')
    train_losses.append(epoch_loss)
    train_accuracy.append(epoch_accuracy)
    val_losses.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)

training loss is  0.95 and training accuracy is 10828/25000     43.31
validation loss is  0.91 and validation accuracy is 10927/25000     43.71
training loss is   0.9 and training accuracy is 10965/25000     43.86
validation loss is  0.89 and validation accuracy is 11032/25000     44.13
training loss is  0.89 and training accuracy is 10875/25000      43.5
validation loss is  0.89 and validation accuracy is 10879/25000     43.52
training loss is  0.88 and training accuracy is 10988/25000     43.95
validation loss is  0.88 and validation accuracy is 10909/25000     43.64
training loss is  0.88 and training accuracy is 10951/25000      43.8
validation loss is  0.88 and validation accuracy is 10843/25000     43.37
training loss is  0.88 and training accuracy is 10903/25000     43.61
validation loss is  0.88 and validation accuracy is 11009/25000     44.04
training loss is  0.88 and training accuracy is 10930/25000     43.72
validation loss is  0.87 and validation accuracy is 11055/25000   