In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchtext import data, datasets
import random

SEED = 5
random.seed(SEED)
torch.manual_seed(SEED)

# 하이퍼파라미터
BATCH_SIZE = 64
lr = 0.001
EPOCHS = 10

USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
print("cpu와 cuda 중 다음 기기로 학습함:", DEVICE)

cpu와 cuda 중 다음 기기로 학습함: cuda


In [2]:
TEXT = data.Field(sequential=True, batch_first=True, lower=True)
LABEL = data.Field(sequential=False, batch_first=True)

In [3]:
# 전체 데이터를 훈련 데이터와 테스트 데이터를 8:2 비율로 나누기
trainset, testset = datasets.IMDB.splits(TEXT, LABEL)

In [4]:
TEXT.build_vocab(trainset, min_freq=5) # 단어 집합 생성
LABEL.build_vocab(trainset)

vocab_size = len(TEXT.vocab)
n_classes = 2
print('단어 집합의 크기 : {}'.format(vocab_size))
print('클래스의 개수 : {}'.format(n_classes))

단어 집합의 크기 : 46159
클래스의 개수 : 2


In [5]:
trainset, valset = trainset.split(split_ratio=0.8)
train_iter, val_iter, test_iter = data.BucketIterator.splits(
        (trainset, valset, testset), batch_size=BATCH_SIZE,
        shuffle=True, repeat=False)

In [9]:
class GRU(nn.Module):
    def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout_p=0.2):
        super(GRU, self).__init__()
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim

        self.embed = nn.Embedding(n_vocab, embed_dim)
        self.dropout = nn.Dropout(dropout_p)
        self.gru = nn.GRU(embed_dim, self.hidden_dim,
                          num_layers=self.n_layers,
                          batch_first=True)
        self.out = nn.Linear(self.hidden_dim, n_classes)

    def forward(self, x):
        x = self.embed(x)
        h_0 = self._init_state(batch_size=x.size(0)) # 첫번째 히든 스테이트를 0벡터로 초기화
        x, _ = self.gru(x, h_0)  # GRU의 리턴값은 (배치 크기, 시퀀스 길이, 은닉 상태의 크기)
        h_t = x[:,-1,:] # (배치 크기, 은닉 상태의 크기)의 텐서로 크기가 변경됨. 즉, 마지막 time-step의 은닉 상태만 가져온다.
        self.dropout(h_t)
        logit = self.out(h_t)  # (배치 크기, 은닉 상태의 크기) -> (배치 크기, 출력층의 크기)
        return logit

    def _init_state(self, batch_size=1):
        weight = next(self.parameters()).data
        return weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()

In [29]:
class BiLSTM(nn.Module):
    def __init__(self, n_vocab, hidden_dim, embed_dim, num_lstm_layer, n_classes, dropout=0.1):
        super(BiLSTM, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.num_lstm_layer = num_lstm_layer
        self.n_classes = n_classes
        #self.embedding = nn.Embedding.from_pretrained(pre_embedding, freeze=False)
        self.embed_dim = embed_dim
        self.n_vocab = n_vocab
        
        self.embedding = nn.Embedding(self.n_vocab, self.embed_dim)
        
        # BiLSTM layer 세팅
        self.bi_lstm = nn.LSTM(input_size=self.embedding.embedding_dim,
                               hidden_size=self.hidden_dim,
                               num_layers=self.num_lstm_layer,
                               dropout=dropout,
                               batch_first = True,
                               bidirectional=True)
        
        # bidirectional 이라서 hidden_dim * 2
        self.linear = nn.Linear(self.hidden_dim * 2, self.n_classes)
        self.lin_layers = nn.Sequential(
            nn.ReLU(),
            nn.Linear(self.hidden_dim * 2, self.n_classes)
        )
        self.softmax = nn.Softmax(dim=-1)
        
    def forward(self, sents):
        # embedding 
        embedded = self.embedding(sents)
        
        # lstm 통과
        # output : (seq_len, batch, num_directions * hidden_size)
        # h_n : (num_layers * num_directions, batch, hidden_size)
        # c_n : (num_layers * num_directions, batch, hidden_size)
        lstm_out, (h_n, c_n) = self.bi_lstm(embedded) # (h_0, c_0) = (0, 0)
        
        # (배치 크기, 은닉 상태의 크기)의 텐서로 크기가 변경됨.
        # 즉, 마지막 time-step의 은닉 상태만 가져온다.
        """시도 1
        #h_t = lstm_out[:, -1, :]
        #logit = self.linear(h_t)
        """
        """시도 2
        lin_out = self.lin_layers(lstm_out[:, -1])
        logit = self.softmax(lin_out)
        """
        #logit = self.linear(h_n[-1])
        #"""시도 3
        #concat the final forward and backward hidden state
        hidden = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim = 1)
        #hidden = torch.cat((h_n[:,-2,:], h_n[:,-1,:]), dim = 1)
        out=self.linear(hidden)
        #"""
        """
        print(lstm_out.shape)
        out_forward = lstm_out[range(len(lstm_out)), text_len - 1, :self.embed_dim]
        out_reverse = lstm_out[:, 0, self.embed_dim:]
        out_reduced = torch.cat((out_forward, out_reverse), 1)
        """
        
        return out

In [30]:
model = GRU(1, 256, vocab_size, 128, n_classes, 0.5).to(DEVICE)
model = BiLSTM(vocab_size, 256, 128, 2, n_classes).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [31]:
def train(model, optimizer, train_iter):
    model.train()
    for b, batch in enumerate(train_iter):
        x, y = batch.text.to(DEVICE), batch.label.to(DEVICE)
        y.data.sub_(1)  # 레이블 값을 0과 1로 변환
        optimizer.zero_grad()

        logit = model(x)
        #print(logit.shape)
        #print(y.shape)
        loss = F.cross_entropy(logit, y)
        loss.backward()
        optimizer.step()
        
def evaluate(model, val_iter):
    """evaluate model"""
    model.eval()
    corrects, total_loss = 0, 0
    for batch in val_iter:
        x, y = batch.text.to(DEVICE), batch.label.to(DEVICE)
        y.data.sub_(1) # 레이블 값을 0과 1로 변환
        logit = model(x)
        loss = F.cross_entropy(logit, y, reduction='sum')
        total_loss += loss.item()
        corrects += (logit.max(1)[1].view(y.size()).data == y.data).sum()
    size = len(val_iter.dataset)
    avg_loss = total_loss / size
    avg_accuracy = 100.0 * corrects / size
    return avg_loss, avg_accuracy

In [32]:
best_val_loss = None
EPOCHS = 8
for e in range(1, EPOCHS+1):
    train(model, optimizer, train_iter)
    val_loss, val_accuracy = evaluate(model, val_iter)

    print("[Epoch: %d] val loss : %5.2f | val accuracy : %5.2f" % (e, val_loss, val_accuracy))

    # 검증 오차가 가장 적은 최적의 모델을 저장
    if not best_val_loss or val_loss < best_val_loss:
        if not os.path.isdir("snapshot"):
            os.makedirs("snapshot")
        torch.save(model.state_dict(), './snapshot/txtclassification.pt')
        best_val_loss = val_loss
        
    break

torch.Size([4, 64, 256])
torch.Size([64, 256])


RuntimeError: Tensors must have same number of dimensions: got 1 and 2

hidden = torch.cat((h_n[-2,:,:], h_n[-1,:,:]), dim = 1)  
[Epoch: 10] val loss :  0.60 | val accuracy : 86.20  
  
  
