In [22]:
import torch

from bert_data_utils import get_raw_imdb_data

torch.cuda.set_device(1)

In [2]:
# 데이터 셋을 불러옴
train_data, valid_data, test_data = get_raw_imdb_data()

In [3]:
from transformers import BertTokenizer

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

In [5]:
def bert_tokenized_data(tokenizer, data, max_seq_len=128, pad_to_max_len=True):
    sentences = [' '.join(s.text) for s in data]  # I am so ... good .
    labels = [torch.tensor([1]) if l.label == 'pos' else torch.tensor([0]) for l in data]  # [1, 0, 0, ... , 1, ...]

    sentences = [tokenizer.encode_plus(s, max_length=max_seq_len, pad_to_max_length=pad_to_max_len) for s
                 in sentences]
    input_ids = [torch.tensor(s['input_ids']) for s in sentences]
    token_type_ids = [torch.tensor(s['token_type_ids']) for s in sentences]
    attn_mask = [torch.tensor(s['attention_mask']) for s in sentences]
    
    return input_ids, \
           token_type_ids, \
           attn_mask, \
           labels

In [7]:
train_input_ids, train_token_type_ids, train_attn_mask, train_labels = bert_tokenized_data(tokenizer, train_data)
valid_input_ids, valid_token_type_ids, valid_attn_mask, valid_labels = bert_tokenized_data(tokenizer, valid_data)
test_input_ids, test_token_type_ids, test_attn_mask, test_labels = bert_tokenized_data(tokenizer, test_data)

In [8]:
from bert_dataset import Corpus
from torch.utils.data import Dataset, DataLoader

In [9]:
train = Corpus(train_input_ids, train_token_type_ids, train_attn_mask, train_labels)
valid = Corpus(valid_input_ids, valid_token_type_ids, valid_attn_mask, valid_labels)
test = Corpus(test_input_ids, test_token_type_ids, test_attn_mask, test_labels)

params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 6}

train_loader = DataLoader(train, **params)
valid_loader = DataLoader(valid, **params)
test_loader = DataLoader(test, **params)

In [10]:
from transformers import BertModel, BertPreTrainedModel, BertForSequenceClassification

In [11]:
# Bert 모델 정의
bert_config = 'bert-base-cased'
model = BertForSequenceClassification.from_pretrained(bert_config)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=361.0, style=ProgressStyle(description_…




In [13]:
def get_device():
    return 'cuda' if torch.cuda.is_available() else 'cpu'

def get_num_corrects(logits, labels):
    return (logits.max(1)[1] == labels.max(1)[0]).sum()

def train(model, optim, iterator, device):
    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for batch in iterator:
        optim.zero_grad()
        
        input_ids, token_type_ids, attn_masks, labels = batch[0].to(device), batch[1].to(device), batch[2].to(device), batch[3].to(device)
        loss, logits = model(input_ids=input_ids, attention_mask=attn_masks, token_type_ids=token_type_ids, labels=labels)
        num_corrects = get_num_corrects(logits, labels)
        acc = 100.0 * num_corrects.item() / labels.size(0)
        epoch_loss += loss.item()
        epoch_acc += acc

        loss.backward()
        optim.step()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate(model, otpim, iterator, device):
    epoch_loss = 0
    epoch_acc = 0

    model.eval()

    with torch.no_grad():
        for batch in iterator:
            input_ids, token_type_ids, attn_masks, labels = batch[0].to(device), batch[1].to(device), batch[2].to(device), batch[3].to(device)
            loss, logits = model(input_ids=input_ids, attention_mask=attn_masks, token_type_ids=token_type_ids, labels=labels)
            num_corrects = get_num_corrects(logits, labels)
            acc = 100.0 * num_corrects.item() / labels.size(0)
            epoch_loss += loss.item()
            epoch_acc += acc


    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [30]:
import torch.optim as optim
import torch.nn as nn

torch.backends.cudnn.deterministic = True
torch.cuda.set_device(0)

N_EPOCHS = 5

best_valid_loss = float('inf')

device = get_device()
optimizer = optim.Adam(model.parameters())
model = model.to(device)
# model = nn.DataParallel(model)

for epoch in range(N_EPOCHS):

    train_loss, train_acc = train(model, optimizer, train_loader, device)
    valid_loss, valid_acc = evaluate(model, optimizer, valid_loader, device)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), './bert_base_cased_sentence_classification.pt')

    print(f'Train Loss: {train_loss} | Train Acc: {train_acc}%')
    print(f'Val Loss: {valid_loss} |  Val Acc: {valid_acc}%')



RuntimeError: module must have its parameters and buffers on device cuda:0 (device_ids[0]) but found one of them on device: cuda:1

In [24]:
!nvidia-smi

Sat Feb 15 18:59:30 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.33.01    Driver Version: 440.33.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  TITAN Xp            Off  | 00000000:1E:00.0 Off |                  N/A |
| 23%   24C    P8     8W / 250W |  12191MiB / 12196MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  TITAN Xp            Off  | 00000000:22:00.0 Off |                  N/A |
| 23%   35C    P8     9W / 250W |  12185MiB / 12196MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-------