In [20]:
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from pytorch_transformers import BertTokenizer, BertForSequenceClassification
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [21]:
train_df = pd.read_csv("../pytorch/data/training.txt", sep="\t")
valid_df = pd.read_csv("../pytorch/data/validing.txt", sep="\t")
test_df = pd.read_csv("../pytorch/data/testing.txt", sep="\t")

In [22]:
train_df = train_df.sample(frac=0.1, random_state=500)
valid_df = valid_df.sample(frac=0.1, random_state=500)
test_df = test_df.sample(frac=0.1, random_state=500)

In [23]:
class Dataset(Dataset):
    def __init__(self, df):
        self.df = df
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        text = self.df.iloc[idx, 1]
        label = self.df.iloc[idx, 2]
        return text, label

In [24]:
train_dataset = Dataset(train_df)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=0)

valid_dataset = Dataset(valid_df)
valid_loader = DataLoader(valid_dataset, batch_size=2, shuffle=True, num_workers=0)

test_dataset = Dataset(test_df)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=True, num_workers=0)

In [25]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [26]:
def save_checkpoint(save_path, model, valid_loss):
    if not save_path:
        return
    state_dict = {"model_state_dict" : model.state_dict(), "valid_loss" : valid_loss}
    torch.save(state_dict, save_path)
    print(f"Model saved to ==> {save_path}")
    
def load_checkpoint(load_path, model):
    if not load_path:
        return 
    state_dict = torch.load(load_path, map_location=device)
    print(f"Model loaded from <== {load_path}")
    model.load_state_dict(state_dict["model_state_dict"])
    return state_dict["valid_loss"]

def save_metrics(save_path, train_loss_list, valid_loss_list, global_steps_list):
    if not save_path:
        return 
    state_dict = {"train_loss_list" : train_loss_list,
                  "valid_loss_list" : valid_loss_list,
                  "test_loss_list" : global_steps_list}
    torch.save(state_dict, save_path)
    print(f"Model saved to ==> {save_path}")
    
def load_metrics(load_path):
    if not load_path:
        return 
    state_dict = torch.load(load_path, map_location=device)
    print(f"Model loaded from <== {load_path}")
    return state_dict["train_loss_list"], state_dict["valid_loss_list"], state_dict["global_steps_list"]

In [27]:
def train(model, optimizer, criterion=nn.BCELoss(), num_epochs=5, eval_every=len(train_loader) // 2, best_valid_loss=float("Inf")):
    total_correct = 0.0
    total_len = 0.0
    running_loss = 0.0
    valid_running_loss = 0.0
    global_step = 0
    train_loss_list = []
    valid_loss_list = []
    global_steps_list = []
    
    model.train()
    for epoch in range(num_epochs):
        for text, label in train_loader:
            optimizer.zero_grad()
            encoded_list = [tokenizer.encode(t, add_special_tokens=True) for t in text]
            padded_list = [e + [0] * (512 - len(e)) for e in encoded_list]
            sample = torch.tensor(padded_list)
            sample, label = sample.to(device), label.to(device)
            labels = torch.tensor(label)
            outputs = model(sample, labels=labels)
            loss, logits = outputs
            
            pred = torch.argmax(F.softmax(logits), dim=1)
            correct = pred.eq(labels)
            total_correct += correct.sum().item()
            total_len += len(labels)
            running_loss += loss.item()
            loss.backward()
            optimizer.step()
            global_step += 1
            
            if global_step % eval_every == 0:
                model.eval()
                with torch.no_grad():
                    for text, label in valid_loader:
                        encoded_list = [tokenizer.encode(t, add_special_tokens=True) for t in text]
                        padded_list = [e + [0] * (512 - len(e)) for e in encoded_list]
                        sample = torch.tensor(padded_list)
                        sample, label = sample.to(device), label.to(device)
                        labels = torch.tensor(label)
                        outputs = model(sample, labels=labels)
                        loss, logits = outputs
                        valid_running_loss += loss.item()
                        
                average_train_loss = running_loss / eval_every
                average_valid_loss = valid_running_loss / len(valid_loader)
                train_loss_list.append(average_train_loss)
                valid_loss_list.append(average_valid_loss)
                global_steps_list.append(global_step)
                
                running_loss = 0.0
                valid_running_loss = 0.0
                model.train()
                
                print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{global_step}/{num_epochs * len(train_loader)}], Train Loss: {average_train_loss : .4f}, Valid Loss: {average_valid_loss : .4f}")
                
                if best_valid_loss > average_valid_loss:
                    best_valid_loss = average_valid_loss
                    save_checkpoint("../pytorch/data/model.pt", model, best_valid_loss)
                    save_metrics("../pytorch/data/metrics.pt", train_loss_list, valid_loss_list, global_steps_list)
    save_metrics("../pytorch/data/metrics.pt", train_loss_list, valid_loss_list, global_steps_list)
    print("훈련 종료!")

In [28]:
optimizer = optim.Adam(model.parameters(), lr=2e-5)
train(model=model, optimizer=optimizer)

  labels = torch.tensor(label)
  pred = torch.argmax(F.softmax(logits), dim=1)
  labels = torch.tensor(label)


Epoch [1/5], Step [510/5100], Train Loss:  0.7161, Valid Loss:  0.7060
Model saved to ==> ../pytorch/data/model.pt
Model saved to ==> ../pytorch/data/metrics.pt
Epoch [1/5], Step [1020/5100], Train Loss:  0.7078, Valid Loss:  0.6956
Model saved to ==> ../pytorch/data/model.pt
Model saved to ==> ../pytorch/data/metrics.pt
Epoch [2/5], Step [1530/5100], Train Loss:  0.7054, Valid Loss:  0.7052
Epoch [2/5], Step [2040/5100], Train Loss:  0.7033, Valid Loss:  0.7122
Epoch [3/5], Step [2550/5100], Train Loss:  0.7042, Valid Loss:  0.6949
Model saved to ==> ../pytorch/data/model.pt
Model saved to ==> ../pytorch/data/metrics.pt
Epoch [3/5], Step [3060/5100], Train Loss:  0.7028, Valid Loss:  0.6944
Model saved to ==> ../pytorch/data/model.pt
Model saved to ==> ../pytorch/data/metrics.pt
Epoch [4/5], Step [3570/5100], Train Loss:  0.7102, Valid Loss:  0.6938
Model saved to ==> ../pytorch/data/model.pt
Model saved to ==> ../pytorch/data/metrics.pt
Epoch [4/5], Step [4080/5100], Train Loss:  0.7

In [29]:
train_loss_list, valid_loss_list, global_steps_list = load_metrics("../pytorch/data/metrics.pt")

plt.plot(global_steps_list, train_loss_list, label="Train")
plt.plot(global_steps_list, valid_loss_list, label="Valid")
plt.xlabel("Global Steps")
plt.ylabel("Loss")
plt.legend()
plt.show()

Model loaded from <== ../pytorch/data/metrics.pt


KeyError: 'global_steps_list'

In [None]:
def evaluate(model, test_loader):
    y_pred = []
    y_true = []
    
    model.eval()
    with torch.no_grad():
        for text, label in test_loader:
            encoded_list = [tokenizer.encode(t, add_special_tokens=True) for t in text]
            padded_list = [e + [0] * (512 - len(e)) for e in encoded_list]
            sample = torch.tensor(padded_list)
            sample, label = sample.to(device), label.to(device)
            labels = torch.tensor(label)
            output = model(sample, labels=labels)
            _, output = output
            y_pred.extend(torch.argmax(output, 1).tolist())
            y_true.extend(labels.tolist())
            
    print("Classification 결과:")
    print(classification_report(y_true, y_pred, labels=[1, 0], digits=4))
    
    cm = confusion_matrix(y_true, y_pred, labels=[1, 0])
    ax = plt.subplot()
    sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="d")
    ax.set_title("Confusion Matrix")
    ax.set_xlabel("Predicted Labels")
    ax.set_ylabel("True Labels")
    ax.xaxis.set_ticklabels(["0", "1"])
    ax.yaxis.set_ticklabels(["0", "1"])

In [None]:
best_model = model.to(device)
load_checkpoint("../pytorch/data/model.pt", best_model)
evaluate(best_model, test_loader)