# 1.import packages

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# genral packages
import torch
import random
import numpy as np
import pandas as pd

from tqdm.notebook import trange, tqdm

from torch import nn
from torch.optim import Adam

In [3]:
from sklearn import metrics

In [4]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.21.3-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 30.8 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 52.7 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.9.1-py3-none-any.whl (120 kB)
[K     |████████████████████████████████| 120 kB 74.3 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.9.1 tokenizers-0.12.1 transformers-4.21.3


In [5]:
from transformers import BertModel, BertTokenizer

# 2.functions

In [6]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.labels = [labels[label] for label in df['category']]
        self.texts = [tokenizer(text, 
                                padding='max_length', 
                                max_length = 512, 
                                truncation=True,
                                return_tensors="pt") 
                      for text in df['clean_text']]

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):
        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)
        return batch_texts, batch_y

In [7]:
class BertClassifierEN(nn.Module):
    def __init__(self, dropout=0.5):
        super(BertClassifierEN, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.linear = nn.Linear(768, 3)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):
        _, pooled_output = self.bert(input_ids=input_id, attention_mask=mask,return_dict=False)
        linear_output = self.linear(pooled_output)
        final_layer = self.relu(linear_output)
        return final_layer

In [17]:
def train(model, train_data, val_data, learning_rate, epochs):
  # get data for training and validation using Dataset
    train, val = Dataset(train_data), Dataset(val_data)
    # get data by batch using DataLoader
    # shuffle the order of samples
    train_dataloader = torch.utils.data.DataLoader(train, batch_size=2)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=2)
  # use GPU or not 
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    # loss function and optimizer 
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=learning_rate)

    if use_cuda:
            model = model.cuda()
            criterion = criterion.cuda()
    # train 
    for epoch_num in range(epochs):
      # accuracy and loss for train data 
            total_acc_train = 0
            total_loss_train = 0

            for train_input, train_label in tqdm(train_dataloader):

                train_label = train_label.to(device)
                mask = train_input['attention_mask'].to(device)
                input_id = train_input['input_ids'].squeeze(1).to(device)
        # get ouput from model 
                output = model(input_id, mask)
                # compute batch loss
                batch_loss = criterion(output, train_label)
                total_loss_train += batch_loss.item()
                # compute batch accuracy 
                acc = (output.argmax(dim=1) == train_label).sum().item()
                total_acc_train += acc
        # update model 
                model.zero_grad()
                batch_loss.backward()
                optimizer.step()

            # ------ validate model -----------
            # accuracy and loss for validation  data 
            total_acc_val = 0
            total_loss_val = 0

            with torch.no_grad():

                for val_input, val_label in val_dataloader:
        
                    val_label = val_label.to(device)
                    mask = val_input['attention_mask'].to(device)
                    input_id = val_input['input_ids'].squeeze(1).to(device)
  
                    output = model(input_id, mask)

                    batch_loss = criterion(output, val_label)
                    total_loss_val += batch_loss.item()
                    
                    acc = (output.argmax(dim=1) == val_label).sum().item()
                    total_acc_val += acc
            
            print(f'''Epochs: {epoch_num + 1} 
| Train Loss: {total_loss_train / len(train_data): .3f} 
| Train Accuracy: {total_acc_train / len(train_data): .3f} 
| Val Loss: {total_loss_val / len(val_data): .3f} 
| Val Accuracy: {total_acc_val / len(val_data): .3f}''')

In [26]:
def evaluate(model, test_data):

    test = Dataset(test_data)
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=2)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    if use_cuda:
        model = model.cuda()
    
    trues = []
    preds = []

    with torch.no_grad():
        for test_input, test_label in tqdm(test_dataloader):
              test_label = test_label.to(device)
              mask = test_input['attention_mask'].to(device)
              input_id = test_input['input_ids'].squeeze(1).to(device)
              output = model(input_id, mask)
              true = test_label.cpu().numpy().tolist()
              trues.extend(true)
              pred = output.argmax(dim=1).cpu().numpy().tolist()
              preds.extend(pred)

    return trues, preds

#3. Data 

In [10]:
# read data
df = pd.read_csv('/content/drive/MyDrive/EM/dataset.csv')
df = df[df['diag'] != 'INV']

In [11]:
# define label
# labels = {'EP':1, 'NONEP':2, 'OtherOther':3, 'OtherMess': 4, 'OtherInj': 5}
labels = {'EP':0,
       'NONEP':1,
       'OtherOther':2,
       'OtherPuzzle':2,
       'OtherInj':2}

In [14]:
# cross validation preparation
np.random.seed(112)
df_train, df_test, df_val = np.split(df.sample(frac=1, random_state=42), [int(.8*len(df)), int(.9*len(df))])
print(f'train: {len(df_train)}, validation: {len(df_val)}, test: {len(df_test)}')

train: 2247, validation: 281, test: 281


In [19]:
EPOCHS = 5
LR = 1e-6
model = BertClassifierEN()
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train(model, df_train, df_val, LR, EPOCHS)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/1124 [00:00<?, ?it/s]

Epochs: 1 
| Train Loss:  0.423 
| Train Accuracy:  0.650 
| Val Loss:  0.282 
| Val Accuracy:  0.865


  0%|          | 0/1124 [00:00<?, ?it/s]

Epochs: 2 
| Train Loss:  0.215 
| Train Accuracy:  0.876 
| Val Loss:  0.166 
| Val Accuracy:  0.907


  0%|          | 0/1124 [00:00<?, ?it/s]

Epochs: 3 
| Train Loss:  0.123 
| Train Accuracy:  0.933 
| Val Loss:  0.132 
| Val Accuracy:  0.915


  0%|          | 0/1124 [00:00<?, ?it/s]

Epochs: 4 
| Train Loss:  0.076 
| Train Accuracy:  0.963 
| Val Loss:  0.122 
| Val Accuracy:  0.915


  0%|          | 0/1124 [00:00<?, ?it/s]

Epochs: 5 
| Train Loss:  0.045 
| Train Accuracy:  0.983 
| Val Loss:  0.119 
| Val Accuracy:  0.918


In [28]:
trues, preds = evaluate(model, df_test)

  0%|          | 0/141 [00:00<?, ?it/s]

In [39]:
average = 'macro'
f1 = metrics.f1_score(trues, preds, average=average)
p = metrics.precision_score(trues, preds, average=average)
r = metrics.recall_score(trues, preds, average=average)
a = metrics.accuracy_score(trues, preds)
print(f'precision: {p:.3f}, recall: {r:.3f}, F1-score: {f1:.3f}, accuracy: {a:.3f}')

precision: 0.909, recall: 0.918, F1-score: 0.913, accuracy: 0.915


In [41]:
average = None
f1 = metrics.f1_score(trues, preds, average=average)
p = metrics.precision_score(trues, preds, average=average)
r = metrics.recall_score(trues, preds, average=average)
a = metrics.accuracy_score(trues, preds)
print(f'''precision:\tEP: {p[0]:.3f}, NONEP: {p[1]:.3f}, OTHER: {p[2]:.3f};
recall:\t\tEP: {r[0]:.3f}, NONEP: {r[1]:.3f}, OTHER: {r[2]:.3f};
F1-score:\tEP: {f1[0]:.3f}, NONEP: {f1[1]:.3f}, OTHER: {f1[2]:.3f};
accuracy:\t{a:.3f}''')

precision:	EP: 0.944, NONEP: 0.889, OTHER: 0.894;
recall:		EP: 0.922, NONEP: 0.879, OTHER: 0.952;
F1-score:	EP: 0.933, NONEP: 0.884, OTHER: 0.922;
accuracy:	0.915


In [42]:
metrics.confusion_matrix(trues, preds)

array([[118,   8,   2],
       [  6,  80,   5],
       [  1,   2,  59]])

In [47]:
print(metrics.classification_report(trues, preds))

              precision    recall  f1-score   support

           0       0.94      0.92      0.93       128
           1       0.89      0.88      0.88        91
           2       0.89      0.95      0.92        62

    accuracy                           0.91       281
   macro avg       0.91      0.92      0.91       281
weighted avg       0.92      0.91      0.91       281

