# This notebook learns and evaluates Justice Eater among the ETHICS datasets.

# Import the necessary libraries.

In [1]:
!pip install pandas
!pip install transformers



In [2]:
import os
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

from tqdm import tqdm
from torch.utils.data import DataLoader
from transformers.trainer_utils import set_seed
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW

In [3]:
seed = 42

set_seed(seed)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
class EthicsDataset(torch.utils.data.Dataset):
    def __init__(self, tokenizer, csv_path, max_length=64):
        df = pd.read_csv(csv_path)

        self.scenarios = df['scenario'].tolist()
        self.labels = df['label'].tolist()
        self.encodings = tokenizer(self.scenarios,
                                   max_length=max_length,
                                   padding='max_length',
                                   truncation=True)
        self.num_labels = len(set(self.labels))
        
    def __getitem__(self, idx):
        item = {k: torch.Tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = self.labels[idx]
        
        return item

    def __len__(self):
        return len(self.labels)
    
    def get_num_labels(self):
        return self.num_labels

# Load Tokenizer.

In [6]:
model_name = 'bert-base-uncased'

tokenizer = AutoTokenizer.from_pretrained(model_name)

Set the values required for training.

The value was set by referring to the paper.

In [7]:
epochs = 2
batch_size = 16
learning_rate = 1e-5
weight_decay = 0.01

# Create training and test datasets.

In [8]:
base_dir = './ethics'
train_name = 'justice/justice_train.csv'
test_name = 'justice/justice_test.csv'
test_hard_name = 'justice/justice_test_hard.csv'

train_dataset = EthicsDataset(tokenizer, os.path.join(base_dir, train_name))
test_dataset = EthicsDataset(tokenizer, os.path.join(base_dir, test_name))
test_hard_dataset = EthicsDataset(tokenizer, os.path.join(base_dir, test_hard_name))

In [9]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
test_hard_loader = DataLoader(test_hard_dataset, batch_size=batch_size, shuffle=False)

# Load bert-base model and optimizer for training.

In [10]:
num_labels = train_dataset.get_num_labels()
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
model = model.to(device)

optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

# Let's start Training.

In [11]:
from tqdm import tqdm_notebook

def train_epoch(train_loader, model, optimizer):
    model.train()
    
    total_loss = 0.0
    total_length = len(train_loader.dataset)
    
    with tqdm(total=len(train_loader), unit='step') as t:
        for batch in train_loader:
            inputs = {k: v.to(device).long() for k, v in batch.items()}

            optimizer.zero_grad()
            outputs = model(**inputs)

            logits = outputs.logits
            loss = outputs.loss
            loss.backward()
            optimizer.step()

            total_loss += loss * len(batch['input_ids'])
            
            t.set_postfix(loss=f"{loss:.4f}")
            t.update(1)
    
    loss = total_loss / total_length
    
    print(f"Train Loss : {loss:.4f}")


@torch.no_grad()
def evaluate(model, test_loader):
    model.eval()
    cors = []
    
    for batch in test_loader:
        inputs = {k: v.to(device).long() for k, v in batch.items()}

        outputs = model(**inputs)

        logits = outputs.logits

        predictions = torch.argmax(logits, dim=1).detach().cpu().numpy()

        labels = inputs['labels'].detach().cpu().numpy()

        cors += list(predictions == labels)
            

    acc = np.mean(cors)
    em_sums = [int(cors[4*i]) + int(cors[4*i+1]) + int(cors[4*i+2]) + int(cors[4*i+3]) for i in range(len(cors) // 4)]
    em_cors = [em_sums[i] == 4 for i in range(len(em_sums))]
    em = np.mean(em_cors)
    
    print(f'Accuracy: {acc:.4f}, Exact match: {em:.4f}')
    
    results = {
        'acc': acc,
        'em': em,
    }
    
    return results
    
    
for epoch in range(epochs):
    print(f'< Epoch {epoch+1}/{epochs} >')
    
    # train
    train_epoch(train_loader, model, optimizer)
    
    # evaluate
    print('Test Dataset')
    test_results = evaluate(model, test_loader)
    print('Test Hard Dataset')
    test_hard_results = evaluate(model, test_hard_loader)


  0%|          | 0/1362 [00:00<?, ?step/s]

< Epoch 1/2 >


100%|██████████| 1362/1362 [04:46<00:00,  4.76step/s, loss=0.3306]


Train Loss : 0.5042
Test Dataset
Accuracy: 0.7352, Exact match: 0.1967
Test Hard Dataset


  0%|          | 0/1362 [00:00<?, ?step/s]

Accuracy: 0.5682, Exact match: 0.0429
< Epoch 2/2 >


100%|██████████| 1362/1362 [04:46<00:00,  4.75step/s, loss=0.3265]


Train Loss : 0.3325
Test Dataset
Accuracy: 0.7548, Exact match: 0.2249
Test Hard Dataset
Accuracy: 0.5819, Exact match: 0.0526


# Save tokenizer and fine-tuned model to local.

In [12]:
tokenizer.save_pretrained('./bert-base-uncased-justice')
model.save_pretrained('./bert-base-uncased-justice')