# Install Requirements

In [None]:
!pip install transformers datasets

# Connect to Drive

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import glob
from datasets import load_dataset, Dataset

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Load Corpus

In [4]:
import pickle

f = open("/content/drive/MyDrive/Corpus/CG_Corpus/cg_3to1_2previous_event_selection.dat", "rb")
dataset = pickle.load(f)
f.close()

In [5]:
dataset

DatasetDict({
    train: Dataset({
        features: ['Speaker', 'Sentence_Number', 'Sentence', 'Event', 'Target_Event', 'Bel(A)', 'Bel(B)', 'CG(A)', 'CG(B)'],
        num_rows: 970
    })
    test: Dataset({
        features: ['Speaker', 'Sentence_Number', 'Sentence', 'Event', 'Target_Event', 'Bel(A)', 'Bel(B)', 'CG(A)', 'CG(B)'],
        num_rows: 325
    })
})

In [6]:
train_df = dataset['train'].to_pandas()
train_df = train_df[train_df['Bel(A)'] != 0]
dataset['train'] = Dataset.from_pandas(train_df)
dataset['train'] = dataset['train'].remove_columns('__index_level_0__')

test_df = dataset['test'].to_pandas()
test_df = test_df[test_df['Bel(A)'] != 0]
dataset['test'] = Dataset.from_pandas(test_df)
dataset['test'] = dataset['test'].remove_columns('__index_level_0__')

In [7]:
dataset

DatasetDict({
    train: Dataset({
        features: ['Speaker', 'Sentence_Number', 'Sentence', 'Event', 'Target_Event', 'Bel(A)', 'Bel(B)', 'CG(A)', 'CG(B)'],
        num_rows: 954
    })
    test: Dataset({
        features: ['Speaker', 'Sentence_Number', 'Sentence', 'Event', 'Target_Event', 'Bel(A)', 'Bel(B)', 'CG(A)', 'CG(B)'],
        num_rows: 320
    })
})

# Corpus Support

In [8]:
def cal_class_support(class_number="1", bel_col="Bel(A)", corpora="train"):
  SUM = 0
  for record in dataset[corpora]:
    if record[bel_col] == class_number: SUM+=1
  return SUM

Bel_A_Train_Support = [cal_class_support(1, "Bel(A)", "train"), cal_class_support(2, "Bel(A)", "train"), cal_class_support(3, "Bel(A)", "train"), cal_class_support(4, "Bel(A)", "train"), cal_class_support(0, "Bel(A)", "train")]
Bel_A_Test_Support = [cal_class_support(1, "Bel(A)", "test"), cal_class_support(2, "Bel(A)", "test"), cal_class_support(3, "Bel(A)", "test"), cal_class_support(4, "Bel(A)", "test"), cal_class_support(0, "Bel(A)", "test")]
Bel_B_Train_Support = [cal_class_support(1, "Bel(B)", "train"), cal_class_support(2, "Bel(B)", "train"), cal_class_support(3, "Bel(B)", "train"), cal_class_support(4, "Bel(B)", "train"), cal_class_support(0, "Bel(B)", "train")]
Bel_B_Test_Support = [cal_class_support(1, "Bel(B)", "test"), cal_class_support(2, "Bel(B)", "test"), cal_class_support(3, "Bel(B)", "test"), cal_class_support(4, "Bel(B)", "test"), cal_class_support(0, "Bel(B)", "test")]

print(f"Bel(A) Train : {Bel_A_Train_Support}\nBel(A) Test  : {Bel_A_Test_Support}\nBel(B) Train : {Bel_B_Train_Support}\nBel(B) Test  : {Bel_B_Test_Support}")

Bel(A) Train : [784, 54, 78, 38, 0]
Bel(A) Test  : [261, 35, 20, 4, 0]
Bel(B) Train : [782, 51, 68, 43, 10]
Bel(B) Test  : [258, 35, 14, 4, 9]


# Tokenizer

In [9]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

In [10]:
def tokenize_function(examples):
    return tokenizer(examples["Event"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets

Map:   0%|          | 0/954 [00:00<?, ? examples/s]

Map:   0%|          | 0/320 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['Speaker', 'Sentence_Number', 'Sentence', 'Event', 'Target_Event', 'Bel(A)', 'Bel(B)', 'CG(A)', 'CG(B)', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 954
    })
    test: Dataset({
        features: ['Speaker', 'Sentence_Number', 'Sentence', 'Event', 'Target_Event', 'Bel(A)', 'Bel(B)', 'CG(A)', 'CG(B)', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 320
    })
})

# Fine-tuning in native PyTorch

In [11]:
tokenized_datasets = tokenized_datasets.remove_columns(['Speaker', 'Sentence_Number', 'Sentence', 'Event', 'Target_Event', 'Bel(B)', 'CG(A)', 'CG(B)'])
tokenized_datasets = tokenized_datasets.rename_column("Bel(A)", "labels")
tokenized_datasets.set_format("torch")

In [12]:
full_train_dataset = tokenized_datasets["train"]
full_eval_dataset = tokenized_datasets["test"]

In [13]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(full_train_dataset, shuffle=True, batch_size=8)
eval_dataloader = DataLoader(full_eval_dataset, batch_size=8)

In [14]:
for record in train_dataloader:
  print(record)
  break

{'labels': tensor([2, 1, 1, 1, 1, 1, 3, 1]), 'input_ids': tensor([[  101, 24142, 14895,  ...,     0,     0,     0],
        [  101, 24142, 14895,  ...,     0,     0,     0],
        [  101, 24142, 14895,  ...,     0,     0,     0],
        ...,
        [  101, 24142, 14895,  ...,     0,     0,     0],
        [  101, 24142, 14895,  ...,     0,     0,     0],
        [  101, 24142, 14895,  ...,     0,     0,     0]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}


# Model Training

In [15]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5)

Downloading model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initi

In [16]:
from torch.optim import AdamW
from transformers import get_scheduler

optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 12
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps,
)

In [17]:
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [18]:
from tqdm.auto import tqdm
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

progress_bar = tqdm(range(num_training_steps))

# Train the model
model.train()
for epoch in range(num_epochs):
    for batch in train_dataloader:
        batch = {k: v.to(device)
         for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)

    # Evaluation in each epochs
    from datasets import load_metric
    import numpy

    predictions_list, labels_list = [], []
    metric = load_metric("accuracy")
    model.eval()
    for batch in eval_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        metric.add_batch(predictions=predictions, references=batch["labels"])

        # Confusion matrix
        predictions_list.extend(predictions.cpu().numpy())
        labels_list.extend(batch["labels"].cpu().numpy())

    if epoch==0: print(f"Epoch\tLoss F1_macro F1_micro F1_weighted")
    f1_score_macro = f1_score(labels_list, predictions_list, average='macro')
    f1_score_micro = f1_score(labels_list, predictions_list, average='micro')
    f1_score_weighted = f1_score(labels_list, predictions_list, average='weighted')
    print(f"{epoch + 1}\t{loss:.3f}\t{f1_score_macro:.2f}\t{f1_score_micro:.2f}\t{f1_score_weighted:.2f}")

  0%|          | 0/1440 [00:00<?, ?it/s]

  metric = load_metric("accuracy")


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Epoch	Loss F1_macro F1_micro F1_weighted
1	2.915	0.22	0.82	0.73
2	1.656	0.22	0.82	0.73
3	0.853	0.22	0.82	0.73
4	0.649	0.29	0.76	0.74
5	0.462	0.31	0.77	0.76
6	0.147	0.33	0.83	0.79
7	0.005	0.36	0.80	0.78
8	0.031	0.37	0.79	0.78
9	0.000	0.35	0.78	0.77
10	0.000	0.32	0.79	0.76
11	0.000	0.35	0.80	0.78
12	0.007	0.35	0.80	0.77


# Model Evaluation

In [19]:
from datasets import load_metric
from sklearn.metrics import confusion_matrix
import numpy

predictions_list, labels_list = [], []
metric = load_metric("accuracy")
model.eval()
for batch in eval_dataloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(**batch)

    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)
    metric.add_batch(predictions=predictions, references=batch["labels"])

    # Confusion matrix
    predictions_list.extend(predictions.cpu().numpy())
    labels_list.extend(batch["labels"].cpu().numpy())

print(metric.compute())
cm = confusion_matrix(labels_list, predictions_list)
print(cm)

{'accuracy': 0.796875}
[[243   3  14   1]
 [ 25   4   6   0]
 [  8   4   8   0]
 [  2   1   1   0]]


### Confusion Matrix

# Save Model Results in CSV file

In [20]:
results_dict = {
    'Speaker': [],
    'Sentence_Number': [],
    'Sentence': [],
    'Event': [],
    'Target_Event': [],
    'Predicted Bel(A)': [],
    'Predicted Bel(B)': [],
    'Bel(A)': [],
    'Bel(B)': [],
    'CG(A)': [],
    'CG(B)': [],
}

In [21]:
from tqdm.auto import tqdm

samples_number = len(dataset['test'])
progress_bar = tqdm(range(samples_number))
predictions_list = []
labels_list = []
for i in range(samples_number):
  text = dataset['train']['Event'][i]
  tokens = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
  tokens.to(device)
  outputs = model(**tokens)
  logits = outputs.logits
  prediction = torch.argmax(logits, dim=-1).tolist()[0]
  predictions_list.append(prediction)
  labels_list.append(dataset['test']['Bel(A)'][i])

  results_dict['Speaker'].append(dataset['test']['Speaker'][i])
  results_dict['Sentence_Number'].append(dataset['test']['Sentence_Number'][i])
  results_dict['Sentence'].append(dataset['test']['Sentence'][i])
  results_dict['Event'].append(dataset['test']['Event'][i])
  results_dict['Target_Event'].append(dataset['test']['Target_Event'][i])
  results_dict['Predicted Bel(A)'].append(prediction)
  results_dict['Predicted Bel(B)'].append(prediction)
  results_dict['Bel(A)'].append(dataset['test']['Bel(A)'][i])
  results_dict['Bel(B)'].append(dataset['test']['Bel(B)'][i])
  results_dict['CG(A)'].append(dataset['test']['CG(A)'][i])
  results_dict['CG(B)'].append(dataset['test']['CG(B)'][i])

  progress_bar.update(1)

  0%|          | 0/320 [00:00<?, ?it/s]

In [22]:
results_df = pd.DataFrame.from_dict(results_dict)
results_df.to_csv('results.csv')

### Bel(A)

In [23]:
from tqdm.auto import tqdm
from sklearn.metrics import classification_report

samples_number = len(dataset['test'])
progress_bar = tqdm(range(samples_number))
predictions_list = []
labels_list = []
for i in range(samples_number):
  text = dataset['test']['Event'][i]
  tokens = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
  tokens.to(device)
  outputs = model(**tokens)
  logits = outputs.logits
  prediction = torch.argmax(logits, dim=-1).tolist()[0]
  predictions_list.append(prediction)
  labels_list.append(dataset['test']['Bel(A)'][i])
  progress_bar.update(1)

report = classification_report(labels_list, predictions_list, zero_division=0)
print(report)

  0%|          | 0/320 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           1       0.87      0.93      0.90       261
           2       0.33      0.11      0.17        35
           3       0.28      0.40      0.33        20
           4       0.00      0.00      0.00         4

    accuracy                           0.80       320
   macro avg       0.37      0.36      0.35       320
weighted avg       0.77      0.80      0.77       320



### Bel(B)

In [24]:
test_df = dataset['test'].to_pandas()
test_df = test_df[test_df['Bel(B)'] != 0]
dataset['test'] = Dataset.from_pandas(test_df)
dataset['test'] = dataset['test'].remove_columns('__index_level_0__')

In [25]:
from tqdm.auto import tqdm
from sklearn.metrics import classification_report

samples_number = len(dataset['test'])
progress_bar = tqdm(range(samples_number))
predictions_list = []
labels_list = []
for i in range(samples_number):
  text = dataset['test']['Event'][i]
  tokens = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
  tokens.to(device)
  outputs = model(**tokens)
  logits = outputs.logits
  prediction = torch.argmax(logits, dim=-1).tolist()[0]
  predictions_list.append(prediction)
  labels_list.append(dataset['test']['Bel(B)'][i])
  progress_bar.update(1)

report = classification_report(labels_list, predictions_list, zero_division=0)
print(report)

  0%|          | 0/311 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           1       0.87      0.91      0.89       258
           2       0.33      0.11      0.17        35
           3       0.14      0.29      0.19        14
           4       0.00      0.00      0.00         4

    accuracy                           0.78       311
   macro avg       0.34      0.33      0.31       311
weighted avg       0.77      0.78      0.77       311



# Test on some Samples

In [None]:
from tqdm.auto import tqdm

samples_number = len(dataset['train'])
progress_bar = tqdm(range(samples_number))

true_prediction_0, true_prediction_1, true_prediction_2, true_prediction_3, true_prediction_4 = 0, 0, 0, 0, 0
false_prediction_0, false_prediction_1, false_prediction_2, false_prediction_3, false_prediction_4 = 0, 0, 0, 0, 0

for i in range(samples_number):
  text = dataset['train']['Event'][i]
  tokens = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
  tokens.to(device)
  outputs = model(**tokens)
  logits = outputs.logits
  prediction = torch.argmax(logits, dim=-1).tolist()[0]

  if prediction == 0 and dataset['train']["Bel(A)"][i] == 0: true_prediction_0 += 1
  if not prediction == 0 and dataset['train']["Bel(A)"][i] == 0: false_prediction_0 += 1

  if prediction == 1 and dataset['train']["Bel(A)"][i] == 1: true_prediction_1 += 1
  if not prediction == 1 and dataset['train']["Bel(A)"][i] == 1: false_prediction_1 += 1

  if prediction == 2 and dataset['train']["Bel(A)"][i] == 2: true_prediction_2 += 1
  if not prediction == 2 and dataset['train']["Bel(A)"][i] == 2: false_prediction_2 += 1

  if prediction == 3 and dataset['train']["Bel(A)"][i] == 3: true_prediction_3 += 1
  if not prediction == 3 and dataset['train']["Bel(A)"][i] == 3: false_prediction_3 += 1

  if prediction == 4 and dataset['train']["Bel(A)"][i] == 4: true_prediction_4 += 1
  if not prediction == 4 and dataset['train']["Bel(A)"][i] == 4: false_prediction_4 += 1

  progress_bar.update(1)

  0%|          | 0/424 [00:00<?, ?it/s]

### Recall: TP/(TP+FN)

In [None]:
print("None: ", "\tTP =", true_prediction_0, "\tFN =", false_prediction_0, f" \tRecall = {(true_prediction_0/(true_prediction_0+false_prediction_0)):.4f} %")
print("CT+: ", "\tTP =", true_prediction_1, "\tFN =", false_prediction_1, f" \tRecall = {(true_prediction_1/(true_prediction_1+false_prediction_1)):.4f} %")
print("CT-: ", "\tTP =", true_prediction_2, "\tFN =", false_prediction_2, f" \tRecall = {(true_prediction_2/(true_prediction_2+false_prediction_2)):.4f} %")
print("PS: ", "\tTP =", true_prediction_3, "\tFN =", false_prediction_3, f" \tRecall = {(true_prediction_3/(true_prediction_3+false_prediction_3)):.4f} %")
print("NB: ", "\tTP =", true_prediction_4, "\tFN =", false_prediction_4, f" \tRecall = {(true_prediction_4/(true_prediction_4+false_prediction_4)):.4f} %")

None:  	TP = 3 	FN = 0  	Recall = 1.0000 %
CT+:  	TP = 343 	FN = 0  	Recall = 1.0000 %
CT-:  	TP = 19 	FN = 0  	Recall = 1.0000 %
PS:  	TP = 30 	FN = 0  	Recall = 1.0000 %
NB:  	TP = 29 	FN = 0  	Recall = 1.0000 %


In [None]:
from tqdm.auto import tqdm

samples_number = len(dataset['test'])
progress_bar = tqdm(range(samples_number))

true_prediction_0, true_prediction_1, true_prediction_2, true_prediction_3, true_prediction_4 = 0, 0, 0, 0, 0
false_prediction_0, false_prediction_1, false_prediction_2, false_prediction_3, false_prediction_4 = 0, 0, 0, 0, 0

for i in range(samples_number):
  text = dataset['test']['Event'][i]
  tokens = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
  tokens.to(device)
  outputs = model(**tokens)
  logits = outputs.logits
  prediction = torch.argmax(logits, dim=-1).tolist()[0]

  if prediction == 0 and dataset['test']["Bel(A)"][i] == 0: true_prediction_0 += 1
  if not prediction == 0 and dataset['test']["Bel(A)"][i] == 0: false_prediction_0 += 1

  if prediction == 1 and dataset['train']["Bel(A)"][i] == 1: true_prediction_1 += 1
  if not prediction == 1 and dataset['test']["Bel(A)"][i] == 1: false_prediction_1 += 1

  if prediction == 2 and dataset['test']["Bel(A)"][i] == 2: true_prediction_2 += 1
  if not prediction == 2 and dataset['test']["Bel(A)"][i] == 2: false_prediction_2 += 1

  if prediction == 3 and dataset['test']["Bel(A)"][i] == 3: true_prediction_3 += 1
  if not prediction == 3 and dataset['test']["Bel(A)"][i] == 3: false_prediction_3 += 1

  if prediction == 4 and dataset['test']["Bel(A)"][i] == 4: true_prediction_4 += 1
  if not prediction == 4 and dataset['test']["Bel(A)"][i] == 4: false_prediction_4 += 1

  progress_bar.update(1)

  0%|          | 0/424 [00:00<?, ?it/s]

In [None]:
print("None: ", "\tTP =", true_prediction_0, "\tFN =", false_prediction_0, f" \tRecall = {(true_prediction_0/(true_prediction_0+false_prediction_0)):.4f} %")
print("CT+: ", "\tTP =", true_prediction_1, "\tFN =", false_prediction_1, f" \tRecall = {(true_prediction_1/(true_prediction_1+false_prediction_1)):.4f} %")
print("CT-: ", "\tTP =", true_prediction_2, "\tFN =", false_prediction_2, f" \tRecall = {(true_prediction_2/(true_prediction_2+false_prediction_2)):.4f} %")
print("PS: ", "\tTP =", true_prediction_3, "\tFN =", false_prediction_3, f" \tRecall = {(true_prediction_3/(true_prediction_3+false_prediction_3)):.4f} %")
print("NB: ", "\tTP =", true_prediction_4, "\tFN =", false_prediction_4, f" \tRecall = {(true_prediction_4/(true_prediction_4+false_prediction_4)):.4f} %")

None:  	TP = 0 	FN = 2  	Recall = 0.0000 %
CT+:  	TP = 294 	FN = 36  	Recall = 0.8909 %
CT-:  	TP = 4 	FN = 24  	Recall = 0.1429 %
PS:  	TP = 9 	FN = 15  	Recall = 0.3750 %
NB:  	TP = 11 	FN = 11  	Recall = 0.5000 %
