Install environment

In [None]:
!pip install transformers
!pip install sentencepiece

Upload data to colab

In [5]:
# after shift+enter, you have to click choose file,choose the 'tr.csv', can shift+enter again to upload i172_7000_v1_text.txt, i172_7000_vs2_label.txt and i172_589_labels_updated.txt
# we need upload 4 files
from google.colab import files
uploaded = files.upload()

Read data

In [1]:
#preprocess data using updated data
import numpy as np
with open('i172_7000_vs2_label.txt','r') as f:
  claim = f.readlines()
claim = [np.int(t.strip()) for t in claim]
with open('i172_7000_vs1_text.txt','r') as f:
  texts = f.readlines()
texts = [t.strip() for t in texts]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """


In [2]:
import pandas as pd
df = pd.read_csv("tr.csv")

df = df[:7000]
df['claim_s_new'] = claim
df['one_hot_labels'] = list(df[['claim_s_new','evid_s','reasoning_s']].values)
labels = list(df.one_hot_labels.values)
# texts = list(df.irsen_text.values)
labels = labels[:4487] + labels[4488:7000]
texts = texts[:4487] + texts[4488:7000]
texts = [t.replace('\n','') for t in texts]

In [3]:
for i in range(len(texts)):
  if texts[i] == '':
    labels[i] = labels[0]

Using GPU

In [4]:
import numpy as np
import torch
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

data prepare

In [5]:
# split data to training and validation
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score,recall_score,precision_score,accuracy_score
from sklearn.metrics import confusion_matrix
train_texts, val_texts, train_labels, val_labels = train_test_split(texts, labels, random_state=2, test_size=0.05)

In [6]:
# load tokenizer, turn data to bert type token
from transformers import BertTokenizerFast,RobertaTokenizer
from transformers import XLNetTokenizer, XLNetForSequenceClassification
# tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
val_encodings = tokenizer(val_texts, truncation=True, padding=True)

In [7]:
# turn data to torch dataset
class bertDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx],dtype=torch.float)
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = bertDataset(train_encodings, train_labels)
val_dataset = bertDataset(val_encodings, val_labels)

Prepare model

In [8]:
# training metrics, will show result during training
from transformers import EvalPrediction
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
import torch
def multi_label_metrics(p, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    predictions, labels = p
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics

# def compute_metrics(p):
#     pred, labels = p
#     pred = np.argmax(pred, axis=1)

#     accuracy = accuracy_score(y_true=labels, y_pred=pred)
#     recall = recall_score(y_true=labels, y_pred=pred)
#     precision = precision_score(y_true=labels, y_pred=pred)
#     f1 = f1_score(y_true=labels, y_pred=pred)

#     return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

In [None]:
from transformers import BertForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback
from torch import nn

# set some parameter


#using multi_label bert
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", 
                                                           problem_type="multi_label_classification", num_labels=3,
)


training_args = TrainingArguments(
    output_dir='./results',
    
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_steps=100,
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    #push_to_hub=True,
)                                                           
model.to(device)# put model to gpu
trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset,# evaluation dataset
    compute_metrics=multi_label_metrics,             
    # callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
    #model_init=model_init,
    )

trainer.train()

Save model

In [None]:
output_dir='./model'
model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
model_to_save.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

download model

In [None]:
from google.colab import files
!zip -r /content/model.zip /content/model
files.download('model.zip')

Predict 

In [10]:
model.eval()
import torch.nn.functional as F
def predict(content,threshold=.5):

    inputs = tokenizer(content,
                       #return_offsets_mapping=True,
                       padding='max_length',
                       truncation=True, return_tensors="pt")

    # move to gpu
    ids = inputs["input_ids"].to(device)
    idt = inputs["token_type_ids"].to(device)
    #print(inputs["input_ids"])
    mask = inputs["attention_mask"].to(device)
    # forward pass
    outputs = model(ids,token_type_ids=idt,attention_mask=mask)
    logits = outputs[0]
    
    
    
    # x = F.sigmoid(logits)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(logits))
    # print(probs)
    # active_logits = logits.view(-1, model.num_labels)  # shape (batch_size * seq_len, num_labels)
    
    # print(active_logits.cpu().detach().numpy()[0])
    # flattened_predictions = active_logits.cpu().detach().numpy()[0]
    flattened_predictions = probs.cpu().detach().numpy()[0]
    label = []
    
    for i in range(len(flattened_predictions)):
      if flattened_predictions[i] >= threshold:
        label.append(i)

    return flattened_predictions[0],[3] if label == [] else label # 3 means noclaim, noevidence and noreasoning,0 is claim,1 is evidence, 2 is reasoning

prepare test data

In [19]:
import pandas as pd
df = pd.read_csv("tr.csv")
sentence = df.irsen_text.values.tolist()[7000:]
# real = df.one_hot_labels.values.tolist()[7000:]
real_c = df.claim_s.values.tolist()[7000:]
# with open('i172_589_labels_updated.txt','r') as f:
#   real = f.readlines()
# real_c = [np.int(t.strip()) for t in real]
real_e = df.evid_s.values.tolist()[7000:]
real_r = df.reasoning_s.values.tolist()[7000:]

In [20]:
pre_c = []
pre_r = []
pre_e = []
pro_c = []# claim probability
for i in range(len(sentence)):
  y, x = predict(sentence[i])
  pro_c.append(y)
  if 1 in x:
    pre_e.append(1)
  if 1 not in x:
    pre_e.append(0)
  if 2 in x:
    pre_r.append(1)
  if 2 not in x:
    pre_r.append(0)
  # if 0 in x and 1 in x and 2 in x:
  #   pre_c.append(1)
  #   pre_e.append(1)
  #   pre_r.append(1)
  # elif 1 in x:
  #   pre_c.append(0)
  #   pre_e.append(1)
  # elif 0 in x:
  #   pre_c.append(1)
  #   pre_e.append(0)
  # else:
  #   pre_c.append(0)
  #   pre_e.append(0)

[]

Show result

In [None]:
print('f1:'+str(f1_score(real_e, pre_e, average=None))+'\n'+'recall:'+str(recall_score(real_e, pre_e, average=None))+'\n'+'precision:'+str(precision_score(real_e, pre_e, average=None))+'\n'+'accuracy:'+str(accuracy_score(real_e, pre_e))+'\n')

In [None]:
confusion_matrix(real_e, pre_e)

In [None]:
print('f1:'+str(f1_score(real_r, pre_r, average=None))+'\n'+'recall:'+str(recall_score(real_r, pre_r, average=None))+'\n'+'precision:'+str(precision_score(real_r, pre_r, average=None))+'\n'+'accuracy:'+str(accuracy_score(real_r, pre_r))+'\n')

In [None]:
confusion_matrix(real_r, pre_r)

Show probability graph

In [None]:
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots(1,1,figsize = (10,5))
ax.hist(np.array(pro_c)[np.array(real_c) == 1], color = "darkred",bins = "scott", alpha = .5, edgecolor = "red")
ax.hist(np.array(pro_c)[np.array(real_c) == 0], color = "darkgreen",bins = "scott", alpha = .5, edgecolor = "green")

In [None]:
# print the wrong predictions. The end of sentence shows the real labels
for i in range(len(real_c)):
  if real_c[i] != pre_c[i]:
    print(sentence[i] + ' claim' if real_c[i] else sentence[i] + ' noclaim')