In [26]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
import os

### Notes:
- To run this code you need the multi-task-NLP framework XXX
- Models and data specific for this task are saved under ceph/storage/data-in-progress/data-research/arguana/argmining22-sharedtask/models/multitask
- In the nli_based_task.yml, you can see the task specific parameters. If you wanna focus on testing for one task, you can remove the other one from the file

In [69]:
output_path = "../../data-ceph/arguana/argmining22-sharedtask/models/multitask" #replace this with your equivelant path..

#### Prepare the files for training:

- Here we prepare the data into tsv format with the corrsponding columns, so the parser of the frmaework can generate the json files required for training.

In [60]:
taska_training_df = pd.read_csv('../data/TaskA_train.csv')
taska_valid_df = pd.read_csv('../data/TaskA_dev.csv')

taska_training_df.insert(loc=0,column='row_num',value=np.arange(len(taska_training_df)))
taska_valid_df.insert(loc=0,column='row_num',value=np.arange(len(taska_valid_df)))
#The </s></s>  is the separator used in the pre-trained nli model..
taska_training_df['input_txt'] = taska_training_df.apply(lambda x: '{}:{}  </s></s> {} '.format(x['topic'], x['Premise'], x['Conclusion']), axis=1)
taska_valid_df['input_txt'] = taska_valid_df.apply(lambda x: '{}:{} </s></s> {}'.format(x['topic'], x['Premise'], x['Conclusion']), axis=1)


taska_validity_train_df = taska_training_df[taska_training_df.Validity != 0].copy()
taska_validity_valid_df = taska_valid_df[taska_valid_df.Validity != 0].copy()
taska_validity_train_df['label'] = taska_validity_train_df.Validity.apply(lambda x : "valid" if x == 1 else "invalid")
taska_validity_valid_df['label'] = taska_validity_valid_df.Validity.apply(lambda x : "valid" if x == 1 else "invalid")


taska_novelty_train_df = taska_training_df[taska_training_df.Novelty != 0].copy()
taska_novelty_valid_df = taska_valid_df[taska_valid_df.Novelty != 0].copy()
taska_novelty_train_df['label'] = taska_novelty_train_df.Novelty.apply(lambda x : "novel" if x == -1 else "conservative")
taska_novelty_valid_df['label'] = taska_novelty_valid_df.Novelty.apply(lambda x : "novel" if x == -1 else "conservative")

In [61]:
taska_validity_train_df.label.value_counts()

valid      401
invalid    320
Name: label, dtype: int64

In [62]:
taska_novelty_train_df.label.value_counts()

novel           595
conservative    123
Name: label, dtype: int64

In [63]:
taska_validity_train_df[['row_num',  'label', 'input_txt']].to_csv('../data/multitask_data/validity_training_df.tsv', sep='\t', header=False, index=False)
taska_validity_valid_df[['row_num', 'label', 'input_txt' ]].to_csv('../data/multitask_data/validity_valid_df.tsv', sep='\t', header=False, index=False)
taska_validity_valid_df[['row_num', 'label', 'input_txt' ]].to_csv('../data/multitask_data/validity_test_df.tsv', sep='\t', header=False, index=False)

taska_novelty_train_df[['row_num', 'label', 'input_txt']].to_csv('../data/multitask_data/novelty_training_df.tsv', sep='\t', header=False, index=False)
taska_novelty_valid_df[['row_num', 'label', 'input_txt']].to_csv('../data/multitask_data/novelty_valid_df.tsv', sep='\t', header=False, index=False)
taska_novelty_valid_df[['row_num', 'label', 'input_txt']].to_csv('../data/multitask_data/novelty_test_df.tsv', sep='\t', header=False, index=False)

### Train BERT model:

- The baseline to compare to

In [87]:
! python ../../multi-task-NLP/data_preparation.py \
  --task_file ../data/multitask_data/bert_based_task.yml \
  --data_dir ../data/multitask_data \
  --max_seq_len 512

task object created from task file...
loading file https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt from cache at /mnt/ceph/storage/data-tmp/current//sile2804/.cache/huggingface/transformers/45c3f7a79a80e1cf0a489e5c62b43f173c15db47864303a55d623bb3c96f72a5.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
loading file https://huggingface.co/bert-base-uncased/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/bert-base-uncased/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/bert-base-uncased/resolve/main/tokenizer_config.json from cache at /mnt/ceph/storage/data-tmp/current//sile2804/.cache/huggingface/transformers/c1d7f0a763fb63861cc08553866f1fc3e5a6f4f07621be277452d26d71303b7e.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /mnt/ceph/storage/data-tmp/curren

In [90]:
! python ../../multi-task-NLP/train.py \
      --data_dir ../data/multitask_data/bert-base-uncased_prepared_data \
      --task_file ../data/multitask_data/nli_based_task.yml \
      --learning_rate 5e-6 \
      --out_dir $output'/bert_model/' \
      --epochs 5 \
      --train_batch_size 8 \
      --eval_batch_size 8 \
      --grad_accumulation_steps 1 \
      --max_seq_len 512 \
      --log_per_updates 100 \
      --log_dir /var/tmp/argsvalidnovel/multitask \
      --limit_save 15\
      --eval_while_train\
      --test_while_train

INFO - logger created.
INFO - ARGS : {'data_dir': '../data/multitask_data/bert-base-uncased_prepared_data', 'task_file': '../data/multitask_data/nli_based_task.yml', 'out_dir': '/bert_model/', 'epochs': 5, 'freeze_shared_model': False, 'train_batch_size': 8, 'eval_batch_size': 8, 'grad_accumulation_steps': 1, 'num_of_warmup_steps': 0, 'learning_rate': 5e-06, 'epsilon': 1e-08, 'grad_clip_value': 1.0, 'log_file': 'multi_task_logs.log', 'log_dir': '/var/tmp/argsvalidnovel/multitask', 'log_per_updates': 100, 'seed': 42, 'max_seq_len': 512, 'save_per_updates': 0, 'limit_save': 15, 'load_saved_model': None, 'eval_while_train': True, 'test_while_train': True, 'resume_train': False, 'finetune': False, 'load_pretrained_classifiers': False, 'debug_mode': False, 'silent': False}
INFO - Task params object created from task file...
INFO - task parameters:
 {'TaskA': {'config_name': 'roberta-large-mnli', 'dropout_prob': 0.2, 'file_names': ['novelty_training_df.tsv', 'novelty_valid_df.tsv', 'novelty_

### Train NLI Model:

In [92]:
! python ../../multi-task-NLP/data_preparation.py \
  --task_file ../data/multitask_data/nli_based_task.yml \
  --data_dir ../data/multitask_data \
  --max_seq_len 512

task object created from task file...
loading file https://huggingface.co/roberta-large-mnli/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current//sile2804/.cache/huggingface/transformers/64a1d72b2bd05b0aff1a4dd9e7a90a6eea0312b4f914e80b0a923aa8f72219bd.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/roberta-large-mnli/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current//sile2804/.cache/huggingface/transformers/425529714b758f50b6d3f93f8093d859856fd41cf1cec7c8edf2ab44aee632b6.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/roberta-large-mnli/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/roberta-large-mnli/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/roberta-large-mnli/resolve/main/tokenizer_config.json from cache at None
loading configuration file https://hu

In [93]:
! python ../../multi-task-NLP/train.py \
      --data_dir ../data/multitask_data/roberta-large-mnli_prepared_data/ \
      --task_file ../data/multitask_data/nli_based_task.yml \
      --learning_rate 5e-6 \
      --out_dir $output'/nli_model/' \
      --epochs 5 \
      --train_batch_size 8 \
      --eval_batch_size 8 \
      --grad_accumulation_steps 1 \
      --max_seq_len 512 \
      --log_per_updates 100 \
      --log_dir /var/tmp/argsvalidnovel/multitask \
      --limit_save 15\
      --eval_while_train\
      --load_pretrained_classifiers \
      --test_while_train

INFO - logger created.
INFO - ARGS : {'data_dir': '../data/multitask_data/roberta-large-mnli_prepared_data/', 'task_file': '../data/multitask_data/nli_based_task.yml', 'out_dir': '/nli_model/', 'epochs': 5, 'freeze_shared_model': False, 'train_batch_size': 8, 'eval_batch_size': 8, 'grad_accumulation_steps': 1, 'num_of_warmup_steps': 0, 'learning_rate': 5e-06, 'epsilon': 1e-08, 'grad_clip_value': 1.0, 'log_file': 'multi_task_logs.log', 'log_dir': '/var/tmp/argsvalidnovel/multitask', 'log_per_updates': 100, 'seed': 42, 'max_seq_len': 512, 'save_per_updates': 0, 'limit_save': 15, 'load_saved_model': None, 'eval_while_train': True, 'test_while_train': True, 'resume_train': False, 'finetune': False, 'load_pretrained_classifiers': True, 'debug_mode': False, 'silent': False}
INFO - Task params object created from task file...
INFO - task parameters:
 {'TaskA': {'config_name': 'roberta-large-mnli', 'dropout_prob': 0.2, 'file_names': ['novelty_training_df.tsv', 'novelty_valid_df.tsv', 'novelty_

### Perform prediction:

NOTE: There is still an issue when loading the model through the infer file. The results here are not reflective of the model performance. For now look at the logged classification_f1_score during training....

In [48]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [68]:
import sys
import glob
import json
sys.path.append('../../multi-task-NLP/')
from sklearn.metrics import f1_score, precision_score, recall_score

In [69]:
%autoreload
from infer_pipeline import inferPipeline

In [70]:
#bert_model = inferPipeline(modelPath = '../data/multitask_data/bert_model/multi_task_model_10_995.pt', maxSeqLen = 512)
nli_model  = inferPipeline(modelPath = '../data/multitask_data/nli_model/multi_task_model_4_455.pt', maxSeqLen = 512, load_pretrained_classifiers=True)

loading file https://huggingface.co/roberta-large-mnli/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current//sile2804/.cache/huggingface/transformers/64a1d72b2bd05b0aff1a4dd9e7a90a6eea0312b4f914e80b0a923aa8f72219bd.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/roberta-large-mnli/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current//sile2804/.cache/huggingface/transformers/425529714b758f50b6d3f93f8093d859856fd41cf1cec7c8edf2ab44aee632b6.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/roberta-large-mnli/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/roberta-large-mnli/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/roberta-large-mnli/resolve/main/tokenizer_config.json from cache at None
loading configuration file https://huggingface.co/roberta-large-mnli/resolv

In [71]:
#bert_preds = bert_model.infer(taska_valid_df['input_txt'].tolist(), ['TaskA', 'TaskB']) 
nli_preds  = nli_model.infer(taska_valid_df['input_txt'].tolist(), ['TaskB']) 

Eval: 100%|██████████| 26/26 [00:01<00:00, 17.63it/s]


In [72]:
nli_preds

[{'Query': 'Vegetarianism:This is frequent argument of animal rights activists; that animals deserve rights because they have at least as much capacity to reason as do some retarded humans, who retain rights. The problem with this argument is that it fails to see rights as a thing that must be shared among a group of creatures, not something that is extended on an individual basis. Therefore, the question is not whether some humans are incapable of having rights, but rather whether human kind, as a species, is capable of having rights. They are. Non-human animals, conversely, as a class of organisms, are not capable of holding rights. </s></s> Humans have rights because they can reason; animals do not deserve rights',
  'TaskB': ['ENTAILMENT',
   array([0.28952143, 0.54605544, 0.6431212 ], dtype=float32)]},
 {'Query': 'Vegetarianism:Almost all dangerous types of food-poisoning (e.g. E-coli, salmonella) are passed on through meat or eggs. Close contact between humans and animals also le

In [74]:
[(x['TaskB'][0]) for x in nli_preds]

['ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILMENT',
 'ENTAILME

In [54]:
#taska_valid_df['bert_pred_validity'] = [x['TaskB'][0] for x in bert_preds]
#taska_valid_df['bert_pred_novelty']  = [x['TaskA'][0] for x in bert_preds]
taska_valid_df['nli_pred_validity']  = [1 if x['TaskB'][0] == 'ENTAILMENT' else 0 for x in nli_preds]
taska_valid_df['nli_pred_novelty']   = [1 if x['TaskA'][0] == 'NEUTRAL' else 0 for x in nli_preds]

KeyError: 'TaskA'

In [43]:
taska_valid_df.head()

Unnamed: 0,row_num,topic,Premise,Conclusion,Validity,Validity-Confidence,Novelty,Novelty-Confidence,input_txt,nli_pred_validity,nli_pred_novelty
0,0,Vegetarianism,This is frequent argument of animal rights act...,Humans have rights because they can reason; an...,-1,confident,-1,very confident,Vegetarianism:This is frequent argument of ani...,0,1
1,1,Vegetarianism,Almost all dangerous types of food-poisoning (...,Vegetarians often pass diseases through animals,-1,very confident,1,majority,Vegetarianism:Almost all dangerous types of fo...,0,1
2,2,Vegetarianism,Almost all dangerous types of food-poisoning (...,Vegetarianism is a risk to human health,-1,very confident,1,majority,Vegetarianism:Almost all dangerous types of fo...,0,1
3,3,Vegetarianism,Almost all dangerous types of food-poisoning (...,Vegetarians are vulnerable to disease and pests,-1,very confident,1,majority,Vegetarianism:Almost all dangerous types of fo...,0,1
4,4,Vegetarianism,The notion of man's dominion over animals need...,"Man's ""dominion"" over animals does not imply a...",1,very confident,-1,very confident,Vegetarianism:The notion of man's dominion ove...,0,1


In [44]:
#print('BERT Validity: ', f1_score(taska_valid_df.Validity.tolist(), taska_valid_df.bert_pred_validity.tolist(), average='macro'))
#print('BERT Novelty: ', f1_score(taska_valid_df.Novelty.tolist(), taska_valid_df.bert_pred_novelty.tolist(), average='macro'))

print('NLI Validity: ', f1_score(taska_valid_df.Validity.tolist(), taska_valid_df.nli_pred_validity.tolist(), average='macro'))
print('NLI Novelty: ', f1_score(taska_valid_df.Novelty.tolist(), taska_valid_df.nli_pred_novelty.tolist(), average='macro'))

NLI Validity:  0.03526121266606733
NLI Novelty:  0.19248826291079815
