In [3]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=123)

import os

### To obtain the predictions:
1. Replace the location of the test file and the output_path
2. Run the two commands to train our approach for validity and novelty (During training, the predictions for the test file would be generated after every epoch)
3. Extract the predictions as liklihood of novelty and validity

In [15]:
output_path = "../../data-ceph/arguana/argmining22-sharedtask/models/multitask" # Replace this with your equivelant path in your docker image: /mnt/ceph/storage/data-in-progress/data-research/arguana/argmining22-sharedtask/models/multitask

### Prepare the data:

In [12]:
taska_training_df = pd.read_csv('../data/TaskA_train.csv')
taska_valid_df    = pd.read_csv('../data/TaskA_dev.csv')
taska_test_df     = pd.read_csv('../data/TaskA_dev.csv') #Replace this with the path to the test file

taska_training_df.insert(loc=0,column='row_num',value=np.arange(len(taska_training_df)))
taska_valid_df.insert(loc=0,column='row_num',value=np.arange(len(taska_valid_df)))
taska_test_df.insert(loc=0,column='row_num',value=np.arange(len(taska_test_df)))

#The </s></s>  is the separator used in the pre-trained nli model..
taska_training_df['input_txt'] = taska_training_df.apply(lambda x: '{}:{}  </s></s> {} '.format(x['topic'], x['Premise'], x['Conclusion']), axis=1)
taska_valid_df['input_txt']    = taska_valid_df.apply(lambda x: '{}:{} </s></s> {}'.format(x['topic'], x['Premise'], x['Conclusion']), axis=1)
taska_test_df['input_txt']     = taska_test_df.apply(lambda x: '{}:{} </s></s> {}'.format(x['topic'], x['Premise'], x['Conclusion']), axis=1)

taska_validity_train_df = taska_training_df[taska_training_df.Validity != 0].copy()
taska_validity_valid_df = taska_valid_df[taska_valid_df.Validity != 0].copy()
taska_validity_test_df  = taska_test_df.copy()

taska_validity_train_df['label'] = taska_validity_train_df.Validity.apply(lambda x : "valid" if x == 1 else "invalid")
taska_validity_valid_df['label'] = taska_validity_valid_df.Validity.apply(lambda x : "valid" if x == 1 else "invalid")
taska_validity_test_df['label'] = ["valid"] * len(taska_validity_test_df) #since we don't know the labels of the testset, just set it to all valid..


taska_novelty_train_df = taska_training_df[taska_training_df.Novelty != 0].copy()
taska_novelty_valid_df = taska_valid_df[taska_valid_df.Novelty != 0].copy()
taska_novelty_test_df  = taska_test_df.copy()

#Balancing the data for novelty task..
taska_novelty_train_balanced_df, y = ros.fit_resample(taska_novelty_train_df, taska_novelty_train_df['Novelty'])
taska_novelty_train_balanced_df['Novelty'] = y

taska_novelty_train_df['label'] = taska_novelty_train_df.Novelty.apply(lambda x : "novel" if x == -1 else "conservative")
taska_novelty_train_balanced_df['label'] = taska_novelty_train_balanced_df.Novelty.apply(lambda x : "novel" if x == -1 else "conservative")
taska_novelty_valid_df['label'] = taska_novelty_valid_df.Novelty.apply(lambda x : "novel" if x == -1 else "conservative")
taska_novelty_test_df['label'] = ["novel"] * len(taska_novelty_test_df)

In [34]:
taska_validity_train_df[['row_num',  'label', 'input_txt']].to_csv('../data/multitask_data/validity_training_df.tsv', sep='\t', header=False, index=False)
taska_validity_valid_df[['row_num', 'label', 'input_txt' ]].to_csv('../data/multitask_data/validity_valid_df.tsv', sep='\t', header=False, index=False)
taska_validity_test_df[['row_num', 'label', 'input_txt' ]].to_csv('../data/multitask_data/validity_test_df.tsv', sep='\t', header=False, index=False)

taska_novelty_train_df[['row_num', 'label', 'input_txt']].to_csv('../data/multitask_data/novelty_training_df.tsv', sep='\t', header=False, index=False)
taska_novelty_train_balanced_df[['row_num', 'label', 'input_txt']].to_csv('../data/multitask_data/novelty_training_balanced_df.tsv', sep='\t', header=False, index=False)
taska_novelty_valid_df[['row_num', 'label', 'input_txt']].to_csv('../data/multitask_data/novelty_valid_df.tsv', sep='\t', header=False, index=False)
taska_novelty_test_df[['row_num', 'label', 'input_txt']].to_csv('../data/multitask_data/novelty_test_df.tsv', sep='\t', header=False, index=False)

### Train our Approach:
- We train one multitask model for novelty and one for validity with different task weights and learning rates.

In [56]:
! sh ./src-py/run_nli_based_multitask_experiment.sh ../data/multitask_data/nli_based_multitask_for_novelty.yml \
                                          ../data/multitask_data/ \
                                          ../../data-ceph/arguana/argmining22-sharedtask/models/multitask/nli_model/final-multitask-for-novelty/ \
                                          /var/argmining-sharedtask/nli-final-multitask-for-novelty\
                                          roberta-large-mnli_prepared_data 2e-5 20

Preparing data...
task object created from task file...
loading file https://huggingface.co/roberta-large-mnli/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current//sile2804/.cache/huggingface/transformers/64a1d72b2bd05b0aff1a4dd9e7a90a6eea0312b4f914e80b0a923aa8f72219bd.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/roberta-large-mnli/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current//sile2804/.cache/huggingface/transformers/425529714b758f50b6d3f93f8093d859856fd41cf1cec7c8edf2ab44aee632b6.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/roberta-large-mnli/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/roberta-large-mnli/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/roberta-large-mnli/resolve/main/tokenizer_config.json from cache at None
loading configurati

In [None]:
! sh ./src-py/run_nli_based_multitask_experiment.sh ../data/multitask_data/nli_based_multitask_for_validity.yml \
                                          ../data/multitask_data/ \
                                          ../../data-ceph/arguana/argmining22-sharedtask/models/multitask/nli_model/final-multitask-for-validity/ \
                                          /var/argmining-sharedtask/nli-final-multitask-for-validity\
                                          roberta-large-mnli_prepared_data 5e-6 20

Preparing data...
task object created from task file...
loading file https://huggingface.co/roberta-large-mnli/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current//sile2804/.cache/huggingface/transformers/64a1d72b2bd05b0aff1a4dd9e7a90a6eea0312b4f914e80b0a923aa8f72219bd.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/roberta-large-mnli/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current//sile2804/.cache/huggingface/transformers/425529714b758f50b6d3f93f8093d859856fd41cf1cec7c8edf2ab44aee632b6.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/roberta-large-mnli/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/roberta-large-mnli/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/roberta-large-mnli/resolve/main/tokenizer_config.json from cache at None
loading configurati

### Extract Predictions:

Look at the best epoch from the f1-score of the corrsponding task and choose the corrsponding generated predictions for the test.
- For novelty: The best f1-score on validation is 0.669 after the third epoch
- For validity: The best f1-score on validation is 0.73 after the third epoch

In [7]:
best_novelty_pred = '../../data-ceph/arguana/argmining22-sharedtask/models/multitask/nli_model/final-multitask-for-novelty/NoveltyTask_test_predictions_3.tsv'
best_validity_pred = '../../data-ceph/arguana/argmining22-sharedtask/models/multitask/nli_model/final-multitask-for-validity/ValidityTask_test_predictions_3.tsv'

In [8]:
novelty_predictions_df = pd.read_csv(best_novelty_pred, delimiter='\t')
validity_predictions_df = pd.read_csv(best_validity_pred, delimiter='\t')

In [9]:
validity_predictions_df.sample().head()

Unnamed: 0,uid,prediction,label,score
60,122,valid,valid,0.804321


In [10]:
novelty_predictions_df.sample().head()

Unnamed: 0,uid,prediction,label,score
171,65,novel,novel,0.635969


In [13]:
taska_test_df['is_validity'] = validity_predictions_df['score'].tolist()
taska_test_df['is_novelty']  = novelty_predictions_df['score'].tolist()

In [14]:
taska_test_df.head()

Unnamed: 0,row_num,topic,Premise,Conclusion,Validity,Validity-Confidence,Novelty,Novelty-Confidence,input_txt,is_validity,is_novelty
0,0,Vegetarianism,This is frequent argument of animal rights act...,Humans have rights because they can reason; an...,-1,confident,-1,very confident,Vegetarianism:This is frequent argument of ani...,0.747315,0.184941
1,1,Vegetarianism,Almost all dangerous types of food-poisoning (...,Vegetarians often pass diseases through animals,-1,very confident,1,majority,Vegetarianism:Almost all dangerous types of fo...,0.798334,0.502156
2,2,Vegetarianism,Almost all dangerous types of food-poisoning (...,Vegetarianism is a risk to human health,-1,very confident,1,majority,Vegetarianism:Almost all dangerous types of fo...,0.780985,0.819381
3,3,Vegetarianism,Almost all dangerous types of food-poisoning (...,Vegetarians are vulnerable to disease and pests,-1,very confident,1,majority,Vegetarianism:Almost all dangerous types of fo...,0.369141,0.657229
4,4,Vegetarianism,The notion of man's dominion over animals need...,"Man's ""dominion"" over animals does not imply a...",1,very confident,-1,very confident,Vegetarianism:The notion of man's dominion ove...,0.313624,0.2061


Now the `taska_test_df` dataframe contains the predictions for validity and novelty... Submit it please :)