In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Read training, dev and unlabeled test data

The following provides a starting code (Python 3) of how to read the labeled training and dev sentence pairs, and unlabeled test sentence pairs, into lists.

In [2]:
%pip install transformers
%pip install torch
%pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.21.1-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 5.1 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 49.4 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 12.6 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.8.1 tokenizers-0.12.1 transformers-4.21.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentencepiece
  Downloadi

In [3]:
import csv
import torch

from torch.utils.data import Dataset, random_split

from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer

In [4]:
train, dev, test = [], [], []

In [5]:
with open('/content/drive/MyDrive/ColabNotebooks/pnli_train.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        train.append(x)
print (len(train))
print (train[:3])

5983
[['Sometimes do exercise.', 'A person typically desire healthy life.', '1'], ['Who eats junk foods.', 'A person typically desire healthy life.', '0'], ['A person is sick.', 'A person typically desire healthy life.', '1']]


In [6]:
with open('/content/drive/MyDrive/ColabNotebooks/pnli_dev.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        dev.append(x)
print (len(dev))
print (dev[:3])

1055
[['A person is looking for accuracy.', 'A person typically desires accurate results.', '1'], ['A person does not care for accuracy.', 'A person typically desires accurate results.', '0'], ['The person double checks their data.', 'A person typically desires accurate results.', '1']]


In [7]:
with open('/content/drive/MyDrive/ColabNotebooks/pnli_test_unlabeled.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[0] and x[1] will be the sentence pairs.
        test.append(x)
print (len(test))
print (test[:3])

4850
[['The people want to have a romantic and pleasant feel.', 'People typically does desire to smell violets.'], ['The contract is to buy products from you.', 'Getting contract typically cause to make money or spend money.'], ['Train station is closed.', 'Line can typically be used to move train along tracks.']]


### Main Code Body

You may choose to experiment with different methods using your program. However, you need to embed the training and inference processes at here. We will use your prediction on the unlabeled test data to grade, while checking this part to understand how your method has produced the predictions.

In [8]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

model_name = 'roberta-large-mnli'
model_args = []

tokenizer = AutoTokenizer.from_pretrained(model_name)

Downloading config.json:   0%|          | 0.00/688 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

In [9]:
def get_data(X, with_label=True):
    premise = [d[0] for d in X]
    hypothesis = [d[1] for d in X]

    inputs = tokenizer(premise, hypothesis, return_tensors='pt', padding=True)

    if with_label:
        inputs['label_ids'] = torch.tensor([0 if d[2] == '0' else 2 for d in X])

    return inputs

In [10]:
class PreconditionInferenceDataset(Dataset):
    def __init__(self, data):
        len_data = len(data['input_ids'])
        self.data = [{key: torch.tensor(val[idx]) for key, val in data.items()} 
                     for idx in range(len_data)]
      
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]

train_data, dev_data = PreconditionInferenceDataset(get_data(train)), PreconditionInferenceDataset(get_data(dev))
all_labelled_data = PreconditionInferenceDataset(get_data(train + dev))

print(len(train_data), len(dev_data))

  after removing the cwd from sys.path.


5983 1055


In [11]:
def get_pred_labels(logits):
    logits = torch.from_numpy(logits)
    logits = torch.stack([logits[:, 0] + logits[:, 1], logits[:, 2]], dim=1)
    return torch.argmax(logits, dim=1)

def compute_metrics(eval_pred):
    logits, true_labels = eval_pred

    pred_labels = get_pred_labels(logits)
    true_labels = torch.from_numpy(true_labels) // 2

    metrics = {'accuracy': torch.sum(true_labels == pred_labels) / len(true_labels)}

    return metrics

def model_init():
    return AutoModelForSequenceClassification.from_pretrained(
        model_name, return_dict=True)

training_args = TrainingArguments(
    output_dir="test_trainer",
    evaluation_strategy="epoch",
    num_train_epochs=2,
    logging_steps=500,
    learning_rate=1e-5,
)

trainer = Trainer(
    args=training_args,
    train_dataset=train_data,
    eval_dataset=dev_data,
    compute_metrics=compute_metrics,
    model_init=model_init,
)

trainer.train()

loading configuration file https://huggingface.co/roberta-large-mnli/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fab42bdbd5cb5e6ff7cabeb9bcc12728f56022f50b9644a3079904564f2bc704.ddc5961cccf081d6ca7f4f58ee119c21895aa9b19f0044f01954cd2ff42fefcb
Model config RobertaConfig {
  "_name_or_path": "roberta-large-mnli",
  "_num_labels": 3,
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "CONTRADICTION",
    "1": "NEUTRAL",
    "2": "ENTAILMENT"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "CONTRADICTION": 0,
    "ENTAILMENT": 2,
    "NEUTRAL": 1
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_

Downloading pytorch_model.bin:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

storing https://huggingface.co/roberta-large-mnli/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/63cbd98723b89863bcd86a8002e823de3004a139513559246690c65521cdc9b9.38ef55c51c84ab2e78e5a0e2ea9c25830fd074df70d2f10071eb9a1bc1586ca0
creating metadata file for /root/.cache/huggingface/transformers/63cbd98723b89863bcd86a8002e823de3004a139513559246690c65521cdc9b9.38ef55c51c84ab2e78e5a0e2ea9c25830fd074df70d2f10071eb9a1bc1586ca0
loading weights file https://huggingface.co/roberta-large-mnli/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/63cbd98723b89863bcd86a8002e823de3004a139513559246690c65521cdc9b9.38ef55c51c84ab2e78e5a0e2ea9c25830fd074df70d2f10071eb9a1bc1586ca0
Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from t

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4101,0.401899,0.904265
2,0.3278,0.400246,0.909005


Saving model checkpoint to test_trainer/checkpoint-500
Configuration saved in test_trainer/checkpoint-500/config.json
Model weights saved in test_trainer/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1055
  Batch size = 8
  # Remove the CWD from sys.path while we load stuff.
Saving model checkpoint to test_trainer/checkpoint-1000
Configuration saved in test_trainer/checkpoint-1000/config.json
Model weights saved in test_trainer/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1055
  Batch size = 8
  # Remove the CWD from sys.path while we load stuff.


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=1496, training_loss=0.33433524555063504, metrics={'train_runtime': 541.0073, 'train_samples_per_second': 22.118, 'train_steps_per_second': 2.765, 'total_flos': 827652496368552.0, 'train_loss': 0.33433524555063504, 'epoch': 2.0})

The following cell trains the model with dev data included.

In [None]:
trainer = Trainer(
    args=training_args,
    train_dataset=all_labelled_data,
    eval_dataset=dev_data,
    compute_metrics=compute_metrics,
    model_init=model_init,
)

trainer.train()

In [12]:
# Eventually, results need to be a list of 2028 0 or 1's
test_data = PreconditionInferenceDataset(get_data(test, with_label=False))
output = trainer.predict(test_data)
results = list(get_pred_labels(output.predictions))

  after removing the cwd from sys.path.
***** Running Prediction *****
  Num examples = 4850
  Batch size = 8


### Output Prediction Result File

You will need to submit a prediction result file. It should have 2028 lines, every line should be either 0 or 1, which is your model's prediction on the respective test set instance.

In [13]:
# suppose you had your model's predictions on the 2028 test cases read from test_enc_unlabeled.tsv, and 
#those results are in the list called 'results'
assert (len(results) == 4850)

In [14]:
# make sure the results are not float numbers, but intergers 0 and 1
results = [int(x) for x in results]

In [15]:
# write your prediction results to 'upload_predictions.txt' and upload that later
with open('upload_predictions.txt', 'w', encoding = 'utf-8') as fp:
    for x in results:
        fp.write(str(x) + '\n')