## Imports

In [1]:
# pip install simpletransformers

In [2]:
import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt

import csv
# import os
# import pickle
# import argparse
# import fnmatch
# import json

# import string
import time
# from collections import Counter

# import torch
# from torch.utils.data import Dataset, DataLoader
# import torch.nn as nn

# from tqdm import tqdm

In [3]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs

In [4]:
import logging

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

## Read training, dev and unlabeled test data

The following provides a starting code (Python 3) of how to read the labeled training and dev sentence pairs, and unlabeled test sentence pairs, into lists.

In [5]:
# TRAIN_PATH = 'data/pnli_train.csv'
# DEV_PATH = 'data/pnli_dev.csv'
# TEST_PATH = 'data/pnli_test_unlabeled.csv'

TRAIN_PATH = 'pnli_train.csv'
DEV_PATH = 'pnli_dev.csv'
TEST_PATH = 'pnli_test_unlabeled.csv'

In [6]:
df_train = pd.read_csv(TRAIN_PATH, names=['text_a', 'text_b', 'labels'])
df_dev = pd.read_csv(DEV_PATH, names=['text_a', 'text_b', 'labels'])
# df_test = pd.read_csv(TEST_PATH, names=['text_a', 'text_b'])

test_list = []
with open(TEST_PATH, 'r', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        test_list.append(x)        

In [18]:
df_train.shape

(5983, 3)

In [17]:
df_dev.shape

(1055, 3)

## Main Code Body

You may choose to experiment with different methods using your program. However, you need to embed the training and inference processes at here. We will use your prediction on the unlabeled test data to grade, while checking this part to understand how your method has produced the predictions.

### Model

In [7]:
start = time.time()

In [8]:
# Optional model configuration
model_args = ClassificationArgs()
model_args.num_train_epochs=4

# Create a ClassificationModel
model = ClassificationModel("roberta", "roberta-base", args=model_args, num_labels=2, use_cuda=False)

# Train the model
model.train_model(df_train)

# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(df_dev)

# Make predictions with the model
predictions, raw_outputs = model.predict(test_list)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

  0%|          | 0/5983 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_roberta_128_2_3


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/748 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1055 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3


Running Evaluation:   0%|          | 0/132 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.6698654222665578, 'tp': 486, 'tn': 395, 'fp': 106, 'fn': 68, 'auroc': 0.9103561829409771, 'auprc': 0.9094066592065452, 'eval_loss': 0.40892763766036794}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/4850 [00:00<?, ?it/s]

  0%|          | 0/607 [00:00<?, ?it/s]

In [9]:
print(predictions[:5])

[1 1 0 0 0]


In [10]:
end = time.time()

In [11]:
print(end-start)

8255.274708986282


In [28]:
from sklearn.metrics import accuracy_score

dev_list = []
with open(DEV_PATH, 'r', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        dev_list.append([x[0], x[1]])

y_pred, dev_outputs = model.predict(dev_list)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1055 [00:00<?, ?it/s]

  0%|          | 0/132 [00:00<?, ?it/s]

In [34]:
y_true = df_dev['labels']
accuracy_score(y_true, y_pred)
# # accuracy_score(y_true, y_pred, normalize=False)

0.8350710900473933

In [35]:
# Eventually, results need to be a list of 2028 0 or 1's
results = predictions

## Output Prediction Result File

You will need to submit a prediction result file. It should have 2028 lines, every line should be either 0 or 1, which is your model's prediction on the respective test set instance.

In [36]:
# suppose you had your model's predictions on the 2028 test cases read from test_enc_unlabeled.tsv, and 
#those results are in the list called 'results'
assert (len(results) == 4850)

In [38]:
# make sure the results are not float numbers, but intergers 0 and 1
results = [int(x) for x in results]

In [40]:
# write your prediction results to 'upload_predictions.txt' and upload that later
with open('upload_predictions.txt', 'w', encoding = 'utf-8') as fp:
    for x in results:
        fp.write(str(x) + '\n')