### Read training, dev and unlabeled test data

The following provides a starting code (Python 3) of how to read the labeled training and dev sentence pairs, and unlabeled test sentence pairs, into lists.

In [1]:
import csv

In [2]:
train, dev, test = [], [], []

In [3]:
with open('./data/pnli_train.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        train.append(x)
print (len(train))
print (train[:3])

5983
[['Sometimes do exercise.', 'A person typically desire healthy life.', '1'], ['Who eats junk foods.', 'A person typically desire healthy life.', '0'], ['A person is sick.', 'A person typically desire healthy life.', '1']]


In [4]:
with open('./data/pnli_dev.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        dev.append(x)
print (len(dev))
print (dev[:3])

1055
[['A person is looking for accuracy.', 'A person typically desires accurate results.', '1'], ['A person does not care for accuracy.', 'A person typically desires accurate results.', '0'], ['The person double checks their data.', 'A person typically desires accurate results.', '1']]


In [5]:
with open('./data/pnli_test_unlabeled.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[0] and x[1] will be the sentence pairs.
        test.append(x)
print (len(test))
print (test[:3])

4850
[['The people want to have a romantic and pleasant feel.', 'People typically does desire to smell violets.'], ['The contract is to buy products from you.', 'Getting contract typically cause to make money or spend money.'], ['Train station is closed.', 'Line can typically be used to move train along tracks.']]


### Main Code Body

You may choose to experiment with different methods using your program. However, you need to embed the training and inference processes at here. We will use your prediction on the unlabeled test data to grade, while checking this part to understand how your method has produced the predictions.

In [6]:
y_train = [el[2] for el in train]
print(len(y_train))
y_train[:5]

5983


['1', '0', '1', '0', '1']

In [7]:
X_train = [el[0:2] for el in train]
print(len(X_train))
X_train[:3]

5983


[['Sometimes do exercise.', 'A person typically desire healthy life.'],
 ['Who eats junk foods.', 'A person typically desire healthy life.'],
 ['A person is sick.', 'A person typically desire healthy life.']]

In [8]:
X_train = [[word.replace(".", "") for word in el] for el in X_train]
X_train[:3]

[['Sometimes do exercise', 'A person typically desire healthy life'],
 ['Who eats junk foods', 'A person typically desire healthy life'],
 ['A person is sick', 'A person typically desire healthy life']]

In [9]:
X_train = [[char.lower() for char in el] for el in X_train]
print(len(X_train))
X_train[:3]

5983


[['sometimes do exercise', 'a person typically desire healthy life'],
 ['who eats junk foods', 'a person typically desire healthy life'],
 ['a person is sick', 'a person typically desire healthy life']]

In [10]:
import pandas as pd
train_df = pd.read_csv('./data/pnli_train.csv',header=None)
val_df   = pd.read_csv('./data/pnli_dev.csv',header=None)
test_df   = pd.read_csv('./data/pnli_test_unlabeled.csv',header=None)


In [11]:
train_df
train_df.columns

Int64Index([0, 1, 2], dtype='int64')

In [12]:
val_df

Unnamed: 0,0,1,2
0,A person is looking for accuracy.,A person typically desires accurate results.,1
1,A person does not care for accuracy.,A person typically desires accurate results.,0
2,The person double checks their data.,A person typically desires accurate results.,1
3,The person speeds through the experiment.,A person typically desires accurate results.,0
4,A person is studying well.,A person typically desires accurate results.,1
...,...,...,...
1050,The clue is for a different puzzle.,Giving clue are typically used for helping peo...,0
1051,The puzzle is a jigsaw puzzle.,Giving clue are typically used for helping peo...,0
1052,You are playing a puzzle game.,Giving clue are typically used for helping peo...,1
1053,The clue is correct for the puzzle.,Giving clue are typically used for helping peo...,1


In [13]:
import torch
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from transformers import BertTokenizer, AlbertTokenizer


class DataBert(Dataset):

  def __init__(self, train_df, val_df):

    self.train_df = train_df
    self.val_df = val_df

    self.tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2', do_lower_case=True)
    self.train_data = None
    self.val_data = None
    self.init_data()

  def init_data(self):
      self.train_data = self.load_data(self.train_df)
      self.val_data = self.load_data(self.val_df)

  def load_data(self, df):
    MAX_LEN = 512
    token_ids = []
    mask_ids = []
    seg_ids = []
    y = []

    precondition_list = df[0].to_list()
    sentence_list = df[1].to_list()
    label_list = df[2].to_list()

    for (precondition, sentence, label) in zip(precondition_list, sentence_list, label_list):
      precondition_id = self.tokenizer.encode(precondition, add_special_tokens = False)
      sentence_id = self.tokenizer.encode(sentence, add_special_tokens = False)
      pair_token_ids = [self.tokenizer.cls_token_id] + precondition_id + [self.tokenizer.sep_token_id] + sentence_id + [self.tokenizer.sep_token_id]
      precondition_len = len(precondition_id)
      sentence_len = len(sentence_id)

      segment_ids = torch.tensor([0] * (precondition_len + 2) + [1] * (sentence_len + 1)) 
      attention_mask_ids = torch.tensor([1] * (precondition_len + sentence_len + 3))  # mask padded values

      token_ids.append(torch.tensor(pair_token_ids))
      seg_ids.append(segment_ids)
      mask_ids.append(attention_mask_ids)
      y.append(int(label))
    
    token_ids = pad_sequence(token_ids, batch_first=True)
    mask_ids = pad_sequence(mask_ids, batch_first=True)
    seg_ids = pad_sequence(seg_ids, batch_first=True)
    y = torch.tensor(y)
    dataset = TensorDataset(token_ids, mask_ids, seg_ids, y)
    print(len(dataset))
    return dataset

  def get_data_loaders(self, batch_size=32):
    train_loader = DataLoader(
      self.train_data,
      batch_size=batch_size
    )

    val_loader = DataLoader(
      self.val_data,
      batch_size=batch_size
    )

    return train_loader, val_loader

In [14]:
train_dataset = DataBert(train_df, val_df)

Downloading: 100%|██████████| 742k/742k [00:00<00:00, 1.82MB/s]
Downloading: 100%|██████████| 1.25M/1.25M [00:00<00:00, 3.18MB/s]
Downloading: 100%|██████████| 684/684 [00:00<00:00, 342kB/s]


5983
1055


In [15]:
train_loader, val_loader = train_dataset.get_data_loaders(batch_size=16)

In [16]:
len(val_loader)
len(train_loader)

374

In [17]:
from transformers import BertForSequenceClassification, AlbertForSequenceClassification, AdamW

model = AlbertForSequenceClassification.from_pretrained("albert-base-v2", num_labels=2)

Downloading: 100%|██████████| 45.2M/45.2M [00:01<00:00, 29.9MB/s]
Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.LayerNorm.weight', 'predictions.decoder.weight', 'predictions.bias', 'predictions.dense.weight', 'predictions.LayerNorm.bias', 'predictions.decoder.bias', 'predictions.dense.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are n

In [18]:
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.005}
]

In [19]:
# This variable contains all of the hyperparemeter information our training loop needs
optimizer = AdamW(optimizer_grouped_parameters, lr=2e-5, correct_bias=False)



In [20]:
def multi_acc(y_pred, y_test):
  acc = (torch.log_softmax(y_pred, dim=1).argmax(dim=1) == y_test).sum().float() / float(y_test.size(0))
  return acc

In [24]:
epoch_predictions = []
EPOCHS = 5

def train(model, train_loader, val_loader, optimizer):  
  total_step = len(train_loader)

  for epoch in range(EPOCHS):
    # start = time.time()
    model.train()
    total_train_loss = 0
    total_train_acc  = 0
    for batch_idx, (pair_token_ids, mask_ids, seg_ids, y) in enumerate(train_loader):
      optimizer.zero_grad()
    #   pair_token_ids = pair_token_ids.to(device)
    #   mask_ids = mask_ids.to(device)
    #   seg_ids = seg_ids.to(device)
      labels = y
      # prediction = model(pair_token_ids, mask_ids, seg_ids)
      loss, prediction = model(pair_token_ids, 
                             token_type_ids=seg_ids, 
                             attention_mask=mask_ids, 
                             labels=labels).values()

      # loss = criterion(prediction, labels)
      acc = multi_acc(prediction, labels)

      loss.backward()
      optimizer.step()
      
      total_train_loss += loss.item()
      total_train_acc  += acc.item()

    train_acc  = total_train_acc/len(train_loader)
    train_loss = total_train_loss/len(train_loader)
    model.eval()
    print(f'Epoch {epoch+1}: train_loss: {train_loss:.4f} train_acc: {train_acc:.4f}')

    total_val_acc  = 0
    total_val_loss = 0
    predictions = []
    with torch.no_grad():
      for batch_idx, (pair_token_ids, mask_ids, seg_ids, y) in enumerate(val_loader):
        optimizer.zero_grad()
        # pair_token_ids = pair_token_ids.to(device)
        # mask_ids = mask_ids.to(device)
        # seg_ids = seg_ids.to(device)
        labels = y
        # prediction = model(pair_token_ids, mask_ids, seg_ids)
        loss, prediction = model(pair_token_ids, 
                             token_type_ids=seg_ids, 
                             attention_mask=mask_ids, 
                             labels=labels).values()
        predictions.append(prediction)
        # loss = criterion(prediction, labels)
        acc = multi_acc(prediction, labels)

        total_val_loss += loss.item()
        total_val_acc  += acc.item()
    
    epoch_predictions.append(predictions)

    val_acc  = total_val_acc/len(val_loader)
    val_loss = total_val_loss/len(val_loader)
    # end = time.time()
    # hours, rem = divmod(end-start, 3600)
    # minutes, seconds = divmod(rem, 60)

    print(f'Epoch {epoch+1}: val_loss: {val_loss:.4f} val_acc: {val_acc:.4f}')
    # print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

In [25]:
train(model, train_loader, val_loader, optimizer)

Epoch 1: train_loss: 0.5110 train_acc: 0.7714
Epoch 1: val_loss: 0.5126 val_acc: 0.7819
Epoch 2: train_loss: 0.4684 train_acc: 0.7954
Epoch 2: val_loss: 0.5116 val_acc: 0.7848
Epoch 3: train_loss: 0.4224 train_acc: 0.8200
Epoch 3: val_loss: 0.4952 val_acc: 0.7838


In [40]:
predictions_copy = epoch_predictions

In [41]:
len(predictions_copy[1])

66

In [42]:
flat_list = [item for sublist in predictions_copy[0] for item in sublist]

In [43]:
len(flat_list)
max(flat_list[0])

tensor(0.4829)

In [44]:
result = []
for pred in flat_list:
    result.append(max(range(len(pred)), key=pred.__getitem__))
len(result)

1055

In [45]:
y_dev = [el[2] for el in dev]
len(y_dev)

1055

In [47]:
correct_pred = 0
for i in range(len(y_dev)):
    if result[i] == int(y_dev[i]):
        correct_pred += 1
correct_pred /= len(y_dev)
print(correct_pred)

0.5251184834123223


In [30]:
class TestDataBert(Dataset):

  def __init__(self, test_df):

    self.test_df = test_df

    self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
    self.test_data = None
    self.init_data()

  def init_data(self):
      self.test_data = self.load_data(self.test_df)

  def load_data(self, df):
    MAX_LEN = 512
    token_ids = []
    mask_ids = []
    seg_ids = []

    precondition_list = df[0].to_list()
    sentence_list = df[1].to_list()

    for (precondition, sentence) in zip(precondition_list, sentence_list):
      precondition_id = self.tokenizer.encode(precondition, add_special_tokens = False)
      sentence_id = self.tokenizer.encode(sentence, add_special_tokens = False)
      pair_token_ids = [self.tokenizer.cls_token_id] + precondition_id + [self.tokenizer.sep_token_id] + sentence_id + [self.tokenizer.sep_token_id]
      precondition_len = len(precondition_id)
      sentence_len = len(sentence_id)

      segment_ids = torch.tensor([0] * (precondition_len + 2) + [1] * (sentence_len + 1)) 
      attention_mask_ids = torch.tensor([1] * (precondition_len + sentence_len + 3))  # mask padded values

      token_ids.append(torch.tensor(pair_token_ids))
      seg_ids.append(segment_ids)
      mask_ids.append(attention_mask_ids)
    
    token_ids = pad_sequence(token_ids, batch_first=True)
    mask_ids = pad_sequence(mask_ids, batch_first=True)
    seg_ids = pad_sequence(seg_ids, batch_first=True)
    dataset = TensorDataset(token_ids, mask_ids, seg_ids)
    print(len(dataset))
    return dataset

  def get_data_loaders(self, batch_size=32):
    test_loader = DataLoader(
      self.test_data,
      batch_size=batch_size
    )

    return test_loader

In [31]:
test_dataset = TestDataBert(test_df)

4850


In [32]:
test_loader = test_dataset.get_data_loaders(batch_size=32)
len(test_loader)

152

In [33]:
final_predictions = [] 
def predict_test(model, test_loader): 
  for batch_idx, (pair_token_ids, mask_ids, seg_ids) in enumerate(test_loader):
    prediction = model(pair_token_ids, 
                          token_type_ids=seg_ids, 
                          attention_mask=mask_ids).values()
    final_predictions.append(prediction)


In [34]:
predict_test(model,test_loader)

In [None]:
len(final_predictions)

0

In [13]:
# Eventually, results need to be a list of 2028 0 or 1's
results = []

### Output Prediction Result File

You will need to submit a prediction result file. It should have 2028 lines, every line should be either 0 or 1, which is your model's prediction on the respective test set instance.

In [None]:
# suppose you had your model's predictions on the 2028 test cases read from test_enc_unlabeled.tsv, and 
#those results are in the list called 'results'
assert (len(results) == 4850)

In [None]:
# make sure the results are not float numbers, but intergers 0 and 1
results = [int(x) for x in results]

In [None]:
# write your prediction results to 'upload_predictions.txt' and upload that later
with open('upload_predictions.txt', 'w', encoding = 'utf-8') as fp:
    for x in results:
        fp.write(str(x) + '\n')