In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from libs import *

In [4]:
model_name = 'google-bert/bert-base-cased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=19)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [11]:
def compute_top_k_accuracy(preds, labels, k=1):
    top_k_preds = np.argsort(preds, axis=1)[:, -k:]
    top_k_accuracy = np.any(top_k_preds == np.expand_dims(labels, axis=1), axis=1).mean()
    return top_k_accuracy

def preprocess_function(examples, tokenizer):
    return tokenizer(examples["Question"], truncation=True, padding = 'max_length', max_length=512)   

In [12]:
seed = 42
df_test_asdiv = pd.read_csv(f'data_first_ver/{seed}_test_set_asdiv.csv')
df_test_mcas = pd.read_csv(f'data_first_ver/{seed}_test_set_mcas.csv')

dataset_test_asdiv = Dataset.from_pandas(df_test_asdiv)
dataset_test_mcas = Dataset.from_pandas(df_test_mcas)

tokenized_dataset_test_asdiv = dataset_test_asdiv.map(lambda x: preprocess_function(x, tokenizer), batched=True)
tokenized_dataset_test_mcas = dataset_test_mcas.map(lambda x: preprocess_function(x, tokenizer), batched=True)

Map:   0%|          | 0/62 [00:00<?, ? examples/s]

Map:   0%|          | 0/73 [00:00<?, ? examples/s]

In [23]:
trainer = Trainer(
    model=model,
    args=TrainingArguments(
        per_device_eval_batch_size= 1,
        output_dir= './result'
    ),
    tokenizer=tokenizer,
    data_collator=None
)

print(f"Evaluation on test set for seed {seed}...")
test_results_asdiv = trainer.evaluate(eval_dataset=tokenized_dataset_test_asdiv)
test_results_mcas = trainer.evaluate(eval_dataset=tokenized_dataset_test_mcas)
print(f'ASDIV: {test_results_asdiv}')
print(f'MCAS: {test_results_mcas}')

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Evaluation on test set for seed 42...


  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/73 [00:00<?, ?it/s]

ASDIV: {'eval_loss': 3.0698134899139404, 'eval_runtime': 35.5289, 'eval_samples_per_second': 1.745, 'eval_steps_per_second': 1.745}
MCAS: {'eval_loss': 2.979890823364258, 'eval_runtime': 40.0275, 'eval_samples_per_second': 1.824, 'eval_steps_per_second': 1.824}


In [24]:
preds_asdiv = trainer.predict(tokenized_dataset_test_asdiv).predictions
labels_asdiv = np.array(tokenized_dataset_test_asdiv["label"])

preds_mcas = trainer.predict(tokenized_dataset_test_mcas).predictions
labels_mcas = np.array(tokenized_dataset_test_mcas["label"])

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/73 [00:00<?, ?it/s]

In [29]:
labels_asdiv.shape

(62,)

In [31]:
preds_asdiv

array([[ 0.11436419,  0.40059042,  0.33430475, ...,  0.23638973,
        -0.2792644 ,  0.17329365],
       [ 0.14490099,  0.30057898,  0.33392623, ...,  0.25265345,
        -0.27043825,  0.2187818 ],
       [ 0.14490099,  0.30057898,  0.33392623, ...,  0.25265345,
        -0.27043825,  0.2187818 ],
       ...,
       [ 0.08702233,  0.3386045 ,  0.2955528 , ...,  0.22843863,
        -0.27322516,  0.1883105 ],
       [ 0.08702233,  0.3386045 ,  0.2955528 , ...,  0.22843863,
        -0.27322516,  0.1883105 ],
       [ 0.09362254,  0.35367736,  0.2959504 , ...,  0.19492975,
        -0.29393795,  0.19146667]], dtype=float32)

In [84]:
np.any(np.expand_dims(labels_asdiv, axis=1) == np.argsort(preds_asdiv, axis=1)[:, -18:], axis = 1).sum()

60

In [60]:
labels_asdiv.shape

(62,)

In [59]:
np.expand_dims(labels_asdiv, axis=1) # Change to shape (62,1)

array([[ 5],
       [ 3],
       [15],
       [ 2],
       [18],
       [ 4],
       [ 4],
       [ 4],
       [ 6],
       [ 3],
       [14],
       [13],
       [10],
       [16],
       [16],
       [14],
       [12],
       [ 3],
       [ 3],
       [18],
       [ 4],
       [17],
       [ 3],
       [ 1],
       [12],
       [ 4],
       [ 7],
       [ 7],
       [ 4],
       [ 3],
       [13],
       [ 3],
       [ 4],
       [ 9],
       [ 0],
       [ 2],
       [10],
       [14],
       [10],
       [14],
       [ 2],
       [ 5],
       [ 3],
       [ 4],
       [10],
       [ 0],
       [ 7],
       [18],
       [ 3],
       [ 7],
       [13],
       [18],
       [ 3],
       [11],
       [ 8],
       [ 0],
       [ 0],
       [ 3],
       [10],
       [12],
       [ 0],
       [15]])

In [48]:
def get_top_k_predictions(predictions, k=1):
    top_k_preds = np.argsort(predictions, axis=1)[:, -k:]
    return top_k_preds

def compute_top_k_accuracy(preds, labels, k=1):
    top_k_preds = np.argsort(preds, axis=1)[:, -k:]
    top_k_accuracy = np.any(top_k_preds == np.expand_dims(labels, axis=1), axis=1).mean()
    print(np.expand_dims(labels, axis=1))
    return top_k_accuracy


compute_top_k_accuracy(preds_asdiv, labels_asdiv, k=1)

[[ 5]
 [ 3]
 [15]
 [ 2]
 [18]
 [ 4]
 [ 4]
 [ 4]
 [ 6]
 [ 3]
 [14]
 [13]
 [10]
 [16]
 [16]
 [14]
 [12]
 [ 3]
 [ 3]
 [18]
 [ 4]
 [17]
 [ 3]
 [ 1]
 [12]
 [ 4]
 [ 7]
 [ 7]
 [ 4]
 [ 3]
 [13]
 [ 3]
 [ 4]
 [ 9]
 [ 0]
 [ 2]
 [10]
 [14]
 [10]
 [14]
 [ 2]
 [ 5]
 [ 3]
 [ 4]
 [10]
 [ 0]
 [ 7]
 [18]
 [ 3]
 [ 7]
 [13]
 [18]
 [ 3]
 [11]
 [ 8]
 [ 0]
 [ 0]
 [ 3]
 [10]
 [12]
 [ 0]
 [15]]


0.06451612903225806

In [37]:
compute_top_k_accuracy(preds_asdiv, labels_asdiv, k=18)

0.967741935483871

In [None]:
for k in range(1, 4):
        top_k_accuracies_asdiv[k] += compute_top_k_accuracy(preds_asdiv, labels_asdiv, k=k)
        top_k_accuracies_mcas[k] += compute_top_k_accuracy(preds_mcas, labels_mcas, k=k)

results = []
results.append([f"Seed {seed}", test_results_asdiv['eval_accuracy'], test_results_mcas['eval_accuracy']])
test_acc_asdiv += test_results_asdiv['eval_accuracy']
test_acc_mcas += test_results_mcas['eval_accuracy']