In [18]:
from datasets import load_from_disk
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from collections import OrderedDict
import os
import torch
import json
import pickle
import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.append("../../../")
import calibration_ppd as cppd

In [20]:
def read_cbow_results():
    results_path = "../results/cbow/"
    data = {}
    for split in os.listdir(os.path.join(results_path,"cache/00_Load Quora Dataset/")):
        data[split] = load_from_disk(os.path.join(results_path,"cache/00_Load Quora Dataset/",split))
    twitter_data = load_from_disk(os.path.join(results_path,"cache/01_Load Twitter Dataset/"))
    data["qqp_test"] = data.pop("test")
    data["twitter_test"] = twitter_data
    tokenizer = AutoTokenizer.from_pretrained(os.path.join(results_path,"cache/02_Load tokenizer/"))
    state_dict = torch.load(os.path.join(results_path,"run00_cbow/08_Train model/state_dict.pkl"))
    with open(os.path.join(results_path,"run00_cbow/04_Init Model/config.json"),"r") as f:
        config = json.load(f)
        name = config.pop("name")
    model_cls = getattr(cppd,name)
    model = model_cls(**config)
    model.load_state_dict(state_dict)
    with open(os.path.join(results_path,"run00_cbow/09_Evaluation of the model/results.pkl"),"rb") as f:
        predictions = pickle.load(f)

    return data, tokenizer, model, predictions

def read_bert_results():
    results_path = "../results/bert/"
    data = {}
    for split in os.listdir(os.path.join(results_path,"cache/00_Load Quora Dataset/")):
        data[split] = load_from_disk(os.path.join(results_path,"cache/00_Load Quora Dataset/",split))
    twitter_data = load_from_disk(os.path.join(results_path,"cache/01_Load Twitter Dataset/"))
    data["qqp_test"] = data.pop("test")
    data["twitter_test"] = twitter_data
    tokenizer = AutoTokenizer.from_pretrained(os.path.join(results_path,"cache/02_Load tokenizer/"))
    state_dict = torch.load(os.path.join(results_path,"run00_bert/08_Train model/state_dict.pkl"))
    model = AutoModelForSequenceClassification.from_pretrained(os.path.join(results_path,"run00_bert/04_Init Model/"))
    model.load_state_dict(state_dict)
    with open(os.path.join(results_path,"run00_bert/09_Evaluation of the model/results.pkl"),"rb") as f:
        predictions = pickle.load(f)

    return data, tokenizer, model, state_dict, predictions

data, tokenizer, model, state_dict = read_bert_results()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [17]:
model

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          

In [14]:
dict(model.named_parameters()).keys()

dict_keys(['embeddings.word_embeddings.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.LayerNorm.weight', 'embeddings.LayerNorm.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.output.dense.weight', 'encoder.layer.0.output.dense.bias', 'encoder.layer.0.output.LayerNorm.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.1.attention.self.query.weight', 'encoder.layer.1.attention.self.query.bia

In [6]:
trh = 0.5
for split, results in predictions.items():
    labels = results["labels"]
    logits = results["logits"]
    preds = (logits > trh).astype(float)
    acc = np.mean(preds == labels)
    print(f"Accuracy on {split} split: {acc:.04f}")

Accuracy on model_train split: 0.6821
Accuracy on calibration_train split: 0.6792
Accuracy on validation split: 0.6815
Accuracy on qqp_test split: 0.6826
Accuracy on twitter_test split: 0.8038
