### Results

* Check and compare results from the models

In [1]:
import sys
sys.path.append('../')
from pathlib import Path
from datetime import datetime

import pandas as pd

import torch
import torch.nn as nn
from torch import cuda
from torch.utils.data import Dataset, DataLoader

from transformers import DistilBertTokenizer, DistilBertModel

from config import settings

device = 'cuda:1' if cuda.is_available() else 'cpu'

MAX_LEN = 150
BATCH_SIZE = 64
EPOCHS = 1
LEARNING_RATE = 1e-05
DISTIL_BERT_CHECKPOINT = 'distilbert-base-uncased'
TEST_PATH = '../data/processed/test.csv'

tokenizer = DistilBertTokenizer.from_pretrained(DISTIL_BERT_CHECKPOINT)

In [2]:
class QuoraDataset(Dataset):

    def __init__(self, file_path, tokenizer, max_len):
        self._dataset = pd.read_csv(file_path, low_memory=False)
        self._tokenizer = tokenizer 
        self._max_len = max_len

    def __getitem__(self, index):
        text = self._dataset.iloc[index]["question_text"]
        inputs = self._tokenizer(
            [text],
            truncation=True, 
            return_tensors="pt",
            max_length=self._max_len,
            padding='max_length'
        )

        return {
            "ids": inputs["input_ids"],
            "mask": inputs["attention_mask"],
            "target": torch.tensor(self._dataset.iloc[index]["target"], dtype=torch.long)
        }

    def __len__(self):
        return len(self._dataset)


test_dataset = QuoraDataset(TEST_PATH, tokenizer, MAX_LEN)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [12]:
class DistilBertModelClass(nn.Module):

    def __init__(self):
        super(DistilBertModelClass, self).__init__()
        self.distil_bert = DistilBertModel.from_pretrained("distilbert-base-uncased")
        self.linear1 = nn.Linear(768, 2)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, ids, mask):
        bert_out = self.distil_bert(ids, mask)
        x = bert_out.last_hidden_state[:, -1, :] # get bert last hidden state
        x = self.linear1(x)
        x = self.sigmoid(x)
        return x

path_ros = "../models/ros/ftbert_6500_2022-02-15 01:45:25.338435"
path_nlpaug = "../models/nlpaug/ftbert_6500_2022-02-16 00:52:05.772874"

model_ros = DistilBertModelClass()
model_ros.load_state_dict(torch.load(path_ros), strict=False)
model_ros.to(device);

model_aug = DistilBertModelClass()
model_aug.load_state_dict(torch.load(path_nlpaug), strict=False)
model_aug.to(device);

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- T

In [7]:
from collections import defaultdict

def results(model, loader):
    model.eval()

    with torch.no_grad():
        y_pred = []
        y_true = []

        for idx, inputs in enumerate(loader):
            if idx%1_00==0:
                print(f"{idx}/{len(loader)}")
            
            ids = inputs['ids'].squeeze(1).to(device)
            mask = inputs['mask'].squeeze(1).to(device)
            targets = inputs['target'].to(device)

            output = model(ids, mask).squeeze()

            _, predictions = torch.max(output, 1)
            
            y_pred += list(predictions.to('cpu'))
            y_true += list(targets.to('cpu'))

        return y_pred, y_true


ros_results = results(model_ros, test_loader)
nlpaug_results = results(model_aug, test_loader)

collections.OrderedDict

In [8]:
model = DistilBertModelClass()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [10]:
model.load_state_dict(model_ros, strict=False)

<All keys matched successfully>