In [1]:
from IPython.display import clear_output
!pip install datasets
clear_output()

In [2]:
import numpy as np
import copy 
import os
import pandas as pd
from datasets import load_dataset, concatenate_datasets

clear_output()

In [3]:
#####This cell is Borrowed from https://github.com/technion-cs-nlp/bias-probing

import re
from typing import Tuple, List, Union

from nltk import word_tokenize

_IGNORED_TOKENS = [".", "?", "!", "-"]

import nltk
nltk.download('punkt')

def sentence_to_words(sent: Union[str, List[str]], ignored: List[str] = None, lowercase=True):
    if ignored is None:
        ignored = _IGNORED_TOKENS
    if isinstance(sent, str):
        sent = word_tokenize(sent)

    assert isinstance(sent, list)
    regex = re.compile('[' + "".join(ignored).replace('.', r'\.').replace('?', r'\?').replace('-', r'\-') + ']')
    if lowercase:
        return [regex.sub('', word.lower()) for word in sent if word not in ignored]
    else:
        return [regex.sub('', word) for word in sent if word not in ignored]

def _prem_hypothesis_to_words(premise: str, hypothesis: str, lowercase=True):
    prem_words = sentence_to_words(premise, lowercase=lowercase)
    hyp_words = sentence_to_words(hypothesis, lowercase=lowercase)
    return prem_words, hyp_words

def _prem_hypothesis_to_words(premise: str, hypothesis: str, lowercase=True):
    prem_words = sentence_to_words(premise, lowercase=lowercase)
    hyp_words = sentence_to_words(hypothesis, lowercase=lowercase)
    return prem_words, hyp_words

def percent_lexical_overlap(premise: str, hypothesis: str, get_hans_new_features=False, lowercase=True):
    r"""Check if a given premise and hypothesis lexically overlap.
    :param premise: The premise
    :param hypothesis: The hypothesis
    :param get_hans_new_features: If True, the returned overlap percentage is calculated w.r.t. the hypothesis.
    Otherwise, it is calculated w.r.t. the premise.
    :return:
        overlap_percent: The percentage of overlapping words (types) in the hypothesis the are also in
        the premise.
    """
    prem_words, hyp_words = _prem_hypothesis_to_words(premise, hypothesis, lowercase=lowercase)
    num_overlapping = len(list(set(hyp_words) & set(prem_words)))
    overlap_percent = num_overlapping / len(set(hyp_words)) #if len(set(prem_words)) > 0 else 0

    return overlap_percent

In [8]:
snli_dataset = load_dataset('snli')
dataset = concatenate_datasets([snli_dataset["train"], snli_dataset["validation"], snli_dataset["test"]])

Downloading builder script:   0%|          | 0.00/3.82k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.90k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/14.1k [00:00<?, ?B/s]

Downloading and preparing dataset snli/plain_text to /root/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b...


Downloading:   0%|          | 0.00/1.93k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.26M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/65.9M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.26M [00:00<?, ?B/s]

Dataset snli downloaded and prepared to /root/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [9]:
overlap = []
size = len(dataset)
premise = dataset['premise']
hypothesis = dataset['hypothesis']
for i in range(size):
  overlap.append(percent_lexical_overlap(premise[i], hypothesis[i]))

In [10]:
label = dataset['label']

In [11]:
##SNLI Distribution based on the overlap percentage
##Discard samples with -1 label

counter_ent =  [0,0,0,0,0,0,0]
counter_Nent = [0,0,0,0,0,0,0]
threshold = [1, 0.80, 0.60, 0.40, 0.20, 0.00]

for i in range(1,6):
  tmp_ent = 0
  tmp_Nent = 0

  for j in range(size):

    if threshold[i] == 0.0:

      if overlap[j] > threshold[i] and overlap[j] < np.around([threshold[i] + 0.2], 2):
        
        if label[j] !=-1:
          if label[j] == 0:
            tmp_ent = tmp_ent + 1
          elif label[j] != 0:
            tmp_Nent = tmp_Nent + 1
    else:

      if overlap[j] >= threshold[i] and overlap[j] < np.around([threshold[i] + 0.2], 2):

        if label[j] !=-1:
         
          if label[j] == 0:
            tmp_ent = tmp_ent + 1
          elif label[j] != 0:
            tmp_Nent = tmp_Nent + 1

  counter_ent[i] =    tmp_ent
  counter_Nent[i] =   tmp_Nent

##Non-overlap
tmp_ent = 0
tmp_Nent = 0
for j in range(size):

  if overlap[j] == 0:
    
    if label[j] !=-1:
      if label[j] == 0:
        tmp_ent = tmp_ent + 1
      elif label[j] == 2 or label[j] == 1 :
        tmp_Nent = tmp_Nent + 1

counter_ent[-1] =    tmp_ent
counter_Nent[-1] =   tmp_Nent

##Full-overlap
tmp_ent = 0
tmp_Nent = 0
for j in range(size):

  if overlap[j] == 1:
    
    if label[j] !=-1:
      if label[j] == 0:
        tmp_ent = tmp_ent + 1
      elif label[j] == 2 or label[j] == 1 :
        tmp_Nent = tmp_Nent + 1

counter_ent[0] =    tmp_ent
counter_Nent[0] =   tmp_Nent

#Evaluation

In [12]:
!pip install transformers==4.3.3
clear_output()

In [13]:
from transformers import set_seed,BertTokenizer, TFBertModel, RobertaTokenizer, TFRobertaModel
from transformers.optimization_tf import create_optimizer
import tensorflow as tf


clear_output()

In [14]:
# @title Hyperparameters
BATCH_SIZE =  32# @param {type:"integer"}
EPOCHS =  20#@param {type:"integer"}
MAX_LENGTH =   128#@param {type:"integer"}

TASK = "mnli" 
LEARNING_RATE =  5e-5 #@param {type:"number"}
WARMUP_RATIO =   0.1 #@param {type:"number"}
num_labels = 3
#dataset = load_dataset('glue', TASK)
set_seed(42)


In [15]:
class main_model(tf.keras.Model):
  def __init__(self, bert_model, num_labels, *inputs, **kwargs):
    super(main_model, self).__init__(name="main_model")
    self.bert = bert_model
    self.bert.trainable = True
    self.dropout = tf.keras.layers.Dropout(0.1)
    self.classifier = tf.keras.layers.Dense(
                      num_labels,
                      kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02, seed = 42),
                      name="classifier")
    
  
  def call(self, inputs, **kwargs):

    outputs = self.bert(inputs, **kwargs)
    pooled_out = outputs[1]

    droped_out = self.dropout(pooled_out, training=kwargs.get("training", False))
    output = self.classifier(droped_out)
    return output

In [16]:
casing = "bert-base-uncased" 
tokenizer = BertTokenizer.from_pretrained(casing)
bert_model = TFBertModel.from_pretrained(casing)
bert_model.trainable = True
model = main_model(bert_model, num_labels)
#bert_model = TFBertModel.from_pretrained(SAVED_MODELS_DIR, from_pt=True, config = config)
#model = TFBertForSequenceClassification.from_pretrained('/content/',from_pt=True,)
clear_output()

In [22]:
def encode(examples):
     return tokenizer(examples['premise'], examples['hypothesis'], truncation=True, padding='max_length', max_length = MAX_LENGTH,
                      return_token_type_ids=True)

In [17]:
## Loading the model
model.load_weights('/content/drive/Shareddrives/Unlimited/Dataset/BERT/AnEmperical/seed121/20/best_weights.ckpt')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fc655234bb0>

In [23]:
snli_data = dataset.map(encode, batched=True)

  0%|          | 0/571 [00:00<?, ?ba/s]

In [24]:
snli_data = snli_data.map(lambda examples: {'labels': examples['label']}, batched=True)
snli_data.set_format(type='tensorflow', columns=['input_ids', 'token_type_ids', 'attention_mask', 'labels'])
features_snli = {x: snli_data[x] for x in ['input_ids', 'token_type_ids', 'attention_mask']}

  0%|          | 0/571 [00:00<?, ?ba/s]

In [25]:
##getting predictions on SNLI
pre = model.predict(features_snli)
pre = tf.nn.softmax(pre)
pre = np.argmax(pre, axis=1)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method






In [27]:
acc_ent =  [0,0,0,0,0,0,0]
acc_Nent = [0,0,0,0,0,0,0]
threshold = [1, 0.80, 0.60, 0.40, 0.20, 0.00]

x = 0
y = 0 
for i in range(6):
  tmp_ent = 0
  tmp_Nent = 0

  for j in range(size):

    if threshold[i] == 0.0:

      if overlap[j] > threshold[i] and overlap[j] < np.around([threshold[i] + 0.2], 2):
        
        if label[j] !=-1:
          # x = x + 1
          if label[j] == 0:
            tmp_ent = tmp_ent + 1
            if pre[j] == 0:
              acc_ent[i] = acc_ent[i] + 1
          elif label[j] != 0:
            tmp_Nent = tmp_Nent + 1
            if pre[j] !=0 :
              acc_Nent[i] = acc_Nent[i] + 1
    else:

      if overlap[j] >= threshold[i] and overlap[j] < np.around([threshold[i] + 0.2], 2):

        if label[j] !=-1:
         
          if label[j] == 0:
            tmp_ent = tmp_ent + 1
            
            if pre[j] == 0:
              acc_ent[i] = acc_ent[i] + 1
          elif label[j] != 0:
            tmp_Nent = tmp_Nent + 1
            
            if pre[j] !=0 :
              acc_Nent[i] = acc_Nent[i] + 1

  acc_ent[i] =   acc_ent[i] /  tmp_ent
  acc_Nent[i] = acc_Nent[i] / tmp_Nent

##Non-overlap
tmp_ent = 0
tmp_Nent = 0
for j in range(size):

  if overlap[j] == 0:
    
    if label[j] !=-1:
      if label[j] == 0:
        tmp_ent = tmp_ent + 1
        if pre[j] == 0:
              acc_ent[-1] = acc_ent[-1] + 1
      elif label[j] == 2 or label[j] == 1 :
        tmp_Nent = tmp_Nent + 1
        if pre[j] !=0 :
              acc_Nent[-1] = acc_Nent[-1] + 1

acc_ent[-1] =    acc_ent[-1] / tmp_ent
acc_Nent[-1] =   acc_Nent[-1] / tmp_Nent

##Full-overlap
tmp_ent = 0
tmp_Nent = 0
for j in range(size):

  if overlap[j] == 1:
    
    if label[j] !=-1:
      if label[j] == 0:
        tmp_ent = tmp_ent + 1
        if pre[j] == 0:
              acc_ent[0] = acc_ent[0] + 1
      elif label[j] == 2 or label[j] == 1 :
        tmp_Nent = tmp_Nent + 1
        if pre[j] !=0 :
              acc_Nent[0] = acc_Nent[0] + 1

acc_ent[0] =  acc_ent[0] / tmp_ent
acc_Nent[0] =  acc_Nent[0] / tmp_Nent

In [28]:
acc_ent

[0.9895725281136637,
 0.9349593495934959,
 0.8687698374531114,
 0.7728785357737105,
 0.6795921618607615,
 0.5773279352226721,
 0.6748361356511827]

In [29]:
acc_Nent

[0.17047798664824895,
 0.8171134578933117,
 0.8441917198823277,
 0.9038185835483816,
 0.9376881203970541,
 0.9563996956898135,
 0.9366384959551679]